PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
dynahash.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dynahash.c
4 * dynamic chained hash tables
5 *
6 * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7 * shared memory. For shared hash tables, it is the caller's responsibility
8 * to provide appropriate access interlocking. The simplest convention is
9 * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10 * hash_seq_search) need only shared lock, but any update requires exclusive
11 * lock. For heavily-used shared tables, the single-lock approach creates a
12 * concurrency bottleneck, so we also support "partitioned" locking wherein
13 * there are multiple LWLocks guarding distinct subsets of the table. To use
14 * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15 * to hash_create. This prevents any attempt to split buckets on-the-fly.
16 * Therefore, each hash bucket chain operates independently, and no fields
17 * of the hash header change after init except nentries and freeList.
18 * (A partitioned table uses multiple copies of those fields, guarded by
19 * spinlocks, for additional concurrency.)
20 * This lets any subset of the hash buckets be treated as a separately
21 * lockable partition. We expect callers to use the low-order bits of a
22 * lookup key's hash value as a partition number --- this will work because
23 * of the way calc_bucket() maps hash values to bucket numbers.
24 *
25 * For hash tables in shared memory, the memory allocator function should
26 * match malloc's semantics of returning NULL on failure. For hash tables
27 * in local memory, we typically use palloc() which will throw error on
28 * failure. The code in this file has to cope with both cases.
29 *
30 * dynahash.c provides support for these types of lookup keys:
31 *
32 * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
33 * compared as though by strcmp(). This is selected by specifying the
34 * HASH_STRINGS flag to hash_create.
35 *
36 * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
37 * (Caller must ensure there are no undefined padding bits in the keys!)
38 * This is selected by specifying the HASH_BLOBS flag to hash_create.
39 *
40 * 3. More complex key behavior can be selected by specifying user-supplied
41 * hashing, comparison, and/or key-copying functions. At least a hashing
42 * function must be supplied; comparison defaults to memcmp() and key copying
43 * to memcpy() when a user-defined hashing function is selected.
44 *
45 * Compared to simplehash, dynahash has the following benefits:
46 *
47 * - It supports partitioning, which is useful for shared memory access using
48 * locks.
49 * - Shared memory hashes are allocated in a fixed size area at startup and
50 * are discoverable by name from other processes.
51 * - Because entries don't need to be moved in the case of hash conflicts,
52 * dynahash has better performance for large entries.
53 * - Guarantees stable pointers to entries.
54 *
55 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
56 * Portions Copyright (c) 1994, Regents of the University of California
57 *
58 *
59 * IDENTIFICATION
60 * src/backend/utils/hash/dynahash.c
61 *
62 *-------------------------------------------------------------------------
63 */
64
65/*
66 * Original comments:
67 *
68 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
69 * Coded into C, with minor code improvements, and with hsearch(3) interface,
70 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
71 * also, hcreate/hdestroy routines added to simulate hsearch(3).
72 *
73 * These routines simulate hsearch(3) and family, with the important
74 * difference that the hash table is dynamic - can grow indefinitely
75 * beyond its original size (as supplied to hcreate()).
76 *
77 * Performance appears to be comparable to that of hsearch(3).
78 * The 'source-code' options referred to in hsearch(3)'s 'man' page
79 * are not implemented; otherwise functionality is identical.
80 *
81 * Compilation controls:
82 * HASH_DEBUG controls some informative traces, mainly for debugging.
83 * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
84 * when combined with HASH_DEBUG, these are displayed by hdestroy().
85 *
86 * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
87 * concatenation property, in probably unnecessary code 'optimization'.
88 *
89 * Modified margo@postgres.berkeley.edu February 1990
90 * added multiple table interface
91 * Modified by sullivan@postgres.berkeley.edu April 1990
92 * changed ctl structure for shared memory
93 */
94
95#include "postgres.h"
96
97#include <limits.h>
98
99#include "access/xact.h"
100#include "common/hashfn.h"
101#include "port/pg_bitutils.h"
102#include "storage/shmem.h"
103#include "storage/spin.h"
104#include "utils/dynahash.h"
105#include "utils/memutils.h"
106
107
108/*
109 * Constants
110 *
111 * A hash table has a top-level "directory", each of whose entries points
112 * to a "segment" of ssize bucket headers. The maximum number of hash
113 * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
114 * the number of records in the table can be larger, but we don't want a
115 * whole lot of records per bucket or performance goes down.
116 *
117 * In a hash table allocated in shared memory, the directory cannot be
118 * expanded because it must stay at a fixed address. The directory size
119 * should be selected using hash_select_dirsize (and you'd better have
120 * a good idea of the maximum number of entries!). For non-shared hash
121 * tables, the initial directory size can be left at the default.
122 */
123#define DEF_SEGSIZE 256
124#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
125#define DEF_DIRSIZE 256
126
127/* Number of freelists to be used for a partitioned hash table. */
128#define NUM_FREELISTS 32
129
130/* A hash bucket is a linked list of HASHELEMENTs */
132
133/* A hash segment is an array of bucket headers */
135
136/*
137 * Per-freelist data.
138 *
139 * In a partitioned hash table, each freelist is associated with a specific
140 * set of hashcodes, as determined by the FREELIST_IDX() macro below.
141 * nentries tracks the number of live hashtable entries having those hashcodes
142 * (NOT the number of entries in the freelist, as you might expect).
143 *
144 * The coverage of a freelist might be more or less than one partition, so it
145 * needs its own lock rather than relying on caller locking. Relying on that
146 * wouldn't work even if the coverage was the same, because of the occasional
147 * need to "borrow" entries from another freelist; see get_hash_entry().
148 *
149 * Using an array of FreeListData instead of separate arrays of mutexes,
150 * nentries and freeLists helps to reduce sharing of cache lines between
151 * different mutexes.
152 */
153typedef struct
154{
155 slock_t mutex; /* spinlock for this freelist */
156 long nentries; /* number of entries in associated buckets */
157 HASHELEMENT *freeList; /* chain of free elements */
159
160/*
161 * Header structure for a hash table --- contains all changeable info
162 *
163 * In a shared-memory hash table, the HASHHDR is in shared memory, while
164 * each backend has a local HTAB struct. For a non-shared table, there isn't
165 * any functional difference between HASHHDR and HTAB, but we separate them
166 * anyway to share code between shared and non-shared tables.
167 */
169{
170 /*
171 * The freelist can become a point of contention in high-concurrency hash
172 * tables, so we use an array of freelists, each with its own mutex and
173 * nentries count, instead of just a single one. Although the freelists
174 * normally operate independently, we will scavenge entries from freelists
175 * other than a hashcode's default freelist when necessary.
176 *
177 * If the hash table is not partitioned, only freeList[0] is used and its
178 * spinlock is not used at all; callers' locking is assumed sufficient.
179 */
181
182 /* These fields can change, but not in a partitioned table */
183 /* Also, dsize can't change in a shared table, even if unpartitioned */
184 long dsize; /* directory size */
185 long nsegs; /* number of allocated segments (<= dsize) */
186 uint32 max_bucket; /* ID of maximum bucket in use */
187 uint32 high_mask; /* mask to modulo into entire table */
188 uint32 low_mask; /* mask to modulo into lower half of table */
189
190 /* These fields are fixed at hashtable creation */
191 Size keysize; /* hash key length in bytes */
192 Size entrysize; /* total user element size in bytes */
193 long num_partitions; /* # partitions (must be power of 2), or 0 */
194 long max_dsize; /* 'dsize' limit if directory is fixed size */
195 long ssize; /* segment size --- must be power of 2 */
196 int sshift; /* segment shift = log2(ssize) */
197 int nelem_alloc; /* number of entries to allocate at once */
198
199#ifdef HASH_STATISTICS
200
201 /*
202 * Count statistics here. NB: stats code doesn't bother with mutex, so
203 * counts could be corrupted a bit in a partitioned table.
204 */
205 long accesses;
206 long collisions;
207#endif
208};
209
210#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
211
212#define FREELIST_IDX(hctl, hashcode) \
213 (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
214
215/*
216 * Top control structure for a hashtable --- in a shared table, each backend
217 * has its own copy (OK since no fields change at runtime)
218 */
219struct HTAB
220{
221 HASHHDR *hctl; /* => shared control information */
222 HASHSEGMENT *dir; /* directory of segment starts */
223 HashValueFunc hash; /* hash function */
224 HashCompareFunc match; /* key comparison function */
225 HashCopyFunc keycopy; /* key copying function */
226 HashAllocFunc alloc; /* memory allocator */
227 MemoryContext hcxt; /* memory context if default allocator used */
228 char *tabname; /* table name (for error messages) */
229 bool isshared; /* true if table is in shared memory */
230 bool isfixed; /* if true, don't enlarge */
231
232 /* freezing a shared table isn't allowed, so we can keep state here */
233 bool frozen; /* true = no more inserts allowed */
234
235 /* We keep local copies of these fixed values to reduce contention */
236 Size keysize; /* hash key length in bytes */
237 long ssize; /* segment size --- must be power of 2 */
238 int sshift; /* segment shift = log2(ssize) */
239};
240
241/*
242 * Key (also entry) part of a HASHELEMENT
243 */
244#define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
245
246/*
247 * Obtain element pointer given pointer to key
248 */
249#define ELEMENT_FROM_KEY(key) \
250 ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
251
252/*
253 * Fast MOD arithmetic, assuming that y is a power of 2 !
254 */
255#define MOD(x,y) ((x) & ((y)-1))
256
257#ifdef HASH_STATISTICS
258static long hash_accesses,
259 hash_collisions,
260 hash_expansions;
261#endif
262
263/*
264 * Private function prototypes
265 */
266static void *DynaHashAlloc(Size size);
267static HASHSEGMENT seg_alloc(HTAB *hashp);
268static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
269static bool dir_realloc(HTAB *hashp);
270static bool expand_table(HTAB *hashp);
271static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
272static void hdefault(HTAB *hashp);
273static int choose_nelem_alloc(Size entrysize);
274static bool init_htab(HTAB *hashp, long nelem);
275pg_noreturn static void hash_corrupted(HTAB *hashp);
276static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
277 HASHBUCKET **bucketptr);
278static long next_pow2_long(long num);
279static int next_pow2_int(long num);
280static void register_seq_scan(HTAB *hashp);
281static void deregister_seq_scan(HTAB *hashp);
282static bool has_seq_scans(HTAB *hashp);
283
284
285/*
286 * memory allocation support
287 */
289
290static void *
292{
296}
297
298
299/*
300 * HashCompareFunc for string keys
301 *
302 * Because we copy keys with strlcpy(), they will be truncated at keysize-1
303 * bytes, so we can only compare that many ... hence strncmp is almost but
304 * not quite the right thing.
305 */
306static int
307string_compare(const char *key1, const char *key2, Size keysize)
308{
309 return strncmp(key1, key2, keysize - 1);
310}
311
312
313/************************** CREATE ROUTINES **********************/
314
315/*
316 * hash_create -- create a new dynamic hash table
317 *
318 * tabname: a name for the table (for debugging purposes)
319 * nelem: maximum number of elements expected
320 * *info: additional table parameters, as indicated by flags
321 * flags: bitmask indicating which parameters to take from *info
322 *
323 * The flags value *must* include HASH_ELEM. (Formerly, this was nominally
324 * optional, but the default keysize and entrysize values were useless.)
325 * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
326 * or HASH_FUNCTION, to define the key hashing semantics (C strings,
327 * binary blobs, or custom, respectively). Callers specifying a custom
328 * hash function will likely also want to use HASH_COMPARE, and perhaps
329 * also HASH_KEYCOPY, to control key comparison and copying.
330 * Another often-used flag is HASH_CONTEXT, to allocate the hash table
331 * under info->hcxt rather than under TopMemoryContext; the default
332 * behavior is only suitable for session-lifespan hash tables.
333 * Other flags bits are special-purpose and seldom used, except for those
334 * associated with shared-memory hash tables, for which see ShmemInitHash().
335 *
336 * Fields in *info are read only when the associated flags bit is set.
337 * It is not necessary to initialize other fields of *info.
338 * Neither tabname nor *info need persist after the hash_create() call.
339 *
340 * Note: It is deprecated for callers of hash_create() to explicitly specify
341 * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or
342 * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than
343 * one of these.
344 *
345 * Note: for a shared-memory hashtable, nelem needs to be a pretty good
346 * estimate, since we can't expand the table on the fly. But an unshared
347 * hashtable can be expanded on-the-fly, so it's better for nelem to be
348 * on the small side and let the table grow if it's exceeded. An overly
349 * large nelem will penalize hash_seq_search speed without buying much.
350 */
351HTAB *
352hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
353{
354 HTAB *hashp;
355 HASHHDR *hctl;
356
357 /*
358 * Hash tables now allocate space for key and data, but you have to say
359 * how much space to allocate.
360 */
361 Assert(flags & HASH_ELEM);
362 Assert(info->keysize > 0);
363 Assert(info->entrysize >= info->keysize);
364
365 /*
366 * For shared hash tables, we have a local hash header (HTAB struct) that
367 * we allocate in TopMemoryContext; all else is in shared memory.
368 *
369 * For non-shared hash tables, everything including the hash header is in
370 * a memory context created specially for the hash table --- this makes
371 * hash_destroy very simple. The memory context is made a child of either
372 * a context specified by the caller, or TopMemoryContext if nothing is
373 * specified.
374 */
375 if (flags & HASH_SHARED_MEM)
376 {
377 /* Set up to allocate the hash header */
379 }
380 else
381 {
382 /* Create the hash table's private memory context */
383 if (flags & HASH_CONTEXT)
384 CurrentDynaHashCxt = info->hcxt;
385 else
388 "dynahash",
390 }
391
392 /* Initialize the hash header, plus a copy of the table name */
393 hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
394 MemSet(hashp, 0, sizeof(HTAB));
395
396 hashp->tabname = (char *) (hashp + 1);
397 strcpy(hashp->tabname, tabname);
398
399 /* If we have a private context, label it with hashtable's name */
400 if (!(flags & HASH_SHARED_MEM))
402
403 /*
404 * Select the appropriate hash function (see comments at head of file).
405 */
406 if (flags & HASH_FUNCTION)
407 {
408 Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
409 hashp->hash = info->hash;
410 }
411 else if (flags & HASH_BLOBS)
412 {
413 Assert(!(flags & HASH_STRINGS));
414 /* We can optimize hashing for common key sizes */
415 if (info->keysize == sizeof(uint32))
416 hashp->hash = uint32_hash;
417 else
418 hashp->hash = tag_hash;
419 }
420 else
421 {
422 /*
423 * string_hash used to be considered the default hash method, and in a
424 * non-assert build it effectively still is. But we now consider it
425 * an assertion error to not say HASH_STRINGS explicitly. To help
426 * catch mistaken usage of HASH_STRINGS, we also insist on a
427 * reasonably long string length: if the keysize is only 4 or 8 bytes,
428 * it's almost certainly an integer or pointer not a string.
429 */
430 Assert(flags & HASH_STRINGS);
431 Assert(info->keysize > 8);
432
433 hashp->hash = string_hash;
434 }
435
436 /*
437 * If you don't specify a match function, it defaults to string_compare if
438 * you used string_hash, and to memcmp otherwise.
439 *
440 * Note: explicitly specifying string_hash is deprecated, because this
441 * might not work for callers in loadable modules on some platforms due to
442 * referencing a trampoline instead of the string_hash function proper.
443 * Specify HASH_STRINGS instead.
444 */
445 if (flags & HASH_COMPARE)
446 hashp->match = info->match;
447 else if (hashp->hash == string_hash)
449 else
450 hashp->match = memcmp;
451
452 /*
453 * Similarly, the key-copying function defaults to strlcpy or memcpy.
454 */
455 if (flags & HASH_KEYCOPY)
456 hashp->keycopy = info->keycopy;
457 else if (hashp->hash == string_hash)
458 {
459 /*
460 * The signature of keycopy is meant for memcpy(), which returns
461 * void*, but strlcpy() returns size_t. Since we never use the return
462 * value of keycopy, and size_t is pretty much always the same size as
463 * void *, this should be safe. The extra cast in the middle is to
464 * avoid warnings from -Wcast-function-type.
465 */
467 }
468 else
469 hashp->keycopy = memcpy;
470
471 /* And select the entry allocation function, too. */
472 if (flags & HASH_ALLOC)
473 hashp->alloc = info->alloc;
474 else
475 hashp->alloc = DynaHashAlloc;
476
477 if (flags & HASH_SHARED_MEM)
478 {
479 /*
480 * ctl structure and directory are preallocated for shared memory
481 * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
482 * well.
483 */
484 hashp->hctl = info->hctl;
485 hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
486 hashp->hcxt = NULL;
487 hashp->isshared = true;
488
489 /* hash table already exists, we're just attaching to it */
490 if (flags & HASH_ATTACH)
491 {
492 /* make local copies of some heavily-used values */
493 hctl = hashp->hctl;
494 hashp->keysize = hctl->keysize;
495 hashp->ssize = hctl->ssize;
496 hashp->sshift = hctl->sshift;
497
498 return hashp;
499 }
500 }
501 else
502 {
503 /* setup hash table defaults */
504 hashp->hctl = NULL;
505 hashp->dir = NULL;
506 hashp->hcxt = CurrentDynaHashCxt;
507 hashp->isshared = false;
508 }
509
510 if (!hashp->hctl)
511 {
512 hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
513 if (!hashp->hctl)
515 (errcode(ERRCODE_OUT_OF_MEMORY),
516 errmsg("out of memory")));
517 }
518
519 hashp->frozen = false;
520
521 hdefault(hashp);
522
523 hctl = hashp->hctl;
524
525 if (flags & HASH_PARTITION)
526 {
527 /* Doesn't make sense to partition a local hash table */
528 Assert(flags & HASH_SHARED_MEM);
529
530 /*
531 * The number of partitions had better be a power of 2. Also, it must
532 * be less than INT_MAX (see init_htab()), so call the int version of
533 * next_pow2.
534 */
536
537 hctl->num_partitions = info->num_partitions;
538 }
539
540 if (flags & HASH_SEGMENT)
541 {
542 hctl->ssize = info->ssize;
543 hctl->sshift = my_log2(info->ssize);
544 /* ssize had better be a power of 2 */
545 Assert(hctl->ssize == (1L << hctl->sshift));
546 }
547
548 /*
549 * SHM hash tables have fixed directory size passed by the caller.
550 */
551 if (flags & HASH_DIRSIZE)
552 {
553 hctl->max_dsize = info->max_dsize;
554 hctl->dsize = info->dsize;
555 }
556
557 /* remember the entry sizes, too */
558 hctl->keysize = info->keysize;
559 hctl->entrysize = info->entrysize;
560
561 /* make local copies of heavily-used constant fields */
562 hashp->keysize = hctl->keysize;
563 hashp->ssize = hctl->ssize;
564 hashp->sshift = hctl->sshift;
565
566 /* Build the hash directory structure */
567 if (!init_htab(hashp, nelem))
568 elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
569
570 /*
571 * For a shared hash table, preallocate the requested number of elements.
572 * This reduces problems with run-time out-of-shared-memory conditions.
573 *
574 * For a non-shared hash table, preallocate the requested number of
575 * elements if it's less than our chosen nelem_alloc. This avoids wasting
576 * space if the caller correctly estimates a small table size.
577 */
578 if ((flags & HASH_SHARED_MEM) ||
579 nelem < hctl->nelem_alloc)
580 {
581 int i,
582 freelist_partitions,
583 nelem_alloc,
584 nelem_alloc_first;
585
586 /*
587 * If hash table is partitioned, give each freelist an equal share of
588 * the initial allocation. Otherwise only freeList[0] is used.
589 */
590 if (IS_PARTITIONED(hashp->hctl))
591 freelist_partitions = NUM_FREELISTS;
592 else
593 freelist_partitions = 1;
594
595 nelem_alloc = nelem / freelist_partitions;
596 if (nelem_alloc <= 0)
597 nelem_alloc = 1;
598
599 /*
600 * Make sure we'll allocate all the requested elements; freeList[0]
601 * gets the excess if the request isn't divisible by NUM_FREELISTS.
602 */
603 if (nelem_alloc * freelist_partitions < nelem)
604 nelem_alloc_first =
605 nelem - nelem_alloc * (freelist_partitions - 1);
606 else
607 nelem_alloc_first = nelem_alloc;
608
609 for (i = 0; i < freelist_partitions; i++)
610 {
611 int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
612
613 if (!element_alloc(hashp, temp, i))
615 (errcode(ERRCODE_OUT_OF_MEMORY),
616 errmsg("out of memory")));
617 }
618 }
619
620 if (flags & HASH_FIXED_SIZE)
621 hashp->isfixed = true;
622 return hashp;
623}
624
625/*
626 * Set default HASHHDR parameters.
627 */
628static void
630{
631 HASHHDR *hctl = hashp->hctl;
632
633 MemSet(hctl, 0, sizeof(HASHHDR));
634
635 hctl->dsize = DEF_DIRSIZE;
636 hctl->nsegs = 0;
637
638 hctl->num_partitions = 0; /* not partitioned */
639
640 /* table has no fixed maximum size */
641 hctl->max_dsize = NO_MAX_DSIZE;
642
643 hctl->ssize = DEF_SEGSIZE;
645
646#ifdef HASH_STATISTICS
647 hctl->accesses = hctl->collisions = 0;
648#endif
649}
650
651/*
652 * Given the user-specified entry size, choose nelem_alloc, ie, how many
653 * elements to add to the hash table when we need more.
654 */
655static int
657{
658 int nelem_alloc;
659 Size elementSize;
660 Size allocSize;
661
662 /* Each element has a HASHELEMENT header plus user data. */
663 /* NB: this had better match element_alloc() */
664 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
665
666 /*
667 * The idea here is to choose nelem_alloc at least 32, but round up so
668 * that the allocation request will be a power of 2 or just less. This
669 * makes little difference for hash tables in shared memory, but for hash
670 * tables managed by palloc, the allocation request will be rounded up to
671 * a power of 2 anyway. If we fail to take this into account, we'll waste
672 * as much as half the allocated space.
673 */
674 allocSize = 32 * 4; /* assume elementSize at least 8 */
675 do
676 {
677 allocSize <<= 1;
678 nelem_alloc = allocSize / elementSize;
679 } while (nelem_alloc < 32);
680
681 return nelem_alloc;
682}
683
684/*
685 * Compute derived fields of hctl and build the initial directory/segment
686 * arrays
687 */
688static bool
689init_htab(HTAB *hashp, long nelem)
690{
691 HASHHDR *hctl = hashp->hctl;
692 HASHSEGMENT *segp;
693 int nbuckets;
694 int nsegs;
695 int i;
696
697 /*
698 * initialize mutexes if it's a partitioned table
699 */
700 if (IS_PARTITIONED(hctl))
701 for (i = 0; i < NUM_FREELISTS; i++)
702 SpinLockInit(&(hctl->freeList[i].mutex));
703
704 /*
705 * Allocate space for the next greater power of two number of buckets,
706 * assuming a desired maximum load factor of 1.
707 */
708 nbuckets = next_pow2_int(nelem);
709
710 /*
711 * In a partitioned table, nbuckets must be at least equal to
712 * num_partitions; were it less, keys with apparently different partition
713 * numbers would map to the same bucket, breaking partition independence.
714 * (Normally nbuckets will be much bigger; this is just a safety check.)
715 */
716 while (nbuckets < hctl->num_partitions)
717 nbuckets <<= 1;
718
719 hctl->max_bucket = hctl->low_mask = nbuckets - 1;
720 hctl->high_mask = (nbuckets << 1) - 1;
721
722 /*
723 * Figure number of directory segments needed, round up to a power of 2
724 */
725 nsegs = (nbuckets - 1) / hctl->ssize + 1;
726 nsegs = next_pow2_int(nsegs);
727
728 /*
729 * Make sure directory is big enough. If pre-allocated directory is too
730 * small, choke (caller screwed up).
731 */
732 if (nsegs > hctl->dsize)
733 {
734 if (!(hashp->dir))
735 hctl->dsize = nsegs;
736 else
737 return false;
738 }
739
740 /* Allocate a directory */
741 if (!(hashp->dir))
742 {
743 CurrentDynaHashCxt = hashp->hcxt;
744 hashp->dir = (HASHSEGMENT *)
745 hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
746 if (!hashp->dir)
747 return false;
748 }
749
750 /* Allocate initial segments */
751 for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
752 {
753 *segp = seg_alloc(hashp);
754 if (*segp == NULL)
755 return false;
756 }
757
758 /* Choose number of entries to allocate at a time */
760
761#ifdef HASH_DEBUG
762 fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
763 "TABLE POINTER ", hashp,
764 "DIRECTORY SIZE ", hctl->dsize,
765 "SEGMENT SIZE ", hctl->ssize,
766 "SEGMENT SHIFT ", hctl->sshift,
767 "MAX BUCKET ", hctl->max_bucket,
768 "HIGH MASK ", hctl->high_mask,
769 "LOW MASK ", hctl->low_mask,
770 "NSEGS ", hctl->nsegs);
771#endif
772 return true;
773}
774
775/*
776 * Estimate the space needed for a hashtable containing the given number
777 * of entries of given size.
778 * NOTE: this is used to estimate the footprint of hashtables in shared
779 * memory; therefore it does not count HTAB which is in local memory.
780 * NB: assumes that all hash structure parameters have default values!
781 */
782Size
783hash_estimate_size(long num_entries, Size entrysize)
784{
785 Size size;
786 long nBuckets,
787 nSegments,
788 nDirEntries,
789 nElementAllocs,
790 elementSize,
791 elementAllocCnt;
792
793 /* estimate number of buckets wanted */
794 nBuckets = next_pow2_long(num_entries);
795 /* # of segments needed for nBuckets */
796 nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
797 /* directory entries */
798 nDirEntries = DEF_DIRSIZE;
799 while (nDirEntries < nSegments)
800 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
801
802 /* fixed control info */
803 size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
804 /* directory */
805 size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
806 /* segments */
807 size = add_size(size, mul_size(nSegments,
808 MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
809 /* elements --- allocated in groups of choose_nelem_alloc() entries */
810 elementAllocCnt = choose_nelem_alloc(entrysize);
811 nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
812 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
813 size = add_size(size,
814 mul_size(nElementAllocs,
815 mul_size(elementAllocCnt, elementSize)));
816
817 return size;
818}
819
820/*
821 * Select an appropriate directory size for a hashtable with the given
822 * maximum number of entries.
823 * This is only needed for hashtables in shared memory, whose directories
824 * cannot be expanded dynamically.
825 * NB: assumes that all hash structure parameters have default values!
826 *
827 * XXX this had better agree with the behavior of init_htab()...
828 */
829long
830hash_select_dirsize(long num_entries)
831{
832 long nBuckets,
833 nSegments,
834 nDirEntries;
835
836 /* estimate number of buckets wanted */
837 nBuckets = next_pow2_long(num_entries);
838 /* # of segments needed for nBuckets */
839 nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
840 /* directory entries */
841 nDirEntries = DEF_DIRSIZE;
842 while (nDirEntries < nSegments)
843 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
844
845 return nDirEntries;
846}
847
848/*
849 * Compute the required initial memory allocation for a shared-memory
850 * hashtable with the given parameters. We need space for the HASHHDR
851 * and for the (non expansible) directory.
852 */
853Size
855{
856 Assert(flags & HASH_DIRSIZE);
857 Assert(info->dsize == info->max_dsize);
858 return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
859}
860
861
862/********************** DESTROY ROUTINES ************************/
863
864void
866{
867 if (hashp != NULL)
868 {
869 /* allocation method must be one we know how to free, too */
870 Assert(hashp->alloc == DynaHashAlloc);
871 /* so this hashtable must have its own context */
872 Assert(hashp->hcxt != NULL);
873
874 hash_stats("destroy", hashp);
875
876 /*
877 * Free everything by destroying the hash table's memory context.
878 */
880 }
881}
882
883void
884hash_stats(const char *where, HTAB *hashp)
885{
886#ifdef HASH_STATISTICS
887 fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
888 where, hashp->hctl->accesses, hashp->hctl->collisions);
889
890 fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
891 hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
892 hashp->hctl->max_bucket, hashp->hctl->nsegs);
893 fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
894 where, hash_accesses, hash_collisions);
895 fprintf(stderr, "hash_stats: total expansions %ld\n",
896 hash_expansions);
897#endif
898}
899
900/*******************************SEARCH ROUTINES *****************************/
901
902
903/*
904 * get_hash_value -- exported routine to calculate a key's hash value
905 *
906 * We export this because for partitioned tables, callers need to compute
907 * the partition number (from the low-order bits of the hash value) before
908 * searching.
909 */
910uint32
911get_hash_value(HTAB *hashp, const void *keyPtr)
912{
913 return hashp->hash(keyPtr, hashp->keysize);
914}
915
916/* Convert a hash value to a bucket number */
917static inline uint32
918calc_bucket(HASHHDR *hctl, uint32 hash_val)
919{
920 uint32 bucket;
921
922 bucket = hash_val & hctl->high_mask;
923 if (bucket > hctl->max_bucket)
924 bucket = bucket & hctl->low_mask;
925
926 return bucket;
927}
928
929/*
930 * hash_search -- look up key in table and perform action
931 * hash_search_with_hash_value -- same, with key's hash value already computed
932 *
933 * action is one of:
934 * HASH_FIND: look up key in table
935 * HASH_ENTER: look up key in table, creating entry if not present
936 * HASH_ENTER_NULL: same, but return NULL if out of memory
937 * HASH_REMOVE: look up key in table, remove entry if present
938 *
939 * Return value is a pointer to the element found/entered/removed if any,
940 * or NULL if no match was found. (NB: in the case of the REMOVE action,
941 * the result is a dangling pointer that shouldn't be dereferenced!)
942 *
943 * HASH_ENTER will normally ereport a generic "out of memory" error if
944 * it is unable to create a new entry. The HASH_ENTER_NULL operation is
945 * the same except it will return NULL if out of memory.
946 *
947 * If foundPtr isn't NULL, then *foundPtr is set true if we found an
948 * existing entry in the table, false otherwise. This is needed in the
949 * HASH_ENTER case, but is redundant with the return value otherwise.
950 *
951 * For hash_search_with_hash_value, the hashvalue parameter must have been
952 * calculated with get_hash_value().
953 */
954void *
956 const void *keyPtr,
958 bool *foundPtr)
959{
960 return hash_search_with_hash_value(hashp,
961 keyPtr,
962 hashp->hash(keyPtr, hashp->keysize),
963 action,
964 foundPtr);
965}
966
967void *
969 const void *keyPtr,
970 uint32 hashvalue,
972 bool *foundPtr)
973{
974 HASHHDR *hctl = hashp->hctl;
975 int freelist_idx = FREELIST_IDX(hctl, hashvalue);
976 Size keysize;
977 HASHBUCKET currBucket;
978 HASHBUCKET *prevBucketPtr;
979 HashCompareFunc match;
980
981#ifdef HASH_STATISTICS
982 hash_accesses++;
983 hctl->accesses++;
984#endif
985
986 /*
987 * If inserting, check if it is time to split a bucket.
988 *
989 * NOTE: failure to expand table is not a fatal error, it just means we
990 * have to run at higher fill factor than we wanted. However, if we're
991 * using the palloc allocator then it will throw error anyway on
992 * out-of-memory, so we must do this before modifying the table.
993 */
995 {
996 /*
997 * Can't split if running in partitioned mode, nor if frozen, nor if
998 * table is the subject of any active hash_seq_search scans.
999 */
1000 if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
1001 !IS_PARTITIONED(hctl) && !hashp->frozen &&
1002 !has_seq_scans(hashp))
1003 (void) expand_table(hashp);
1004 }
1005
1006 /*
1007 * Do the initial lookup
1008 */
1009 (void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);
1010 currBucket = *prevBucketPtr;
1011
1012 /*
1013 * Follow collision chain looking for matching key
1014 */
1015 match = hashp->match; /* save one fetch in inner loop */
1016 keysize = hashp->keysize; /* ditto */
1017
1018 while (currBucket != NULL)
1019 {
1020 if (currBucket->hashvalue == hashvalue &&
1021 match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
1022 break;
1023 prevBucketPtr = &(currBucket->link);
1024 currBucket = *prevBucketPtr;
1025#ifdef HASH_STATISTICS
1026 hash_collisions++;
1027 hctl->collisions++;
1028#endif
1029 }
1030
1031 if (foundPtr)
1032 *foundPtr = (bool) (currBucket != NULL);
1033
1034 /*
1035 * OK, now what?
1036 */
1037 switch (action)
1038 {
1039 case HASH_FIND:
1040 if (currBucket != NULL)
1041 return ELEMENTKEY(currBucket);
1042 return NULL;
1043
1044 case HASH_REMOVE:
1045 if (currBucket != NULL)
1046 {
1047 /* if partitioned, must lock to touch nentries and freeList */
1048 if (IS_PARTITIONED(hctl))
1049 SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1050
1051 /* delete the record from the appropriate nentries counter. */
1052 Assert(hctl->freeList[freelist_idx].nentries > 0);
1053 hctl->freeList[freelist_idx].nentries--;
1054
1055 /* remove record from hash bucket's chain. */
1056 *prevBucketPtr = currBucket->link;
1057
1058 /* add the record to the appropriate freelist. */
1059 currBucket->link = hctl->freeList[freelist_idx].freeList;
1060 hctl->freeList[freelist_idx].freeList = currBucket;
1061
1062 if (IS_PARTITIONED(hctl))
1063 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1064
1065 /*
1066 * better hope the caller is synchronizing access to this
1067 * element, because someone else is going to reuse it the next
1068 * time something is added to the table
1069 */
1070 return ELEMENTKEY(currBucket);
1071 }
1072 return NULL;
1073
1074 case HASH_ENTER:
1075 case HASH_ENTER_NULL:
1076 /* Return existing element if found, else create one */
1077 if (currBucket != NULL)
1078 return ELEMENTKEY(currBucket);
1079
1080 /* disallow inserts if frozen */
1081 if (hashp->frozen)
1082 elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1083 hashp->tabname);
1084
1085 currBucket = get_hash_entry(hashp, freelist_idx);
1086 if (currBucket == NULL)
1087 {
1088 /* out of memory */
1089 if (action == HASH_ENTER_NULL)
1090 return NULL;
1091 /* report a generic message */
1092 if (hashp->isshared)
1093 ereport(ERROR,
1094 (errcode(ERRCODE_OUT_OF_MEMORY),
1095 errmsg("out of shared memory")));
1096 else
1097 ereport(ERROR,
1098 (errcode(ERRCODE_OUT_OF_MEMORY),
1099 errmsg("out of memory")));
1100 }
1101
1102 /* link into hashbucket chain */
1103 *prevBucketPtr = currBucket;
1104 currBucket->link = NULL;
1105
1106 /* copy key into record */
1107 currBucket->hashvalue = hashvalue;
1108 hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1109
1110 /*
1111 * Caller is expected to fill the data field on return. DO NOT
1112 * insert any code that could possibly throw error here, as doing
1113 * so would leave the table entry incomplete and hence corrupt the
1114 * caller's data structure.
1115 */
1116
1117 return ELEMENTKEY(currBucket);
1118 }
1119
1120 elog(ERROR, "unrecognized hash action code: %d", (int) action);
1121
1122 return NULL; /* keep compiler quiet */
1123}
1124
1125/*
1126 * hash_update_hash_key -- change the hash key of an existing table entry
1127 *
1128 * This is equivalent to removing the entry, making a new entry, and copying
1129 * over its data, except that the entry never goes to the table's freelist.
1130 * Therefore this cannot suffer an out-of-memory failure, even if there are
1131 * other processes operating in other partitions of the hashtable.
1132 *
1133 * Returns true if successful, false if the requested new hash key is already
1134 * present. Throws error if the specified entry pointer isn't actually a
1135 * table member.
1136 *
1137 * NB: currently, there is no special case for old and new hash keys being
1138 * identical, which means we'll report false for that situation. This is
1139 * preferable for existing uses.
1140 *
1141 * NB: for a partitioned hashtable, caller must hold lock on both relevant
1142 * partitions, if the new hash key would belong to a different partition.
1143 */
1144bool
1146 void *existingEntry,
1147 const void *newKeyPtr)
1148{
1149 HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1150 uint32 newhashvalue;
1151 Size keysize;
1152 uint32 bucket;
1153 uint32 newbucket;
1154 HASHBUCKET currBucket;
1155 HASHBUCKET *prevBucketPtr;
1156 HASHBUCKET *oldPrevPtr;
1157 HashCompareFunc match;
1158
1159#ifdef HASH_STATISTICS
1160 hash_accesses++;
1161 hctl->accesses++;
1162#endif
1163
1164 /* disallow updates if frozen */
1165 if (hashp->frozen)
1166 elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1167 hashp->tabname);
1168
1169 /*
1170 * Lookup the existing element using its saved hash value. We need to do
1171 * this to be able to unlink it from its hash chain, but as a side benefit
1172 * we can verify the validity of the passed existingEntry pointer.
1173 */
1174 bucket = hash_initial_lookup(hashp, existingElement->hashvalue,
1175 &prevBucketPtr);
1176 currBucket = *prevBucketPtr;
1177
1178 while (currBucket != NULL)
1179 {
1180 if (currBucket == existingElement)
1181 break;
1182 prevBucketPtr = &(currBucket->link);
1183 currBucket = *prevBucketPtr;
1184 }
1185
1186 if (currBucket == NULL)
1187 elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1188 hashp->tabname);
1189
1190 oldPrevPtr = prevBucketPtr;
1191
1192 /*
1193 * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1194 * chain we want to put the entry into.
1195 */
1196 newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1197 newbucket = hash_initial_lookup(hashp, newhashvalue, &prevBucketPtr);
1198 currBucket = *prevBucketPtr;
1199
1200 /*
1201 * Follow collision chain looking for matching key
1202 */
1203 match = hashp->match; /* save one fetch in inner loop */
1204 keysize = hashp->keysize; /* ditto */
1205
1206 while (currBucket != NULL)
1207 {
1208 if (currBucket->hashvalue == newhashvalue &&
1209 match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1210 break;
1211 prevBucketPtr = &(currBucket->link);
1212 currBucket = *prevBucketPtr;
1213#ifdef HASH_STATISTICS
1214 hash_collisions++;
1215 hctl->collisions++;
1216#endif
1217 }
1218
1219 if (currBucket != NULL)
1220 return false; /* collision with an existing entry */
1221
1222 currBucket = existingElement;
1223
1224 /*
1225 * If old and new hash values belong to the same bucket, we need not
1226 * change any chain links, and indeed should not since this simplistic
1227 * update will corrupt the list if currBucket is the last element. (We
1228 * cannot fall out earlier, however, since we need to scan the bucket to
1229 * check for duplicate keys.)
1230 */
1231 if (bucket != newbucket)
1232 {
1233 /* OK to remove record from old hash bucket's chain. */
1234 *oldPrevPtr = currBucket->link;
1235
1236 /* link into new hashbucket chain */
1237 *prevBucketPtr = currBucket;
1238 currBucket->link = NULL;
1239 }
1240
1241 /* copy new key into record */
1242 currBucket->hashvalue = newhashvalue;
1243 hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1244
1245 /* rest of record is untouched */
1246
1247 return true;
1248}
1249
1250/*
1251 * Allocate a new hashtable entry if possible; return NULL if out of memory.
1252 * (Or, if the underlying space allocator throws error for out-of-memory,
1253 * we won't return at all.)
1254 */
1255static HASHBUCKET
1256get_hash_entry(HTAB *hashp, int freelist_idx)
1257{
1258 HASHHDR *hctl = hashp->hctl;
1259 HASHBUCKET newElement;
1260
1261 for (;;)
1262 {
1263 /* if partitioned, must lock to touch nentries and freeList */
1264 if (IS_PARTITIONED(hctl))
1265 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1266
1267 /* try to get an entry from the freelist */
1268 newElement = hctl->freeList[freelist_idx].freeList;
1269
1270 if (newElement != NULL)
1271 break;
1272
1273 if (IS_PARTITIONED(hctl))
1274 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1275
1276 /*
1277 * No free elements in this freelist. In a partitioned table, there
1278 * might be entries in other freelists, but to reduce contention we
1279 * prefer to first try to get another chunk of buckets from the main
1280 * shmem allocator. If that fails, though, we *MUST* root through all
1281 * the other freelists before giving up. There are multiple callers
1282 * that assume that they can allocate every element in the initially
1283 * requested table size, or that deleting an element guarantees they
1284 * can insert a new element, even if shared memory is entirely full.
1285 * Failing because the needed element is in a different freelist is
1286 * not acceptable.
1287 */
1288 if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1289 {
1290 int borrow_from_idx;
1291
1292 if (!IS_PARTITIONED(hctl))
1293 return NULL; /* out of memory */
1294
1295 /* try to borrow element from another freelist */
1296 borrow_from_idx = freelist_idx;
1297 for (;;)
1298 {
1299 borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1300 if (borrow_from_idx == freelist_idx)
1301 break; /* examined all freelists, fail */
1302
1303 SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1304 newElement = hctl->freeList[borrow_from_idx].freeList;
1305
1306 if (newElement != NULL)
1307 {
1308 hctl->freeList[borrow_from_idx].freeList = newElement->link;
1309 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1310
1311 /* careful: count the new element in its proper freelist */
1312 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1313 hctl->freeList[freelist_idx].nentries++;
1314 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1315
1316 return newElement;
1317 }
1318
1319 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1320 }
1321
1322 /* no elements available to borrow either, so out of memory */
1323 return NULL;
1324 }
1325 }
1326
1327 /* remove entry from freelist, bump nentries */
1328 hctl->freeList[freelist_idx].freeList = newElement->link;
1329 hctl->freeList[freelist_idx].nentries++;
1330
1331 if (IS_PARTITIONED(hctl))
1332 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1333
1334 return newElement;
1335}
1336
1337/*
1338 * hash_get_num_entries -- get the number of entries in a hashtable
1339 */
1340long
1342{
1343 int i;
1344 long sum = hashp->hctl->freeList[0].nentries;
1345
1346 /*
1347 * We currently don't bother with acquiring the mutexes; it's only
1348 * sensible to call this function if you've got lock on all partitions of
1349 * the table.
1350 */
1351 if (IS_PARTITIONED(hashp->hctl))
1352 {
1353 for (i = 1; i < NUM_FREELISTS; i++)
1354 sum += hashp->hctl->freeList[i].nentries;
1355 }
1356
1357 return sum;
1358}
1359
1360/*
1361 * hash_seq_init/_search/_term
1362 * Sequentially search through hash table and return
1363 * all the elements one by one, return NULL when no more.
1364 *
1365 * hash_seq_term should be called if and only if the scan is abandoned before
1366 * completion; if hash_seq_search returns NULL then it has already done the
1367 * end-of-scan cleanup.
1368 *
1369 * NOTE: caller may delete the returned element before continuing the scan.
1370 * However, deleting any other element while the scan is in progress is
1371 * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1372 * if elements are added to the table while the scan is in progress, it is
1373 * unspecified whether they will be visited by the scan or not.
1374 *
1375 * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1376 * worry about hash_seq_term cleanup, if the hashtable is first locked against
1377 * further insertions by calling hash_freeze.
1378 *
1379 * NOTE: to use this with a partitioned hashtable, caller had better hold
1380 * at least shared lock on all partitions of the table throughout the scan!
1381 * We can cope with insertions or deletions by our own backend, but *not*
1382 * with concurrent insertions or deletions by another.
1383 */
1384void
1386{
1387 status->hashp = hashp;
1388 status->curBucket = 0;
1389 status->curEntry = NULL;
1390 status->hasHashvalue = false;
1391 if (!hashp->frozen)
1392 register_seq_scan(hashp);
1393}
1394
1395/*
1396 * Same as above but scan by the given hash value.
1397 * See also hash_seq_search().
1398 *
1399 * NOTE: the default hash function doesn't match syscache hash function.
1400 * Thus, if you're going to use this function in syscache callback, make sure
1401 * you're using custom hash function. See relatt_cache_syshash()
1402 * for example.
1403 */
1404void
1406 uint32 hashvalue)
1407{
1408 HASHBUCKET *bucketPtr;
1409
1410 hash_seq_init(status, hashp);
1411
1412 status->hasHashvalue = true;
1413 status->hashvalue = hashvalue;
1414
1415 status->curBucket = hash_initial_lookup(hashp, hashvalue, &bucketPtr);
1416 status->curEntry = *bucketPtr;
1417}
1418
1419void *
1421{
1422 HTAB *hashp;
1423 HASHHDR *hctl;
1424 uint32 max_bucket;
1425 long ssize;
1426 long segment_num;
1427 long segment_ndx;
1428 HASHSEGMENT segp;
1429 uint32 curBucket;
1430 HASHELEMENT *curElem;
1431
1432 if (status->hasHashvalue)
1433 {
1434 /*
1435 * Scan entries only in the current bucket because only this bucket
1436 * can contain entries with the given hash value.
1437 */
1438 while ((curElem = status->curEntry) != NULL)
1439 {
1440 status->curEntry = curElem->link;
1441 if (status->hashvalue != curElem->hashvalue)
1442 continue;
1443 return (void *) ELEMENTKEY(curElem);
1444 }
1445
1446 hash_seq_term(status);
1447 return NULL;
1448 }
1449
1450 if ((curElem = status->curEntry) != NULL)
1451 {
1452 /* Continuing scan of curBucket... */
1453 status->curEntry = curElem->link;
1454 if (status->curEntry == NULL) /* end of this bucket */
1455 ++status->curBucket;
1456 return ELEMENTKEY(curElem);
1457 }
1458
1459 /*
1460 * Search for next nonempty bucket starting at curBucket.
1461 */
1462 curBucket = status->curBucket;
1463 hashp = status->hashp;
1464 hctl = hashp->hctl;
1465 ssize = hashp->ssize;
1466 max_bucket = hctl->max_bucket;
1467
1468 if (curBucket > max_bucket)
1469 {
1470 hash_seq_term(status);
1471 return NULL; /* search is done */
1472 }
1473
1474 /*
1475 * first find the right segment in the table directory.
1476 */
1477 segment_num = curBucket >> hashp->sshift;
1478 segment_ndx = MOD(curBucket, ssize);
1479
1480 segp = hashp->dir[segment_num];
1481
1482 /*
1483 * Pick up the first item in this bucket's chain. If chain is not empty
1484 * we can begin searching it. Otherwise we have to advance to find the
1485 * next nonempty bucket. We try to optimize that case since searching a
1486 * near-empty hashtable has to iterate this loop a lot.
1487 */
1488 while ((curElem = segp[segment_ndx]) == NULL)
1489 {
1490 /* empty bucket, advance to next */
1491 if (++curBucket > max_bucket)
1492 {
1493 status->curBucket = curBucket;
1494 hash_seq_term(status);
1495 return NULL; /* search is done */
1496 }
1497 if (++segment_ndx >= ssize)
1498 {
1499 segment_num++;
1500 segment_ndx = 0;
1501 segp = hashp->dir[segment_num];
1502 }
1503 }
1504
1505 /* Begin scan of curBucket... */
1506 status->curEntry = curElem->link;
1507 if (status->curEntry == NULL) /* end of this bucket */
1508 ++curBucket;
1509 status->curBucket = curBucket;
1510 return ELEMENTKEY(curElem);
1511}
1512
1513void
1515{
1516 if (!status->hashp->frozen)
1517 deregister_seq_scan(status->hashp);
1518}
1519
1520/*
1521 * hash_freeze
1522 * Freeze a hashtable against future insertions (deletions are
1523 * still allowed)
1524 *
1525 * The reason for doing this is that by preventing any more bucket splits,
1526 * we no longer need to worry about registering hash_seq_search scans,
1527 * and thus caller need not be careful about ensuring hash_seq_term gets
1528 * called at the right times.
1529 *
1530 * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1531 * with active scans (since hash_seq_term would then do the wrong thing).
1532 */
1533void
1535{
1536 if (hashp->isshared)
1537 elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
1538 if (!hashp->frozen && has_seq_scans(hashp))
1539 elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1540 hashp->tabname);
1541 hashp->frozen = true;
1542}
1543
1544
1545/********************************* UTILITIES ************************/
1546
1547/*
1548 * Expand the table by adding one more hash bucket.
1549 */
1550static bool
1552{
1553 HASHHDR *hctl = hashp->hctl;
1554 HASHSEGMENT old_seg,
1555 new_seg;
1556 long old_bucket,
1557 new_bucket;
1558 long new_segnum,
1559 new_segndx;
1560 long old_segnum,
1561 old_segndx;
1562 HASHBUCKET *oldlink,
1563 *newlink;
1564 HASHBUCKET currElement,
1565 nextElement;
1566
1567 Assert(!IS_PARTITIONED(hctl));
1568
1569#ifdef HASH_STATISTICS
1570 hash_expansions++;
1571#endif
1572
1573 new_bucket = hctl->max_bucket + 1;
1574 new_segnum = new_bucket >> hashp->sshift;
1575 new_segndx = MOD(new_bucket, hashp->ssize);
1576
1577 if (new_segnum >= hctl->nsegs)
1578 {
1579 /* Allocate new segment if necessary -- could fail if dir full */
1580 if (new_segnum >= hctl->dsize)
1581 if (!dir_realloc(hashp))
1582 return false;
1583 if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
1584 return false;
1585 hctl->nsegs++;
1586 }
1587
1588 /* OK, we created a new bucket */
1589 hctl->max_bucket++;
1590
1591 /*
1592 * *Before* changing masks, find old bucket corresponding to same hash
1593 * values; values in that bucket may need to be relocated to new bucket.
1594 * Note that new_bucket is certainly larger than low_mask at this point,
1595 * so we can skip the first step of the regular hash mask calc.
1596 */
1597 old_bucket = (new_bucket & hctl->low_mask);
1598
1599 /*
1600 * If we crossed a power of 2, readjust masks.
1601 */
1602 if ((uint32) new_bucket > hctl->high_mask)
1603 {
1604 hctl->low_mask = hctl->high_mask;
1605 hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1606 }
1607
1608 /*
1609 * Relocate records to the new bucket. NOTE: because of the way the hash
1610 * masking is done in calc_bucket, only one old bucket can need to be
1611 * split at this point. With a different way of reducing the hash value,
1612 * that might not be true!
1613 */
1614 old_segnum = old_bucket >> hashp->sshift;
1615 old_segndx = MOD(old_bucket, hashp->ssize);
1616
1617 old_seg = hashp->dir[old_segnum];
1618 new_seg = hashp->dir[new_segnum];
1619
1620 oldlink = &old_seg[old_segndx];
1621 newlink = &new_seg[new_segndx];
1622
1623 for (currElement = *oldlink;
1624 currElement != NULL;
1625 currElement = nextElement)
1626 {
1627 nextElement = currElement->link;
1628 if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1629 {
1630 *oldlink = currElement;
1631 oldlink = &currElement->link;
1632 }
1633 else
1634 {
1635 *newlink = currElement;
1636 newlink = &currElement->link;
1637 }
1638 }
1639 /* don't forget to terminate the rebuilt hash chains... */
1640 *oldlink = NULL;
1641 *newlink = NULL;
1642
1643 return true;
1644}
1645
1646
1647static bool
1649{
1650 HASHSEGMENT *p;
1651 HASHSEGMENT *old_p;
1652 long new_dsize;
1653 long old_dirsize;
1654 long new_dirsize;
1655
1656 if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
1657 return false;
1658
1659 /* Reallocate directory */
1660 new_dsize = hashp->hctl->dsize << 1;
1661 old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1662 new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1663
1664 old_p = hashp->dir;
1665 CurrentDynaHashCxt = hashp->hcxt;
1666 p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1667
1668 if (p != NULL)
1669 {
1670 memcpy(p, old_p, old_dirsize);
1671 MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
1672 hashp->dir = p;
1673 hashp->hctl->dsize = new_dsize;
1674
1675 /* XXX assume the allocator is palloc, so we know how to free */
1676 Assert(hashp->alloc == DynaHashAlloc);
1677 pfree(old_p);
1678
1679 return true;
1680 }
1681
1682 return false;
1683}
1684
1685
1686static HASHSEGMENT
1688{
1689 HASHSEGMENT segp;
1690
1691 CurrentDynaHashCxt = hashp->hcxt;
1692 segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1693
1694 if (!segp)
1695 return NULL;
1696
1697 MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
1698
1699 return segp;
1700}
1701
1702/*
1703 * allocate some new elements and link them into the indicated free list
1704 */
1705static bool
1706element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1707{
1708 HASHHDR *hctl = hashp->hctl;
1709 Size elementSize;
1710 HASHELEMENT *firstElement;
1711 HASHELEMENT *tmpElement;
1712 HASHELEMENT *prevElement;
1713 int i;
1714
1715 if (hashp->isfixed)
1716 return false;
1717
1718 /* Each element has a HASHELEMENT header plus user data. */
1719 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1720
1721 CurrentDynaHashCxt = hashp->hcxt;
1722 firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
1723
1724 if (!firstElement)
1725 return false;
1726
1727 /* prepare to link all the new entries into the freelist */
1728 prevElement = NULL;
1729 tmpElement = firstElement;
1730 for (i = 0; i < nelem; i++)
1731 {
1732 tmpElement->link = prevElement;
1733 prevElement = tmpElement;
1734 tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1735 }
1736
1737 /* if partitioned, must lock to touch freeList */
1738 if (IS_PARTITIONED(hctl))
1739 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1740
1741 /* freelist could be nonempty if two backends did this concurrently */
1742 firstElement->link = hctl->freeList[freelist_idx].freeList;
1743 hctl->freeList[freelist_idx].freeList = prevElement;
1744
1745 if (IS_PARTITIONED(hctl))
1746 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1747
1748 return true;
1749}
1750
1751/*
1752 * Do initial lookup of a bucket for the given hash value, retrieving its
1753 * bucket number and its hash bucket.
1754 */
1755static inline uint32
1756hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
1757{
1758 HASHHDR *hctl = hashp->hctl;
1759 HASHSEGMENT segp;
1760 long segment_num;
1761 long segment_ndx;
1762 uint32 bucket;
1763
1764 bucket = calc_bucket(hctl, hashvalue);
1765
1766 segment_num = bucket >> hashp->sshift;
1767 segment_ndx = MOD(bucket, hashp->ssize);
1768
1769 segp = hashp->dir[segment_num];
1770
1771 if (segp == NULL)
1772 hash_corrupted(hashp);
1773
1774 *bucketptr = &segp[segment_ndx];
1775 return bucket;
1776}
1777
1778/* complain when we have detected a corrupted hashtable */
1779static void
1781{
1782 /*
1783 * If the corruption is in a shared hashtable, we'd better force a
1784 * systemwide restart. Otherwise, just shut down this one backend.
1785 */
1786 if (hashp->isshared)
1787 elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1788 else
1789 elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1790}
1791
1792/* calculate ceil(log base 2) of num */
1793int
1794my_log2(long num)
1795{
1796 /*
1797 * guard against too-large input, which would be invalid for
1798 * pg_ceil_log2_*()
1799 */
1800 if (num > LONG_MAX / 2)
1801 num = LONG_MAX / 2;
1802
1803#if SIZEOF_LONG < 8
1804 return pg_ceil_log2_32(num);
1805#else
1806 return pg_ceil_log2_64(num);
1807#endif
1808}
1809
1810/* calculate first power of 2 >= num, bounded to what will fit in a long */
1811static long
1813{
1814 /* my_log2's internal range check is sufficient */
1815 return 1L << my_log2(num);
1816}
1817
1818/* calculate first power of 2 >= num, bounded to what will fit in an int */
1819static int
1821{
1822 if (num > INT_MAX / 2)
1823 num = INT_MAX / 2;
1824 return 1 << my_log2(num);
1825}
1826
1827
1828/************************* SEQ SCAN TRACKING ************************/
1829
1830/*
1831 * We track active hash_seq_search scans here. The need for this mechanism
1832 * comes from the fact that a scan will get confused if a bucket split occurs
1833 * while it's in progress: it might visit entries twice, or even miss some
1834 * entirely (if it's partway through the same bucket that splits). Hence
1835 * we want to inhibit bucket splits if there are any active scans on the
1836 * table being inserted into. This is a fairly rare case in current usage,
1837 * so just postponing the split until the next insertion seems sufficient.
1838 *
1839 * Given present usages of the function, only a few scans are likely to be
1840 * open concurrently; so a finite-size stack of open scans seems sufficient,
1841 * and we don't worry that linear search is too slow. Note that we do
1842 * allow multiple scans of the same hashtable to be open concurrently.
1843 *
1844 * This mechanism can support concurrent scan and insertion in a shared
1845 * hashtable if it's the same backend doing both. It would fail otherwise,
1846 * but locking reasons seem to preclude any such scenario anyway, so we don't
1847 * worry.
1848 *
1849 * This arrangement is reasonably robust if a transient hashtable is deleted
1850 * without notifying us. The absolute worst case is we might inhibit splits
1851 * in another table created later at exactly the same address. We will give
1852 * a warning at transaction end for reference leaks, so any bugs leading to
1853 * lack of notification should be easy to catch.
1854 */
1855
1856#define MAX_SEQ_SCANS 100
1857
1858static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1859static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1860static int num_seq_scans = 0;
1861
1862
1863/* Register a table as having an active hash_seq_search scan */
1864static void
1866{
1868 elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1869 hashp->tabname);
1872 num_seq_scans++;
1873}
1874
1875/* Deregister an active scan */
1876static void
1878{
1879 int i;
1880
1881 /* Search backward since it's most likely at the stack top */
1882 for (i = num_seq_scans - 1; i >= 0; i--)
1883 {
1884 if (seq_scan_tables[i] == hashp)
1885 {
1888 num_seq_scans--;
1889 return;
1890 }
1891 }
1892 elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1893 hashp->tabname);
1894}
1895
1896/* Check if a table has any active scan */
1897static bool
1899{
1900 int i;
1901
1902 for (i = 0; i < num_seq_scans; i++)
1903 {
1904 if (seq_scan_tables[i] == hashp)
1905 return true;
1906 }
1907 return false;
1908}
1909
1910/* Clean up any open scans at end of transaction */
1911void
1913{
1914 /*
1915 * During abort cleanup, open scans are expected; just silently clean 'em
1916 * out. An open scan at commit means someone forgot a hash_seq_term()
1917 * call, so complain.
1918 *
1919 * Note: it's tempting to try to print the tabname here, but refrain for
1920 * fear of touching deallocated memory. This isn't a user-facing message
1921 * anyway, so it needn't be pretty.
1922 */
1923 if (isCommit)
1924 {
1925 int i;
1926
1927 for (i = 0; i < num_seq_scans; i++)
1928 {
1929 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1931 }
1932 }
1933 num_seq_scans = 0;
1934}
1935
1936/* Clean up any open scans at end of subtransaction */
1937void
1938AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1939{
1940 int i;
1941
1942 /*
1943 * Search backward to make cleanup easy. Note we must check all entries,
1944 * not only those at the end of the array, because deletion technique
1945 * doesn't keep them in order.
1946 */
1947 for (i = num_seq_scans - 1; i >= 0; i--)
1948 {
1949 if (seq_scan_level[i] >= nestDepth)
1950 {
1951 if (isCommit)
1952 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1956 num_seq_scans--;
1957 }
1958 }
1959}
#define MAXALIGN(LEN)
Definition: c.h:782
#define pg_noreturn
Definition: c.h:165
uint32_t uint32
Definition: c.h:502
#define MemSet(start, val, len)
Definition: c.h:991
void(* pg_funcptr_t)(void)
Definition: c.h:424
size_t Size
Definition: c.h:576
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
static HTAB * seq_scan_tables[MAX_SEQ_SCANS]
Definition: dynahash.c:1858
static int seq_scan_level[MAX_SEQ_SCANS]
Definition: dynahash.c:1859
void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, uint32 hashvalue)
Definition: dynahash.c:1405
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
#define ELEMENT_FROM_KEY(key)
Definition: dynahash.c:249
#define DEF_DIRSIZE
Definition: dynahash.c:125
static void * DynaHashAlloc(Size size)
Definition: dynahash.c:291
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx)
Definition: dynahash.c:1706
void AtEOXact_HashTables(bool isCommit)
Definition: dynahash.c:1912
static bool init_htab(HTAB *hashp, long nelem)
Definition: dynahash.c:689
static HASHSEGMENT seg_alloc(HTAB *hashp)
Definition: dynahash.c:1687
#define MAX_SEQ_SCANS
Definition: dynahash.c:1856
static MemoryContext CurrentDynaHashCxt
Definition: dynahash.c:288
static int choose_nelem_alloc(Size entrysize)
Definition: dynahash.c:656
static int next_pow2_int(long num)
Definition: dynahash.c:1820
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:854
static void register_seq_scan(HTAB *hashp)
Definition: dynahash.c:1865
#define MOD(x, y)
Definition: dynahash.c:255
#define IS_PARTITIONED(hctl)
Definition: dynahash.c:210
#define DEF_SEGSIZE_SHIFT
Definition: dynahash.c:124
void AtEOSubXact_HashTables(bool isCommit, int nestDepth)
Definition: dynahash.c:1938
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx)
Definition: dynahash.c:1256
#define NUM_FREELISTS
Definition: dynahash.c:128
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
static int string_compare(const char *key1, const char *key2, Size keysize)
Definition: dynahash.c:307
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:968
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
static bool expand_table(HTAB *hashp)
Definition: dynahash.c:1551
static void hdefault(HTAB *hashp)
Definition: dynahash.c:629
static void deregister_seq_scan(HTAB *hashp)
Definition: dynahash.c:1877
#define ELEMENTKEY(helem)
Definition: dynahash.c:244
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1514
#define DEF_SEGSIZE
Definition: dynahash.c:123
static int num_seq_scans
Definition: dynahash.c:1860
int my_log2(long num)
Definition: dynahash.c:1794
#define FREELIST_IDX(hctl, hashcode)
Definition: dynahash.c:212
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
long hash_select_dirsize(long num_entries)
Definition: dynahash.c:830
static pg_noreturn void hash_corrupted(HTAB *hashp)
Definition: dynahash.c:1780
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:783
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
void hash_stats(const char *where, HTAB *hashp)
Definition: dynahash.c:884
void hash_freeze(HTAB *hashp)
Definition: dynahash.c:1534
static bool dir_realloc(HTAB *hashp)
Definition: dynahash.c:1648
bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr)
Definition: dynahash.c:1145
static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
Definition: dynahash.c:1756
HASHELEMENT * HASHBUCKET
Definition: dynahash.c:131
uint32 get_hash_value(HTAB *hashp, const void *keyPtr)
Definition: dynahash.c:911
static uint32 calc_bucket(HASHHDR *hctl, uint32 hash_val)
Definition: dynahash.c:918
static bool has_seq_scans(HTAB *hashp)
Definition: dynahash.c:1898
static long next_pow2_long(long num)
Definition: dynahash.c:1812
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
HASHBUCKET * HASHSEGMENT
Definition: dynahash.c:134
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:29
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 uint32_hash(const void *key, Size keysize)
Definition: hashfn.c:688
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
Assert(PointerIsAligned(start, uint64))
#define HASH_KEYCOPY
Definition: hsearch.h:100
void *(* HashAllocFunc)(Size request)
Definition: hsearch.h:44
#define HASH_STRINGS
Definition: hsearch.h:96
int(* HashCompareFunc)(const void *key1, const void *key2, Size keysize)
Definition: hsearch.h:29
HASHACTION
Definition: hsearch.h:112
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
@ HASH_ENTER_NULL
Definition: hsearch.h:116
#define HASH_CONTEXT
Definition: hsearch.h:102
#define NO_MAX_DSIZE
Definition: hsearch.h:108
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_ALLOC
Definition: hsearch.h:101
#define HASH_DIRSIZE
Definition: hsearch.h:94
uint32(* HashValueFunc)(const void *key, Size keysize)
Definition: hsearch.h:21
#define HASH_SEGMENT
Definition: hsearch.h:93
#define HASH_ATTACH
Definition: hsearch.h:104
#define HASH_COMPARE
Definition: hsearch.h:99
struct HASHHDR HASHHDR
Definition: hsearch.h:58
#define HASH_FUNCTION
Definition: hsearch.h:98
#define HASH_BLOBS
Definition: hsearch.h:97
#define HASH_SHARED_MEM
Definition: hsearch.h:103
#define HASH_FIXED_SIZE
Definition: hsearch.h:105
#define HASH_PARTITION
Definition: hsearch.h:92
void *(* HashCopyFunc)(void *dest, const void *src, Size keysize)
Definition: hsearch.h:37
int i
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:2147
MemoryContext TopMemoryContext
Definition: mcxt.c:165
void * MemoryContextAllocExtended(MemoryContext context, Size size, int flags)
Definition: mcxt.c:1313
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:485
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:643
#define MemoryContextIsValid(context)
Definition: memnodes.h:145
#define AllocSetContextCreate
Definition: memutils.h:149
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:180
static uint64 pg_ceil_log2_64(uint64 num)
Definition: pg_bitutils.h:271
static uint32 pg_ceil_log2_32(uint32 num)
Definition: pg_bitutils.h:258
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
slock_t mutex
Definition: dynahash.c:155
HASHELEMENT * freeList
Definition: dynahash.c:157
long nentries
Definition: dynahash.c:156
long ssize
Definition: hsearch.h:70
HashAllocFunc alloc
Definition: hsearch.h:84
Size keysize
Definition: hsearch.h:75
HashValueFunc hash
Definition: hsearch.h:78
Size entrysize
Definition: hsearch.h:76
long dsize
Definition: hsearch.h:72
HashCompareFunc match
Definition: hsearch.h:80
HASHHDR * hctl
Definition: hsearch.h:88
MemoryContext hcxt
Definition: hsearch.h:86
long num_partitions
Definition: hsearch.h:68
HashCopyFunc keycopy
Definition: hsearch.h:82
long max_dsize
Definition: hsearch.h:73
struct HASHELEMENT * link
Definition: hsearch.h:53
uint32 hashvalue
Definition: hsearch.h:54
long max_dsize
Definition: dynahash.c:194
long nsegs
Definition: dynahash.c:185
uint32 high_mask
Definition: dynahash.c:187
long num_partitions
Definition: dynahash.c:193
FreeListData freeList[NUM_FREELISTS]
Definition: dynahash.c:180
Size entrysize
Definition: dynahash.c:192
uint32 max_bucket
Definition: dynahash.c:186
Size keysize
Definition: dynahash.c:191
int nelem_alloc
Definition: dynahash.c:197
uint32 low_mask
Definition: dynahash.c:188
int sshift
Definition: dynahash.c:196
long ssize
Definition: dynahash.c:195
long dsize
Definition: dynahash.c:184
uint32 hashvalue
Definition: hsearch.h:126
HASHELEMENT * curEntry
Definition: hsearch.h:124
uint32 curBucket
Definition: hsearch.h:123
HTAB * hashp
Definition: hsearch.h:122
bool hasHashvalue
Definition: hsearch.h:125
Definition: dynahash.c:220
bool isfixed
Definition: dynahash.c:230
bool isshared
Definition: dynahash.c:229
HashCompareFunc match
Definition: dynahash.c:224
char * tabname
Definition: dynahash.c:228
HASHHDR * hctl
Definition: dynahash.c:221
MemoryContext hcxt
Definition: dynahash.c:227
HashAllocFunc alloc
Definition: dynahash.c:226
HashValueFunc hash
Definition: dynahash.c:223
long ssize
Definition: dynahash.c:237
HASHSEGMENT * dir
Definition: dynahash.c:222
Size keysize
Definition: dynahash.c:236
int sshift
Definition: dynahash.c:238
HashCopyFunc keycopy
Definition: dynahash.c:225
bool frozen
Definition: dynahash.c:233
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:929