PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
simplehash.h
Go to the documentation of this file.
1 /*
2  * simplehash.h
3  *
4  * Hash table implementation which will be specialized to user-defined
5  * types, by including this file to generate the required code. It's
6  * probably not worthwhile to do so for hash tables that aren't performance
7  * or space sensitive.
8  *
9  * Usage notes:
10  *
11  * To generate a hash-table and associated functions for a use case several
12  * macros have to be #define'ed before this file is included. Including
13  * the file #undef's all those, so a new hash table can be generated
14  * afterwards.
15  * The relevant parameters are:
16  * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
17  * will result in hash table type 'foo_hash' and functions like
18  * 'foo_insert'/'foo_lookup' and so forth.
19  * - SH_ELEMENT_TYPE - type of the contained elements
20  * - SH_KEY_TYPE - type of the hashtable's key
21  * - SH_DECLARE - if defined function prototypes and type declarations are
22  * generated
23  * - SH_DEFINE - if defined function definitions are generated
24  * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
25  * declarations reside
26  * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
27  * are defined, so you can supply your own
28  * The following parameters are only relevant when SH_DEFINE is defined:
29  * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
30  * - SH_EQUAL(table, a, b) - compare two table keys
31  * - SH_HASH_KEY(table, key) - generate hash for the key
32  * - SH_STORE_HASH - if defined the hash is stored in the elements
33  * - SH_GET_HASH(tb, a) - return the field to store the hash in
34  *
35  * For examples of usage look at simplehash.c (file local definition) and
36  * execnodes.h/execGrouping.c (exposed declaration, file local
37  * implementation).
38  *
39  * Hash table design:
40  *
41  * The hash table design chosen is a variant of linear open-addressing. The
42  * reason for doing so is that linear addressing is CPU cache & pipeline
43  * friendly. The biggest disadvantage of simple linear addressing schemes
44  * are highly variable lookup times due to clustering, and deletions
45  * leaving a lot of tombstones around. To address these issues a variant
46  * of "robin hood" hashing is employed. Robin hood hashing optimizes
47  * chaining lengths by moving elements close to their optimal bucket
48  * ("rich" elements), out of the way if a to-be-inserted element is further
49  * away from its optimal position (i.e. it's "poor"). While that can make
50  * insertions slower, the average lookup performance is a lot better, and
51  * higher fill factors can be used in a still performant manner. To avoid
52  * tombstones - which normally solve the issue that a deleted node's
53  * presence is relevant to determine whether a lookup needs to continue
54  * looking or is done - buckets following a deleted element are shifted
55  * backwards, unless they're empty or already at their optimal position.
56  */
57 
58 /* helpers */
59 #define SH_MAKE_PREFIX(a) CppConcat(a,_)
60 #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
61 #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
62 
63 /* name macros for: */
64 
65 /* type declarations */
66 #define SH_TYPE SH_MAKE_NAME(hash)
67 #define SH_STATUS SH_MAKE_NAME(status)
68 #define SH_STATUS_EMPTY SH_MAKE_NAME(EMPTY)
69 #define SH_STATUS_IN_USE SH_MAKE_NAME(IN_USE)
70 #define SH_ITERATOR SH_MAKE_NAME(iterator)
71 
72 /* function declarations */
73 #define SH_CREATE SH_MAKE_NAME(create)
74 #define SH_DESTROY SH_MAKE_NAME(destroy)
75 #define SH_INSERT SH_MAKE_NAME(insert)
76 #define SH_DELETE SH_MAKE_NAME(delete)
77 #define SH_LOOKUP SH_MAKE_NAME(lookup)
78 #define SH_GROW SH_MAKE_NAME(grow)
79 #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
80 #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
81 #define SH_ITERATE SH_MAKE_NAME(iterate)
82 #define SH_ALLOCATE SH_MAKE_NAME(allocate)
83 #define SH_FREE SH_MAKE_NAME(free)
84 #define SH_STAT SH_MAKE_NAME(stat)
85 
86 /* internal helper functions (no externally visible prototypes) */
87 #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
88 #define SH_NEXT SH_MAKE_NAME(next)
89 #define SH_PREV SH_MAKE_NAME(prev)
90 #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
91 #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
92 #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
93 
94 /* generate forward declarations necessary to use the hash table */
95 #ifdef SH_DECLARE
96 
97 /* type definitions */
98 typedef struct SH_TYPE
99 {
100  /*
101  * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
102  * tables. Note that the maximum number of elements is lower
103  * (SH_MAX_FILLFACTOR)
104  */
105  uint64 size;
106 
107  /* how many elements have valid contents */
108  uint32 members;
109 
110  /* mask for bucket and size calculations, based on size */
111  uint32 sizemask;
112 
113  /* boundary after which to grow hashtable */
114  uint32 grow_threshold;
115 
116  /* hash buckets */
117  SH_ELEMENT_TYPE *data;
118 
119  /* memory context to use for allocations */
120  MemoryContext ctx;
121 
122  /* user defined data, useful for callbacks */
123  void *private_data;
124 } SH_TYPE;
125 
126 typedef enum SH_STATUS
127 {
128  SH_STATUS_EMPTY = 0x00,
129  SH_STATUS_IN_USE = 0x01
130 } SH_STATUS;
131 
132 typedef struct SH_ITERATOR
133 {
134  uint32 cur; /* current element */
135  uint32 end;
136  bool done; /* iterator exhausted? */
137 } SH_ITERATOR;
138 
139 /* externally visible function prototypes */
141  void *private_data);
142 SH_SCOPE void SH_DESTROY(SH_TYPE *tb);
143 SH_SCOPE void SH_GROW(SH_TYPE *tb, uint32 newsize);
144 SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found);
146 SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key);
150 SH_SCOPE void SH_STAT(SH_TYPE *tb);
151 
152 #endif /* SH_DECLARE */
153 
154 
155 /* generate implementation of the hash table */
156 #ifdef SH_DEFINE
157 
158 #include "utils/memutils.h"
159 
160 /* conservative fillfactor for a robin hood table, might want to adjust */
161 #define SH_FILLFACTOR (0.8)
162 /* increase fillfactor if we otherwise would error out */
163 #define SH_MAX_FILLFACTOR (0.95)
164 /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
165 #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
166 
167 #ifdef SH_STORE_HASH
168 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
169 #else
170 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
171 #endif
172 
173 /* FIXME: can we move these to a central location? */
174 
175 /* calculate ceil(log base 2) of num */
176 static inline uint64
177 sh_log2(uint64 num)
178 {
179  int i;
180  uint64 limit;
181 
182  for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
183  ;
184  return i;
185 }
186 
187 /* calculate first power of 2 >= num */
188 static inline uint64
189 sh_pow2(uint64 num)
190 {
191  return ((uint64) 1) << sh_log2(num);
192 }
193 
194 /*
195  * Compute sizing parameters for hashtable. Called when creating and growing
196  * the hashtable.
197  */
198 static inline void
200 {
201  uint64 size;
202 
203  /* supporting zero sized hashes would complicate matters */
204  size = Max(newsize, 2);
205 
206  /* round up size to the next power of 2, that's the bucketing works */
207  size = sh_pow2(size);
208  Assert(size <= SH_MAX_SIZE);
209 
210  /*
211  * Verify allocation of ->data is possible on platform, without
212  * overflowing Size.
213  */
214  if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
215  elog(ERROR, "hash table too large");
216 
217  /* now set size */
218  tb->size = size;
219 
220  if (tb->size == SH_MAX_SIZE)
221  tb->sizemask = 0;
222  else
223  tb->sizemask = tb->size - 1;
224 
225  /*
226  * Compute growth threshold here and after growing the table, to make
227  * computations during insert cheaper.
228  */
229  if (tb->size == SH_MAX_SIZE)
230  tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
231  else
232  tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
233 }
234 
235 /* return the optimal bucket for the hash */
236 static inline uint32
238 {
239  return hash & tb->sizemask;
240 }
241 
242 /* return next bucket after the current, handling wraparound */
243 static inline uint32
244 SH_NEXT(SH_TYPE *tb, uint32 curelem, uint32 startelem)
245 {
246  curelem = (curelem + 1) & tb->sizemask;
247 
248  Assert(curelem != startelem);
249 
250  return curelem;
251 }
252 
253 /* return bucket before the current, handling wraparound */
254 static inline uint32
255 SH_PREV(SH_TYPE *tb, uint32 curelem, uint32 startelem)
256 {
257  curelem = (curelem - 1) & tb->sizemask;
258 
259  Assert(curelem != startelem);
260 
261  return curelem;
262 }
263 
264 /* return distance between bucket and its optimal position */
265 static inline uint32
266 SH_DISTANCE_FROM_OPTIMAL(SH_TYPE *tb, uint32 optimal, uint32 bucket)
267 {
268  if (optimal <= bucket)
269  return bucket - optimal;
270  else
271  return (tb->size + bucket) - optimal;
272 }
273 
274 static inline uint32
276 {
277 #ifdef SH_STORE_HASH
278  return SH_GET_HASH(tb, entry);
279 #else
280  return SH_HASH_KEY(tb, entry->SH_KEY);
281 #endif
282 }
283 
284 /* default memory allocator function */
285 static inline void *SH_ALLOCATE(SH_TYPE *type, Size size);
286 static inline void SH_FREE(SH_TYPE *type, void *pointer);
287 
288 #ifndef SH_USE_NONDEFAULT_ALLOCATOR
289 
290 /* default memory allocator function */
291 static inline void *
292 SH_ALLOCATE(SH_TYPE *type, Size size)
293 {
294  return MemoryContextAllocExtended(type->ctx, size,
296 }
297 
298 /* default memory free function */
299 static inline void
300 SH_FREE(SH_TYPE *type, void *pointer)
301 {
302  pfree(pointer);
303 }
304 
305 #endif
306 
307 /*
308  * Create a hash table with enough space for `nelements` distinct members.
309  * Memory for the hash table is allocated from the passed-in context. If
310  * desired, the array of elements can be allocated using a passed-in allocator;
311  * this could be useful in order to place the array of elements in a shared
312  * memory, or in a context that will outlive the rest of the hash table.
313  * Memory other than for the array of elements will still be allocated from
314  * the passed-in context.
315  */
317 SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
318 {
319  SH_TYPE *tb;
320  uint64 size;
321 
322  tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
323  tb->ctx = ctx;
324  tb->private_data = private_data;
325 
326  /* increase nelements by fillfactor, want to store nelements elements */
327  size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
328 
329  SH_COMPUTE_PARAMETERS(tb, size);
330 
331  tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
332 
333  return tb;
334 }
335 
336 /* destroy a previously created hash table */
337 SH_SCOPE void
338 SH_DESTROY(SH_TYPE *tb)
339 {
340  SH_FREE(tb, tb->data);
341  pfree(tb);
342 }
343 
344 /*
345  * Grow a hash table to at least `newsize` buckets.
346  *
347  * Usually this will automatically be called by insertions/deletions, when
348  * necessary. But resizing to the exact input size can be advantageous
349  * performance-wise, when known at some point.
350  */
351 SH_SCOPE void
352 SH_GROW(SH_TYPE *tb, uint32 newsize)
353 {
354  uint64 oldsize = tb->size;
355  SH_ELEMENT_TYPE *olddata = tb->data;
356  SH_ELEMENT_TYPE *newdata;
357  uint32 i;
358  uint32 startelem = 0;
359  uint32 copyelem;
360 
361  Assert(oldsize == sh_pow2(oldsize));
362  Assert(oldsize != SH_MAX_SIZE);
363  Assert(oldsize < newsize);
364 
365  /* compute parameters for new table */
366  SH_COMPUTE_PARAMETERS(tb, newsize);
367 
368  tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
369 
370  newdata = tb->data;
371 
372  /*
373  * Copy entries from the old data to newdata. We theoretically could use
374  * SH_INSERT here, to avoid code duplication, but that's more general than
375  * we need. We neither want tb->members increased, nor do we need to do
376  * deal with deleted elements, nor do we need to compare keys. So a
377  * special-cased implementation is lot faster. As resizing can be time
378  * consuming and frequent, that's worthwhile to optimize.
379  *
380  * To be able to simply move entries over, we have to start not at the
381  * first bucket (i.e olddata[0]), but find the first bucket that's either
382  * empty, or is occupied by an entry at its optimal position. Such a
383  * bucket has to exist in any table with a load factor under 1, as not all
384  * buckets are occupied, i.e. there always has to be an empty bucket. By
385  * starting at such a bucket we can move the entries to the larger table,
386  * without having to deal with conflicts.
387  */
388 
389  /* search for the first element in the hash that's not wrapped around */
390  for (i = 0; i < oldsize; i++)
391  {
392  SH_ELEMENT_TYPE *oldentry = &olddata[i];
393  uint32 hash;
394  uint32 optimal;
395 
396  if (oldentry->status != SH_STATUS_IN_USE)
397  {
398  startelem = i;
399  break;
400  }
401 
402  hash = SH_ENTRY_HASH(tb, oldentry);
403  optimal = SH_INITIAL_BUCKET(tb, hash);
404 
405  if (optimal == i)
406  {
407  startelem = i;
408  break;
409  }
410  }
411 
412  /* and copy all elements in the old table */
413  copyelem = startelem;
414  for (i = 0; i < oldsize; i++)
415  {
416  SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
417 
418  if (oldentry->status == SH_STATUS_IN_USE)
419  {
420  uint32 hash;
421  uint32 startelem;
422  uint32 curelem;
423  SH_ELEMENT_TYPE *newentry;
424 
425  hash = SH_ENTRY_HASH(tb, oldentry);
426  startelem = SH_INITIAL_BUCKET(tb, hash);
427  curelem = startelem;
428 
429  /* find empty element to put data into */
430  while (true)
431  {
432  newentry = &newdata[curelem];
433 
434  if (newentry->status == SH_STATUS_EMPTY)
435  {
436  break;
437  }
438 
439  curelem = SH_NEXT(tb, curelem, startelem);
440  }
441 
442  /* copy entry to new slot */
443  memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
444  }
445 
446  /* can't use SH_NEXT here, would use new size */
447  copyelem++;
448  if (copyelem >= oldsize)
449  {
450  copyelem = 0;
451  }
452  }
453 
454  SH_FREE(tb, olddata);
455 }
456 
457 /*
458  * Insert the key key into the hash-table, set *found to true if the key
459  * already exists, false otherwise. Returns the hash-table entry in either
460  * case.
461  */
463 SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found)
464 {
465  uint32 hash = SH_HASH_KEY(tb, key);
466  uint32 startelem;
467  uint32 curelem;
468  SH_ELEMENT_TYPE *data;
469  uint32 insertdist = 0;
470 
471  /*
472  * We do the grow check even if the key is actually present, to avoid
473  * doing the check inside the loop. This also lets us avoid having to
474  * re-find our position in the hashtable after resizing.
475  */
476  if (unlikely(tb->members >= tb->grow_threshold))
477  {
478  if (tb->size == SH_MAX_SIZE)
479  {
480  elog(ERROR, "hash table size exceeded");
481  }
482 
483  /*
484  * When optimizing, it can be very useful to print these out.
485  */
486  /* SH_STAT(tb); */
487  SH_GROW(tb, tb->size * 2);
488  /* SH_STAT(tb); */
489  }
490 
491  /* perform insert, start bucket search at optimal location */
492  data = tb->data;
493  startelem = SH_INITIAL_BUCKET(tb, hash);
494  curelem = startelem;
495  while (true)
496  {
497  uint32 curdist;
498  uint32 curhash;
499  uint32 curoptimal;
500  SH_ELEMENT_TYPE *entry = &data[curelem];
501 
502  /* any empty bucket can directly be used */
503  if (entry->status == SH_STATUS_EMPTY)
504  {
505  tb->members++;
506  entry->SH_KEY = key;
507 #ifdef SH_STORE_HASH
508  SH_GET_HASH(tb, entry) = hash;
509 #endif
510  entry->status = SH_STATUS_IN_USE;
511  *found = false;
512  return entry;
513  }
514 
515  /*
516  * If the bucket is not empty, we either found a match (in which case
517  * we're done), or we have to decide whether to skip over or move the
518  * colliding entry. When the colliding element's distance to its
519  * optimal position is smaller than the to-be-inserted entry's, we
520  * shift the colliding entry (and its followers) forward by one.
521  */
522 
523  if (SH_COMPARE_KEYS(tb, hash, key, entry))
524  {
525  Assert(entry->status == SH_STATUS_IN_USE);
526  *found = true;
527  return entry;
528  }
529 
530  curhash = SH_ENTRY_HASH(tb, entry);
531  curoptimal = SH_INITIAL_BUCKET(tb, curhash);
532  curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
533 
534  if (insertdist > curdist)
535  {
536  SH_ELEMENT_TYPE *lastentry = entry;
537  uint32 emptyelem = curelem;
538  uint32 moveelem;
539 
540  /* find next empty bucket */
541  while (true)
542  {
543  SH_ELEMENT_TYPE *emptyentry;
544 
545  emptyelem = SH_NEXT(tb, emptyelem, startelem);
546  emptyentry = &data[emptyelem];
547 
548  if (emptyentry->status == SH_STATUS_EMPTY)
549  {
550  lastentry = emptyentry;
551  break;
552  }
553  }
554 
555  /* shift forward, starting at last occupied element */
556 
557  /*
558  * TODO: This could be optimized to be one memcpy in may cases,
559  * excepting wrapping around at the end of ->data. Hasn't shown up
560  * in profiles so far though.
561  */
562  moveelem = emptyelem;
563  while (moveelem != curelem)
564  {
565  SH_ELEMENT_TYPE *moveentry;
566 
567  moveelem = SH_PREV(tb, moveelem, startelem);
568  moveentry = &data[moveelem];
569 
570  memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
571  lastentry = moveentry;
572  }
573 
574  /* and fill the now empty spot */
575  tb->members++;
576 
577  entry->SH_KEY = key;
578 #ifdef SH_STORE_HASH
579  SH_GET_HASH(tb, entry) = hash;
580 #endif
581  entry->status = SH_STATUS_IN_USE;
582  *found = false;
583  return entry;
584  }
585 
586  curelem = SH_NEXT(tb, curelem, startelem);
587  insertdist++;
588  }
589 }
590 
591 /*
592  * Lookup up entry in hash table. Returns NULL if key not present.
593  */
595 SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key)
596 {
597  uint32 hash = SH_HASH_KEY(tb, key);
598  const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
599  uint32 curelem = startelem;
600 
601  while (true)
602  {
603  SH_ELEMENT_TYPE *entry = &tb->data[curelem];
604 
605  if (entry->status == SH_STATUS_EMPTY)
606  {
607  return NULL;
608  }
609 
610  Assert(entry->status == SH_STATUS_IN_USE);
611 
612  if (SH_COMPARE_KEYS(tb, hash, key, entry))
613  return entry;
614 
615  /*
616  * TODO: we could stop search based on distance. If the current
617  * buckets's distance-from-optimal is smaller than what we've skipped
618  * already, the entry doesn't exist. Probably only do so if
619  * SH_STORE_HASH is defined, to avoid re-computing hashes?
620  */
621 
622  curelem = SH_NEXT(tb, curelem, startelem);
623  }
624 }
625 
626 /*
627  * Delete entry from hash table. Returns whether to-be-deleted key was
628  * present.
629  */
630 SH_SCOPE bool
631 SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key)
632 {
633  uint32 hash = SH_HASH_KEY(tb, key);
634  uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
635  uint32 curelem = startelem;
636 
637  while (true)
638  {
639  SH_ELEMENT_TYPE *entry = &tb->data[curelem];
640 
641  if (entry->status == SH_STATUS_EMPTY)
642  return false;
643 
644  if (entry->status == SH_STATUS_IN_USE &&
645  SH_COMPARE_KEYS(tb, hash, key, entry))
646  {
647  SH_ELEMENT_TYPE *lastentry = entry;
648 
649  tb->members--;
650 
651  /*
652  * Backward shift following elements till either an empty element
653  * or an element at its optimal position is encountered.
654  *
655  * While that sounds expensive, the average chain length is short,
656  * and deletions would otherwise require toombstones.
657  */
658  while (true)
659  {
660  SH_ELEMENT_TYPE *curentry;
661  uint32 curhash;
662  uint32 curoptimal;
663 
664  curelem = SH_NEXT(tb, curelem, startelem);
665  curentry = &tb->data[curelem];
666 
667  if (curentry->status != SH_STATUS_IN_USE)
668  {
669  lastentry->status = SH_STATUS_EMPTY;
670  break;
671  }
672 
673  curhash = SH_ENTRY_HASH(tb, curentry);
674  curoptimal = SH_INITIAL_BUCKET(tb, curhash);
675 
676  /* current is at optimal position, done */
677  if (curoptimal == curelem)
678  {
679  lastentry->status = SH_STATUS_EMPTY;
680  break;
681  }
682 
683  /* shift */
684  memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
685 
686  lastentry = curentry;
687  }
688 
689  return true;
690  }
691 
692  /* TODO: return false; if distance too big */
693 
694  curelem = SH_NEXT(tb, curelem, startelem);
695  }
696 }
697 
698 /*
699  * Initialize iterator.
700  */
701 SH_SCOPE void
703 {
704  int i;
705  uint64 startelem = PG_UINT64_MAX;
706 
707  /*
708  * Search for the first empty element. As deletions during iterations are
709  * supported, we want to start/end at an element that cannot be affected
710  * by elements being shifted.
711  */
712  for (i = 0; i < tb->size; i++)
713  {
714  SH_ELEMENT_TYPE *entry = &tb->data[i];
715 
716  if (entry->status != SH_STATUS_IN_USE)
717  {
718  startelem = i;
719  break;
720  }
721  }
722 
723  Assert(startelem < SH_MAX_SIZE);
724 
725  /*
726  * Iterate backwards, that allows the current element to be deleted, even
727  * if there are backward shifts
728  */
729  iter->cur = startelem;
730  iter->end = iter->cur;
731  iter->done = false;
732 }
733 
734 /*
735  * Initialize iterator to a specific bucket. That's really only useful for
736  * cases where callers are partially iterating over the hashspace, and that
737  * iteration deletes and inserts elements based on visited entries. Doing that
738  * repeatedly could lead to an unbalanced keyspace when always starting at the
739  * same position.
740  */
741 SH_SCOPE void
743 {
744  /*
745  * Iterate backwards, that allows the current element to be deleted, even
746  * if there are backward shifts.
747  */
748  iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
749  iter->end = iter->cur;
750  iter->done = false;
751 }
752 
753 /*
754  * Iterate over all entries in the hash-table. Return the next occupied entry,
755  * or NULL if done.
756  *
757  * During iteration the current entry in the hash table may be deleted,
758  * without leading to elements being skipped or returned twice. Additionally
759  * the rest of the table may be modified (i.e. there can be insertions or
760  * deletions), but if so, there's neither a guarantee that all nodes are
761  * visited at least once, nor a guarantee that a node is visited at most once.
762  */
764 SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter)
765 {
766  while (!iter->done)
767  {
768  SH_ELEMENT_TYPE *elem;
769 
770  elem = &tb->data[iter->cur];
771 
772  /* next element in backward direction */
773  iter->cur = (iter->cur - 1) & tb->sizemask;
774 
775  if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
776  iter->done = true;
777  if (elem->status == SH_STATUS_IN_USE)
778  {
779  return elem;
780  }
781  }
782 
783  return NULL;
784 }
785 
786 /*
787  * Report some statistics about the state of the hashtable. For
788  * debugging/profiling purposes only.
789  */
790 SH_SCOPE void
791 SH_STAT(SH_TYPE *tb)
792 {
793  uint32 max_chain_length = 0;
794  uint32 total_chain_length = 0;
795  double avg_chain_length;
796  double fillfactor;
797  uint32 i;
798 
799  uint32 *collisions = palloc0(tb->size * sizeof(uint32));
800  uint32 total_collisions = 0;
801  uint32 max_collisions = 0;
802  double avg_collisions;
803 
804  for (i = 0; i < tb->size; i++)
805  {
806  uint32 hash;
807  uint32 optimal;
808  uint32 dist;
809  SH_ELEMENT_TYPE *elem;
810 
811  elem = &tb->data[i];
812 
813  if (elem->status != SH_STATUS_IN_USE)
814  continue;
815 
816  hash = SH_ENTRY_HASH(tb, elem);
817  optimal = SH_INITIAL_BUCKET(tb, hash);
818  dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
819 
820  if (dist > max_chain_length)
821  max_chain_length = dist;
822  total_chain_length += dist;
823 
824  collisions[optimal]++;
825  }
826 
827  for (i = 0; i < tb->size; i++)
828  {
829  uint32 curcoll = collisions[i];
830 
831  if (curcoll == 0)
832  continue;
833 
834  /* single contained element is not a collision */
835  curcoll--;
836  total_collisions += curcoll;
837  if (curcoll > max_collisions)
838  max_collisions = curcoll;
839  }
840 
841  if (tb->members > 0)
842  {
843  fillfactor = tb->members / ((double) tb->size);
844  avg_chain_length = ((double) total_chain_length) / tb->members;
845  avg_collisions = ((double) total_collisions) / tb->members;
846  }
847  else
848  {
849  fillfactor = 0;
850  avg_chain_length = 0;
851  avg_collisions = 0;
852  }
853 
854  elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
855  tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
856  total_collisions, max_collisions, avg_collisions);
857 }
858 
859 #endif /* SH_DEFINE */
860 
861 
862 /* undefine external paramters, so next hash table can be defined */
863 #undef SH_PREFIX
864 #undef SH_KEY_TYPE
865 #undef SH_KEY
866 #undef SH_ELEMENT_TYPE
867 #undef SH_HASH_KEY
868 #undef SH_SCOPE
869 #undef SH_DECLARE
870 #undef SH_DEFINE
871 #undef SH_GET_HASH
872 #undef SH_STORE_HASH
873 #undef SH_USE_NONDEFAULT_ALLOCATOR
874 
875 /* undefine locally declared macros */
876 #undef SH_MAKE_PREFIX
877 #undef SH_MAKE_NAME
878 #undef SH_MAKE_NAME_
879 #undef SH_FILLFACTOR
880 #undef SH_MAX_FILLFACTOR
881 #undef SH_MAX_SIZE
882 
883 /* types */
884 #undef SH_TYPE
885 #undef SH_STATUS
886 #undef SH_STATUS_EMPTY
887 #undef SH_STATUS_IN_USE
888 #undef SH_ITERTOR
889 
890 /* external function names */
891 #undef SH_CREATE
892 #undef SH_DESTROY
893 #undef SH_INSERT
894 #undef SH_DELETE
895 #undef SH_LOOKUP
896 #undef SH_GROW
897 #undef SH_START_ITERATE
898 #undef SH_START_ITERATE_AT
899 #undef SH_ITERATE
900 #undef SH_ALLOCATE
901 #undef SH_FREE
902 #undef SH_STAT
903 
904 /* internal function names */
905 #undef SH_COMPUTE_PARAMETERS
906 #undef SH_COMPARE_KEYS
907 #undef SH_INITIAL_BUCKET
908 #undef SH_NEXT
909 #undef SH_PREV
910 #undef SH_DISTANCE_FROM_OPTIMAL
911 #undef SH_ENTRY_HASH
#define SH_ALLOCATE
Definition: simplehash.h:82
#define SH_COMPUTE_PARAMETERS
Definition: simplehash.h:87
#define SH_KEY_TYPE
Definition: execGrouping.c:38
#define SH_START_ITERATE
Definition: simplehash.h:79
#define PG_UINT64_MAX
Definition: c.h:341
#define MCXT_ALLOC_HUGE
Definition: fe_memutils.h:16
void * MemoryContextAllocExtended(MemoryContext context, Size size, int flags)
Definition: mcxt.c:855
#define SH_DELETE
Definition: simplehash.h:76
#define SH_START_ITERATE_AT
Definition: simplehash.h:80
#define Min(x, y)
Definition: c.h:802
struct cursor * cur
Definition: ecpg.c:28
#define SH_STATUS_EMPTY
Definition: simplehash.h:68
#define SH_LOOKUP
Definition: simplehash.h:77
#define LOG
Definition: elog.h:26
#define SH_GROW
Definition: simplehash.h:78
#define SH_NEXT
Definition: simplehash.h:88
#define SH_STAT
Definition: simplehash.h:84
#define SH_PREV
Definition: simplehash.h:89
#define MaxAllocHugeSize
Definition: memutils.h:44
#define SH_STATUS_IN_USE
Definition: simplehash.h:69
#define SH_ITERATOR
Definition: simplehash.h:70
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
int fillfactor
Definition: pgbench.c:112
#define SH_DESTROY
Definition: simplehash.h:74
unsigned int uint32
Definition: c.h:265
#define SH_FREE
Definition: simplehash.h:83
#define SH_GET_HASH(tb, a)
Definition: execGrouping.c:44
void * palloc0(Size size)
Definition: mcxt.c:920
#define SH_STATUS
Definition: simplehash.h:67
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:784
#define SH_DISTANCE_FROM_OPTIMAL
Definition: simplehash.h:90
#define Max(x, y)
Definition: c.h:796
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define SH_INSERT
Definition: simplehash.h:75
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:19
size_t Size
Definition: c.h:353
#define SH_ITERATE
Definition: simplehash.h:81
#define SH_TYPE
Definition: simplehash.h:66
#define SH_ELEMENT_TYPE
Definition: execGrouping.c:37
#define SH_INITIAL_BUCKET
Definition: simplehash.h:91
int i
#define unlikely(x)
Definition: c.h:958
#define SH_CREATE
Definition: simplehash.h:73
#define SH_ENTRY_HASH
Definition: simplehash.h:92
#define elog
Definition: elog.h:219
#define SH_HASH_KEY(tb, key)
Definition: execGrouping.c:40
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
#define UINT64_FORMAT
Definition: c.h:313
#define SH_SCOPE
Definition: execGrouping.c:42