PostgreSQL Source Code  git master
hash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hash.c
4  * Implementation of Margo Seltzer's Hashing package for postgres.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hash.c
12  *
13  * NOTES
14  * This file contains only the public interface routines.
15  *
16  *-------------------------------------------------------------------------
17  */
18 
19 #include "postgres.h"
20 
21 #include "access/hash.h"
22 #include "access/hash_xlog.h"
23 #include "access/relscan.h"
24 #include "access/tableam.h"
25 #include "catalog/index.h"
26 #include "commands/progress.h"
27 #include "commands/vacuum.h"
28 #include "miscadmin.h"
29 #include "optimizer/plancat.h"
30 #include "pgstat.h"
31 #include "utils/builtins.h"
32 #include "utils/index_selfuncs.h"
33 #include "utils/rel.h"
34 #include "miscadmin.h"
35 
36 
37 /* Working state for hashbuild and its callback */
38 typedef struct
39 {
40  HSpool *spool; /* NULL if not using spooling */
41  double indtuples; /* # tuples accepted into index */
42  Relation heapRel; /* heap relation descriptor */
44 
45 static void hashbuildCallback(Relation index,
46  HeapTuple htup,
47  Datum *values,
48  bool *isnull,
49  bool tupleIsAlive,
50  void *state);
51 
52 
53 /*
54  * Hash handler function: return IndexAmRoutine with access method parameters
55  * and callbacks.
56  */
57 Datum
59 {
61 
62  amroutine->amstrategies = HTMaxStrategyNumber;
63  amroutine->amsupport = HASHNProcs;
64  amroutine->amcanorder = false;
65  amroutine->amcanorderbyop = false;
66  amroutine->amcanbackward = true;
67  amroutine->amcanunique = false;
68  amroutine->amcanmulticol = false;
69  amroutine->amoptionalkey = false;
70  amroutine->amsearcharray = false;
71  amroutine->amsearchnulls = false;
72  amroutine->amstorage = false;
73  amroutine->amclusterable = false;
74  amroutine->ampredlocks = true;
75  amroutine->amcanparallel = false;
76  amroutine->amcaninclude = false;
77  amroutine->amkeytype = INT4OID;
78 
79  amroutine->ambuild = hashbuild;
80  amroutine->ambuildempty = hashbuildempty;
81  amroutine->aminsert = hashinsert;
82  amroutine->ambulkdelete = hashbulkdelete;
84  amroutine->amcanreturn = NULL;
85  amroutine->amcostestimate = hashcostestimate;
86  amroutine->amoptions = hashoptions;
87  amroutine->amproperty = NULL;
88  amroutine->ambuildphasename = NULL;
89  amroutine->amvalidate = hashvalidate;
90  amroutine->ambeginscan = hashbeginscan;
91  amroutine->amrescan = hashrescan;
92  amroutine->amgettuple = hashgettuple;
93  amroutine->amgetbitmap = hashgetbitmap;
94  amroutine->amendscan = hashendscan;
95  amroutine->ammarkpos = NULL;
96  amroutine->amrestrpos = NULL;
97  amroutine->amestimateparallelscan = NULL;
98  amroutine->aminitparallelscan = NULL;
99  amroutine->amparallelrescan = NULL;
100 
101  PG_RETURN_POINTER(amroutine);
102 }
103 
104 /*
105  * hashbuild() -- build a new hash index.
106  */
109 {
110  IndexBuildResult *result;
112  double reltuples;
113  double allvisfrac;
114  uint32 num_buckets;
115  long sort_threshold;
116  HashBuildState buildstate;
117 
118  /*
119  * We expect to be called exactly once for any index relation. If that's
120  * not the case, big trouble's what we have.
121  */
122  if (RelationGetNumberOfBlocks(index) != 0)
123  elog(ERROR, "index \"%s\" already contains data",
124  RelationGetRelationName(index));
125 
126  /* Estimate the number of rows currently present in the table */
127  estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
128 
129  /* Initialize the hash index metadata page and initial buckets */
130  num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
131 
132  /*
133  * If we just insert the tuples into the index in scan order, then
134  * (assuming their hash codes are pretty random) there will be no locality
135  * of access to the index, and if the index is bigger than available RAM
136  * then we'll thrash horribly. To prevent that scenario, we can sort the
137  * tuples by (expected) bucket number. However, such a sort is useless
138  * overhead when the index does fit in RAM. We choose to sort if the
139  * initial index size exceeds maintenance_work_mem, or the number of
140  * buffers usable for the index, whichever is less. (Limiting by the
141  * number of buffers should reduce thrashing between PG buffers and kernel
142  * buffers, which seems useful even if no physical I/O results. Limiting
143  * by maintenance_work_mem is useful to allow easy testing of the sort
144  * code path, and may be useful to DBAs as an additional control knob.)
145  *
146  * NOTE: this test will need adjustment if a bucket is ever different from
147  * one page. Also, "initial index size" accounting does not include the
148  * metapage, nor the first bitmap page.
149  */
150  sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
151  if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
152  sort_threshold = Min(sort_threshold, NBuffers);
153  else
154  sort_threshold = Min(sort_threshold, NLocBuffer);
155 
156  if (num_buckets >= (uint32) sort_threshold)
157  buildstate.spool = _h_spoolinit(heap, index, num_buckets);
158  else
159  buildstate.spool = NULL;
160 
161  /* prepare to build the index */
162  buildstate.indtuples = 0;
163  buildstate.heapRel = heap;
164 
165  /* do the heap scan */
166  reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
168  (void *) &buildstate, NULL);
170  buildstate.indtuples);
171 
172  if (buildstate.spool)
173  {
174  /* sort the tuples and insert them into the index */
175  _h_indexbuild(buildstate.spool, buildstate.heapRel);
176  _h_spooldestroy(buildstate.spool);
177  }
178 
179  /*
180  * Return statistics
181  */
182  result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
183 
184  result->heap_tuples = reltuples;
185  result->index_tuples = buildstate.indtuples;
186 
187  return result;
188 }
189 
190 /*
191  * hashbuildempty() -- build an empty hash index in the initialization fork
192  */
193 void
195 {
196  _hash_init(index, 0, INIT_FORKNUM);
197 }
198 
199 /*
200  * Per-tuple callback for table_index_build_scan
201  */
202 static void
204  HeapTuple htup,
205  Datum *values,
206  bool *isnull,
207  bool tupleIsAlive,
208  void *state)
209 {
210  HashBuildState *buildstate = (HashBuildState *) state;
211  Datum index_values[1];
212  bool index_isnull[1];
213  IndexTuple itup;
214 
215  /* convert data to a hash key; on failure, do not insert anything */
216  if (!_hash_convert_tuple(index,
217  values, isnull,
218  index_values, index_isnull))
219  return;
220 
221  /* Either spool the tuple for sorting, or just put it into the index */
222  if (buildstate->spool)
223  _h_spool(buildstate->spool, &htup->t_self,
224  index_values, index_isnull);
225  else
226  {
227  /* form an index tuple and point it at the heap tuple */
228  itup = index_form_tuple(RelationGetDescr(index),
229  index_values, index_isnull);
230  itup->t_tid = htup->t_self;
231  _hash_doinsert(index, itup, buildstate->heapRel);
232  pfree(itup);
233  }
234 
235  buildstate->indtuples += 1;
236 }
237 
238 /*
239  * hashinsert() -- insert an index tuple into a hash table.
240  *
241  * Hash on the heap tuple's key, form an index tuple with hash code.
242  * Find the appropriate location for the new tuple, and put it there.
243  */
244 bool
245 hashinsert(Relation rel, Datum *values, bool *isnull,
246  ItemPointer ht_ctid, Relation heapRel,
247  IndexUniqueCheck checkUnique,
248  IndexInfo *indexInfo)
249 {
250  Datum index_values[1];
251  bool index_isnull[1];
252  IndexTuple itup;
253 
254  /* convert data to a hash key; on failure, do not insert anything */
255  if (!_hash_convert_tuple(rel,
256  values, isnull,
257  index_values, index_isnull))
258  return false;
259 
260  /* form an index tuple and point it at the heap tuple */
261  itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
262  itup->t_tid = *ht_ctid;
263 
264  _hash_doinsert(rel, itup, heapRel);
265 
266  pfree(itup);
267 
268  return false;
269 }
270 
271 
272 /*
273  * hashgettuple() -- Get the next tuple in the scan.
274  */
275 bool
277 {
278  HashScanOpaque so = (HashScanOpaque) scan->opaque;
279  bool res;
280 
281  /* Hash indexes are always lossy since we store only the hash code */
282  scan->xs_recheck = true;
283 
284  /*
285  * If we've already initialized this scan, we can just advance it in the
286  * appropriate direction. If we haven't done so yet, we call a routine to
287  * get the first item in the scan.
288  */
289  if (!HashScanPosIsValid(so->currPos))
290  res = _hash_first(scan, dir);
291  else
292  {
293  /*
294  * Check to see if we should kill the previously-fetched tuple.
295  */
296  if (scan->kill_prior_tuple)
297  {
298  /*
299  * Yes, so remember it for later. (We'll deal with all such tuples
300  * at once right after leaving the index page or at end of scan.)
301  * In case if caller reverses the indexscan direction it is quite
302  * possible that the same item might get entered multiple times.
303  * But, we don't detect that; instead, we just forget any excess
304  * entries.
305  */
306  if (so->killedItems == NULL)
307  so->killedItems = (int *)
308  palloc(MaxIndexTuplesPerPage * sizeof(int));
309 
311  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
312  }
313 
314  /*
315  * Now continue the scan.
316  */
317  res = _hash_next(scan, dir);
318  }
319 
320  return res;
321 }
322 
323 
324 /*
325  * hashgetbitmap() -- get all tuples at once
326  */
327 int64
329 {
330  HashScanOpaque so = (HashScanOpaque) scan->opaque;
331  bool res;
332  int64 ntids = 0;
333  HashScanPosItem *currItem;
334 
335  res = _hash_first(scan, ForwardScanDirection);
336 
337  while (res)
338  {
339  currItem = &so->currPos.items[so->currPos.itemIndex];
340 
341  /*
342  * _hash_first and _hash_next handle eliminate dead index entries
343  * whenever scan->ignore_killed_tuples is true. Therefore, there's
344  * nothing to do here except add the results to the TIDBitmap.
345  */
346  tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
347  ntids++;
348 
349  res = _hash_next(scan, ForwardScanDirection);
350  }
351 
352  return ntids;
353 }
354 
355 
356 /*
357  * hashbeginscan() -- start a scan on a hash index
358  */
360 hashbeginscan(Relation rel, int nkeys, int norderbys)
361 {
362  IndexScanDesc scan;
363  HashScanOpaque so;
364 
365  /* no order by operators allowed */
366  Assert(norderbys == 0);
367 
368  scan = RelationGetIndexScan(rel, nkeys, norderbys);
369 
370  so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
374 
375  so->hashso_buc_populated = false;
376  so->hashso_buc_split = false;
377 
378  so->killedItems = NULL;
379  so->numKilled = 0;
380 
381  scan->opaque = so;
382 
383  return scan;
384 }
385 
386 /*
387  * hashrescan() -- rescan an index relation
388  */
389 void
390 hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
391  ScanKey orderbys, int norderbys)
392 {
393  HashScanOpaque so = (HashScanOpaque) scan->opaque;
394  Relation rel = scan->indexRelation;
395 
396  if (HashScanPosIsValid(so->currPos))
397  {
398  /* Before leaving current page, deal with any killed items */
399  if (so->numKilled > 0)
400  _hash_kill_items(scan);
401  }
402 
403  _hash_dropscanbuf(rel, so);
404 
405  /* set position invalid (this will cause _hash_first call) */
407 
408  /* Update scan key, if a new one is given */
409  if (scankey && scan->numberOfKeys > 0)
410  {
411  memmove(scan->keyData,
412  scankey,
413  scan->numberOfKeys * sizeof(ScanKeyData));
414  }
415 
416  so->hashso_buc_populated = false;
417  so->hashso_buc_split = false;
418 }
419 
420 /*
421  * hashendscan() -- close down a scan
422  */
423 void
425 {
426  HashScanOpaque so = (HashScanOpaque) scan->opaque;
427  Relation rel = scan->indexRelation;
428 
429  if (HashScanPosIsValid(so->currPos))
430  {
431  /* Before leaving current page, deal with any killed items */
432  if (so->numKilled > 0)
433  _hash_kill_items(scan);
434  }
435 
436  _hash_dropscanbuf(rel, so);
437 
438  if (so->killedItems != NULL)
439  pfree(so->killedItems);
440  pfree(so);
441  scan->opaque = NULL;
442 }
443 
444 /*
445  * Bulk deletion of all index entries pointing to a set of heap tuples.
446  * The set of target tuples is specified via a callback routine that tells
447  * whether any given heap tuple (identified by ItemPointer) is being deleted.
448  *
449  * This function also deletes the tuples that are moved by split to other
450  * bucket.
451  *
452  * Result: a palloc'd struct containing statistical info for VACUUM displays.
453  */
456  IndexBulkDeleteCallback callback, void *callback_state)
457 {
458  Relation rel = info->index;
459  double tuples_removed;
460  double num_index_tuples;
461  double orig_ntuples;
462  Bucket orig_maxbucket;
463  Bucket cur_maxbucket;
464  Bucket cur_bucket;
465  Buffer metabuf = InvalidBuffer;
466  HashMetaPage metap;
467  HashMetaPage cachedmetap;
468 
469  tuples_removed = 0;
470  num_index_tuples = 0;
471 
472  /*
473  * We need a copy of the metapage so that we can use its hashm_spares[]
474  * values to compute bucket page addresses, but a cached copy should be
475  * good enough. (If not, we'll detect that further down and refresh the
476  * cache as necessary.)
477  */
478  cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
479  Assert(cachedmetap != NULL);
480 
481  orig_maxbucket = cachedmetap->hashm_maxbucket;
482  orig_ntuples = cachedmetap->hashm_ntuples;
483 
484  /* Scan the buckets that we know exist */
485  cur_bucket = 0;
486  cur_maxbucket = orig_maxbucket;
487 
488 loop_top:
489  while (cur_bucket <= cur_maxbucket)
490  {
491  BlockNumber bucket_blkno;
493  Buffer bucket_buf;
494  Buffer buf;
495  HashPageOpaque bucket_opaque;
496  Page page;
497  bool split_cleanup = false;
498 
499  /* Get address of bucket's start page */
500  bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
501 
502  blkno = bucket_blkno;
503 
504  /*
505  * We need to acquire a cleanup lock on the primary bucket page to out
506  * wait concurrent scans before deleting the dead tuples.
507  */
508  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
510  _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
511 
512  page = BufferGetPage(buf);
513  bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
514 
515  /*
516  * If the bucket contains tuples that are moved by split, then we need
517  * to delete such tuples. We can't delete such tuples if the split
518  * operation on bucket is not finished as those are needed by scans.
519  */
520  if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
521  H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
522  {
523  split_cleanup = true;
524 
525  /*
526  * This bucket might have been split since we last held a lock on
527  * the metapage. If so, hashm_maxbucket, hashm_highmask and
528  * hashm_lowmask might be old enough to cause us to fail to remove
529  * tuples left behind by the most recent split. To prevent that,
530  * now that the primary page of the target bucket has been locked
531  * (and thus can't be further split), check whether we need to
532  * update our cached metapage data.
533  */
534  Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
535  if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
536  {
537  cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
538  Assert(cachedmetap != NULL);
539  }
540  }
541 
542  bucket_buf = buf;
543 
544  hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
545  cachedmetap->hashm_maxbucket,
546  cachedmetap->hashm_highmask,
547  cachedmetap->hashm_lowmask, &tuples_removed,
548  &num_index_tuples, split_cleanup,
549  callback, callback_state);
550 
551  _hash_dropbuf(rel, bucket_buf);
552 
553  /* Advance to next bucket */
554  cur_bucket++;
555  }
556 
557  if (BufferIsInvalid(metabuf))
559 
560  /* Write-lock metapage and check for split since we started */
562  metap = HashPageGetMeta(BufferGetPage(metabuf));
563 
564  if (cur_maxbucket != metap->hashm_maxbucket)
565  {
566  /* There's been a split, so process the additional bucket(s) */
567  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
568  cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
569  Assert(cachedmetap != NULL);
570  cur_maxbucket = cachedmetap->hashm_maxbucket;
571  goto loop_top;
572  }
573 
574  /* Okay, we're really done. Update tuple count in metapage. */
576 
577  if (orig_maxbucket == metap->hashm_maxbucket &&
578  orig_ntuples == metap->hashm_ntuples)
579  {
580  /*
581  * No one has split or inserted anything since start of scan, so
582  * believe our count as gospel.
583  */
584  metap->hashm_ntuples = num_index_tuples;
585  }
586  else
587  {
588  /*
589  * Otherwise, our count is untrustworthy since we may have
590  * double-scanned tuples in split buckets. Proceed by dead-reckoning.
591  * (Note: we still return estimated_count = false, because using this
592  * count is better than not updating reltuples at all.)
593  */
594  if (metap->hashm_ntuples > tuples_removed)
595  metap->hashm_ntuples -= tuples_removed;
596  else
597  metap->hashm_ntuples = 0;
598  num_index_tuples = metap->hashm_ntuples;
599  }
600 
601  MarkBufferDirty(metabuf);
602 
603  /* XLOG stuff */
604  if (RelationNeedsWAL(rel))
605  {
607  XLogRecPtr recptr;
608 
609  xlrec.ntuples = metap->hashm_ntuples;
610 
611  XLogBeginInsert();
612  XLogRegisterData((char *) &xlrec, SizeOfHashUpdateMetaPage);
613 
614  XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
615 
616  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
617  PageSetLSN(BufferGetPage(metabuf), recptr);
618  }
619 
621 
622  _hash_relbuf(rel, metabuf);
623 
624  /* return statistics */
625  if (stats == NULL)
627  stats->estimated_count = false;
628  stats->num_index_tuples = num_index_tuples;
629  stats->tuples_removed += tuples_removed;
630  /* hashvacuumcleanup will fill in num_pages */
631 
632  return stats;
633 }
634 
635 /*
636  * Post-VACUUM cleanup.
637  *
638  * Result: a palloc'd struct containing statistical info for VACUUM displays.
639  */
642 {
643  Relation rel = info->index;
644  BlockNumber num_pages;
645 
646  /* If hashbulkdelete wasn't called, return NULL signifying no change */
647  /* Note: this covers the analyze_only case too */
648  if (stats == NULL)
649  return NULL;
650 
651  /* update statistics */
652  num_pages = RelationGetNumberOfBlocks(rel);
653  stats->num_pages = num_pages;
654 
655  return stats;
656 }
657 
658 /*
659  * Helper function to perform deletion of index entries from a bucket.
660  *
661  * This function expects that the caller has acquired a cleanup lock on the
662  * primary bucket page, and will return with a write lock again held on the
663  * primary bucket page. The lock won't necessarily be held continuously,
664  * though, because we'll release it when visiting overflow pages.
665  *
666  * There can't be any concurrent scans in progress when we first enter this
667  * function because of the cleanup lock we hold on the primary bucket page,
668  * but as soon as we release that lock, there might be. If those scans got
669  * ahead of our cleanup scan, they might see a tuple before we kill it and
670  * wake up only after VACUUM has completed and the TID has been recycled for
671  * an unrelated tuple. To avoid that calamity, we prevent scans from passing
672  * our cleanup scan by locking the next page in the bucket chain before
673  * releasing the lock on the previous page. (This type of lock chaining is not
674  * ideal, so we might want to look for a better solution at some point.)
675  *
676  * We need to retain a pin on the primary bucket to ensure that no concurrent
677  * split can start.
678  */
679 void
680 hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
681  BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
682  uint32 maxbucket, uint32 highmask, uint32 lowmask,
683  double *tuples_removed, double *num_index_tuples,
684  bool split_cleanup,
685  IndexBulkDeleteCallback callback, void *callback_state)
686 {
688  Buffer buf;
690  bool bucket_dirty = false;
691 
692  blkno = bucket_blkno;
693  buf = bucket_buf;
694 
695  if (split_cleanup)
696  new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
697  lowmask, maxbucket);
698 
699  /* Scan each page in bucket */
700  for (;;)
701  {
702  HashPageOpaque opaque;
703  OffsetNumber offno;
704  OffsetNumber maxoffno;
705  Buffer next_buf;
706  Page page;
707  OffsetNumber deletable[MaxOffsetNumber];
708  int ndeletable = 0;
709  bool retain_pin = false;
710  bool clear_dead_marking = false;
711 
713 
714  page = BufferGetPage(buf);
715  opaque = (HashPageOpaque) PageGetSpecialPointer(page);
716 
717  /* Scan each tuple in page */
718  maxoffno = PageGetMaxOffsetNumber(page);
719  for (offno = FirstOffsetNumber;
720  offno <= maxoffno;
721  offno = OffsetNumberNext(offno))
722  {
723  ItemPointer htup;
724  IndexTuple itup;
725  Bucket bucket;
726  bool kill_tuple = false;
727 
728  itup = (IndexTuple) PageGetItem(page,
729  PageGetItemId(page, offno));
730  htup = &(itup->t_tid);
731 
732  /*
733  * To remove the dead tuples, we strictly want to rely on results
734  * of callback function. refer btvacuumpage for detailed reason.
735  */
736  if (callback && callback(htup, callback_state))
737  {
738  kill_tuple = true;
739  if (tuples_removed)
740  *tuples_removed += 1;
741  }
742  else if (split_cleanup)
743  {
744  /* delete the tuples that are moved by split. */
746  maxbucket,
747  highmask,
748  lowmask);
749  /* mark the item for deletion */
750  if (bucket != cur_bucket)
751  {
752  /*
753  * We expect tuples to either belong to current bucket or
754  * new_bucket. This is ensured because we don't allow
755  * further splits from bucket that contains garbage. See
756  * comments in _hash_expandtable.
757  */
758  Assert(bucket == new_bucket);
759  kill_tuple = true;
760  }
761  }
762 
763  if (kill_tuple)
764  {
765  /* mark the item for deletion */
766  deletable[ndeletable++] = offno;
767  }
768  else
769  {
770  /* we're keeping it, so count it */
771  if (num_index_tuples)
772  *num_index_tuples += 1;
773  }
774  }
775 
776  /* retain the pin on primary bucket page till end of bucket scan */
777  if (blkno == bucket_blkno)
778  retain_pin = true;
779  else
780  retain_pin = false;
781 
782  blkno = opaque->hasho_nextblkno;
783 
784  /*
785  * Apply deletions, advance to next page and write page if needed.
786  */
787  if (ndeletable > 0)
788  {
789  /* No ereport(ERROR) until changes are logged */
791 
792  PageIndexMultiDelete(page, deletable, ndeletable);
793  bucket_dirty = true;
794 
795  /*
796  * Let us mark the page as clean if vacuum removes the DEAD tuples
797  * from an index page. We do this by clearing
798  * LH_PAGE_HAS_DEAD_TUPLES flag.
799  */
800  if (tuples_removed && *tuples_removed > 0 &&
801  H_HAS_DEAD_TUPLES(opaque))
802  {
804  clear_dead_marking = true;
805  }
806 
807  MarkBufferDirty(buf);
808 
809  /* XLOG stuff */
810  if (RelationNeedsWAL(rel))
811  {
812  xl_hash_delete xlrec;
813  XLogRecPtr recptr;
814 
815  xlrec.clear_dead_marking = clear_dead_marking;
816  xlrec.is_primary_bucket_page = (buf == bucket_buf) ? true : false;
817 
818  XLogBeginInsert();
819  XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
820 
821  /*
822  * bucket buffer needs to be registered to ensure that we can
823  * acquire a cleanup lock on it during replay.
824  */
825  if (!xlrec.is_primary_bucket_page)
827 
829  XLogRegisterBufData(1, (char *) deletable,
830  ndeletable * sizeof(OffsetNumber));
831 
832  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
833  PageSetLSN(BufferGetPage(buf), recptr);
834  }
835 
837  }
838 
839  /* bail out if there are no more pages to scan. */
840  if (!BlockNumberIsValid(blkno))
841  break;
842 
843  next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
845  bstrategy);
846 
847  /*
848  * release the lock on previous page after acquiring the lock on next
849  * page
850  */
851  if (retain_pin)
853  else
854  _hash_relbuf(rel, buf);
855 
856  buf = next_buf;
857  }
858 
859  /*
860  * lock the bucket page to clear the garbage flag and squeeze the bucket.
861  * if the current buffer is same as bucket buffer, then we already have
862  * lock on bucket page.
863  */
864  if (buf != bucket_buf)
865  {
866  _hash_relbuf(rel, buf);
867  LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
868  }
869 
870  /*
871  * Clear the garbage flag from bucket after deleting the tuples that are
872  * moved by split. We purposefully clear the flag before squeeze bucket,
873  * so that after restart, vacuum shouldn't again try to delete the moved
874  * by split tuples.
875  */
876  if (split_cleanup)
877  {
878  HashPageOpaque bucket_opaque;
879  Page page;
880 
881  page = BufferGetPage(bucket_buf);
882  bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
883 
884  /* No ereport(ERROR) until changes are logged */
886 
887  bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
888  MarkBufferDirty(bucket_buf);
889 
890  /* XLOG stuff */
891  if (RelationNeedsWAL(rel))
892  {
893  XLogRecPtr recptr;
894 
895  XLogBeginInsert();
896  XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
897 
898  recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
899  PageSetLSN(page, recptr);
900  }
901 
903  }
904 
905  /*
906  * If we have deleted anything, try to compact free space. For squeezing
907  * the bucket, we must have a cleanup lock, else it can impact the
908  * ordering of tuples for a scan that has started before it.
909  */
910  if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
911  _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
912  bstrategy);
913  else
914  LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
915 }
ambeginscan_function ambeginscan
Definition: amapi.h:221
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
static void hashbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: hash.c:203
bytea * hashoptions(Datum reloptions, bool validate)
Definition: hashutil.c:290
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
HashScanOpaqueData * HashScanOpaque
Definition: hash.h:189
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3647
ambulkdelete_function ambulkdelete
Definition: amapi.h:213
bool hashgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: hash.c:276
bool amcanmulticol
Definition: amapi.h:183
void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
Definition: hashinsert.c:36
uint16 amsupport
Definition: amapi.h:173
double tuples_removed
Definition: genam.h:78
#define HTMaxStrategyNumber
Definition: stratnum.h:43
void _hash_dropscanbuf(Relation rel, HashScanOpaque so)
Definition: hashpage.c:288
#define HASHNProcs
Definition: hash.h:339
amgettuple_function amgettuple
Definition: amapi.h:223
#define RelationGetDescr(relation)
Definition: rel.h:442
Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, uint32 highmask, uint32 lowmask)
Definition: hashutil.c:125
bool amcanorderbyop
Definition: amapi.h:177
#define LH_META_PAGE
Definition: hash.h:56
IndexBulkDeleteResult * hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: hash.c:455
amproperty_function amproperty
Definition: amapi.h:218
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define SizeOfHashDelete
Definition: hash_xlog.h:192
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define HashScanPosIsValid(scanpos)
Definition: hash.h:134
#define MaxOffsetNumber
Definition: off.h:28
Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy)
Definition: hashpage.c:238
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
ItemPointerData t_tid
Definition: itup.h:37
amparallelrescan_function amparallelrescan
Definition: amapi.h:232
#define Min(x, y)
Definition: c.h:904
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
void hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6082
bool amstorage
Definition: amapi.h:191
double indtuples
Definition: hash.c:41
uint32 hashm_highmask
Definition: hash.h:252
#define InvalidBuffer
Definition: buf.h:25
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
Relation index
Definition: genam.h:46
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:40
bool ampredlocks
Definition: amapi.h:195
bool clear_dead_marking
Definition: hash_xlog.h:186
void hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, BlockNumber bucket_blkno, BufferAccessStrategy bstrategy, uint32 maxbucket, uint32 highmask, uint32 lowmask, double *tuples_removed, double *num_index_tuples, bool split_cleanup, IndexBulkDeleteCallback callback, void *callback_state)
Definition: hash.c:680
#define PROGRESS_CREATEIDX_TUPLES_TOTAL
Definition: progress.h:67
uint32 BlockNumber
Definition: block.h:31
IndexScanDesc hashbeginscan(Relation rel, int nkeys, int norderbys)
Definition: hash.c:360
aminsert_function aminsert
Definition: amapi.h:212
void _hash_dropbuf(Relation rel, Buffer buf)
Definition: hashpage.c:276
Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags)
Definition: hashpage.c:69
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:88
bool hashinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: hash.c:245
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP
Definition: hash.h:59
Form_pg_class rd_rel
Definition: rel.h:83
Oid amkeytype
Definition: amapi.h:201
uint32 hashm_lowmask
Definition: hash.h:253
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
bool amoptionalkey
Definition: amapi.h:185
amvalidate_function amvalidate
Definition: amapi.h:220
void hashbuildempty(Relation index)
Definition: hash.c:194
int32 relpages
Definition: pg_class.h:60
#define BUCKET_TO_BLKNO(metap, B)
Definition: hash.h:38
Relation indexRelation
Definition: relscan.h:103
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:89
uint32 Bucket
Definition: hash.h:34
IndexUniqueCheck
Definition: genam.h:112
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
int * killedItems
Definition: hash.h:179
void pfree(void *pointer)
Definition: mcxt.c:1031
#define InvalidBucket
Definition: hash.h:36
amgetbitmap_function amgetbitmap
Definition: amapi.h:224
#define H_NEEDS_SPLIT_CLEANUP(opaque)
Definition: hash.h:87
BlockNumber hasho_prevblkno
Definition: hash.h:78
#define ERROR
Definition: elog.h:43
ambuild_function ambuild
Definition: amapi.h:210
uint32 _hash_get_indextuple_hashkey(IndexTuple itup)
Definition: hashutil.c:299
bool _hash_first(IndexScanDesc scan, ScanDirection dir)
Definition: hashsearch.c:292
amoptions_function amoptions
Definition: amapi.h:217
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1499
IndexBuildResult * hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: hash.c:108
BlockNumber num_pages
Definition: genam.h:74
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
bool amcaninclude
Definition: amapi.h:199
int NLocBuffer
Definition: localbuf.c:41
bool _hash_convert_tuple(Relation index, Datum *user_values, bool *user_isnull, Datum *index_values, bool *index_isnull)
Definition: hashutil.c:326
BlockNumber blkno
Definition: ginvacuum.c:119
amcostestimate_function amcostestimate
Definition: amapi.h:216
bool amcanunique
Definition: amapi.h:181
bool is_primary_bucket_page
Definition: hash_xlog.h:188
bool _hash_next(IndexScanDesc scan, ScanDirection dir)
Definition: hashsearch.c:48
#define BufferIsInvalid(buffer)
Definition: buf.h:31
static char * buf
Definition: pg_test_fsync.c:68
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:214
#define HASH_WRITE
Definition: hash.h:322
amendscan_function amendscan
Definition: amapi.h:225
#define memmove(d, s, c)
Definition: c.h:1238
#define HASH_NOLOCK
Definition: hash.h:323
bool amcanbackward
Definition: amapi.h:179
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
Bucket _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket, uint32 lowmask, uint32 maxbucket)
Definition: hashutil.c:502
void _h_spool(HSpool *hspool, ItemPointer self, Datum *values, bool *isnull)
Definition: hashsort.c:107
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:39
ScanDirection
Definition: sdir.h:22
void _h_indexbuild(HSpool *hspool, Relation heapRel)
Definition: hashsort.c:118
#define RelationGetRelationName(relation)
Definition: rel.h:450
unsigned int uint32
Definition: c.h:358
void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: plancat.c:947
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
amrescan_function amrescan
Definition: amapi.h:222
bool amcanparallel
Definition: amapi.h:197
Buffer hashso_bucket_buf
Definition: hash.h:161
int64 hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: hash.c:328
bool IsBufferCleanupOK(Buffer buffer)
Definition: bufmgr.c:3818
#define SizeOfHashUpdateMetaPage
Definition: hash_xlog.h:206
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
bool amsearchnulls
Definition: amapi.h:189
void hashendscan(IndexScanDesc scan)
Definition: hash.c:424
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
void _hash_checkpage(Relation rel, Buffer buf, int flags)
Definition: hashutil.c:225
HSpool * spool
Definition: hash.c:40
void * palloc0(Size size)
Definition: mcxt.c:955
#define HASH_METAPAGE
Definition: hash.h:195
uintptr_t Datum
Definition: postgres.h:367
#define HashScanPosInvalidate(scanpos)
Definition: hash.h:141
double hashm_ntuples
Definition: hash.h:245
bool hashso_buc_populated
Definition: hash.h:171
#define LH_OVERFLOW_PAGE
Definition: hash.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3590
bool amclusterable
Definition: amapi.h:193
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
bool amsearcharray
Definition: amapi.h:187
HashMetaPage _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
Definition: hashpage.c:1500
void _hash_relbuf(Relation rel, Buffer buf)
Definition: hashpage.c:265
int maintenance_work_mem
Definition: globals.c:122
Relation heapRel
Definition: hash.c:42
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define LH_BUCKET_PAGE
Definition: hash.h:54
IndexBulkDeleteResult * hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: hash.c:641
#define H_BUCKET_BEING_SPLIT(opaque)
Definition: hash.h:88
Datum hashhandler(PG_FUNCTION_ARGS)
Definition: hash.c:58
#define makeNode(_type_)
Definition: nodes.h:573
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
Definition: regguts.h:298
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:835
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:32
#define InvalidBlockNumber
Definition: block.h:33
HashPageOpaqueData * HashPageOpaque
Definition: hash.h:85
void _hash_kill_items(IndexScanDesc scan)
Definition: hashutil.c:544
void _hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno, Buffer bucket_buf, BufferAccessStrategy bstrategy)
Definition: hashovfl.c:805
ammarkpos_function ammarkpos
Definition: amapi.h:226
bool amcanorder
Definition: amapi.h:175
ambuildphasename_function ambuildphasename
Definition: amapi.h:219
#define RelationNeedsWAL(relation)
Definition: rel.h:518
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:230
uint32 hashm_maxbucket
Definition: hash.h:251
struct ScanKeyData * keyData
Definition: relscan.h:107
HashScanPosData currPos
Definition: hash.h:186
uint16 hasho_flag
Definition: hash.h:81
bool hashso_buc_split
Definition: hash.h:177
uint32 _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
Definition: hashpage.c:326
bool hashvalidate(Oid opclassoid)
Definition: hashvalidate.c:44
uint16 amstrategies
Definition: amapi.h:171
static Datum values[MAXATTR]
Definition: bootstrap.c:167
void _h_spooldestroy(HSpool *hspool)
Definition: hashsort.c:97
#define MaxIndexTuplesPerPage
Definition: itup.h:145
#define HashPageGetMeta(page)
Definition: hash.h:305
void * palloc(Size size)
Definition: mcxt.c:924
#define elog(elevel,...)
Definition: elog.h:226
ambuildempty_function ambuildempty
Definition: amapi.h:211
bool kill_prior_tuple
Definition: relscan.h:113
Buffer hashso_split_bucket_buf
Definition: hash.h:168
int NBuffers
Definition: globals.c:131
BlockNumber hasho_nextblkno
Definition: hash.h:79
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
int itemIndex
Definition: hash.h:122
HSpool * _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
Definition: hashsort.c:58
void vacuum_delay_point(void)
Definition: vacuum.c:1940
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:123
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:43
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
amcanreturn_function amcanreturn
Definition: amapi.h:215
#define H_HAS_DEAD_TUPLES(opaque)
Definition: hash.h:90
bool estimated_count
Definition: genam.h:76
float4 reltuples
Definition: pg_class.h:63
void hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: hash.c:390
#define LH_PAGE_HAS_DEAD_TUPLES
Definition: hash.h:60
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
double index_tuples
Definition: genam.h:33
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:231
HashScanPosItem items[MaxIndexTuplesPerPage]
Definition: hash.h:124
double heap_tuples
Definition: genam.h:32
amrestrpos_function amrestrpos
Definition: amapi.h:227