PostgreSQL Source Code  git master
nbtree.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nbtree.c
4  * Implementation of Lehman and Yao's btree management algorithm for
5  * Postgres.
6  *
7  * NOTES
8  * This file contains only the public interface routines.
9  *
10  *
11  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * IDENTIFICATION
15  * src/backend/access/nbtree/nbtree.c
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres.h"
20 
21 #include "access/nbtree.h"
22 #include "access/nbtxlog.h"
23 #include "access/relscan.h"
24 #include "access/xlog.h"
25 #include "commands/progress.h"
26 #include "commands/vacuum.h"
27 #include "miscadmin.h"
28 #include "nodes/execnodes.h"
29 #include "pgstat.h"
30 #include "postmaster/autovacuum.h"
32 #include "storage/indexfsm.h"
33 #include "storage/ipc.h"
34 #include "storage/lmgr.h"
35 #include "storage/smgr.h"
36 #include "utils/builtins.h"
37 #include "utils/index_selfuncs.h"
38 #include "utils/memutils.h"
39 
40 
41 /* Working state needed by btvacuumpage */
42 typedef struct
43 {
49  BlockNumber totFreePages; /* true total # of free pages */
52 } BTVacState;
53 
54 /*
55  * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
56  *
57  * BTPARALLEL_ADVANCING indicates that some process is advancing the scan to
58  * a new page; others must wait.
59  *
60  * BTPARALLEL_IDLE indicates that no backend is currently advancing the scan
61  * to a new page; some process can start doing that.
62  *
63  * BTPARALLEL_DONE indicates that the scan is complete (including error exit).
64  * We reach this state once for every distinct combination of array keys.
65  */
66 typedef enum
67 {
72 } BTPS_State;
73 
74 /*
75  * BTParallelScanDescData contains btree specific shared information required
76  * for parallel scan.
77  */
78 typedef struct BTParallelScanDescData
79 {
80  BlockNumber btps_scanPage; /* latest or next page to be scanned */
81  BTPS_State btps_pageStatus; /* indicates whether next page is
82  * available for scan. see above for
83  * possible states of parallel scan. */
84  int btps_arrayKeyCount; /* count indicating number of array scan
85  * keys processed by parallel scan */
86  slock_t btps_mutex; /* protects above variables */
87  ConditionVariable btps_cv; /* used to synchronize parallel scan */
89 
91 
92 
93 static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
94  IndexBulkDeleteCallback callback, void *callback_state,
95  BTCycleId cycleid, TransactionId *oldestBtpoXact);
96 static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
97  BlockNumber orig_blkno);
98 
99 
100 /*
101  * Btree handler function: return IndexAmRoutine with access method parameters
102  * and callbacks.
103  */
104 Datum
106 {
107  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
108 
109  amroutine->amstrategies = BTMaxStrategyNumber;
110  amroutine->amsupport = BTNProcs;
111  amroutine->amcanorder = true;
112  amroutine->amcanorderbyop = false;
113  amroutine->amcanbackward = true;
114  amroutine->amcanunique = true;
115  amroutine->amcanmulticol = true;
116  amroutine->amoptionalkey = true;
117  amroutine->amsearcharray = true;
118  amroutine->amsearchnulls = true;
119  amroutine->amstorage = false;
120  amroutine->amclusterable = true;
121  amroutine->ampredlocks = true;
122  amroutine->amcanparallel = true;
123  amroutine->amcaninclude = true;
124  amroutine->amusemaintenanceworkmem = false;
125  amroutine->amparallelvacuumoptions =
127  amroutine->amkeytype = InvalidOid;
128 
129  amroutine->ambuild = btbuild;
130  amroutine->ambuildempty = btbuildempty;
131  amroutine->aminsert = btinsert;
132  amroutine->ambulkdelete = btbulkdelete;
133  amroutine->amvacuumcleanup = btvacuumcleanup;
134  amroutine->amcanreturn = btcanreturn;
135  amroutine->amcostestimate = btcostestimate;
136  amroutine->amoptions = btoptions;
137  amroutine->amproperty = btproperty;
138  amroutine->ambuildphasename = btbuildphasename;
139  amroutine->amvalidate = btvalidate;
140  amroutine->ambeginscan = btbeginscan;
141  amroutine->amrescan = btrescan;
142  amroutine->amgettuple = btgettuple;
143  amroutine->amgetbitmap = btgetbitmap;
144  amroutine->amendscan = btendscan;
145  amroutine->ammarkpos = btmarkpos;
146  amroutine->amrestrpos = btrestrpos;
149  amroutine->amparallelrescan = btparallelrescan;
150 
151  PG_RETURN_POINTER(amroutine);
152 }
153 
154 /*
155  * btbuildempty() -- build an empty btree index in the initialization fork
156  */
157 void
159 {
160  Page metapage;
161 
162  /* Construct metapage. */
163  metapage = (Page) palloc(BLCKSZ);
164  _bt_initmetapage(metapage, P_NONE, 0);
165 
166  /*
167  * Write the page and log it. It might seem that an immediate sync would
168  * be sufficient to guarantee that the file exists on disk, but recovery
169  * itself might remove it while replaying, for example, an
170  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
171  * this even when wal_level=minimal.
172  */
175  (char *) metapage, true);
177  BTREE_METAPAGE, metapage, true);
178 
179  /*
180  * An immediate sync is required even if we xlog'd the page, because the
181  * write did not go through shared_buffers and therefore a concurrent
182  * checkpoint may have moved the redo pointer past our xlog record.
183  */
185 }
186 
187 /*
188  * btinsert() -- insert an index tuple into a btree.
189  *
190  * Descend the tree recursively, find the appropriate location for our
191  * new tuple, and put it there.
192  */
193 bool
194 btinsert(Relation rel, Datum *values, bool *isnull,
195  ItemPointer ht_ctid, Relation heapRel,
196  IndexUniqueCheck checkUnique,
197  IndexInfo *indexInfo)
198 {
199  bool result;
200  IndexTuple itup;
201 
202  /* generate an index tuple */
203  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
204  itup->t_tid = *ht_ctid;
205 
206  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
207 
208  pfree(itup);
209 
210  return result;
211 }
212 
213 /*
214  * btgettuple() -- Get the next tuple in the scan.
215  */
216 bool
218 {
219  BTScanOpaque so = (BTScanOpaque) scan->opaque;
220  bool res;
221 
222  /* btree indexes are never lossy */
223  scan->xs_recheck = false;
224 
225  /*
226  * If we have any array keys, initialize them during first call for a
227  * scan. We can't do this in btrescan because we don't know the scan
228  * direction at that time.
229  */
230  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
231  {
232  /* punt if we have any unsatisfiable array keys */
233  if (so->numArrayKeys < 0)
234  return false;
235 
236  _bt_start_array_keys(scan, dir);
237  }
238 
239  /* This loop handles advancing to the next array elements, if any */
240  do
241  {
242  /*
243  * If we've already initialized this scan, we can just advance it in
244  * the appropriate direction. If we haven't done so yet, we call
245  * _bt_first() to get the first item in the scan.
246  */
247  if (!BTScanPosIsValid(so->currPos))
248  res = _bt_first(scan, dir);
249  else
250  {
251  /*
252  * Check to see if we should kill the previously-fetched tuple.
253  */
254  if (scan->kill_prior_tuple)
255  {
256  /*
257  * Yes, remember it for later. (We'll deal with all such
258  * tuples at once right before leaving the index page.) The
259  * test for numKilled overrun is not just paranoia: if the
260  * caller reverses direction in the indexscan then the same
261  * item might get entered multiple times. It's not worth
262  * trying to optimize that, so we don't detect it, but instead
263  * just forget any excess entries.
264  */
265  if (so->killedItems == NULL)
266  so->killedItems = (int *)
267  palloc(MaxIndexTuplesPerPage * sizeof(int));
269  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
270  }
271 
272  /*
273  * Now continue the scan.
274  */
275  res = _bt_next(scan, dir);
276  }
277 
278  /* If we have a tuple, return it ... */
279  if (res)
280  break;
281  /* ... otherwise see if we have more array keys to deal with */
282  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
283 
284  return res;
285 }
286 
287 /*
288  * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap
289  */
290 int64
292 {
293  BTScanOpaque so = (BTScanOpaque) scan->opaque;
294  int64 ntids = 0;
295  ItemPointer heapTid;
296 
297  /*
298  * If we have any array keys, initialize them.
299  */
300  if (so->numArrayKeys)
301  {
302  /* punt if we have any unsatisfiable array keys */
303  if (so->numArrayKeys < 0)
304  return ntids;
305 
307  }
308 
309  /* This loop handles advancing to the next array elements, if any */
310  do
311  {
312  /* Fetch the first page & tuple */
313  if (_bt_first(scan, ForwardScanDirection))
314  {
315  /* Save tuple ID, and continue scanning */
316  heapTid = &scan->xs_heaptid;
317  tbm_add_tuples(tbm, heapTid, 1, false);
318  ntids++;
319 
320  for (;;)
321  {
322  /*
323  * Advance to next tuple within page. This is the same as the
324  * easy case in _bt_next().
325  */
326  if (++so->currPos.itemIndex > so->currPos.lastItem)
327  {
328  /* let _bt_next do the heavy lifting */
329  if (!_bt_next(scan, ForwardScanDirection))
330  break;
331  }
332 
333  /* Save tuple ID, and continue scanning */
334  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
335  tbm_add_tuples(tbm, heapTid, 1, false);
336  ntids++;
337  }
338  }
339  /* Now see if we have more array keys to deal with */
341 
342  return ntids;
343 }
344 
345 /*
346  * btbeginscan() -- start a scan on a btree index
347  */
349 btbeginscan(Relation rel, int nkeys, int norderbys)
350 {
351  IndexScanDesc scan;
352  BTScanOpaque so;
353 
354  /* no order by operators allowed */
355  Assert(norderbys == 0);
356 
357  /* get the scan */
358  scan = RelationGetIndexScan(rel, nkeys, norderbys);
359 
360  /* allocate private workspace */
361  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
364  if (scan->numberOfKeys > 0)
365  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
366  else
367  so->keyData = NULL;
368 
369  so->arrayKeyData = NULL; /* assume no array keys for now */
370  so->numArrayKeys = 0;
371  so->arrayKeys = NULL;
372  so->arrayContext = NULL;
373 
374  so->killedItems = NULL; /* until needed */
375  so->numKilled = 0;
376 
377  /*
378  * We don't know yet whether the scan will be index-only, so we do not
379  * allocate the tuple workspace arrays until btrescan. However, we set up
380  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
381  */
382  so->currTuples = so->markTuples = NULL;
383 
384  scan->xs_itupdesc = RelationGetDescr(rel);
385 
386  scan->opaque = so;
387 
388  return scan;
389 }
390 
391 /*
392  * btrescan() -- rescan an index relation
393  */
394 void
395 btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
396  ScanKey orderbys, int norderbys)
397 {
398  BTScanOpaque so = (BTScanOpaque) scan->opaque;
399 
400  /* we aren't holding any read locks, but gotta drop the pins */
401  if (BTScanPosIsValid(so->currPos))
402  {
403  /* Before leaving current page, deal with any killed items */
404  if (so->numKilled > 0)
405  _bt_killitems(scan);
408  }
409 
410  so->markItemIndex = -1;
411  so->arrayKeyCount = 0;
414 
415  /*
416  * Allocate tuple workspace arrays, if needed for an index-only scan and
417  * not already done in a previous rescan call. To save on palloc
418  * overhead, both workspaces are allocated as one palloc block; only this
419  * function and btendscan know that.
420  *
421  * NOTE: this data structure also makes it safe to return data from a
422  * "name" column, even though btree name_ops uses an underlying storage
423  * datatype of cstring. The risk there is that "name" is supposed to be
424  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
425  * However, since we only return data out of tuples sitting in the
426  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
427  * data out of the markTuples array --- running off the end of memory for
428  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
429  * adding special-case treatment for name_ops elsewhere.
430  */
431  if (scan->xs_want_itup && so->currTuples == NULL)
432  {
433  so->currTuples = (char *) palloc(BLCKSZ * 2);
434  so->markTuples = so->currTuples + BLCKSZ;
435  }
436 
437  /*
438  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
439  * - vadim 05/05/97
440  */
441  if (scankey && scan->numberOfKeys > 0)
442  memmove(scan->keyData,
443  scankey,
444  scan->numberOfKeys * sizeof(ScanKeyData));
445  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
446 
447  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
449 }
450 
451 /*
452  * btendscan() -- close down a scan
453  */
454 void
456 {
457  BTScanOpaque so = (BTScanOpaque) scan->opaque;
458 
459  /* we aren't holding any read locks, but gotta drop the pins */
460  if (BTScanPosIsValid(so->currPos))
461  {
462  /* Before leaving current page, deal with any killed items */
463  if (so->numKilled > 0)
464  _bt_killitems(scan);
466  }
467 
468  so->markItemIndex = -1;
470 
471  /* No need to invalidate positions, the RAM is about to be freed. */
472 
473  /* Release storage */
474  if (so->keyData != NULL)
475  pfree(so->keyData);
476  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
477  if (so->arrayContext != NULL)
479  if (so->killedItems != NULL)
480  pfree(so->killedItems);
481  if (so->currTuples != NULL)
482  pfree(so->currTuples);
483  /* so->markTuples should not be pfree'd, see btrescan */
484  pfree(so);
485 }
486 
487 /*
488  * btmarkpos() -- save current scan position
489  */
490 void
492 {
493  BTScanOpaque so = (BTScanOpaque) scan->opaque;
494 
495  /* There may be an old mark with a pin (but no lock). */
497 
498  /*
499  * Just record the current itemIndex. If we later step to next page
500  * before releasing the marked position, _bt_steppage makes a full copy of
501  * the currPos struct in markPos. If (as often happens) the mark is moved
502  * before we leave the page, we don't have to do that work.
503  */
504  if (BTScanPosIsValid(so->currPos))
505  so->markItemIndex = so->currPos.itemIndex;
506  else
507  {
509  so->markItemIndex = -1;
510  }
511 
512  /* Also record the current positions of any array keys */
513  if (so->numArrayKeys)
514  _bt_mark_array_keys(scan);
515 }
516 
517 /*
518  * btrestrpos() -- restore scan to last saved position
519  */
520 void
522 {
523  BTScanOpaque so = (BTScanOpaque) scan->opaque;
524 
525  /* Restore the marked positions of any array keys */
526  if (so->numArrayKeys)
528 
529  if (so->markItemIndex >= 0)
530  {
531  /*
532  * The scan has never moved to a new page since the last mark. Just
533  * restore the itemIndex.
534  *
535  * NB: In this case we can't count on anything in so->markPos to be
536  * accurate.
537  */
538  so->currPos.itemIndex = so->markItemIndex;
539  }
540  else
541  {
542  /*
543  * The scan moved to a new page after last mark or restore, and we are
544  * now restoring to the marked page. We aren't holding any read
545  * locks, but if we're still holding the pin for the current position,
546  * we must drop it.
547  */
548  if (BTScanPosIsValid(so->currPos))
549  {
550  /* Before leaving current page, deal with any killed items */
551  if (so->numKilled > 0)
552  _bt_killitems(scan);
554  }
555 
556  if (BTScanPosIsValid(so->markPos))
557  {
558  /* bump pin on mark buffer for assignment to current buffer */
559  if (BTScanPosIsPinned(so->markPos))
561  memcpy(&so->currPos, &so->markPos,
562  offsetof(BTScanPosData, items[1]) +
563  so->markPos.lastItem * sizeof(BTScanPosItem));
564  if (so->currTuples)
565  memcpy(so->currTuples, so->markTuples,
567  }
568  else
570  }
571 }
572 
573 /*
574  * btestimateparallelscan -- estimate storage for BTParallelScanDescData
575  */
576 Size
578 {
579  return sizeof(BTParallelScanDescData);
580 }
581 
582 /*
583  * btinitparallelscan -- initialize BTParallelScanDesc for parallel btree scan
584  */
585 void
586 btinitparallelscan(void *target)
587 {
588  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
589 
590  SpinLockInit(&bt_target->btps_mutex);
591  bt_target->btps_scanPage = InvalidBlockNumber;
593  bt_target->btps_arrayKeyCount = 0;
594  ConditionVariableInit(&bt_target->btps_cv);
595 }
596 
597 /*
598  * btparallelrescan() -- reset parallel scan
599  */
600 void
602 {
603  BTParallelScanDesc btscan;
604  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
605 
606  Assert(parallel_scan);
607 
608  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
609  parallel_scan->ps_offset);
610 
611  /*
612  * In theory, we don't need to acquire the spinlock here, because there
613  * shouldn't be any other workers running at this point, but we do so for
614  * consistency.
615  */
616  SpinLockAcquire(&btscan->btps_mutex);
619  btscan->btps_arrayKeyCount = 0;
620  SpinLockRelease(&btscan->btps_mutex);
621 }
622 
623 /*
624  * _bt_parallel_seize() -- Begin the process of advancing the scan to a new
625  * page. Other scans must wait until we call _bt_parallel_release()
626  * or _bt_parallel_done().
627  *
628  * The return value is true if we successfully seized the scan and false
629  * if we did not. The latter case occurs if no pages remain for the current
630  * set of scankeys.
631  *
632  * If the return value is true, *pageno returns the next or current page
633  * of the scan (depending on the scan direction). An invalid block number
634  * means the scan hasn't yet started, and P_NONE means we've reached the end.
635  * The first time a participating process reaches the last page, it will return
636  * true and set *pageno to P_NONE; after that, further attempts to seize the
637  * scan will return false.
638  *
639  * Callers should ignore the value of pageno if the return value is false.
640  */
641 bool
643 {
644  BTScanOpaque so = (BTScanOpaque) scan->opaque;
645  BTPS_State pageStatus;
646  bool exit_loop = false;
647  bool status = true;
648  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
649  BTParallelScanDesc btscan;
650 
651  *pageno = P_NONE;
652 
653  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
654  parallel_scan->ps_offset);
655 
656  while (1)
657  {
658  SpinLockAcquire(&btscan->btps_mutex);
659  pageStatus = btscan->btps_pageStatus;
660 
661  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
662  {
663  /* Parallel scan has already advanced to a new set of scankeys. */
664  status = false;
665  }
666  else if (pageStatus == BTPARALLEL_DONE)
667  {
668  /*
669  * We're done with this set of scankeys. This may be the end, or
670  * there could be more sets to try.
671  */
672  status = false;
673  }
674  else if (pageStatus != BTPARALLEL_ADVANCING)
675  {
676  /*
677  * We have successfully seized control of the scan for the purpose
678  * of advancing it to a new page!
679  */
680  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
681  *pageno = btscan->btps_scanPage;
682  exit_loop = true;
683  }
684  SpinLockRelease(&btscan->btps_mutex);
685  if (exit_loop || !status)
686  break;
688  }
690 
691  return status;
692 }
693 
694 /*
695  * _bt_parallel_release() -- Complete the process of advancing the scan to a
696  * new page. We now have the new value btps_scanPage; some other backend
697  * can now begin advancing the scan.
698  */
699 void
701 {
702  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
703  BTParallelScanDesc btscan;
704 
705  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
706  parallel_scan->ps_offset);
707 
708  SpinLockAcquire(&btscan->btps_mutex);
709  btscan->btps_scanPage = scan_page;
711  SpinLockRelease(&btscan->btps_mutex);
713 }
714 
715 /*
716  * _bt_parallel_done() -- Mark the parallel scan as complete.
717  *
718  * When there are no pages left to scan, this function should be called to
719  * notify other workers. Otherwise, they might wait forever for the scan to
720  * advance to the next page.
721  */
722 void
724 {
725  BTScanOpaque so = (BTScanOpaque) scan->opaque;
726  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
727  BTParallelScanDesc btscan;
728  bool status_changed = false;
729 
730  /* Do nothing, for non-parallel scans */
731  if (parallel_scan == NULL)
732  return;
733 
734  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
735  parallel_scan->ps_offset);
736 
737  /*
738  * Mark the parallel scan as done for this combination of scan keys,
739  * unless some other process already did so. See also
740  * _bt_advance_array_keys.
741  */
742  SpinLockAcquire(&btscan->btps_mutex);
743  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
744  btscan->btps_pageStatus != BTPARALLEL_DONE)
745  {
746  btscan->btps_pageStatus = BTPARALLEL_DONE;
747  status_changed = true;
748  }
749  SpinLockRelease(&btscan->btps_mutex);
750 
751  /* wake up all the workers associated with this parallel scan */
752  if (status_changed)
753  ConditionVariableBroadcast(&btscan->btps_cv);
754 }
755 
756 /*
757  * _bt_parallel_advance_array_keys() -- Advances the parallel scan for array
758  * keys.
759  *
760  * Updates the count of array keys processed for both local and parallel
761  * scans.
762  */
763 void
765 {
766  BTScanOpaque so = (BTScanOpaque) scan->opaque;
767  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
768  BTParallelScanDesc btscan;
769 
770  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
771  parallel_scan->ps_offset);
772 
773  so->arrayKeyCount++;
774  SpinLockAcquire(&btscan->btps_mutex);
775  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
776  {
777  btscan->btps_scanPage = InvalidBlockNumber;
778  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
779  btscan->btps_arrayKeyCount++;
780  }
781  SpinLockRelease(&btscan->btps_mutex);
782 }
783 
784 /*
785  * _bt_vacuum_needs_cleanup() -- Checks if index needs cleanup assuming that
786  * btbulkdelete() wasn't called.
787  */
788 static bool
790 {
791  Buffer metabuf;
792  Page metapg;
793  BTMetaPageData *metad;
794  bool result = false;
795 
796  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
797  metapg = BufferGetPage(metabuf);
798  metad = BTPageGetMeta(metapg);
799 
800  if (metad->btm_version < BTREE_NOVAC_VERSION)
801  {
802  /*
803  * Do cleanup if metapage needs upgrade, because we don't have
804  * cleanup-related meta-information yet.
805  */
806  result = true;
807  }
808  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
811  {
812  /*
813  * If oldest btpo.xact in the deleted pages is older than
814  * RecentGlobalXmin, then at least one deleted page can be recycled.
815  */
816  result = true;
817  }
818  else
819  {
820  BTOptions *relopts;
821  float8 cleanup_scale_factor;
822  float8 prev_num_heap_tuples;
823 
824  /*
825  * If table receives enough insertions and no cleanup was performed,
826  * then index would appear have stale statistics. If scale factor is
827  * set, we avoid that by performing cleanup if the number of inserted
828  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
829  * original tuples count.
830  */
831  relopts = (BTOptions *) info->index->rd_options;
832  cleanup_scale_factor = (relopts &&
833  relopts->vacuum_cleanup_index_scale_factor >= 0)
836  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
837 
838  if (cleanup_scale_factor <= 0 ||
839  prev_num_heap_tuples <= 0 ||
840  (info->num_heap_tuples - prev_num_heap_tuples) /
841  prev_num_heap_tuples >= cleanup_scale_factor)
842  result = true;
843  }
844 
845  _bt_relbuf(info->index, metabuf);
846  return result;
847 }
848 
849 /*
850  * Bulk deletion of all index entries pointing to a set of heap tuples.
851  * The set of target tuples is specified via a callback routine that tells
852  * whether any given heap tuple (identified by ItemPointer) is being deleted.
853  *
854  * Result: a palloc'd struct containing statistical info for VACUUM displays.
855  */
858  IndexBulkDeleteCallback callback, void *callback_state)
859 {
860  Relation rel = info->index;
861  BTCycleId cycleid;
862 
863  /* allocate stats if first time through, else re-use existing struct */
864  if (stats == NULL)
866 
867  /* Establish the vacuum cycle ID to use for this scan */
868  /* The ENSURE stuff ensures we clean up shared memory on failure */
870  {
871  TransactionId oldestBtpoXact;
872 
873  cycleid = _bt_start_vacuum(rel);
874 
875  btvacuumscan(info, stats, callback, callback_state, cycleid,
876  &oldestBtpoXact);
877 
878  /*
879  * Update cleanup-related information in metapage. This information is
880  * used only for cleanup but keeping them up to date can avoid
881  * unnecessary cleanup even after bulkdelete.
882  */
883  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
884  info->num_heap_tuples);
885  }
887  _bt_end_vacuum(rel);
888 
889  return stats;
890 }
891 
892 /*
893  * Post-VACUUM cleanup.
894  *
895  * Result: a palloc'd struct containing statistical info for VACUUM displays.
896  */
899 {
900  /* No-op in ANALYZE ONLY mode */
901  if (info->analyze_only)
902  return stats;
903 
904  /*
905  * If btbulkdelete was called, we need not do anything, just return the
906  * stats from the latest btbulkdelete call. If it wasn't called, we might
907  * still need to do a pass over the index, to recycle any newly-recyclable
908  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
909  * determines if either are needed.
910  *
911  * Since we aren't going to actually delete any leaf items, there's no
912  * need to go through all the vacuum-cycle-ID pushups.
913  */
914  if (stats == NULL)
915  {
916  TransactionId oldestBtpoXact;
917 
918  /* Check if we need a cleanup */
919  if (!_bt_vacuum_needs_cleanup(info))
920  return NULL;
921 
923  btvacuumscan(info, stats, NULL, NULL, 0, &oldestBtpoXact);
924 
925  /* Update cleanup-related information in the metapage */
926  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
927  info->num_heap_tuples);
928  }
929 
930  /*
931  * It's quite possible for us to be fooled by concurrent page splits into
932  * double-counting some index tuples, so disbelieve any total that exceeds
933  * the underlying heap's count ... if we know that accurately. Otherwise
934  * this might just make matters worse.
935  */
936  if (!info->estimated_count)
937  {
938  if (stats->num_index_tuples > info->num_heap_tuples)
939  stats->num_index_tuples = info->num_heap_tuples;
940  }
941 
942  return stats;
943 }
944 
945 /*
946  * btvacuumscan --- scan the index for VACUUMing purposes
947  *
948  * This combines the functions of looking for leaf tuples that are deletable
949  * according to the vacuum callback, looking for empty pages that can be
950  * deleted, and looking for old deleted pages that can be recycled. Both
951  * btbulkdelete and btvacuumcleanup invoke this (the latter only if no
952  * btbulkdelete call occurred).
953  *
954  * The caller is responsible for initially allocating/zeroing a stats struct
955  * and for obtaining a vacuum cycle ID if necessary.
956  */
957 static void
959  IndexBulkDeleteCallback callback, void *callback_state,
960  BTCycleId cycleid, TransactionId *oldestBtpoXact)
961 {
962  Relation rel = info->index;
963  BTVacState vstate;
964  BlockNumber num_pages;
965  BlockNumber blkno;
966  bool needLock;
967 
968  /*
969  * Reset counts that will be incremented during the scan; needed in case
970  * of multiple scans during a single VACUUM command
971  */
972  stats->estimated_count = false;
973  stats->num_index_tuples = 0;
974  stats->pages_deleted = 0;
975 
976  /* Set up info to pass down to btvacuumpage */
977  vstate.info = info;
978  vstate.stats = stats;
979  vstate.callback = callback;
980  vstate.callback_state = callback_state;
981  vstate.cycleid = cycleid;
982  vstate.totFreePages = 0;
984 
985  /* Create a temporary memory context to run _bt_pagedel in */
987  "_bt_pagedel",
989 
990  /*
991  * The outer loop iterates over all index pages except the metapage, in
992  * physical order (we hope the kernel will cooperate in providing
993  * read-ahead for speed). It is critical that we visit all leaf pages,
994  * including ones added after we start the scan, else we might fail to
995  * delete some deletable tuples. Hence, we must repeatedly check the
996  * relation length. We must acquire the relation-extension lock while
997  * doing so to avoid a race condition: if someone else is extending the
998  * relation, there is a window where bufmgr/smgr have created a new
999  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1000  * we manage to scan such a page here, we'll improperly assume it can be
1001  * recycled. Taking the lock synchronizes things enough to prevent a
1002  * problem: either num_pages won't include the new page, or _bt_getbuf
1003  * already has write lock on the buffer and it will be fully initialized
1004  * before we can examine it. (See also vacuumlazy.c, which has the same
1005  * issue.) Also, we need not worry if a page is added immediately after
1006  * we look; the page splitting code already has write-lock on the left
1007  * page before it adds a right page, so we must already have processed any
1008  * tuples due to be moved into such a page.
1009  *
1010  * We can skip locking for new or temp relations, however, since no one
1011  * else could be accessing them.
1012  */
1013  needLock = !RELATION_IS_LOCAL(rel);
1014 
1015  blkno = BTREE_METAPAGE + 1;
1016  for (;;)
1017  {
1018  /* Get the current relation length */
1019  if (needLock)
1021  num_pages = RelationGetNumberOfBlocks(rel);
1022  if (needLock)
1024 
1025  if (info->report_progress)
1027  num_pages);
1028 
1029  /* Quit if we've scanned the whole relation */
1030  if (blkno >= num_pages)
1031  break;
1032  /* Iterate over pages, then loop back to recheck length */
1033  for (; blkno < num_pages; blkno++)
1034  {
1035  btvacuumpage(&vstate, blkno, blkno);
1036  if (info->report_progress)
1038  blkno);
1039  }
1040  }
1041 
1043 
1044  /*
1045  * If we found any recyclable pages (and recorded them in the FSM), then
1046  * forcibly update the upper-level FSM pages to ensure that searchers can
1047  * find them. It's possible that the pages were also found during
1048  * previous scans and so this is a waste of time, but it's cheap enough
1049  * relative to scanning the index that it shouldn't matter much, and
1050  * making sure that free pages are available sooner not later seems
1051  * worthwhile.
1052  *
1053  * Note that if no recyclable pages exist, we don't bother vacuuming the
1054  * FSM at all.
1055  */
1056  if (vstate.totFreePages > 0)
1058 
1059  /* update statistics */
1060  stats->num_pages = num_pages;
1061  stats->pages_free = vstate.totFreePages;
1062 
1063  if (oldestBtpoXact)
1064  *oldestBtpoXact = vstate.oldestBtpoXact;
1065 }
1066 
1067 /*
1068  * btvacuumpage --- VACUUM one page
1069  *
1070  * This processes a single page for btvacuumscan(). In some cases we
1071  * must go back and re-examine previously-scanned pages; this routine
1072  * recurses when necessary to handle that case.
1073  *
1074  * blkno is the page to process. orig_blkno is the highest block number
1075  * reached by the outer btvacuumscan loop (the same as blkno, unless we
1076  * are recursing to re-examine a previous page).
1077  */
1078 static void
1079 btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
1080 {
1081  IndexVacuumInfo *info = vstate->info;
1082  IndexBulkDeleteResult *stats = vstate->stats;
1084  void *callback_state = vstate->callback_state;
1085  Relation rel = info->index;
1086  bool delete_now;
1087  BlockNumber recurse_to;
1088  Buffer buf;
1089  Page page;
1090  BTPageOpaque opaque = NULL;
1091 
1092 restart:
1093  delete_now = false;
1094  recurse_to = P_NONE;
1095 
1096  /* call vacuum_delay_point while not holding any buffer lock */
1098 
1099  /*
1100  * We can't use _bt_getbuf() here because it always applies
1101  * _bt_checkpage(), which will barf on an all-zero page. We want to
1102  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1103  * buffer access strategy.
1104  */
1105  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1106  info->strategy);
1107  LockBuffer(buf, BT_READ);
1108  page = BufferGetPage(buf);
1109  if (!PageIsNew(page))
1110  {
1111  _bt_checkpage(rel, buf);
1112  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1113  }
1114 
1115  /*
1116  * If we are recursing, the only case we want to do anything with is a
1117  * live leaf page having the current vacuum cycle ID. Any other state
1118  * implies we already saw the page (eg, deleted it as being empty).
1119  */
1120  if (blkno != orig_blkno)
1121  {
1122  if (_bt_page_recyclable(page) ||
1123  P_IGNORE(opaque) ||
1124  !P_ISLEAF(opaque) ||
1125  opaque->btpo_cycleid != vstate->cycleid)
1126  {
1127  _bt_relbuf(rel, buf);
1128  return;
1129  }
1130  }
1131 
1132  /* Page is valid, see what to do with it */
1133  if (_bt_page_recyclable(page))
1134  {
1135  /* Okay to recycle this page */
1136  RecordFreeIndexPage(rel, blkno);
1137  vstate->totFreePages++;
1138  stats->pages_deleted++;
1139  }
1140  else if (P_ISDELETED(opaque))
1141  {
1142  /* Already deleted, but can't recycle yet */
1143  stats->pages_deleted++;
1144 
1145  /* Update the oldest btpo.xact */
1146  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1147  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1148  vstate->oldestBtpoXact = opaque->btpo.xact;
1149  }
1150  else if (P_ISHALFDEAD(opaque))
1151  {
1152  /* Half-dead, try to delete */
1153  delete_now = true;
1154  }
1155  else if (P_ISLEAF(opaque))
1156  {
1157  OffsetNumber deletable[MaxOffsetNumber];
1158  int ndeletable;
1159  OffsetNumber offnum,
1160  minoff,
1161  maxoff;
1162 
1163  /*
1164  * Trade in the initial read lock for a super-exclusive write lock on
1165  * this page. We must get such a lock on every leaf page over the
1166  * course of the vacuum scan, whether or not it actually contains any
1167  * deletable tuples --- see nbtree/README.
1168  */
1170  LockBufferForCleanup(buf);
1171 
1172  /*
1173  * Check whether we need to recurse back to earlier pages. What we
1174  * are concerned about is a page split that happened since we started
1175  * the vacuum scan. If the split moved some tuples to a lower page
1176  * then we might have missed 'em. If so, set up for tail recursion.
1177  * (Must do this before possibly clearing btpo_cycleid below!)
1178  */
1179  if (vstate->cycleid != 0 &&
1180  opaque->btpo_cycleid == vstate->cycleid &&
1181  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1182  !P_RIGHTMOST(opaque) &&
1183  opaque->btpo_next < orig_blkno)
1184  recurse_to = opaque->btpo_next;
1185 
1186  /*
1187  * When each VACUUM begins, it determines an OldestXmin cutoff value.
1188  * Tuples before the cutoff are removed by VACUUM. Scan over all
1189  * items to see which ones need to be deleted according to cutoff
1190  * point using callback.
1191  */
1192  ndeletable = 0;
1193  minoff = P_FIRSTDATAKEY(opaque);
1194  maxoff = PageGetMaxOffsetNumber(page);
1195  if (callback)
1196  {
1197  for (offnum = minoff;
1198  offnum <= maxoff;
1199  offnum = OffsetNumberNext(offnum))
1200  {
1201  IndexTuple itup;
1202  ItemPointer htup;
1203 
1204  itup = (IndexTuple) PageGetItem(page,
1205  PageGetItemId(page, offnum));
1206  htup = &(itup->t_tid);
1207 
1208  /*
1209  * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1210  * records do not produce their own conflicts. This is safe
1211  * as long as the callback function only considers whether the
1212  * index tuple refers to pre-cutoff heap tuples that were
1213  * certainly already pruned away during VACUUM's initial heap
1214  * scan by the time we get here. (XLOG_HEAP2_CLEANUP_INFO
1215  * records produce conflicts using a latestRemovedXid value
1216  * for the entire VACUUM, so there is no need to produce our
1217  * own conflict now.)
1218  *
1219  * Backends with snapshots acquired after a VACUUM starts but
1220  * before it finishes could have a RecentGlobalXmin with a
1221  * later xid than the VACUUM's OldestXmin cutoff. These
1222  * backends might happen to opportunistically mark some index
1223  * tuples LP_DEAD before we reach them, even though they may
1224  * be after our cutoff. We don't try to kill these "extra"
1225  * index tuples in _bt_delitems_vacuum(). This keep things
1226  * simple, and allows us to always avoid generating our own
1227  * conflicts.
1228  */
1229  if (callback(htup, callback_state))
1230  deletable[ndeletable++] = offnum;
1231  }
1232  }
1233 
1234  /*
1235  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1236  * call per page, so as to minimize WAL traffic.
1237  */
1238  if (ndeletable > 0)
1239  {
1240  _bt_delitems_vacuum(rel, buf, deletable, ndeletable);
1241 
1242  stats->tuples_removed += ndeletable;
1243  /* must recompute maxoff */
1244  maxoff = PageGetMaxOffsetNumber(page);
1245  }
1246  else
1247  {
1248  /*
1249  * If the page has been split during this vacuum cycle, it seems
1250  * worth expending a write to clear btpo_cycleid even if we don't
1251  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1252  * care of this.) This ensures we won't process the page again.
1253  *
1254  * We treat this like a hint-bit update because there's no need to
1255  * WAL-log it.
1256  */
1257  if (vstate->cycleid != 0 &&
1258  opaque->btpo_cycleid == vstate->cycleid)
1259  {
1260  opaque->btpo_cycleid = 0;
1261  MarkBufferDirtyHint(buf, true);
1262  }
1263  }
1264 
1265  /*
1266  * If it's now empty, try to delete; else count the live tuples. We
1267  * don't delete when recursing, though, to avoid putting entries into
1268  * freePages out-of-order (doesn't seem worth any extra code to handle
1269  * the case).
1270  */
1271  if (minoff > maxoff)
1272  delete_now = (blkno == orig_blkno);
1273  else
1274  stats->num_index_tuples += maxoff - minoff + 1;
1275  }
1276 
1277  if (delete_now)
1278  {
1279  MemoryContext oldcontext;
1280  int ndel;
1281 
1282  /* Run pagedel in a temp context to avoid memory leakage */
1284  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1285 
1286  ndel = _bt_pagedel(rel, buf);
1287 
1288  /* count only this page, else may double-count parent */
1289  if (ndel)
1290  {
1291  stats->pages_deleted++;
1292  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1293  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1294  vstate->oldestBtpoXact = opaque->btpo.xact;
1295  }
1296 
1297  MemoryContextSwitchTo(oldcontext);
1298  /* pagedel released buffer, so we shouldn't */
1299  }
1300  else
1301  _bt_relbuf(rel, buf);
1302 
1303  /*
1304  * This is really tail recursion, but if the compiler is too stupid to
1305  * optimize it as such, we'd eat an uncomfortably large amount of stack
1306  * space per recursion level (due to the deletable[] array). A failure is
1307  * improbable since the number of levels isn't likely to be large ... but
1308  * just in case, let's hand-optimize into a loop.
1309  */
1310  if (recurse_to != P_NONE)
1311  {
1312  blkno = recurse_to;
1313  goto restart;
1314  }
1315 }
1316 
1317 /*
1318  * btcanreturn() -- Check whether btree indexes support index-only scans.
1319  *
1320  * btrees always do, so this is trivial.
1321  */
1322 bool
1324 {
1325  return true;
1326 }
slock_t btps_mutex
Definition: nbtree.c:86
ambeginscan_function ambeginscan
Definition: amapi.h:225
uint8 amparallelvacuumoptions
Definition: amapi.h:203
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
int slock_t
Definition: s_lock.h:934
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3669
ambulkdelete_function ambulkdelete
Definition: amapi.h:217
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:533
#define BTP_SPLIT_END
Definition: nbtree.h:77
bool amcanmulticol
Definition: amapi.h:183
BlockNumber btpo_next
Definition: nbtree.h:59
uint16 amsupport
Definition: amapi.h:173
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:51
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:395
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:79
#define P_IGNORE(opaque)
Definition: nbtree.h:195
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:789
uint32 TransactionId
Definition: c.h:514
uint32 btm_version
Definition: nbtree.h:101
amgettuple_function amgettuple
Definition: amapi.h:227
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:581
struct BTParallelScanDescData BTParallelScanDescData
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3445
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:723
#define RelationGetDescr(relation)
Definition: rel.h:454
bool amcanorderbyop
Definition: amapi.h:177
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:507
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:757
BlockNumber totFreePages
Definition: nbtree.c:49
MemoryContext arrayContext
Definition: nbtree.h:642
amproperty_function amproperty
Definition: amapi.h:222
#define ExclusiveLock
Definition: lockdefs.h:44
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2061
#define PointerGetDatum(X)
Definition: postgres.h:556
#define MaxOffsetNumber
Definition: off.h:28
int itemIndex
Definition: nbtree.h:579
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3213
char * currTuples
Definition: nbtree.h:653
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
#define SpinLockInit(lock)
Definition: spin.h:60
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:220
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:548
bool analyze_only
Definition: genam.h:47
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:194
struct TupleDescData * xs_itupdesc
Definition: relscan.h:128
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
ItemPointerData t_tid
Definition: itup.h:37
amparallelrescan_function amparallelrescan
Definition: amapi.h:236
union BTPageOpaqueData::@46 btpo
bool report_progress
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:52
bool amstorage
Definition: amapi.h:191
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:182
void ConditionVariableBroadcast(ConditionVariable *cv)
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:150
bool ampredlocks
Definition: amapi.h:195
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:1965
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:349
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:158
aminsert_function aminsert
Definition: amapi.h:216
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
Oid amkeytype
Definition: amapi.h:205
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:939
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
bool amoptionalkey
Definition: amapi.h:185
amvalidate_function amvalidate
Definition: amapi.h:224
Size btestimateparallelscan(void)
Definition: nbtree.c:577
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:741
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:89
IndexUniqueCheck
Definition: genam.h:112
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:611
void btbuildempty(Relation index)
Definition: nbtree.c:158
#define BT_READ
Definition: nbtree.h:402
#define SpinLockAcquire(lock)
Definition: spin.h:62
int nextTupleOffset
Definition: nbtree.h:568
void ConditionVariableInit(ConditionVariable *cv)
int lastItem
Definition: nbtree.h:578
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1705
float8 vacuum_cleanup_index_scale_factor
Definition: nbtree.h:688
BTScanPosData markPos
Definition: nbtree.h:667
void pfree(void *pointer)
Definition: mcxt.c:1056
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:898
void ConditionVariableCancelSleep(void)
amgetbitmap_function amgetbitmap
Definition: amapi.h:228
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:690
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:194
double float8
Definition: c.h:492
uint16 BTCycleId
Definition: nbtree.h:28
ambuild_function ambuild
Definition: amapi.h:214
amoptions_function amoptions
Definition: amapi.h:221
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:1079
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:291
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:301
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
Definition: nbtree.c:958
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
BTPS_State
Definition: nbtree.c:66
BTCycleId btpo_cycleid
Definition: nbtree.h:66
void ConditionVariableSignal(ConditionVariable *cv)
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:455
bool amcaninclude
Definition: amapi.h:199
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:50
amcostestimate_function amcostestimate
Definition: amapi.h:220
#define BTPageGetMeta(p)
Definition: nbtree.h:113
bool amcanunique
Definition: amapi.h:181
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:5889
int numArrayKeys
Definition: nbtree.h:637
ConditionVariable btps_cv
Definition: nbtree.c:87
ItemPointerData xs_heaptid
Definition: relscan.h:132
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1937
static char * buf
Definition: pg_test_fsync.c:67
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:218
amendscan_function amendscan
Definition: amapi.h:229
#define memmove(d, s, c)
Definition: c.h:1266
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:530
ScanKeyData * ScanKey
Definition: skey.h:75
bool amcanbackward
Definition: amapi.h:179
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:586
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
IndexTupleData * IndexTuple
Definition: itup.h:53
int arrayKeyCount
Definition: nbtree.h:639
ScanDirection
Definition: sdir.h:22
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:592
char * markTuples
Definition: nbtree.h:654
#define InvalidTransactionId
Definition: transam.h:31
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:642
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:136
BlockNumber pages_deleted
Definition: genam.h:79
#define OffsetToPointer(base, offset)
Definition: c.h:642
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
ScanKey arrayKeyData
Definition: nbtree.h:636
amrescan_function amrescan
Definition: amapi.h:226
bool amcanparallel
Definition: amapi.h:197
#define BTREE_METAPAGE
Definition: nbtree.h:132
#define P_ISDELETED(opaque)
Definition: nbtree.h:192
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define SpinLockRelease(lock)
Definition: spin.h:64
bool amsearchnulls
Definition: amapi.h:189
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:217
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:491
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
void * palloc0(Size size)
Definition: mcxt.c:980
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
uintptr_t Datum
Definition: postgres.h:367
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3612
bool amclusterable
Definition: amapi.h:193
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
BlockNumber btps_scanPage
Definition: nbtree.c:80
bool amsearcharray
Definition: amapi.h:187
#define InvalidOid
Definition: postgres_ext.h:36
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1296
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:912
bool amusemaintenanceworkmem
Definition: amapi.h:201
int markItemIndex
Definition: nbtree.h:663
RelFileNode node
Definition: relfilenode.h:74
double num_heap_tuples
Definition: genam.h:51
#define makeNode(_type_)
Definition: nodes.h:573
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int * killedItems
Definition: nbtree.h:645
#define Assert(condition)
Definition: c.h:739
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
IndexVacuumInfo * info
Definition: nbtree.c:44
BTPS_State btps_pageStatus
Definition: nbtree.c:81
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2038
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:521
Datum bthandler(PG_FUNCTION_ARGS)
Definition: nbtree.c:105
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:609
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:467
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1198
int numberOfKeys
Definition: nbtree.h:632
ammarkpos_function ammarkpos
Definition: amapi.h:230
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
bool amcanorder
Definition: amapi.h:175
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
ambuildphasename_function ambuildphasename
Definition: amapi.h:223
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1323
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:601
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:234
#define BTNProcs
Definition: nbtree.h:395
void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
Definition: nbtree.c:700
ItemPointerData heapTid
Definition: nbtree.h:542
struct ScanKeyData * keyData
Definition: relscan.h:107
void _bt_parallel_advance_array_keys(IndexScanDesc scan)
Definition: nbtree.c:764
static Datum values[MAXATTR]
Definition: bootstrap.c:167
uint16 amstrategies
Definition: amapi.h:171
BTScanPosData currPos
Definition: nbtree.h:666
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:949
void btinitparallelscan(void *target)
Definition: nbtree.c:586
ScanKey keyData
Definition: nbtree.h:633
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
ambuildempty_function ambuildempty
Definition: amapi.h:215
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:641
bool kill_prior_tuple
Definition: relscan.h:113
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:192
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1343
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2015
#define BTMaxStrategyNumber
Definition: stratnum.h:35
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
Buffer buf
Definition: nbtree.h:549
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1880
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:1997
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:226
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable)
Definition: nbtpage.c:975
uint16 btpo_flags
Definition: nbtree.h:65
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
amcanreturn_function amcanreturn
Definition: amapi.h:219
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:189
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
bool estimated_count
Definition: genam.h:76
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3413
bytea * rd_options
Definition: rel.h:132
#define offsetof(type, field)
Definition: c.h:662
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:597
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:235
bool estimated_count
Definition: genam.h:49
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:857
amrestrpos_function amrestrpos
Definition: amapi.h:231
#define P_ISLEAF(opaque)
Definition: nbtree.h:190