PostgreSQL Source Code  git master
nbtree.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nbtree.c
4  * Implementation of Lehman and Yao's btree management algorithm for
5  * Postgres.
6  *
7  * NOTES
8  * This file contains only the public interface routines.
9  *
10  *
11  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  * IDENTIFICATION
15  * src/backend/access/nbtree/nbtree.c
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres.h"
20 
21 #include "access/nbtree.h"
22 #include "access/relscan.h"
23 #include "access/xlog.h"
24 #include "commands/vacuum.h"
25 #include "nodes/execnodes.h"
26 #include "pgstat.h"
28 #include "storage/indexfsm.h"
29 #include "storage/ipc.h"
30 #include "storage/lmgr.h"
31 #include "storage/smgr.h"
32 #include "utils/builtins.h"
33 #include "utils/index_selfuncs.h"
34 #include "utils/memutils.h"
35 
36 
37 /* Working state needed by btvacuumpage */
38 typedef struct
39 {
45  BlockNumber lastBlockVacuumed; /* highest blkno actually vacuumed */
46  BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */
47  BlockNumber totFreePages; /* true total # of free pages */
49 } BTVacState;
50 
51 /*
52  * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
53  *
54  * BTPARALLEL_ADVANCING indicates that some process is advancing the scan to
55  * a new page; others must wait.
56  *
57  * BTPARALLEL_IDLE indicates that no backend is currently advancing the scan
58  * to a new page; some process can start doing that.
59  *
60  * BTPARALLEL_DONE indicates that the scan is complete (including error exit).
61  * We reach this state once for every distinct combination of array keys.
62  */
63 typedef enum
64 {
69 } BTPS_State;
70 
71 /*
72  * BTParallelScanDescData contains btree specific shared information required
73  * for parallel scan.
74  */
75 typedef struct BTParallelScanDescData
76 {
77  BlockNumber btps_scanPage; /* latest or next page to be scanned */
78  BTPS_State btps_pageStatus; /* indicates whether next page is
79  * available for scan. see above for
80  * possible states of parallel scan. */
81  int btps_arrayKeyCount; /* count indicating number of array scan
82  * keys processed by parallel scan */
83  slock_t btps_mutex; /* protects above variables */
84  ConditionVariable btps_cv; /* used to synchronize parallel scan */
86 
88 
89 
90 static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
91  IndexBulkDeleteCallback callback, void *callback_state,
92  BTCycleId cycleid);
93 static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
94  BlockNumber orig_blkno);
95 
96 
97 /*
98  * Btree handler function: return IndexAmRoutine with access method parameters
99  * and callbacks.
100  */
101 Datum
103 {
104  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
105 
106  amroutine->amstrategies = BTMaxStrategyNumber;
107  amroutine->amsupport = BTNProcs;
108  amroutine->amcanorder = true;
109  amroutine->amcanorderbyop = false;
110  amroutine->amcanbackward = true;
111  amroutine->amcanunique = true;
112  amroutine->amcanmulticol = true;
113  amroutine->amoptionalkey = true;
114  amroutine->amsearcharray = true;
115  amroutine->amsearchnulls = true;
116  amroutine->amstorage = false;
117  amroutine->amclusterable = true;
118  amroutine->ampredlocks = true;
119  amroutine->amcanparallel = true;
120  amroutine->amkeytype = InvalidOid;
121 
122  amroutine->ambuild = btbuild;
123  amroutine->ambuildempty = btbuildempty;
124  amroutine->aminsert = btinsert;
125  amroutine->ambulkdelete = btbulkdelete;
126  amroutine->amvacuumcleanup = btvacuumcleanup;
127  amroutine->amcanreturn = btcanreturn;
128  amroutine->amcostestimate = btcostestimate;
129  amroutine->amoptions = btoptions;
130  amroutine->amproperty = btproperty;
131  amroutine->amvalidate = btvalidate;
132  amroutine->ambeginscan = btbeginscan;
133  amroutine->amrescan = btrescan;
134  amroutine->amgettuple = btgettuple;
135  amroutine->amgetbitmap = btgetbitmap;
136  amroutine->amendscan = btendscan;
137  amroutine->ammarkpos = btmarkpos;
138  amroutine->amrestrpos = btrestrpos;
141  amroutine->amparallelrescan = btparallelrescan;
142 
143  PG_RETURN_POINTER(amroutine);
144 }
145 
146 /*
147  * btbuildempty() -- build an empty btree index in the initialization fork
148  */
149 void
151 {
152  Page metapage;
153 
154  /* Construct metapage. */
155  metapage = (Page) palloc(BLCKSZ);
156  _bt_initmetapage(metapage, P_NONE, 0);
157 
158  /*
159  * Write the page and log it. It might seem that an immediate sync would
160  * be sufficient to guarantee that the file exists on disk, but recovery
161  * itself might remove it while replaying, for example, an
162  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
163  * this even when wal_level=minimal.
164  */
167  (char *) metapage, true);
169  BTREE_METAPAGE, metapage, true);
170 
171  /*
172  * An immediate sync is required even if we xlog'd the page, because the
173  * write did not go through shared_buffers and therefore a concurrent
174  * checkpoint may have moved the redo pointer past our xlog record.
175  */
177 }
178 
179 /*
180  * btinsert() -- insert an index tuple into a btree.
181  *
182  * Descend the tree recursively, find the appropriate location for our
183  * new tuple, and put it there.
184  */
185 bool
186 btinsert(Relation rel, Datum *values, bool *isnull,
187  ItemPointer ht_ctid, Relation heapRel,
188  IndexUniqueCheck checkUnique,
189  IndexInfo *indexInfo)
190 {
191  bool result;
192  IndexTuple itup;
193 
194  /* generate an index tuple */
195  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
196  itup->t_tid = *ht_ctid;
197 
198  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
199 
200  pfree(itup);
201 
202  return result;
203 }
204 
205 /*
206  * btgettuple() -- Get the next tuple in the scan.
207  */
208 bool
210 {
211  BTScanOpaque so = (BTScanOpaque) scan->opaque;
212  bool res;
213 
214  /* btree indexes are never lossy */
215  scan->xs_recheck = false;
216 
217  /*
218  * If we have any array keys, initialize them during first call for a
219  * scan. We can't do this in btrescan because we don't know the scan
220  * direction at that time.
221  */
222  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
223  {
224  /* punt if we have any unsatisfiable array keys */
225  if (so->numArrayKeys < 0)
226  return false;
227 
228  _bt_start_array_keys(scan, dir);
229  }
230 
231  /* This loop handles advancing to the next array elements, if any */
232  do
233  {
234  /*
235  * If we've already initialized this scan, we can just advance it in
236  * the appropriate direction. If we haven't done so yet, we call
237  * _bt_first() to get the first item in the scan.
238  */
239  if (!BTScanPosIsValid(so->currPos))
240  res = _bt_first(scan, dir);
241  else
242  {
243  /*
244  * Check to see if we should kill the previously-fetched tuple.
245  */
246  if (scan->kill_prior_tuple)
247  {
248  /*
249  * Yes, remember it for later. (We'll deal with all such
250  * tuples at once right before leaving the index page.) The
251  * test for numKilled overrun is not just paranoia: if the
252  * caller reverses direction in the indexscan then the same
253  * item might get entered multiple times. It's not worth
254  * trying to optimize that, so we don't detect it, but instead
255  * just forget any excess entries.
256  */
257  if (so->killedItems == NULL)
258  so->killedItems = (int *)
259  palloc(MaxIndexTuplesPerPage * sizeof(int));
261  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
262  }
263 
264  /*
265  * Now continue the scan.
266  */
267  res = _bt_next(scan, dir);
268  }
269 
270  /* If we have a tuple, return it ... */
271  if (res)
272  break;
273  /* ... otherwise see if we have more array keys to deal with */
274  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
275 
276  return res;
277 }
278 
279 /*
280  * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap
281  */
282 int64
284 {
285  BTScanOpaque so = (BTScanOpaque) scan->opaque;
286  int64 ntids = 0;
287  ItemPointer heapTid;
288 
289  /*
290  * If we have any array keys, initialize them.
291  */
292  if (so->numArrayKeys)
293  {
294  /* punt if we have any unsatisfiable array keys */
295  if (so->numArrayKeys < 0)
296  return ntids;
297 
299  }
300 
301  /* This loop handles advancing to the next array elements, if any */
302  do
303  {
304  /* Fetch the first page & tuple */
305  if (_bt_first(scan, ForwardScanDirection))
306  {
307  /* Save tuple ID, and continue scanning */
308  heapTid = &scan->xs_ctup.t_self;
309  tbm_add_tuples(tbm, heapTid, 1, false);
310  ntids++;
311 
312  for (;;)
313  {
314  /*
315  * Advance to next tuple within page. This is the same as the
316  * easy case in _bt_next().
317  */
318  if (++so->currPos.itemIndex > so->currPos.lastItem)
319  {
320  /* let _bt_next do the heavy lifting */
321  if (!_bt_next(scan, ForwardScanDirection))
322  break;
323  }
324 
325  /* Save tuple ID, and continue scanning */
326  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
327  tbm_add_tuples(tbm, heapTid, 1, false);
328  ntids++;
329  }
330  }
331  /* Now see if we have more array keys to deal with */
333 
334  return ntids;
335 }
336 
337 /*
338  * btbeginscan() -- start a scan on a btree index
339  */
341 btbeginscan(Relation rel, int nkeys, int norderbys)
342 {
343  IndexScanDesc scan;
344  BTScanOpaque so;
345 
346  /* no order by operators allowed */
347  Assert(norderbys == 0);
348 
349  /* get the scan */
350  scan = RelationGetIndexScan(rel, nkeys, norderbys);
351 
352  /* allocate private workspace */
353  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
356  if (scan->numberOfKeys > 0)
357  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
358  else
359  so->keyData = NULL;
360 
361  so->arrayKeyData = NULL; /* assume no array keys for now */
362  so->numArrayKeys = 0;
363  so->arrayKeys = NULL;
364  so->arrayContext = NULL;
365 
366  so->killedItems = NULL; /* until needed */
367  so->numKilled = 0;
368 
369  /*
370  * We don't know yet whether the scan will be index-only, so we do not
371  * allocate the tuple workspace arrays until btrescan. However, we set up
372  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
373  */
374  so->currTuples = so->markTuples = NULL;
375 
376  scan->xs_itupdesc = RelationGetDescr(rel);
377 
378  scan->opaque = so;
379 
380  return scan;
381 }
382 
383 /*
384  * btrescan() -- rescan an index relation
385  */
386 void
387 btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
388  ScanKey orderbys, int norderbys)
389 {
390  BTScanOpaque so = (BTScanOpaque) scan->opaque;
391 
392  /* we aren't holding any read locks, but gotta drop the pins */
393  if (BTScanPosIsValid(so->currPos))
394  {
395  /* Before leaving current page, deal with any killed items */
396  if (so->numKilled > 0)
397  _bt_killitems(scan);
400  }
401 
402  so->markItemIndex = -1;
403  so->arrayKeyCount = 0;
406 
407  /*
408  * Allocate tuple workspace arrays, if needed for an index-only scan and
409  * not already done in a previous rescan call. To save on palloc
410  * overhead, both workspaces are allocated as one palloc block; only this
411  * function and btendscan know that.
412  *
413  * NOTE: this data structure also makes it safe to return data from a
414  * "name" column, even though btree name_ops uses an underlying storage
415  * datatype of cstring. The risk there is that "name" is supposed to be
416  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
417  * However, since we only return data out of tuples sitting in the
418  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
419  * data out of the markTuples array --- running off the end of memory for
420  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
421  * adding special-case treatment for name_ops elsewhere.
422  */
423  if (scan->xs_want_itup && so->currTuples == NULL)
424  {
425  so->currTuples = (char *) palloc(BLCKSZ * 2);
426  so->markTuples = so->currTuples + BLCKSZ;
427  }
428 
429  /*
430  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
431  * - vadim 05/05/97
432  */
433  if (scankey && scan->numberOfKeys > 0)
434  memmove(scan->keyData,
435  scankey,
436  scan->numberOfKeys * sizeof(ScanKeyData));
437  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
438 
439  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
441 }
442 
443 /*
444  * btendscan() -- close down a scan
445  */
446 void
448 {
449  BTScanOpaque so = (BTScanOpaque) scan->opaque;
450 
451  /* we aren't holding any read locks, but gotta drop the pins */
452  if (BTScanPosIsValid(so->currPos))
453  {
454  /* Before leaving current page, deal with any killed items */
455  if (so->numKilled > 0)
456  _bt_killitems(scan);
458  }
459 
460  so->markItemIndex = -1;
462 
463  /* No need to invalidate positions, the RAM is about to be freed. */
464 
465  /* Release storage */
466  if (so->keyData != NULL)
467  pfree(so->keyData);
468  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
469  if (so->arrayContext != NULL)
471  if (so->killedItems != NULL)
472  pfree(so->killedItems);
473  if (so->currTuples != NULL)
474  pfree(so->currTuples);
475  /* so->markTuples should not be pfree'd, see btrescan */
476  pfree(so);
477 }
478 
479 /*
480  * btmarkpos() -- save current scan position
481  */
482 void
484 {
485  BTScanOpaque so = (BTScanOpaque) scan->opaque;
486 
487  /* There may be an old mark with a pin (but no lock). */
489 
490  /*
491  * Just record the current itemIndex. If we later step to next page
492  * before releasing the marked position, _bt_steppage makes a full copy of
493  * the currPos struct in markPos. If (as often happens) the mark is moved
494  * before we leave the page, we don't have to do that work.
495  */
496  if (BTScanPosIsValid(so->currPos))
497  so->markItemIndex = so->currPos.itemIndex;
498  else
499  {
501  so->markItemIndex = -1;
502  }
503 
504  /* Also record the current positions of any array keys */
505  if (so->numArrayKeys)
506  _bt_mark_array_keys(scan);
507 }
508 
509 /*
510  * btrestrpos() -- restore scan to last saved position
511  */
512 void
514 {
515  BTScanOpaque so = (BTScanOpaque) scan->opaque;
516 
517  /* Restore the marked positions of any array keys */
518  if (so->numArrayKeys)
520 
521  if (so->markItemIndex >= 0)
522  {
523  /*
524  * The scan has never moved to a new page since the last mark. Just
525  * restore the itemIndex.
526  *
527  * NB: In this case we can't count on anything in so->markPos to be
528  * accurate.
529  */
530  so->currPos.itemIndex = so->markItemIndex;
531  }
532  else
533  {
534  /*
535  * The scan moved to a new page after last mark or restore, and we are
536  * now restoring to the marked page. We aren't holding any read
537  * locks, but if we're still holding the pin for the current position,
538  * we must drop it.
539  */
540  if (BTScanPosIsValid(so->currPos))
541  {
542  /* Before leaving current page, deal with any killed items */
543  if (so->numKilled > 0)
544  _bt_killitems(scan);
546  }
547 
548  if (BTScanPosIsValid(so->markPos))
549  {
550  /* bump pin on mark buffer for assignment to current buffer */
551  if (BTScanPosIsPinned(so->markPos))
553  memcpy(&so->currPos, &so->markPos,
554  offsetof(BTScanPosData, items[1]) +
555  so->markPos.lastItem * sizeof(BTScanPosItem));
556  if (so->currTuples)
557  memcpy(so->currTuples, so->markTuples,
559  }
560  else
562  }
563 }
564 
565 /*
566  * btestimateparallelscan -- estimate storage for BTParallelScanDescData
567  */
568 Size
570 {
571  return sizeof(BTParallelScanDescData);
572 }
573 
574 /*
575  * btinitparallelscan -- initialize BTParallelScanDesc for parallel btree scan
576  */
577 void
578 btinitparallelscan(void *target)
579 {
580  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
581 
582  SpinLockInit(&bt_target->btps_mutex);
583  bt_target->btps_scanPage = InvalidBlockNumber;
585  bt_target->btps_arrayKeyCount = 0;
586  ConditionVariableInit(&bt_target->btps_cv);
587 }
588 
589 /*
590  * btparallelrescan() -- reset parallel scan
591  */
592 void
594 {
595  BTParallelScanDesc btscan;
596  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
597 
598  Assert(parallel_scan);
599 
600  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
601  parallel_scan->ps_offset);
602 
603  /*
604  * In theory, we don't need to acquire the spinlock here, because there
605  * shouldn't be any other workers running at this point, but we do so for
606  * consistency.
607  */
608  SpinLockAcquire(&btscan->btps_mutex);
611  btscan->btps_arrayKeyCount = 0;
612  SpinLockRelease(&btscan->btps_mutex);
613 }
614 
615 /*
616  * _bt_parallel_seize() -- Begin the process of advancing the scan to a new
617  * page. Other scans must wait until we call bt_parallel_release() or
618  * bt_parallel_done().
619  *
620  * The return value is true if we successfully seized the scan and false
621  * if we did not. The latter case occurs if no pages remain for the current
622  * set of scankeys.
623  *
624  * If the return value is true, *pageno returns the next or current page
625  * of the scan (depending on the scan direction). An invalid block number
626  * means the scan hasn't yet started, and P_NONE means we've reached the end.
627  * The first time a participating process reaches the last page, it will return
628  * true and set *pageno to P_NONE; after that, further attempts to seize the
629  * scan will return false.
630  *
631  * Callers should ignore the value of pageno if the return value is false.
632  */
633 bool
635 {
636  BTScanOpaque so = (BTScanOpaque) scan->opaque;
637  BTPS_State pageStatus;
638  bool exit_loop = false;
639  bool status = true;
640  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
641  BTParallelScanDesc btscan;
642 
643  *pageno = P_NONE;
644 
645  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
646  parallel_scan->ps_offset);
647 
648  while (1)
649  {
650  SpinLockAcquire(&btscan->btps_mutex);
651  pageStatus = btscan->btps_pageStatus;
652 
653  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
654  {
655  /* Parallel scan has already advanced to a new set of scankeys. */
656  status = false;
657  }
658  else if (pageStatus == BTPARALLEL_DONE)
659  {
660  /*
661  * We're done with this set of scankeys. This may be the end, or
662  * there could be more sets to try.
663  */
664  status = false;
665  }
666  else if (pageStatus != BTPARALLEL_ADVANCING)
667  {
668  /*
669  * We have successfully seized control of the scan for the purpose
670  * of advancing it to a new page!
671  */
672  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
673  *pageno = btscan->btps_scanPage;
674  exit_loop = true;
675  }
676  SpinLockRelease(&btscan->btps_mutex);
677  if (exit_loop || !status)
678  break;
680  }
682 
683  return status;
684 }
685 
686 /*
687  * _bt_parallel_release() -- Complete the process of advancing the scan to a
688  * new page. We now have the new value btps_scanPage; some other backend
689  * can now begin advancing the scan.
690  */
691 void
693 {
694  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
695  BTParallelScanDesc btscan;
696 
697  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
698  parallel_scan->ps_offset);
699 
700  SpinLockAcquire(&btscan->btps_mutex);
701  btscan->btps_scanPage = scan_page;
703  SpinLockRelease(&btscan->btps_mutex);
705 }
706 
707 /*
708  * _bt_parallel_done() -- Mark the parallel scan as complete.
709  *
710  * When there are no pages left to scan, this function should be called to
711  * notify other workers. Otherwise, they might wait forever for the scan to
712  * advance to the next page.
713  */
714 void
716 {
717  BTScanOpaque so = (BTScanOpaque) scan->opaque;
718  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
719  BTParallelScanDesc btscan;
720  bool status_changed = false;
721 
722  /* Do nothing, for non-parallel scans */
723  if (parallel_scan == NULL)
724  return;
725 
726  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
727  parallel_scan->ps_offset);
728 
729  /*
730  * Mark the parallel scan as done for this combination of scan keys,
731  * unless some other process already did so. See also
732  * _bt_advance_array_keys.
733  */
734  SpinLockAcquire(&btscan->btps_mutex);
735  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
736  btscan->btps_pageStatus != BTPARALLEL_DONE)
737  {
738  btscan->btps_pageStatus = BTPARALLEL_DONE;
739  status_changed = true;
740  }
741  SpinLockRelease(&btscan->btps_mutex);
742 
743  /* wake up all the workers associated with this parallel scan */
744  if (status_changed)
745  ConditionVariableBroadcast(&btscan->btps_cv);
746 }
747 
748 /*
749  * _bt_parallel_advance_array_keys() -- Advances the parallel scan for array
750  * keys.
751  *
752  * Updates the count of array keys processed for both local and parallel
753  * scans.
754  */
755 void
757 {
758  BTScanOpaque so = (BTScanOpaque) scan->opaque;
759  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
760  BTParallelScanDesc btscan;
761 
762  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
763  parallel_scan->ps_offset);
764 
765  so->arrayKeyCount++;
766  SpinLockAcquire(&btscan->btps_mutex);
767  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
768  {
769  btscan->btps_scanPage = InvalidBlockNumber;
770  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
771  btscan->btps_arrayKeyCount++;
772  }
773  SpinLockRelease(&btscan->btps_mutex);
774 }
775 
776 /*
777  * Bulk deletion of all index entries pointing to a set of heap tuples.
778  * The set of target tuples is specified via a callback routine that tells
779  * whether any given heap tuple (identified by ItemPointer) is being deleted.
780  *
781  * Result: a palloc'd struct containing statistical info for VACUUM displays.
782  */
785  IndexBulkDeleteCallback callback, void *callback_state)
786 {
787  Relation rel = info->index;
788  BTCycleId cycleid;
789 
790  /* allocate stats if first time through, else re-use existing struct */
791  if (stats == NULL)
793 
794  /* Establish the vacuum cycle ID to use for this scan */
795  /* The ENSURE stuff ensures we clean up shared memory on failure */
797  {
798  cycleid = _bt_start_vacuum(rel);
799 
800  btvacuumscan(info, stats, callback, callback_state, cycleid);
801  }
803  _bt_end_vacuum(rel);
804 
805  return stats;
806 }
807 
808 /*
809  * Post-VACUUM cleanup.
810  *
811  * Result: a palloc'd struct containing statistical info for VACUUM displays.
812  */
815 {
816  /* No-op in ANALYZE ONLY mode */
817  if (info->analyze_only)
818  return stats;
819 
820  /*
821  * If btbulkdelete was called, we need not do anything, just return the
822  * stats from the latest btbulkdelete call. If it wasn't called, we must
823  * still do a pass over the index, to recycle any newly-recyclable pages
824  * and to obtain index statistics.
825  *
826  * Since we aren't going to actually delete any leaf items, there's no
827  * need to go through all the vacuum-cycle-ID pushups.
828  */
829  if (stats == NULL)
830  {
832  btvacuumscan(info, stats, NULL, NULL, 0);
833  }
834 
835  /* Finally, vacuum the FSM */
837 
838  /*
839  * It's quite possible for us to be fooled by concurrent page splits into
840  * double-counting some index tuples, so disbelieve any total that exceeds
841  * the underlying heap's count ... if we know that accurately. Otherwise
842  * this might just make matters worse.
843  */
844  if (!info->estimated_count)
845  {
846  if (stats->num_index_tuples > info->num_heap_tuples)
847  stats->num_index_tuples = info->num_heap_tuples;
848  }
849 
850  return stats;
851 }
852 
853 /*
854  * btvacuumscan --- scan the index for VACUUMing purposes
855  *
856  * This combines the functions of looking for leaf tuples that are deletable
857  * according to the vacuum callback, looking for empty pages that can be
858  * deleted, and looking for old deleted pages that can be recycled. Both
859  * btbulkdelete and btvacuumcleanup invoke this (the latter only if no
860  * btbulkdelete call occurred).
861  *
862  * The caller is responsible for initially allocating/zeroing a stats struct
863  * and for obtaining a vacuum cycle ID if necessary.
864  */
865 static void
867  IndexBulkDeleteCallback callback, void *callback_state,
868  BTCycleId cycleid)
869 {
870  Relation rel = info->index;
871  BTVacState vstate;
872  BlockNumber num_pages;
873  BlockNumber blkno;
874  bool needLock;
875 
876  /*
877  * Reset counts that will be incremented during the scan; needed in case
878  * of multiple scans during a single VACUUM command
879  */
880  stats->estimated_count = false;
881  stats->num_index_tuples = 0;
882  stats->pages_deleted = 0;
883 
884  /* Set up info to pass down to btvacuumpage */
885  vstate.info = info;
886  vstate.stats = stats;
887  vstate.callback = callback;
888  vstate.callback_state = callback_state;
889  vstate.cycleid = cycleid;
890  vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
892  vstate.totFreePages = 0;
893 
894  /* Create a temporary memory context to run _bt_pagedel in */
896  "_bt_pagedel",
898 
899  /*
900  * The outer loop iterates over all index pages except the metapage, in
901  * physical order (we hope the kernel will cooperate in providing
902  * read-ahead for speed). It is critical that we visit all leaf pages,
903  * including ones added after we start the scan, else we might fail to
904  * delete some deletable tuples. Hence, we must repeatedly check the
905  * relation length. We must acquire the relation-extension lock while
906  * doing so to avoid a race condition: if someone else is extending the
907  * relation, there is a window where bufmgr/smgr have created a new
908  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
909  * we manage to scan such a page here, we'll improperly assume it can be
910  * recycled. Taking the lock synchronizes things enough to prevent a
911  * problem: either num_pages won't include the new page, or _bt_getbuf
912  * already has write lock on the buffer and it will be fully initialized
913  * before we can examine it. (See also vacuumlazy.c, which has the same
914  * issue.) Also, we need not worry if a page is added immediately after
915  * we look; the page splitting code already has write-lock on the left
916  * page before it adds a right page, so we must already have processed any
917  * tuples due to be moved into such a page.
918  *
919  * We can skip locking for new or temp relations, however, since no one
920  * else could be accessing them.
921  */
922  needLock = !RELATION_IS_LOCAL(rel);
923 
924  blkno = BTREE_METAPAGE + 1;
925  for (;;)
926  {
927  /* Get the current relation length */
928  if (needLock)
930  num_pages = RelationGetNumberOfBlocks(rel);
931  if (needLock)
933 
934  /* Quit if we've scanned the whole relation */
935  if (blkno >= num_pages)
936  break;
937  /* Iterate over pages, then loop back to recheck length */
938  for (; blkno < num_pages; blkno++)
939  {
940  btvacuumpage(&vstate, blkno, blkno);
941  }
942  }
943 
944  /*
945  * Check to see if we need to issue one final WAL record for this index,
946  * which may be needed for correctness on a hot standby node when non-MVCC
947  * index scans could take place.
948  *
949  * If the WAL is replayed in hot standby, the replay process needs to get
950  * cleanup locks on all index leaf pages, just as we've been doing here.
951  * However, we won't issue any WAL records about pages that have no items
952  * to be deleted. For pages between pages we've vacuumed, the replay code
953  * will take locks under the direction of the lastBlockVacuumed fields in
954  * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
955  * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
956  * against the last leaf page in the index, if that one wasn't vacuumed.
957  */
958  if (XLogStandbyInfoActive() &&
959  vstate.lastBlockVacuumed < vstate.lastBlockLocked)
960  {
961  Buffer buf;
962 
963  /*
964  * The page should be valid, but we can't use _bt_getbuf() because we
965  * want to use a nondefault buffer access strategy. Since we aren't
966  * going to delete any items, getting cleanup lock again is probably
967  * overkill, but for consistency do that anyway.
968  */
970  RBM_NORMAL, info->strategy);
972  _bt_checkpage(rel, buf);
973  _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
974  _bt_relbuf(rel, buf);
975  }
976 
978 
979  /* update statistics */
980  stats->num_pages = num_pages;
981  stats->pages_free = vstate.totFreePages;
982 }
983 
984 /*
985  * btvacuumpage --- VACUUM one page
986  *
987  * This processes a single page for btvacuumscan(). In some cases we
988  * must go back and re-examine previously-scanned pages; this routine
989  * recurses when necessary to handle that case.
990  *
991  * blkno is the page to process. orig_blkno is the highest block number
992  * reached by the outer btvacuumscan loop (the same as blkno, unless we
993  * are recursing to re-examine a previous page).
994  */
995 static void
996 btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
997 {
998  IndexVacuumInfo *info = vstate->info;
999  IndexBulkDeleteResult *stats = vstate->stats;
1001  void *callback_state = vstate->callback_state;
1002  Relation rel = info->index;
1003  bool delete_now;
1004  BlockNumber recurse_to;
1005  Buffer buf;
1006  Page page;
1007  BTPageOpaque opaque = NULL;
1008 
1009 restart:
1010  delete_now = false;
1011  recurse_to = P_NONE;
1012 
1013  /* call vacuum_delay_point while not holding any buffer lock */
1015 
1016  /*
1017  * We can't use _bt_getbuf() here because it always applies
1018  * _bt_checkpage(), which will barf on an all-zero page. We want to
1019  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1020  * buffer access strategy.
1021  */
1022  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1023  info->strategy);
1024  LockBuffer(buf, BT_READ);
1025  page = BufferGetPage(buf);
1026  if (!PageIsNew(page))
1027  {
1028  _bt_checkpage(rel, buf);
1029  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1030  }
1031 
1032  /*
1033  * If we are recursing, the only case we want to do anything with is a
1034  * live leaf page having the current vacuum cycle ID. Any other state
1035  * implies we already saw the page (eg, deleted it as being empty).
1036  */
1037  if (blkno != orig_blkno)
1038  {
1039  if (_bt_page_recyclable(page) ||
1040  P_IGNORE(opaque) ||
1041  !P_ISLEAF(opaque) ||
1042  opaque->btpo_cycleid != vstate->cycleid)
1043  {
1044  _bt_relbuf(rel, buf);
1045  return;
1046  }
1047  }
1048 
1049  /* Page is valid, see what to do with it */
1050  if (_bt_page_recyclable(page))
1051  {
1052  /* Okay to recycle this page */
1053  RecordFreeIndexPage(rel, blkno);
1054  vstate->totFreePages++;
1055  stats->pages_deleted++;
1056  }
1057  else if (P_ISDELETED(opaque))
1058  {
1059  /* Already deleted, but can't recycle yet */
1060  stats->pages_deleted++;
1061  }
1062  else if (P_ISHALFDEAD(opaque))
1063  {
1064  /* Half-dead, try to delete */
1065  delete_now = true;
1066  }
1067  else if (P_ISLEAF(opaque))
1068  {
1069  OffsetNumber deletable[MaxOffsetNumber];
1070  int ndeletable;
1071  OffsetNumber offnum,
1072  minoff,
1073  maxoff;
1074 
1075  /*
1076  * Trade in the initial read lock for a super-exclusive write lock on
1077  * this page. We must get such a lock on every leaf page over the
1078  * course of the vacuum scan, whether or not it actually contains any
1079  * deletable tuples --- see nbtree/README.
1080  */
1082  LockBufferForCleanup(buf);
1083 
1084  /*
1085  * Remember highest leaf page number we've taken cleanup lock on; see
1086  * notes in btvacuumscan
1087  */
1088  if (blkno > vstate->lastBlockLocked)
1089  vstate->lastBlockLocked = blkno;
1090 
1091  /*
1092  * Check whether we need to recurse back to earlier pages. What we
1093  * are concerned about is a page split that happened since we started
1094  * the vacuum scan. If the split moved some tuples to a lower page
1095  * then we might have missed 'em. If so, set up for tail recursion.
1096  * (Must do this before possibly clearing btpo_cycleid below!)
1097  */
1098  if (vstate->cycleid != 0 &&
1099  opaque->btpo_cycleid == vstate->cycleid &&
1100  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1101  !P_RIGHTMOST(opaque) &&
1102  opaque->btpo_next < orig_blkno)
1103  recurse_to = opaque->btpo_next;
1104 
1105  /*
1106  * Scan over all items to see which ones need deleted according to the
1107  * callback function.
1108  */
1109  ndeletable = 0;
1110  minoff = P_FIRSTDATAKEY(opaque);
1111  maxoff = PageGetMaxOffsetNumber(page);
1112  if (callback)
1113  {
1114  for (offnum = minoff;
1115  offnum <= maxoff;
1116  offnum = OffsetNumberNext(offnum))
1117  {
1118  IndexTuple itup;
1119  ItemPointer htup;
1120 
1121  itup = (IndexTuple) PageGetItem(page,
1122  PageGetItemId(page, offnum));
1123  htup = &(itup->t_tid);
1124 
1125  /*
1126  * During Hot Standby we currently assume that
1127  * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1128  * only true as long as the callback function depends only
1129  * upon whether the index tuple refers to heap tuples removed
1130  * in the initial heap scan. When vacuum starts it derives a
1131  * value of OldestXmin. Backends taking later snapshots could
1132  * have a RecentGlobalXmin with a later xid than the vacuum's
1133  * OldestXmin, so it is possible that row versions deleted
1134  * after OldestXmin could be marked as killed by other
1135  * backends. The callback function *could* look at the index
1136  * tuple state in isolation and decide to delete the index
1137  * tuple, though currently it does not. If it ever did, we
1138  * would need to reconsider whether XLOG_BTREE_VACUUM records
1139  * should cause conflicts. If they did cause conflicts they
1140  * would be fairly harsh conflicts, since we haven't yet
1141  * worked out a way to pass a useful value for
1142  * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1143  * applies to *any* type of index that marks index tuples as
1144  * killed.
1145  */
1146  if (callback(htup, callback_state))
1147  deletable[ndeletable++] = offnum;
1148  }
1149  }
1150 
1151  /*
1152  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1153  * call per page, so as to minimize WAL traffic.
1154  */
1155  if (ndeletable > 0)
1156  {
1157  /*
1158  * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1159  * all information to the replay code to allow it to get a cleanup
1160  * lock on all pages between the previous lastBlockVacuumed and
1161  * this page. This ensures that WAL replay locks all leaf pages at
1162  * some point, which is important should non-MVCC scans be
1163  * requested. This is currently unused on standby, but we record
1164  * it anyway, so that the WAL contains the required information.
1165  *
1166  * Since we can visit leaf pages out-of-order when recursing,
1167  * replay might end up locking such pages an extra time, but it
1168  * doesn't seem worth the amount of bookkeeping it'd take to avoid
1169  * that.
1170  */
1171  _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
1172  vstate->lastBlockVacuumed);
1173 
1174  /*
1175  * Remember highest leaf page number we've issued a
1176  * XLOG_BTREE_VACUUM WAL record for.
1177  */
1178  if (blkno > vstate->lastBlockVacuumed)
1179  vstate->lastBlockVacuumed = blkno;
1180 
1181  stats->tuples_removed += ndeletable;
1182  /* must recompute maxoff */
1183  maxoff = PageGetMaxOffsetNumber(page);
1184  }
1185  else
1186  {
1187  /*
1188  * If the page has been split during this vacuum cycle, it seems
1189  * worth expending a write to clear btpo_cycleid even if we don't
1190  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1191  * care of this.) This ensures we won't process the page again.
1192  *
1193  * We treat this like a hint-bit update because there's no need to
1194  * WAL-log it.
1195  */
1196  if (vstate->cycleid != 0 &&
1197  opaque->btpo_cycleid == vstate->cycleid)
1198  {
1199  opaque->btpo_cycleid = 0;
1200  MarkBufferDirtyHint(buf, true);
1201  }
1202  }
1203 
1204  /*
1205  * If it's now empty, try to delete; else count the live tuples. We
1206  * don't delete when recursing, though, to avoid putting entries into
1207  * freePages out-of-order (doesn't seem worth any extra code to handle
1208  * the case).
1209  */
1210  if (minoff > maxoff)
1211  delete_now = (blkno == orig_blkno);
1212  else
1213  stats->num_index_tuples += maxoff - minoff + 1;
1214  }
1215 
1216  if (delete_now)
1217  {
1218  MemoryContext oldcontext;
1219  int ndel;
1220 
1221  /* Run pagedel in a temp context to avoid memory leakage */
1223  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1224 
1225  ndel = _bt_pagedel(rel, buf);
1226 
1227  /* count only this page, else may double-count parent */
1228  if (ndel)
1229  stats->pages_deleted++;
1230 
1231  MemoryContextSwitchTo(oldcontext);
1232  /* pagedel released buffer, so we shouldn't */
1233  }
1234  else
1235  _bt_relbuf(rel, buf);
1236 
1237  /*
1238  * This is really tail recursion, but if the compiler is too stupid to
1239  * optimize it as such, we'd eat an uncomfortably large amount of stack
1240  * space per recursion level (due to the deletable[] array). A failure is
1241  * improbable since the number of levels isn't likely to be large ... but
1242  * just in case, let's hand-optimize into a loop.
1243  */
1244  if (recurse_to != P_NONE)
1245  {
1246  blkno = recurse_to;
1247  goto restart;
1248  }
1249 }
1250 
1251 /*
1252  * btcanreturn() -- Check whether btree indexes support index-only scans.
1253  *
1254  * btrees always do, so this is trivial.
1255  */
1256 bool
1258 {
1259  return true;
1260 }
slock_t btps_mutex
Definition: nbtree.c:83
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
ambeginscan_function ambeginscan
Definition: amapi.h:208
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
int slock_t
Definition: s_lock.h:912
BlockNumber lastBlockLocked
Definition: nbtree.c:46
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
ambulkdelete_function ambulkdelete
Definition: amapi.h:201
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:547
#define BTP_SPLIT_END
Definition: nbtree.h:76
bool amcanmulticol
Definition: amapi.h:179
BlockNumber btpo_next
Definition: nbtree.h:58
uint16 amsupport
Definition: amapi.h:169
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:198
MemoryContext pagedelcontext
Definition: nbtree.c:48
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:387
double tuples_removed
Definition: genam.h:77
void * callback_state
Definition: nbtree.c:43
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:108
#define P_IGNORE(opaque)
Definition: nbtree.h:182
BlockNumber lastBlockVacuumed
Definition: nbtree.c:45
amgettuple_function amgettuple
Definition: amapi.h:210
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:335
struct BTParallelScanDescData BTParallelScanDescData
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:715
#define RelationGetDescr(relation)
Definition: rel.h:437
bool amcanorderbyop
Definition: amapi.h:173
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:521
BlockNumber totFreePages
Definition: nbtree.c:47
MemoryContext arrayContext
Definition: nbtree.h:396
amproperty_function amproperty
Definition: amapi.h:206
#define ExclusiveLock
Definition: lockdefs.h:44
#define PointerGetDatum(X)
Definition: postgres.h:539
#define MaxOffsetNumber
Definition: off.h:28
int itemIndex
Definition: nbtree.h:333
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
char * currTuples
Definition: nbtree.h:407
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
#define SpinLockInit(lock)
Definition: spin.h:60
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:206
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:532
bool analyze_only
Definition: genam.h:47
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:186
IndexBulkDeleteCallback callback
Definition: nbtree.c:42
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
ItemPointerData t_tid
Definition: itup.h:37
amparallelrescan_function amparallelrescan
Definition: amapi.h:219
BufferAccessStrategy strategy
Definition: genam.h:51
bool amstorage
Definition: amapi.h:187
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:169
void ConditionVariableBroadcast(ConditionVariable *cv)
Relation index
Definition: genam.h:46
bool ampredlocks
Definition: amapi.h:191
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:357
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:134
uint32 BlockNumber
Definition: block.h:31
TupleDesc xs_itupdesc
Definition: relscan.h:116
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:1992
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:341
aminsert_function aminsert
Definition: amapi.h:200
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:866
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:87
Oid amkeytype
Definition: amapi.h:195
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:745
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
bool amoptionalkey
Definition: amapi.h:181
amvalidate_function amvalidate
Definition: amapi.h:207
Size btestimateparallelscan(void)
Definition: nbtree.c:569
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:536
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:89
IndexUniqueCheck
Definition: genam.h:111
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:37
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:625
void btbuildempty(Relation index)
Definition: nbtree.c:150
#define BT_READ
Definition: nbtree.h:245
#define SpinLockAcquire(lock)
Definition: spin.h:62
int nextTupleOffset
Definition: nbtree.h:322
void ConditionVariableInit(ConditionVariable *cv)
int lastItem
Definition: nbtree.h:332
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1732
BTScanPosData markPos
Definition: nbtree.h:421
void pfree(void *pointer)
Definition: mcxt.c:936
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:814
void ConditionVariableCancelSleep(void)
amgetbitmap_function amgetbitmap
Definition: amapi.h:211
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:504
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:181
uint16 BTCycleId
Definition: nbtree.h:27
ambuild_function ambuild
Definition: amapi.h:198
amoptions_function amoptions
Definition: amapi.h:205
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:996
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:283
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:305
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:424
BlockNumber num_pages
Definition: genam.h:73
BlockNumber pages_free
Definition: genam.h:79
ItemPointerData t_self
Definition: htup.h:65
BTPS_State
Definition: nbtree.c:63
BTCycleId btpo_cycleid
Definition: nbtree.h:65
void ConditionVariableSignal(ConditionVariable *cv)
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:447
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:197
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:49
amcostestimate_function amcostestimate
Definition: amapi.h:204
bool amcanunique
Definition: amapi.h:177
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6880
int numArrayKeys
Definition: nbtree.h:391
ConditionVariable btps_cv
Definition: nbtree.c:84
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1964
static char * buf
Definition: pg_test_fsync.c:67
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:202
amendscan_function amendscan
Definition: amapi.h:212
#define memmove(d, s, c)
Definition: c.h:1089
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
ScanKeyData * ScanKey
Definition: skey.h:75
bool amcanbackward
Definition: amapi.h:175
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:340
IndexTupleData * IndexTuple
Definition: itup.h:53
int arrayKeyCount
Definition: nbtree.h:393
ScanDirection
Definition: sdir.h:22
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:606
char * markTuples
Definition: nbtree.h:408
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:634
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
BlockNumber pages_deleted
Definition: genam.h:78
#define OffsetToPointer(base, offset)
Definition: c.h:591
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
ScanKey arrayKeyData
Definition: nbtree.h:390
amrescan_function amrescan
Definition: amapi.h:209
bool amcanparallel
Definition: amapi.h:193
#define BTREE_METAPAGE
Definition: nbtree.h:110
#define P_ISDELETED(opaque)
Definition: nbtree.h:179
#define AllocSetContextCreate(parent, name, allocparams)
Definition: memutils.h:165
BTCycleId cycleid
Definition: nbtree.c:44
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
#define SpinLockRelease(lock)
Definition: spin.h:64
bool amsearchnulls
Definition: amapi.h:185
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:209
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:483
void * palloc0(Size size)
Definition: mcxt.c:864
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
uintptr_t Datum
Definition: postgres.h:365
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
bool amclusterable
Definition: amapi.h:189
#define XLogStandbyInfoActive()
Definition: xlog.h:160
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
BlockNumber btps_scanPage
Definition: nbtree.c:77
bool amsearcharray
Definition: amapi.h:183
#define InvalidOid
Definition: postgres_ext.h:36
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1110
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:721
int markItemIndex
Definition: nbtree.h:417
RelFileNode node
Definition: relfilenode.h:74
bool xs_want_itup
Definition: relscan.h:97
double num_heap_tuples
Definition: genam.h:50
#define makeNode(_type_)
Definition: nodes.h:561
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int * killedItems
Definition: nbtree.h:399
#define Assert(condition)
Definition: c.h:688
IndexVacuumInfo * info
Definition: nbtree.c:40
BTPS_State btps_pageStatus
Definition: nbtree.c:78
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2054
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:513
HeapTupleData xs_ctup
Definition: relscan.h:121
Datum bthandler(PG_FUNCTION_ARGS)
Definition: nbtree.c:102
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:363
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:422
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:790
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1195
int numberOfKeys
Definition: nbtree.h:386
ammarkpos_function ammarkpos
Definition: amapi.h:213
bool amcanorder
Definition: amapi.h:171
IndexBulkDeleteResult * stats
Definition: nbtree.c:41
ScanKey keyData
Definition: relscan.h:95
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1257
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:593
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:217
#define BTNProcs
Definition: nbtree.h:238
void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
Definition: nbtree.c:692
ItemPointerData heapTid
Definition: nbtree.h:296
void _bt_parallel_advance_array_keys(IndexScanDesc scan)
Definition: nbtree.c:756
static Datum values[MAXATTR]
Definition: bootstrap.c:164
uint16 amstrategies
Definition: amapi.h:167
BTScanPosData currPos
Definition: nbtree.h:420
#define PageIsNew(page)
Definition: bufpage.h:225
#define MaxIndexTuplesPerPage
Definition: itup.h:137
void * palloc(Size size)
Definition: mcxt.c:835
void btinitparallelscan(void *target)
Definition: nbtree.c:578
ScanKey keyData
Definition: nbtree.h:387
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
ambuildempty_function ambuildempty
Definition: amapi.h:199
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:395
bool kill_prior_tuple
Definition: relscan.h:101
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:192
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1129
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2042
#define BTMaxStrategyNumber
Definition: stratnum.h:35
Buffer buf
Definition: nbtree.h:303
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1907
void vacuum_delay_point(void)
Definition: vacuum.c:1696
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
uint16 btpo_flags
Definition: nbtree.h:64
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
amcanreturn_function amcanreturn
Definition: amapi.h:203
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:176
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
bool estimated_count
Definition: genam.h:75
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3347
#define offsetof(type, field)
Definition: c.h:611
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:351
Pointer Page
Definition: bufpage.h:74
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:83
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:218
bool estimated_count
Definition: genam.h:48
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:784
amrestrpos_function amrestrpos
Definition: amapi.h:214
#define P_ISLEAF(opaque)
Definition: nbtree.h:177