PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/xloginsert.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber prev_scan_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 83 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 54 of file nbtree.c.

55 {
61 } BTPS_State;
BTPS_State
Definition: nbtree.c:55
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:58
@ BTPARALLEL_NEED_PRIMSCAN
Definition: nbtree.c:57
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:56
@ BTPARALLEL_IDLE
Definition: nbtree.c:59
@ BTPARALLEL_DONE
Definition: nbtree.c:60

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 732 of file nbtree.c.

733 {
734  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
735  BTParallelScanDesc btscan;
736  bool status_changed = false;
737 
738  /* Do nothing, for non-parallel scans */
739  if (parallel_scan == NULL)
740  return;
741 
742  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
743  parallel_scan->ps_offset);
744 
745  /*
746  * Mark the parallel scan as done, unless some other process did so
747  * already
748  */
749  SpinLockAcquire(&btscan->btps_mutex);
750  if (btscan->btps_pageStatus != BTPARALLEL_DONE)
751  {
753  status_changed = true;
754  }
755  SpinLockRelease(&btscan->btps_mutex);
756 
757  /* wake up all the workers associated with this parallel scan */
758  if (status_changed)
760 }
#define OffsetToPointer(base, offset)
Definition: c.h:772
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:83
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
slock_t btps_mutex
Definition: nbtree.c:73
BTPS_State btps_pageStatus
Definition: nbtree.c:70
ConditionVariable btps_cv
Definition: nbtree.c:74
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166

References BTPARALLEL_DONE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  prev_scan_page 
)

Definition at line 771 of file nbtree.c.

772 {
773  BTScanOpaque so = (BTScanOpaque) scan->opaque;
774  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
775  BTParallelScanDesc btscan;
776 
777  Assert(so->numArrayKeys);
778 
779  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
780  parallel_scan->ps_offset);
781 
782  SpinLockAcquire(&btscan->btps_mutex);
783  if (btscan->btps_scanPage == prev_scan_page &&
784  btscan->btps_pageStatus == BTPARALLEL_IDLE)
785  {
786  btscan->btps_scanPage = InvalidBlockNumber;
787  btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
788 
789  /* Serialize scan's current array keys */
790  for (int i = 0; i < so->numArrayKeys; i++)
791  {
792  BTArrayKeyInfo *array = &so->arrayKeys[i];
793 
794  btscan->btps_arrElems[i] = array->cur_elem;
795  }
796  }
797  SpinLockRelease(&btscan->btps_mutex);
798 }
#define InvalidBlockNumber
Definition: block.h:33
#define Assert(condition)
Definition: c.h:858
int i
Definition: isn.c:73
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1081
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1051

References BTScanOpaqueData::arrayKeys, Assert, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTArrayKeyInfo::cur_elem, i, InvalidBlockNumber, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 709 of file nbtree.c.

710 {
711  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
712  BTParallelScanDesc btscan;
713 
714  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
715  parallel_scan->ps_offset);
716 
717  SpinLockAcquire(&btscan->btps_mutex);
718  btscan->btps_scanPage = scan_page;
720  SpinLockRelease(&btscan->btps_mutex);
722 }
void ConditionVariableSignal(ConditionVariable *cv)
BlockNumber btps_scanPage
Definition: nbtree.c:69

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno,
bool  first 
)

Definition at line 605 of file nbtree.c.

606 {
607  BTScanOpaque so = (BTScanOpaque) scan->opaque;
608  bool exit_loop = false;
609  bool status = true;
610  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
611  BTParallelScanDesc btscan;
612 
613  *pageno = P_NONE;
614 
615  if (first)
616  {
617  /*
618  * Initialize array related state when called from _bt_first, assuming
619  * that this will either be the first primitive index scan for the
620  * scan, or a previous explicitly scheduled primitive scan.
621  *
622  * Note: so->needPrimScan is only set when a scheduled primitive index
623  * scan is set to be performed in caller's worker process. It should
624  * not be set here by us for the first primitive scan, nor should we
625  * ever set it for a parallel scan that has no array keys.
626  */
627  so->needPrimScan = false;
628  so->scanBehind = false;
629  }
630  else
631  {
632  /*
633  * Don't attempt to seize the scan when backend requires another
634  * primitive index scan unless we're in a position to start it now
635  */
636  if (so->needPrimScan)
637  return false;
638  }
639 
640  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
641  parallel_scan->ps_offset);
642 
643  while (1)
644  {
645  SpinLockAcquire(&btscan->btps_mutex);
646 
647  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
648  {
649  /* We're done with this parallel index scan */
650  status = false;
651  }
652  else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
653  {
654  Assert(so->numArrayKeys);
655 
656  /*
657  * If we can start another primitive scan right away, do so.
658  * Otherwise just wait.
659  */
660  if (first)
661  {
662  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
663  for (int i = 0; i < so->numArrayKeys; i++)
664  {
665  BTArrayKeyInfo *array = &so->arrayKeys[i];
666  ScanKey skey = &so->keyData[array->scan_key];
667 
668  array->cur_elem = btscan->btps_arrElems[i];
669  skey->sk_argument = array->elem_values[array->cur_elem];
670  }
671  so->needPrimScan = true;
672  so->scanBehind = false;
673  *pageno = InvalidBlockNumber;
674  exit_loop = true;
675  }
676  }
677  else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
678  {
679  /*
680  * We have successfully seized control of the scan for the purpose
681  * of advancing it to a new page!
682  */
683  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
684  *pageno = btscan->btps_scanPage;
685  exit_loop = true;
686  }
687  SpinLockRelease(&btscan->btps_mutex);
688  if (exit_loop || !status)
689  break;
690  ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
691  }
693 
694  return status;
695 }
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define P_NONE
Definition: nbtree.h:212
Datum * elem_values
Definition: nbtree.h:1037
bool needPrimScan
Definition: nbtree.h:1049
ScanKey keyData
Definition: nbtree.h:1045
Datum sk_argument
Definition: skey.h:72

References BTScanOpaqueData::arrayKeys, Assert, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTArrayKeyInfo::cur_elem, BTArrayKeyInfo::elem_values, i, if(), InvalidBlockNumber, BTScanOpaqueData::keyData, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, BTArrayKeyInfo::scan_key, BTScanOpaqueData::scanBehind, ScanKeyData::sk_argument, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 312 of file nbtree.c.

313 {
314  IndexScanDesc scan;
315  BTScanOpaque so;
316 
317  /* no order by operators allowed */
318  Assert(norderbys == 0);
319 
320  /* get the scan */
321  scan = RelationGetIndexScan(rel, nkeys, norderbys);
322 
323  /* allocate private workspace */
324  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
327  if (scan->numberOfKeys > 0)
328  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
329  else
330  so->keyData = NULL;
331 
332  so->needPrimScan = false;
333  so->scanBehind = false;
334  so->arrayKeys = NULL;
335  so->orderProcs = NULL;
336  so->arrayContext = NULL;
337 
338  so->killedItems = NULL; /* until needed */
339  so->numKilled = 0;
340 
341  /*
342  * We don't know yet whether the scan will be index-only, so we do not
343  * allocate the tuple workspace arrays until btrescan. However, we set up
344  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
345  */
346  so->currTuples = so->markTuples = NULL;
347 
348  scan->xs_itupdesc = RelationGetDescr(rel);
349 
350  scan->opaque = so;
351 
352  return scan;
353 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78
void * palloc(Size size)
Definition: mcxt.c:1316
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1022
#define RelationGetDescr(relation)
Definition: rel.h:531
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:1065
FmgrInfo * orderProcs
Definition: nbtree.h:1052
BTScanPosData currPos
Definition: nbtree.h:1077
int * killedItems
Definition: nbtree.h:1056
char * currTuples
Definition: nbtree.h:1064
BTScanPosData markPos
Definition: nbtree.h:1078
MemoryContext arrayContext
Definition: nbtree.h:1053
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::orderProcs, palloc(), RelationGetDescr, RelationGetIndexScan(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 159 of file nbtree.c.

160 {
161  bool allequalimage = _bt_allequalimage(index, false);
162  BulkWriteState *bulkstate;
163  BulkWriteBuffer metabuf;
164 
165  bulkstate = smgr_bulk_start_rel(index, INIT_FORKNUM);
166 
167  /* Construct metapage. */
168  metabuf = smgr_bulk_get_buf(bulkstate);
169  _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
170  smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
171 
172  smgr_bulk_finish(bulkstate);
173 }
Pointer Page
Definition: bufpage.h:78
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:271
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:295
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:129
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition: bulk_write.c:86
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
#define BTREE_METAPAGE
Definition: nbtree.h:148
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:5143
@ INIT_FORKNUM
Definition: relpath.h:53
Definition: type.h:95

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 808 of file nbtree.c.

810 {
811  Relation rel = info->index;
812  BTCycleId cycleid;
813 
814  /* allocate stats if first time through, else re-use existing struct */
815  if (stats == NULL)
817 
818  /* Establish the vacuum cycle ID to use for this scan */
819  /* The ENSURE stuff ensures we clean up shared memory on failure */
821  {
822  cycleid = _bt_start_vacuum(rel);
823 
824  btvacuumscan(info, stats, callback, callback_state, cycleid);
825  }
827  _bt_end_vacuum(rel);
828 
829  return stats;
830 }
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1346
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:926
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:4487
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:4515
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:4430
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
Relation index
Definition: genam.h:46
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1444 of file nbtree.c.

1445 {
1446  return true;
1447 }

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 417 of file nbtree.c.

418 {
419  BTScanOpaque so = (BTScanOpaque) scan->opaque;
420 
421  /* we aren't holding any read locks, but gotta drop the pins */
423  {
424  /* Before leaving current page, deal with any killed items */
425  if (so->numKilled > 0)
426  _bt_killitems(scan);
428  }
429 
430  so->markItemIndex = -1;
432 
433  /* No need to invalidate positions, the RAM is about to be freed. */
434 
435  /* Release storage */
436  if (so->keyData != NULL)
437  pfree(so->keyData);
438  /* so->arrayKeys and so->orderProcs are in arrayContext */
439  if (so->arrayContext != NULL)
441  if (so->killedItems != NULL)
442  pfree(so->killedItems);
443  if (so->currTuples != NULL)
444  pfree(so->currTuples);
445  /* so->markTuples should not be pfree'd, see btrescan */
446  pfree(so);
447 }
void pfree(void *pointer)
Definition: mcxt.c:1520
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1016
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1010
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:4179

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( int  nkeys,
int  norderbys 
)

Definition at line 537 of file nbtree.c.

538 {
539  /* Pessimistically assume all input scankeys will be output with arrays */
540  return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys;
541 }

References BTParallelScanDescData::btps_arrElems.

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 266 of file nbtree.c.

267 {
268  BTScanOpaque so = (BTScanOpaque) scan->opaque;
269  int64 ntids = 0;
270  ItemPointer heapTid;
271 
272  /* Each loop iteration performs another primitive index scan */
273  do
274  {
275  /* Fetch the first page & tuple */
277  {
278  /* Save tuple ID, and continue scanning */
279  heapTid = &scan->xs_heaptid;
280  tbm_add_tuples(tbm, heapTid, 1, false);
281  ntids++;
282 
283  for (;;)
284  {
285  /*
286  * Advance to next tuple within page. This is the same as the
287  * easy case in _bt_next().
288  */
289  if (++so->currPos.itemIndex > so->currPos.lastItem)
290  {
291  /* let _bt_next do the heavy lifting */
292  if (!_bt_next(scan, ForwardScanDirection))
293  break;
294  }
295 
296  /* Save tuple ID, and continue scanning */
297  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
298  tbm_add_tuples(tbm, heapTid, 1, false);
299  ntids++;
300  }
301  }
302  /* Now see if we need another primitive index scan */
303  } while (so->numArrayKeys && _bt_start_prim_scan(scan, ForwardScanDirection));
304 
305  return ntids;
306 }
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:876
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1492
bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:1669
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:991
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:994
int itemIndex
Definition: nbtree.h:992
ItemPointerData heapTid
Definition: nbtree.h:946
ItemPointerData xs_heaptid
Definition: relscan.h:147
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:377

References _bt_first(), _bt_next(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 206 of file nbtree.c.

207 {
208  BTScanOpaque so = (BTScanOpaque) scan->opaque;
209  bool res;
210 
211  /* btree indexes are never lossy */
212  scan->xs_recheck = false;
213 
214  /* Each loop iteration performs another primitive index scan */
215  do
216  {
217  /*
218  * If we've already initialized this scan, we can just advance it in
219  * the appropriate direction. If we haven't done so yet, we call
220  * _bt_first() to get the first item in the scan.
221  */
222  if (!BTScanPosIsValid(so->currPos))
223  res = _bt_first(scan, dir);
224  else
225  {
226  /*
227  * Check to see if we should kill the previously-fetched tuple.
228  */
229  if (scan->kill_prior_tuple)
230  {
231  /*
232  * Yes, remember it for later. (We'll deal with all such
233  * tuples at once right before leaving the index page.) The
234  * test for numKilled overrun is not just paranoia: if the
235  * caller reverses direction in the indexscan then the same
236  * item might get entered multiple times. It's not worth
237  * trying to optimize that, so we don't detect it, but instead
238  * just forget any excess entries.
239  */
240  if (so->killedItems == NULL)
241  so->killedItems = (int *)
242  palloc(MaxTIDsPerBTreePage * sizeof(int));
243  if (so->numKilled < MaxTIDsPerBTreePage)
244  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
245  }
246 
247  /*
248  * Now continue the scan.
249  */
250  res = _bt_next(scan, dir);
251  }
252 
253  /* If we have a tuple, return it ... */
254  if (res)
255  break;
256  /* ... otherwise see if we need another primitive index scan */
257  } while (so->numArrayKeys && _bt_start_prim_scan(scan, dir));
258 
259  return res;
260 }
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:128

References _bt_first(), _bt_next(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), res, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 101 of file nbtree.c.

102 {
103  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
104 
105  amroutine->amstrategies = BTMaxStrategyNumber;
106  amroutine->amsupport = BTNProcs;
107  amroutine->amoptsprocnum = BTOPTIONS_PROC;
108  amroutine->amcanorder = true;
109  amroutine->amcanorderbyop = false;
110  amroutine->amcanbackward = true;
111  amroutine->amcanunique = true;
112  amroutine->amcanmulticol = true;
113  amroutine->amoptionalkey = true;
114  amroutine->amsearcharray = true;
115  amroutine->amsearchnulls = true;
116  amroutine->amstorage = false;
117  amroutine->amclusterable = true;
118  amroutine->ampredlocks = true;
119  amroutine->amcanparallel = true;
120  amroutine->amcanbuildparallel = true;
121  amroutine->amcaninclude = true;
122  amroutine->amusemaintenanceworkmem = false;
123  amroutine->amsummarizing = false;
124  amroutine->amparallelvacuumoptions =
126  amroutine->amkeytype = InvalidOid;
127 
128  amroutine->ambuild = btbuild;
129  amroutine->ambuildempty = btbuildempty;
130  amroutine->aminsert = btinsert;
131  amroutine->aminsertcleanup = NULL;
132  amroutine->ambulkdelete = btbulkdelete;
133  amroutine->amvacuumcleanup = btvacuumcleanup;
134  amroutine->amcanreturn = btcanreturn;
135  amroutine->amcostestimate = btcostestimate;
136  amroutine->amoptions = btoptions;
137  amroutine->amproperty = btproperty;
138  amroutine->ambuildphasename = btbuildphasename;
139  amroutine->amvalidate = btvalidate;
140  amroutine->amadjustmembers = btadjustmembers;
141  amroutine->ambeginscan = btbeginscan;
142  amroutine->amrescan = btrescan;
143  amroutine->amgettuple = btgettuple;
144  amroutine->amgetbitmap = btgetbitmap;
145  amroutine->amendscan = btendscan;
146  amroutine->ammarkpos = btmarkpos;
147  amroutine->amrestrpos = btrestrpos;
150  amroutine->amparallelrescan = btparallelrescan;
151 
152  PG_RETURN_POINTER(amroutine);
153 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1444
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:312
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:808
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:838
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:206
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:561
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:182
void btbuildempty(Relation index)
Definition: nbtree.c:159
void btinitparallelscan(void *target)
Definition: nbtree.c:547
Size btestimateparallelscan(int nkeys, int norderbys)
Definition: nbtree.c:537
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:266
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:453
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:417
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:359
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:479
#define BTNProcs
Definition: nbtree.h:712
#define BTOPTIONS_PROC
Definition: nbtree.h:711
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:293
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:4611
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:4565
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:4588
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
#define makeNode(_type_)
Definition: nodes.h:155
#define InvalidOid
Definition: postgres_ext.h:36
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6788
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:277
ambuildempty_function ambuildempty
Definition: amapi.h:268
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:272
bool amclusterable
Definition: amapi.h:242
amoptions_function amoptions
Definition: amapi.h:275
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:289
amrestrpos_function amrestrpos
Definition: amapi.h:286
aminsert_function aminsert
Definition: amapi.h:269
amendscan_function amendscan
Definition: amapi.h:284
uint16 amoptsprocnum
Definition: amapi.h:222
amparallelrescan_function amparallelrescan
Definition: amapi.h:291
Oid amkeytype
Definition: amapi.h:258
bool ampredlocks
Definition: amapi.h:244
uint16 amsupport
Definition: amapi.h:220
amcostestimate_function amcostestimate
Definition: amapi.h:274
bool amcanorderbyop
Definition: amapi.h:226
amadjustmembers_function amadjustmembers
Definition: amapi.h:279
ambuild_function ambuild
Definition: amapi.h:267
bool amstorage
Definition: amapi.h:240
uint16 amstrategies
Definition: amapi.h:218
bool amoptionalkey
Definition: amapi.h:234
amgettuple_function amgettuple
Definition: amapi.h:282
amcanreturn_function amcanreturn
Definition: amapi.h:273
bool amcanunique
Definition: amapi.h:230
amgetbitmap_function amgetbitmap
Definition: amapi.h:283
amproperty_function amproperty
Definition: amapi.h:276
ambulkdelete_function ambulkdelete
Definition: amapi.h:271
bool amsearcharray
Definition: amapi.h:236
bool amsummarizing
Definition: amapi.h:254
amvalidate_function amvalidate
Definition: amapi.h:278
ammarkpos_function ammarkpos
Definition: amapi.h:285
bool amcanmulticol
Definition: amapi.h:232
bool amusemaintenanceworkmem
Definition: amapi.h:252
ambeginscan_function ambeginscan
Definition: amapi.h:280
bool amcanparallel
Definition: amapi.h:246
amrescan_function amrescan
Definition: amapi.h:281
bool amcanorder
Definition: amapi.h:224
bool amcanbuildparallel
Definition: amapi.h:248
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:290
uint8 amparallelvacuumoptions
Definition: amapi.h:256
aminsertcleanup_function aminsertcleanup
Definition: amapi.h:270
bool amcanbackward
Definition: amapi.h:228
bool amcaninclude
Definition: amapi.h:250
bool amsearchnulls
Definition: amapi.h:238
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:55

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanbuildparallel, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::aminsertcleanup, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 547 of file nbtree.c.

548 {
549  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
550 
551  SpinLockInit(&bt_target->btps_mutex);
552  bt_target->btps_scanPage = InvalidBlockNumber;
554  ConditionVariableInit(&bt_target->btps_cv);
555 }
void ConditionVariableInit(ConditionVariable *cv)
#define SpinLockInit(lock)
Definition: spin.h:60

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 182 of file nbtree.c.

187 {
188  bool result;
189  IndexTuple itup;
190 
191  /* generate an index tuple */
192  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
193  itup->t_tid = *ht_ctid;
194 
195  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
196 
197  pfree(itup);
198 
199  return result;
200 }
static Datum values[MAXATTR]
Definition: bootstrap.c:152
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:102
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 453 of file nbtree.c.

454 {
455  BTScanOpaque so = (BTScanOpaque) scan->opaque;
456 
457  /* There may be an old mark with a pin (but no lock). */
459 
460  /*
461  * Just record the current itemIndex. If we later step to next page
462  * before releasing the marked position, _bt_steppage makes a full copy of
463  * the currPos struct in markPos. If (as often happens) the mark is moved
464  * before we leave the page, we don't have to do that work.
465  */
466  if (BTScanPosIsValid(so->currPos))
467  so->markItemIndex = so->currPos.itemIndex;
468  else
469  {
471  so->markItemIndex = -1;
472  }
473 }

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 561 of file nbtree.c.

562 {
563  BTParallelScanDesc btscan;
564  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
565 
566  Assert(parallel_scan);
567 
568  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
569  parallel_scan->ps_offset);
570 
571  /*
572  * In theory, we don't need to acquire the spinlock here, because there
573  * shouldn't be any other workers running at this point, but we do so for
574  * consistency.
575  */
576  SpinLockAcquire(&btscan->btps_mutex);
579  SpinLockRelease(&btscan->btps_mutex);
580 }

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1395 of file nbtree.c.

1397 {
1398  int live = 0;
1399  int nitem = BTreeTupleGetNPosting(posting);
1401  BTVacuumPosting vacposting = NULL;
1402 
1403  for (int i = 0; i < nitem; i++)
1404  {
1405  if (!vstate->callback(items + i, vstate->callback_state))
1406  {
1407  /* Live table TID */
1408  live++;
1409  }
1410  else if (vacposting == NULL)
1411  {
1412  /*
1413  * First dead table TID encountered.
1414  *
1415  * It's now clear that we need to delete one or more dead table
1416  * TIDs, so start maintaining metadata describing how to update
1417  * existing posting list tuple.
1418  */
1419  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1420  nitem * sizeof(uint16));
1421 
1422  vacposting->itup = posting;
1423  vacposting->updatedoffset = updatedoffset;
1424  vacposting->ndeletedtids = 0;
1425  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1426  }
1427  else
1428  {
1429  /* Second or subsequent dead table TID */
1430  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1431  }
1432  }
1433 
1434  *nremaining = live;
1435  return vacposting;
1436 }
unsigned short uint16
Definition: c.h:505
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
void * callback_state
Definition: nbtree.h:335
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:911
uint16 ndeletedtids
Definition: nbtree.h:910
IndexTuple itup
Definition: nbtree.h:906
OffsetNumber updatedoffset
Definition: nbtree.h:907
static ItemArray items
Definition: test_tidstore.c:49

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, items, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 359 of file nbtree.c.

361 {
362  BTScanOpaque so = (BTScanOpaque) scan->opaque;
363 
364  /* we aren't holding any read locks, but gotta drop the pins */
366  {
367  /* Before leaving current page, deal with any killed items */
368  if (so->numKilled > 0)
369  _bt_killitems(scan);
372  }
373 
374  so->markItemIndex = -1;
375  so->needPrimScan = false;
376  so->scanBehind = false;
379 
380  /*
381  * Allocate tuple workspace arrays, if needed for an index-only scan and
382  * not already done in a previous rescan call. To save on palloc
383  * overhead, both workspaces are allocated as one palloc block; only this
384  * function and btendscan know that.
385  *
386  * NOTE: this data structure also makes it safe to return data from a
387  * "name" column, even though btree name_ops uses an underlying storage
388  * datatype of cstring. The risk there is that "name" is supposed to be
389  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
390  * However, since we only return data out of tuples sitting in the
391  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
392  * data out of the markTuples array --- running off the end of memory for
393  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
394  * adding special-case treatment for name_ops elsewhere.
395  */
396  if (scan->xs_want_itup && so->currTuples == NULL)
397  {
398  so->currTuples = (char *) palloc(BLCKSZ * 2);
399  so->markTuples = so->currTuples + BLCKSZ;
400  }
401 
402  /*
403  * Reset the scan keys
404  */
405  if (scankey && scan->numberOfKeys > 0)
406  memmove(scan->keyData,
407  scankey,
408  scan->numberOfKeys * sizeof(ScanKeyData));
409  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
410  so->numArrayKeys = 0; /* ditto */
411 }
struct ScanKeyData * keyData
Definition: relscan.h:122

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 479 of file nbtree.c.

480 {
481  BTScanOpaque so = (BTScanOpaque) scan->opaque;
482 
483  if (so->markItemIndex >= 0)
484  {
485  /*
486  * The scan has never moved to a new page since the last mark. Just
487  * restore the itemIndex.
488  *
489  * NB: In this case we can't count on anything in so->markPos to be
490  * accurate.
491  */
492  so->currPos.itemIndex = so->markItemIndex;
493  }
494  else
495  {
496  /*
497  * The scan moved to a new page after last mark or restore, and we are
498  * now restoring to the marked page. We aren't holding any read
499  * locks, but if we're still holding the pin for the current position,
500  * we must drop it.
501  */
502  if (BTScanPosIsValid(so->currPos))
503  {
504  /* Before leaving current page, deal with any killed items */
505  if (so->numKilled > 0)
506  _bt_killitems(scan);
508  }
509 
510  if (BTScanPosIsValid(so->markPos))
511  {
512  /* bump pin on mark buffer for assignment to current buffer */
513  if (BTScanPosIsPinned(so->markPos))
515  memcpy(&so->currPos, &so->markPos,
516  offsetof(BTScanPosData, items[1]) +
517  so->markPos.lastItem * sizeof(BTScanPosItem));
518  if (so->currTuples)
519  memcpy(so->currTuples, so->markTuples,
521  /* Reset the scan's array keys (see _bt_steppage for why) */
522  if (so->numArrayKeys)
523  {
524  _bt_start_array_keys(scan, so->currPos.dir);
525  so->needPrimScan = false;
526  }
527  }
528  else
530  }
531 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4882
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:999
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:1344
Buffer buf
Definition: nbtree.h:953
int nextTupleOffset
Definition: nbtree.h:981
ScanDirection dir
Definition: nbtree.h:975

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::dir, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, items, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 838 of file nbtree.c.

839 {
840  BlockNumber num_delpages;
841 
842  /* No-op in ANALYZE ONLY mode */
843  if (info->analyze_only)
844  return stats;
845 
846  /*
847  * If btbulkdelete was called, we need not do anything (we just maintain
848  * the information used within _bt_vacuum_needs_cleanup() by calling
849  * _bt_set_cleanup_info() below).
850  *
851  * If btbulkdelete was _not_ called, then we have a choice to make: we
852  * must decide whether or not a btvacuumscan() call is needed now (i.e.
853  * whether the ongoing VACUUM operation can entirely avoid a physical scan
854  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
855  * now.
856  */
857  if (stats == NULL)
858  {
859  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
860  if (!_bt_vacuum_needs_cleanup(info->index))
861  return NULL;
862 
863  /*
864  * Since we aren't going to actually delete any leaf items, there's no
865  * need to go through all the vacuum-cycle-ID pushups here.
866  *
867  * Posting list tuples are a source of inaccuracy for cleanup-only
868  * scans. btvacuumscan() will assume that the number of index tuples
869  * from each page can be used as num_index_tuples, even though
870  * num_index_tuples is supposed to represent the number of TIDs in the
871  * index. This naive approach can underestimate the number of tuples
872  * in the index significantly.
873  *
874  * We handle the problem by making num_index_tuples an estimate in
875  * cleanup-only case.
876  */
878  btvacuumscan(info, stats, NULL, NULL, 0);
879  stats->estimated_count = true;
880  }
881 
882  /*
883  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
884  *
885  * num_delpages is the number of deleted pages now in the index that were
886  * not safe to place in the FSM to be recycled just yet. num_delpages is
887  * greater than 0 only when _bt_pagedel() actually deleted pages during
888  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
889  * have failed to place any newly deleted pages in the FSM just moments
890  * ago. (Actually, there are edge cases where recycling of the current
891  * VACUUM's newly deleted pages does not even become safe by the time the
892  * next VACUUM comes around. See nbtree/README.)
893  */
894  Assert(stats->pages_deleted >= stats->pages_free);
895  num_delpages = stats->pages_deleted - stats->pages_free;
896  _bt_set_cleanup_info(info->index, num_delpages);
897 
898  /*
899  * It's quite possible for us to be fooled by concurrent page splits into
900  * double-counting some index tuples, so disbelieve any total that exceeds
901  * the underlying heap's count ... if we know that accurately. Otherwise
902  * this might just make matters worse.
903  */
904  if (!info->estimated_count)
905  {
906  if (stats->num_index_tuples > info->num_heap_tuples)
907  stats->num_index_tuples = info->num_heap_tuples;
908  }
909 
910  return stats;
911 }
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:232
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:179
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_free
Definition: genam.h:83
double num_index_tuples
Definition: genam.h:79
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
bool estimated_count
Definition: genam.h:50

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1060 of file nbtree.c.

1061 {
1062  IndexVacuumInfo *info = vstate->info;
1063  IndexBulkDeleteResult *stats = vstate->stats;
1065  void *callback_state = vstate->callback_state;
1066  Relation rel = info->index;
1067  Relation heaprel = info->heaprel;
1068  bool attempt_pagedel;
1069  BlockNumber blkno,
1070  backtrack_to;
1071  Buffer buf;
1072  Page page;
1073  BTPageOpaque opaque;
1074 
1075  blkno = scanblkno;
1076 
1077 backtrack:
1078 
1079  attempt_pagedel = false;
1080  backtrack_to = P_NONE;
1081 
1082  /* call vacuum_delay_point while not holding any buffer lock */
1084 
1085  /*
1086  * We can't use _bt_getbuf() here because it always applies
1087  * _bt_checkpage(), which will barf on an all-zero page. We want to
1088  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1089  * buffer access strategy.
1090  */
1092  info->strategy);
1093  _bt_lockbuf(rel, buf, BT_READ);
1094  page = BufferGetPage(buf);
1095  opaque = NULL;
1096  if (!PageIsNew(page))
1097  {
1098  _bt_checkpage(rel, buf);
1099  opaque = BTPageGetOpaque(page);
1100  }
1101 
1102  Assert(blkno <= scanblkno);
1103  if (blkno != scanblkno)
1104  {
1105  /*
1106  * We're backtracking.
1107  *
1108  * We followed a right link to a sibling leaf page (a page that
1109  * happens to be from a block located before scanblkno). The only
1110  * case we want to do anything with is a live leaf page having the
1111  * current vacuum cycle ID.
1112  *
1113  * The page had better be in a state that's consistent with what we
1114  * expect. Check for conditions that imply corruption in passing. It
1115  * can't be half-dead because only an interrupted VACUUM process can
1116  * leave pages in that state, so we'd definitely have dealt with it
1117  * back when the page was the scanblkno page (half-dead pages are
1118  * always marked fully deleted by _bt_pagedel(), barring corruption).
1119  */
1120  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1121  {
1122  Assert(false);
1123  ereport(LOG,
1124  (errcode(ERRCODE_INDEX_CORRUPTED),
1125  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1126  blkno, scanblkno, RelationGetRelationName(rel))));
1127  _bt_relbuf(rel, buf);
1128  return;
1129  }
1130 
1131  /*
1132  * We may have already processed the page in an earlier call, when the
1133  * page was scanblkno. This happens when the leaf page split occurred
1134  * after the scan began, but before the right sibling page became the
1135  * scanblkno.
1136  *
1137  * Page may also have been deleted by current btvacuumpage() call,
1138  * since _bt_pagedel() sometimes deletes the right sibling page of
1139  * scanblkno in passing (it does so after we decided where to
1140  * backtrack to). We don't need to process this page as a deleted
1141  * page a second time now (in fact, it would be wrong to count it as a
1142  * deleted page in the bulk delete statistics a second time).
1143  */
1144  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1145  {
1146  /* Done with current scanblkno (and all lower split pages) */
1147  _bt_relbuf(rel, buf);
1148  return;
1149  }
1150  }
1151 
1152  if (!opaque || BTPageIsRecyclable(page, heaprel))
1153  {
1154  /* Okay to recycle this page (which could be leaf or internal) */
1155  RecordFreeIndexPage(rel, blkno);
1156  stats->pages_deleted++;
1157  stats->pages_free++;
1158  }
1159  else if (P_ISDELETED(opaque))
1160  {
1161  /*
1162  * Already deleted page (which could be leaf or internal). Can't
1163  * recycle yet.
1164  */
1165  stats->pages_deleted++;
1166  }
1167  else if (P_ISHALFDEAD(opaque))
1168  {
1169  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1170  attempt_pagedel = true;
1171 
1172  /*
1173  * _bt_pagedel() will increment both pages_newly_deleted and
1174  * pages_deleted stats in all cases (barring corruption)
1175  */
1176  }
1177  else if (P_ISLEAF(opaque))
1178  {
1180  int ndeletable;
1182  int nupdatable;
1183  OffsetNumber offnum,
1184  minoff,
1185  maxoff;
1186  int nhtidsdead,
1187  nhtidslive;
1188 
1189  /*
1190  * Trade in the initial read lock for a full cleanup lock on this
1191  * page. We must get such a lock on every leaf page over the course
1192  * of the vacuum scan, whether or not it actually contains any
1193  * deletable tuples --- see nbtree/README.
1194  */
1196 
1197  /*
1198  * Check whether we need to backtrack to earlier pages. What we are
1199  * concerned about is a page split that happened since we started the
1200  * vacuum scan. If the split moved tuples on the right half of the
1201  * split (i.e. the tuples that sort high) to a block that we already
1202  * passed over, then we might have missed the tuples. We need to
1203  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1204  * or deleting scanblkno page below!)
1205  */
1206  if (vstate->cycleid != 0 &&
1207  opaque->btpo_cycleid == vstate->cycleid &&
1208  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1209  !P_RIGHTMOST(opaque) &&
1210  opaque->btpo_next < scanblkno)
1211  backtrack_to = opaque->btpo_next;
1212 
1213  ndeletable = 0;
1214  nupdatable = 0;
1215  minoff = P_FIRSTDATAKEY(opaque);
1216  maxoff = PageGetMaxOffsetNumber(page);
1217  nhtidsdead = 0;
1218  nhtidslive = 0;
1219  if (callback)
1220  {
1221  /* btbulkdelete callback tells us what to delete (or update) */
1222  for (offnum = minoff;
1223  offnum <= maxoff;
1224  offnum = OffsetNumberNext(offnum))
1225  {
1226  IndexTuple itup;
1227 
1228  itup = (IndexTuple) PageGetItem(page,
1229  PageGetItemId(page, offnum));
1230 
1231  Assert(!BTreeTupleIsPivot(itup));
1232  if (!BTreeTupleIsPosting(itup))
1233  {
1234  /* Regular tuple, standard table TID representation */
1235  if (callback(&itup->t_tid, callback_state))
1236  {
1237  deletable[ndeletable++] = offnum;
1238  nhtidsdead++;
1239  }
1240  else
1241  nhtidslive++;
1242  }
1243  else
1244  {
1245  BTVacuumPosting vacposting;
1246  int nremaining;
1247 
1248  /* Posting list tuple */
1249  vacposting = btreevacuumposting(vstate, itup, offnum,
1250  &nremaining);
1251  if (vacposting == NULL)
1252  {
1253  /*
1254  * All table TIDs from the posting tuple remain, so no
1255  * delete or update required
1256  */
1257  Assert(nremaining == BTreeTupleGetNPosting(itup));
1258  }
1259  else if (nremaining > 0)
1260  {
1261 
1262  /*
1263  * Store metadata about posting list tuple in
1264  * updatable array for entire page. Existing tuple
1265  * will be updated during the later call to
1266  * _bt_delitems_vacuum().
1267  */
1268  Assert(nremaining < BTreeTupleGetNPosting(itup));
1269  updatable[nupdatable++] = vacposting;
1270  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1271  }
1272  else
1273  {
1274  /*
1275  * All table TIDs from the posting list must be
1276  * deleted. We'll delete the index tuple completely
1277  * (no update required).
1278  */
1279  Assert(nremaining == 0);
1280  deletable[ndeletable++] = offnum;
1281  nhtidsdead += BTreeTupleGetNPosting(itup);
1282  pfree(vacposting);
1283  }
1284 
1285  nhtidslive += nremaining;
1286  }
1287  }
1288  }
1289 
1290  /*
1291  * Apply any needed deletes or updates. We issue just one
1292  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1293  */
1294  if (ndeletable > 0 || nupdatable > 0)
1295  {
1296  Assert(nhtidsdead >= ndeletable + nupdatable);
1297  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1298  nupdatable);
1299 
1300  stats->tuples_removed += nhtidsdead;
1301  /* must recompute maxoff */
1302  maxoff = PageGetMaxOffsetNumber(page);
1303 
1304  /* can't leak memory here */
1305  for (int i = 0; i < nupdatable; i++)
1306  pfree(updatable[i]);
1307  }
1308  else
1309  {
1310  /*
1311  * If the leaf page has been split during this vacuum cycle, it
1312  * seems worth expending a write to clear btpo_cycleid even if we
1313  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1314  * takes care of this.) This ensures we won't process the page
1315  * again.
1316  *
1317  * We treat this like a hint-bit update because there's no need to
1318  * WAL-log it.
1319  */
1320  Assert(nhtidsdead == 0);
1321  if (vstate->cycleid != 0 &&
1322  opaque->btpo_cycleid == vstate->cycleid)
1323  {
1324  opaque->btpo_cycleid = 0;
1325  MarkBufferDirtyHint(buf, true);
1326  }
1327  }
1328 
1329  /*
1330  * If the leaf page is now empty, try to delete it; else count the
1331  * live tuples (live table TIDs in posting lists are counted as
1332  * separate live tuples). We don't delete when backtracking, though,
1333  * since that would require teaching _bt_pagedel() about backtracking
1334  * (doesn't seem worth adding more complexity to deal with that).
1335  *
1336  * We don't count the number of live TIDs during cleanup-only calls to
1337  * btvacuumscan (i.e. when callback is not set). We count the number
1338  * of index tuples directly instead. This avoids the expense of
1339  * directly examining all of the tuples on each page. VACUUM will
1340  * treat num_index_tuples as an estimate in cleanup-only case, so it
1341  * doesn't matter that this underestimates num_index_tuples
1342  * significantly in some cases.
1343  */
1344  if (minoff > maxoff)
1345  attempt_pagedel = (blkno == scanblkno);
1346  else if (callback)
1347  stats->num_index_tuples += nhtidslive;
1348  else
1349  stats->num_index_tuples += maxoff - minoff + 1;
1350 
1351  Assert(!attempt_pagedel || nhtidslive == 0);
1352  }
1353 
1354  if (attempt_pagedel)
1355  {
1356  MemoryContext oldcontext;
1357 
1358  /* Run pagedel in a temp context to avoid memory leakage */
1360  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1361 
1362  /*
1363  * _bt_pagedel maintains the bulk delete stats on our behalf;
1364  * pages_newly_deleted and pages_deleted are likely to be incremented
1365  * during call
1366  */
1367  Assert(blkno == scanblkno);
1368  _bt_pagedel(rel, buf, vstate);
1369 
1370  MemoryContextSwitchTo(oldcontext);
1371  /* pagedel released buffer, so we shouldn't */
1372  }
1373  else
1374  _bt_relbuf(rel, buf);
1375 
1376  if (backtrack_to != P_NONE)
1377  {
1378  blkno = backtrack_to;
1379  goto backtrack;
1380  }
1381 }
int Buffer
Definition: buf.h:23
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4914
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
@ RBM_NORMAL
Definition: bufmgr.h:45
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
int errcode(int sqlerrcode)
Definition: elog.c:859
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:87
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:165
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1023
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1802
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1154
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:797
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1039
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1395
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:719
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition: nbtree.h:291
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:73
MemoryContextSwitchTo(old_ctx)
#define RelationGetRelationName(relation)
Definition: rel.h:539
@ MAIN_FORKNUM
Definition: relpath.h:50
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
BTCycleId cycleid
Definition: nbtree.h:336
MemoryContext pagedelcontext
Definition: nbtree.h:337
IndexVacuumInfo * info
Definition: nbtree.h:332
double tuples_removed
Definition: genam.h:80
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
void vacuum_delay_point(void)
Definition: vacuum.c:2336

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 926 of file nbtree.c.

929 {
930  Relation rel = info->index;
931  BTVacState vstate;
932  BlockNumber num_pages;
933  BlockNumber scanblkno;
934  bool needLock;
935 
936  /*
937  * Reset fields that track information about the entire index now. This
938  * avoids double-counting in the case where a single VACUUM command
939  * requires multiple scans of the index.
940  *
941  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
942  * since they track information about the VACUUM command, and so must last
943  * across each call to btvacuumscan().
944  *
945  * (Note that pages_free is treated as state about the whole index, not
946  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
947  * calls are idempotent, and get repeated for the same deleted pages in
948  * some scenarios. The point for us is to track the number of recyclable
949  * pages in the index at the end of the VACUUM command.)
950  */
951  stats->num_pages = 0;
952  stats->num_index_tuples = 0;
953  stats->pages_deleted = 0;
954  stats->pages_free = 0;
955 
956  /* Set up info to pass down to btvacuumpage */
957  vstate.info = info;
958  vstate.stats = stats;
959  vstate.callback = callback;
960  vstate.callback_state = callback_state;
961  vstate.cycleid = cycleid;
962 
963  /* Create a temporary memory context to run _bt_pagedel in */
965  "_bt_pagedel",
967 
968  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
969  vstate.bufsize = 0;
970  vstate.maxbufsize = 0;
971  vstate.pendingpages = NULL;
972  vstate.npendingpages = 0;
973  /* Consider applying _bt_pendingfsm_finalize optimization */
974  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
975 
976  /*
977  * The outer loop iterates over all index pages except the metapage, in
978  * physical order (we hope the kernel will cooperate in providing
979  * read-ahead for speed). It is critical that we visit all leaf pages,
980  * including ones added after we start the scan, else we might fail to
981  * delete some deletable tuples. Hence, we must repeatedly check the
982  * relation length. We must acquire the relation-extension lock while
983  * doing so to avoid a race condition: if someone else is extending the
984  * relation, there is a window where bufmgr/smgr have created a new
985  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
986  * we manage to scan such a page here, we'll improperly assume it can be
987  * recycled. Taking the lock synchronizes things enough to prevent a
988  * problem: either num_pages won't include the new page, or _bt_getbuf
989  * already has write lock on the buffer and it will be fully initialized
990  * before we can examine it. Also, we need not worry if a page is added
991  * immediately after we look; the page splitting code already has
992  * write-lock on the left page before it adds a right page, so we must
993  * already have processed any tuples due to be moved into such a page.
994  *
995  * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
996  * think the use of the extension lock is still required.
997  *
998  * We can skip locking for new or temp relations, however, since no one
999  * else could be accessing them.
1000  */
1001  needLock = !RELATION_IS_LOCAL(rel);
1002 
1003  scanblkno = BTREE_METAPAGE + 1;
1004  for (;;)
1005  {
1006  /* Get the current relation length */
1007  if (needLock)
1009  num_pages = RelationGetNumberOfBlocks(rel);
1010  if (needLock)
1012 
1013  if (info->report_progress)
1015  num_pages);
1016 
1017  /* Quit if we've scanned the whole relation */
1018  if (scanblkno >= num_pages)
1019  break;
1020  /* Iterate over pages, then loop back to recheck length */
1021  for (; scanblkno < num_pages; scanblkno++)
1022  {
1023  btvacuumpage(&vstate, scanblkno);
1024  if (info->report_progress)
1026  scanblkno);
1027  }
1028  }
1029 
1030  /* Set statistics num_pages field to final size of index */
1031  stats->num_pages = num_pages;
1032 
1034 
1035  /*
1036  * If there were any calls to _bt_pagedel() during scan of the index then
1037  * see if any of the resulting pages can be placed in the FSM now. When
1038  * it's not safe we'll have to leave it up to a future VACUUM operation.
1039  *
1040  * Finally, if we placed any pages in the FSM (either just now or during
1041  * the scan), forcibly update the upper-level FSM pages to ensure that
1042  * searchers can find them.
1043  */
1044  _bt_pendingfsm_finalize(rel, &vstate);
1045  if (stats->pages_free > 0)
1047 }
void pgstat_progress_update_param(int index, int64 val)
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:281
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:430
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:480
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2995
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2954
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1060
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:122
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:121
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
BTPendingFSM * pendingpages
Definition: nbtree.h:344
int npendingpages
Definition: nbtree.h:345
int bufsize
Definition: nbtree.h:342
int maxbufsize
Definition: nbtree.h:343
BlockNumber num_pages
Definition: genam.h:77
bool report_progress
Definition: genam.h:49

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().