PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber next_scan_page, BlockNumber curr_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber curr_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
int btgettreeheight (Relation rel)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 82 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 51 of file nbtree.c.

52 {
58 } BTPS_State;
BTPS_State
Definition: nbtree.c:52
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:55
@ BTPARALLEL_NEED_PRIMSCAN
Definition: nbtree.c:54
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:53
@ BTPARALLEL_IDLE
Definition: nbtree.c:56
@ BTPARALLEL_DONE
Definition: nbtree.c:57

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 774 of file nbtree.c.

775 {
776  BTScanOpaque so = (BTScanOpaque) scan->opaque;
777  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
778  BTParallelScanDesc btscan;
779  bool status_changed = false;
780 
782 
783  /* Do nothing, for non-parallel scans */
784  if (parallel_scan == NULL)
785  return;
786 
787  /*
788  * Should not mark parallel scan done when there's still a pending
789  * primitive index scan
790  */
791  if (so->needPrimScan)
792  return;
793 
794  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
795  parallel_scan->ps_offset);
796 
797  /*
798  * Mark the parallel scan as done, unless some other process did so
799  * already
800  */
801  SpinLockAcquire(&btscan->btps_mutex);
802  Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
803  if (btscan->btps_pageStatus != BTPARALLEL_DONE)
804  {
805  btscan->btps_pageStatus = BTPARALLEL_DONE;
806  status_changed = true;
807  }
808  SpinLockRelease(&btscan->btps_mutex);
809 
810  /* wake up all the workers associated with this parallel scan */
811  if (status_changed)
812  ConditionVariableBroadcast(&btscan->btps_cv);
813 }
#define OffsetToPointer(base, offset)
Definition: c.h:777
#define Assert(condition)
Definition: c.h:863
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:82
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1010
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1073
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
bool needPrimScan
Definition: nbtree.h:1040
BTScanPosData currPos
Definition: nbtree.h:1069
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:189

References Assert, BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosIsValid, ConditionVariableBroadcast(), BTScanOpaqueData::currPos, BTScanOpaqueData::needPrimScan, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_seize(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  curr_page 
)

Definition at line 824 of file nbtree.c.

825 {
826  BTScanOpaque so = (BTScanOpaque) scan->opaque;
827  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
828  BTParallelScanDesc btscan;
829 
830  Assert(so->numArrayKeys);
831 
832  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
833  parallel_scan->ps_offset);
834 
835  SpinLockAcquire(&btscan->btps_mutex);
836  if (btscan->btps_lastCurrPage == curr_page &&
837  btscan->btps_pageStatus == BTPARALLEL_IDLE)
838  {
839  btscan->btps_nextScanPage = InvalidBlockNumber;
840  btscan->btps_lastCurrPage = InvalidBlockNumber;
841  btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
842 
843  /* Serialize scan's current array keys */
844  for (int i = 0; i < so->numArrayKeys; i++)
845  {
846  BTArrayKeyInfo *array = &so->arrayKeys[i];
847 
848  btscan->btps_arrElems[i] = array->cur_elem;
849  }
850  }
851  SpinLockRelease(&btscan->btps_mutex);
852 }
#define InvalidBlockNumber
Definition: block.h:33
int i
Definition: isn.c:72
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1043

References BTScanOpaqueData::arrayKeys, Assert, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTArrayKeyInfo::cur_elem, i, InvalidBlockNumber, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  next_scan_page,
BlockNumber  curr_page 
)

Definition at line 747 of file nbtree.c.

749 {
750  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
751  BTParallelScanDesc btscan;
752 
753  Assert(BlockNumberIsValid(next_scan_page));
754 
755  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
756  parallel_scan->ps_offset);
757 
758  SpinLockAcquire(&btscan->btps_mutex);
759  btscan->btps_nextScanPage = next_scan_page;
760  btscan->btps_lastCurrPage = curr_page;
762  SpinLockRelease(&btscan->btps_mutex);
764 }
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void ConditionVariableSignal(ConditionVariable *cv)
slock_t btps_mutex
Definition: nbtree.c:72
BTPS_State btps_pageStatus
Definition: nbtree.c:69
BlockNumber btps_lastCurrPage
Definition: nbtree.c:67
ConditionVariable btps_cv
Definition: nbtree.c:73
BlockNumber btps_nextScanPage
Definition: nbtree.c:66

References Assert, BlockNumberIsValid(), BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber next_scan_page,
BlockNumber last_curr_page,
bool  first 
)

Definition at line 605 of file nbtree.c.

607 {
608  BTScanOpaque so = (BTScanOpaque) scan->opaque;
609  bool exit_loop = false,
610  status = true,
611  endscan = false;
612  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
613  BTParallelScanDesc btscan;
614 
615  *next_scan_page = InvalidBlockNumber;
616  *last_curr_page = InvalidBlockNumber;
617 
618  /*
619  * Reset so->currPos, and initialize moreLeft/moreRight such that the next
620  * call to _bt_readnextpage treats this backend similarly to a serial
621  * backend that steps from *last_curr_page to *next_scan_page (unless this
622  * backend's so->currPos is initialized by _bt_readfirstpage before then).
623  */
625  so->currPos.moreLeft = so->currPos.moreRight = true;
626 
627  if (first)
628  {
629  /*
630  * Initialize array related state when called from _bt_first, assuming
631  * that this will be the first primitive index scan for the scan
632  */
633  so->needPrimScan = false;
634  so->scanBehind = false;
635  so->oppositeDirCheck = false;
636  }
637  else
638  {
639  /*
640  * Don't attempt to seize the scan when it requires another primitive
641  * index scan, since caller's backend cannot start it right now
642  */
643  if (so->needPrimScan)
644  return false;
645  }
646 
647  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
648  parallel_scan->ps_offset);
649 
650  while (1)
651  {
652  SpinLockAcquire(&btscan->btps_mutex);
653 
654  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
655  {
656  /* We're done with this parallel index scan */
657  status = false;
658  }
659  else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
660  btscan->btps_nextScanPage == P_NONE)
661  {
662  /* End this parallel index scan */
663  status = false;
664  endscan = true;
665  }
666  else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
667  {
668  Assert(so->numArrayKeys);
669 
670  if (first)
671  {
672  /* Can start scheduled primitive scan right away, so do so */
673  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
674  for (int i = 0; i < so->numArrayKeys; i++)
675  {
676  BTArrayKeyInfo *array = &so->arrayKeys[i];
677  ScanKey skey = &so->keyData[array->scan_key];
678 
679  array->cur_elem = btscan->btps_arrElems[i];
680  skey->sk_argument = array->elem_values[array->cur_elem];
681  }
682  exit_loop = true;
683  }
684  else
685  {
686  /*
687  * Don't attempt to seize the scan when it requires another
688  * primitive index scan, since caller's backend cannot start
689  * it right now
690  */
691  status = false;
692  }
693 
694  /*
695  * Either way, update backend local state to indicate that a
696  * pending primitive scan is required
697  */
698  so->needPrimScan = true;
699  so->scanBehind = false;
700  so->oppositeDirCheck = false;
701  }
702  else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
703  {
704  /*
705  * We have successfully seized control of the scan for the purpose
706  * of advancing it to a new page!
707  */
708  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
709  Assert(btscan->btps_nextScanPage != P_NONE);
710  *next_scan_page = btscan->btps_nextScanPage;
711  *last_curr_page = btscan->btps_lastCurrPage;
712  exit_loop = true;
713  }
714  SpinLockRelease(&btscan->btps_mutex);
715  if (exit_loop || !status)
716  break;
717  ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
718  }
720 
721  /* When the scan has reached the rightmost (or leftmost) page, end it */
722  if (endscan)
723  _bt_parallel_done(scan);
724 
725  return status;
726 }
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:774
#define P_NONE
Definition: nbtree.h:212
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1016
Datum * elem_values
Definition: nbtree.h:1028
bool oppositeDirCheck
Definition: nbtree.h:1042
ScanKey keyData
Definition: nbtree.h:1036
bool moreRight
Definition: nbtree.h:975
bool moreLeft
Definition: nbtree.h:974
Datum sk_argument
Definition: skey.h:72

References _bt_parallel_done(), BTScanOpaqueData::arrayKeys, Assert, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosInvalidate, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTArrayKeyInfo::cur_elem, BTScanOpaqueData::currPos, BTArrayKeyInfo::elem_values, i, InvalidBlockNumber, BTScanOpaqueData::keyData, BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, P_NONE, IndexScanDescData::parallel_scan, BTArrayKeyInfo::scan_key, BTScanOpaqueData::scanBehind, ScanKeyData::sk_argument, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 312 of file nbtree.c.

313 {
314  IndexScanDesc scan;
315  BTScanOpaque so;
316 
317  /* no order by operators allowed */
318  Assert(norderbys == 0);
319 
320  /* get the scan */
321  scan = RelationGetIndexScan(rel, nkeys, norderbys);
322 
323  /* allocate private workspace */
324  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
327  if (scan->numberOfKeys > 0)
328  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
329  else
330  so->keyData = NULL;
331 
332  so->needPrimScan = false;
333  so->scanBehind = false;
334  so->oppositeDirCheck = false;
335  so->arrayKeys = NULL;
336  so->orderProcs = NULL;
337  so->arrayContext = NULL;
338 
339  so->killedItems = NULL; /* until needed */
340  so->numKilled = 0;
341 
342  /*
343  * We don't know yet whether the scan will be index-only, so we do not
344  * allocate the tuple workspace arrays until btrescan. However, we set up
345  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
346  */
347  so->currTuples = so->markTuples = NULL;
348 
349  scan->xs_itupdesc = RelationGetDescr(rel);
350 
351  scan->opaque = so;
352 
353  return scan;
354 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
void * palloc(Size size)
Definition: mcxt.c:1317
#define RelationGetDescr(relation)
Definition: rel.h:531
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:1057
FmgrInfo * orderProcs
Definition: nbtree.h:1044
int * killedItems
Definition: nbtree.h:1048
char * currTuples
Definition: nbtree.h:1056
BTScanPosData markPos
Definition: nbtree.h:1070
MemoryContext arrayContext
Definition: nbtree.h:1045
struct TupleDescData * xs_itupdesc
Definition: relscan.h:166

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, BTScanOpaqueData::orderProcs, palloc(), RelationGetDescr, RelationGetIndexScan(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 159 of file nbtree.c.

160 {
161  bool allequalimage = _bt_allequalimage(index, false);
162  BulkWriteState *bulkstate;
163  BulkWriteBuffer metabuf;
164 
165  bulkstate = smgr_bulk_start_rel(index, INIT_FORKNUM);
166 
167  /* Construct metapage. */
168  metabuf = smgr_bulk_get_buf(bulkstate);
169  _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
170  smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
171 
172  smgr_bulk_finish(bulkstate);
173 }
Pointer Page
Definition: bufpage.h:81
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition: bulk_write.c:87
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
#define BTREE_METAPAGE
Definition: nbtree.h:148
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:5142
@ INIT_FORKNUM
Definition: relpath.h:61
Definition: type.h:95

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 862 of file nbtree.c.

864 {
865  Relation rel = info->index;
866  BTCycleId cycleid;
867 
868  /* allocate stats if first time through, else re-use existing struct */
869  if (stats == NULL)
871 
872  /* Establish the vacuum cycle ID to use for this scan */
873  /* The ENSURE stuff ensures we clean up shared memory on failure */
875  {
876  cycleid = _bt_start_vacuum(rel);
877 
878  btvacuumscan(info, stats, callback, callback_state, cycleid);
879  }
881  _bt_end_vacuum(rel);
882 
883  return stats;
884 }
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1347
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:980
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:4486
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:4514
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:4429
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
Relation index
Definition: genam.h:46
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1498 of file nbtree.c.

1499 {
1500  return true;
1501 }

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 417 of file nbtree.c.

418 {
419  BTScanOpaque so = (BTScanOpaque) scan->opaque;
420 
421  /* we aren't holding any read locks, but gotta drop the pins */
423  {
424  /* Before leaving current page, deal with any killed items */
425  if (so->numKilled > 0)
426  _bt_killitems(scan);
428  }
429 
430  so->markItemIndex = -1;
432 
433  /* No need to invalidate positions, the RAM is about to be freed. */
434 
435  /* Release storage */
436  if (so->keyData != NULL)
437  pfree(so->keyData);
438  /* so->arrayKeys and so->orderProcs are in arrayContext */
439  if (so->arrayContext != NULL)
441  if (so->killedItems != NULL)
442  pfree(so->killedItems);
443  if (so->currTuples != NULL)
444  pfree(so->currTuples);
445  /* so->markTuples should not be pfree'd, see btrescan */
446  pfree(so);
447 }
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void pfree(void *pointer)
Definition: mcxt.c:1521
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1004
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:4178

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( int  nkeys,
int  norderbys 
)

Definition at line 537 of file nbtree.c.

538 {
539  /* Pessimistically assume all input scankeys will be output with arrays */
540  return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys;
541 }

References BTParallelScanDescData::btps_arrElems.

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 266 of file nbtree.c.

267 {
268  BTScanOpaque so = (BTScanOpaque) scan->opaque;
269  int64 ntids = 0;
270  ItemPointer heapTid;
271 
272  /* Each loop iteration performs another primitive index scan */
273  do
274  {
275  /* Fetch the first page & tuple */
277  {
278  /* Save tuple ID, and continue scanning */
279  heapTid = &scan->xs_heaptid;
280  tbm_add_tuples(tbm, heapTid, 1, false);
281  ntids++;
282 
283  for (;;)
284  {
285  /*
286  * Advance to next tuple within page. This is the same as the
287  * easy case in _bt_next().
288  */
289  if (++so->currPos.itemIndex > so->currPos.lastItem)
290  {
291  /* let _bt_next do the heavy lifting */
292  if (!_bt_next(scan, ForwardScanDirection))
293  break;
294  }
295 
296  /* Save tuple ID, and continue scanning */
297  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
298  tbm_add_tuples(tbm, heapTid, 1, false);
299  ntids++;
300  }
301  }
302  /* Now see if we need another primitive index scan */
303  } while (so->numArrayKeys && _bt_start_prim_scan(scan, ForwardScanDirection));
304 
305  return ntids;
306 }
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:882
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1461
bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:1682
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:985
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:988
int itemIndex
Definition: nbtree.h:986
ItemPointerData heapTid
Definition: nbtree.h:946
ItemPointerData xs_heaptid
Definition: relscan.h:170
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:377

References _bt_first(), _bt_next(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettreeheight()

int btgettreeheight ( Relation  rel)

Definition at line 1507 of file nbtree.c.

1508 {
1509  return _bt_getrootheight(rel);
1510 }
int _bt_getrootheight(Relation rel)
Definition: nbtpage.c:675

References _bt_getrootheight().

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 206 of file nbtree.c.

207 {
208  BTScanOpaque so = (BTScanOpaque) scan->opaque;
209  bool res;
210 
211  /* btree indexes are never lossy */
212  scan->xs_recheck = false;
213 
214  /* Each loop iteration performs another primitive index scan */
215  do
216  {
217  /*
218  * If we've already initialized this scan, we can just advance it in
219  * the appropriate direction. If we haven't done so yet, we call
220  * _bt_first() to get the first item in the scan.
221  */
222  if (!BTScanPosIsValid(so->currPos))
223  res = _bt_first(scan, dir);
224  else
225  {
226  /*
227  * Check to see if we should kill the previously-fetched tuple.
228  */
229  if (scan->kill_prior_tuple)
230  {
231  /*
232  * Yes, remember it for later. (We'll deal with all such
233  * tuples at once right before leaving the index page.) The
234  * test for numKilled overrun is not just paranoia: if the
235  * caller reverses direction in the indexscan then the same
236  * item might get entered multiple times. It's not worth
237  * trying to optimize that, so we don't detect it, but instead
238  * just forget any excess entries.
239  */
240  if (so->killedItems == NULL)
241  so->killedItems = (int *)
242  palloc(MaxTIDsPerBTreePage * sizeof(int));
243  if (so->numKilled < MaxTIDsPerBTreePage)
244  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
245  }
246 
247  /*
248  * Now continue the scan.
249  */
250  res = _bt_next(scan, dir);
251  }
252 
253  /* If we have a tuple, return it ... */
254  if (res)
255  break;
256  /* ... otherwise see if we need another primitive index scan */
257  } while (so->numArrayKeys && _bt_start_prim_scan(scan, dir));
258 
259  return res;
260 }
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:151

References _bt_first(), _bt_next(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), res, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 100 of file nbtree.c.

101 {
102  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
103 
104  amroutine->amstrategies = BTMaxStrategyNumber;
105  amroutine->amsupport = BTNProcs;
106  amroutine->amoptsprocnum = BTOPTIONS_PROC;
107  amroutine->amcanorder = true;
108  amroutine->amcanorderbyop = false;
109  amroutine->amcanbackward = true;
110  amroutine->amcanunique = true;
111  amroutine->amcanmulticol = true;
112  amroutine->amoptionalkey = true;
113  amroutine->amsearcharray = true;
114  amroutine->amsearchnulls = true;
115  amroutine->amstorage = false;
116  amroutine->amclusterable = true;
117  amroutine->ampredlocks = true;
118  amroutine->amcanparallel = true;
119  amroutine->amcanbuildparallel = true;
120  amroutine->amcaninclude = true;
121  amroutine->amusemaintenanceworkmem = false;
122  amroutine->amsummarizing = false;
123  amroutine->amparallelvacuumoptions =
125  amroutine->amkeytype = InvalidOid;
126 
127  amroutine->ambuild = btbuild;
128  amroutine->ambuildempty = btbuildempty;
129  amroutine->aminsert = btinsert;
130  amroutine->aminsertcleanup = NULL;
131  amroutine->ambulkdelete = btbulkdelete;
132  amroutine->amvacuumcleanup = btvacuumcleanup;
133  amroutine->amcanreturn = btcanreturn;
134  amroutine->amcostestimate = btcostestimate;
135  amroutine->amgettreeheight = btgettreeheight;
136  amroutine->amoptions = btoptions;
137  amroutine->amproperty = btproperty;
138  amroutine->ambuildphasename = btbuildphasename;
139  amroutine->amvalidate = btvalidate;
140  amroutine->amadjustmembers = btadjustmembers;
141  amroutine->ambeginscan = btbeginscan;
142  amroutine->amrescan = btrescan;
143  amroutine->amgettuple = btgettuple;
144  amroutine->amgetbitmap = btgetbitmap;
145  amroutine->amendscan = btendscan;
146  amroutine->ammarkpos = btmarkpos;
147  amroutine->amrestrpos = btrestrpos;
150  amroutine->amparallelrescan = btparallelrescan;
151 
152  PG_RETURN_POINTER(amroutine);
153 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1498
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:312
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:862
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:892
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:206
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:562
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:182
void btbuildempty(Relation index)
Definition: nbtree.c:159
int btgettreeheight(Relation rel)
Definition: nbtree.c:1507
void btinitparallelscan(void *target)
Definition: nbtree.c:547
Size btestimateparallelscan(int nkeys, int norderbys)
Definition: nbtree.c:537
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:266
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:453
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:417
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:360
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:479
#define BTNProcs
Definition: nbtree.h:712
#define BTOPTIONS_PROC
Definition: nbtree.h:711
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:295
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:4610
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:4564
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:4587
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
#define makeNode(_type_)
Definition: nodes.h:155
#define InvalidOid
Definition: postgres_ext.h:36
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6799
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:285
ambuildempty_function ambuildempty
Definition: amapi.h:275
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:279
bool amclusterable
Definition: amapi.h:249
amoptions_function amoptions
Definition: amapi.h:283
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:297
amrestrpos_function amrestrpos
Definition: amapi.h:294
aminsert_function aminsert
Definition: amapi.h:276
amendscan_function amendscan
Definition: amapi.h:292
uint16 amoptsprocnum
Definition: amapi.h:229
amparallelrescan_function amparallelrescan
Definition: amapi.h:299
Oid amkeytype
Definition: amapi.h:265
bool ampredlocks
Definition: amapi.h:251
uint16 amsupport
Definition: amapi.h:227
amcostestimate_function amcostestimate
Definition: amapi.h:281
bool amcanorderbyop
Definition: amapi.h:233
amadjustmembers_function amadjustmembers
Definition: amapi.h:287
ambuild_function ambuild
Definition: amapi.h:274
bool amstorage
Definition: amapi.h:247
uint16 amstrategies
Definition: amapi.h:225
bool amoptionalkey
Definition: amapi.h:241
amgettuple_function amgettuple
Definition: amapi.h:290
amcanreturn_function amcanreturn
Definition: amapi.h:280
bool amcanunique
Definition: amapi.h:237
amgetbitmap_function amgetbitmap
Definition: amapi.h:291
amproperty_function amproperty
Definition: amapi.h:284
ambulkdelete_function ambulkdelete
Definition: amapi.h:278
bool amsearcharray
Definition: amapi.h:243
bool amsummarizing
Definition: amapi.h:261
amvalidate_function amvalidate
Definition: amapi.h:286
ammarkpos_function ammarkpos
Definition: amapi.h:293
bool amcanmulticol
Definition: amapi.h:239
bool amusemaintenanceworkmem
Definition: amapi.h:259
ambeginscan_function ambeginscan
Definition: amapi.h:288
bool amcanparallel
Definition: amapi.h:253
amrescan_function amrescan
Definition: amapi.h:289
bool amcanorder
Definition: amapi.h:231
bool amcanbuildparallel
Definition: amapi.h:255
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:298
uint8 amparallelvacuumoptions
Definition: amapi.h:263
aminsertcleanup_function aminsertcleanup
Definition: amapi.h:277
bool amcanbackward
Definition: amapi.h:235
amgettreeheight_function amgettreeheight
Definition: amapi.h:282
bool amcaninclude
Definition: amapi.h:257
bool amsearchnulls
Definition: amapi.h:245
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:55

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanbuildparallel, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettreeheight, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::aminsertcleanup, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettreeheight(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 182 of file nbtree.c.

187 {
188  bool result;
189  IndexTuple itup;
190 
191  /* generate an index tuple */
192  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
193  itup->t_tid = *ht_ctid;
194 
195  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
196 
197  pfree(itup);
198 
199  return result;
200 }
static Datum values[MAXATTR]
Definition: bootstrap.c:151
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:102
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 453 of file nbtree.c.

454 {
455  BTScanOpaque so = (BTScanOpaque) scan->opaque;
456 
457  /* There may be an old mark with a pin (but no lock). */
459 
460  /*
461  * Just record the current itemIndex. If we later step to next page
462  * before releasing the marked position, _bt_steppage makes a full copy of
463  * the currPos struct in markPos. If (as often happens) the mark is moved
464  * before we leave the page, we don't have to do that work.
465  */
466  if (BTScanPosIsValid(so->currPos))
467  so->markItemIndex = so->currPos.itemIndex;
468  else
469  {
471  so->markItemIndex = -1;
472  }
473 }

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 562 of file nbtree.c.

563 {
564  BTParallelScanDesc btscan;
565  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
566 
567  Assert(parallel_scan);
568 
569  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
570  parallel_scan->ps_offset);
571 
572  /*
573  * In theory, we don't need to acquire the spinlock here, because there
574  * shouldn't be any other workers running at this point, but we do so for
575  * consistency.
576  */
577  SpinLockAcquire(&btscan->btps_mutex);
581  SpinLockRelease(&btscan->btps_mutex);
582 }

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1449 of file nbtree.c.

1451 {
1452  int live = 0;
1453  int nitem = BTreeTupleGetNPosting(posting);
1455  BTVacuumPosting vacposting = NULL;
1456 
1457  for (int i = 0; i < nitem; i++)
1458  {
1459  if (!vstate->callback(items + i, vstate->callback_state))
1460  {
1461  /* Live table TID */
1462  live++;
1463  }
1464  else if (vacposting == NULL)
1465  {
1466  /*
1467  * First dead table TID encountered.
1468  *
1469  * It's now clear that we need to delete one or more dead table
1470  * TIDs, so start maintaining metadata describing how to update
1471  * existing posting list tuple.
1472  */
1473  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1474  nitem * sizeof(uint16));
1475 
1476  vacposting->itup = posting;
1477  vacposting->updatedoffset = updatedoffset;
1478  vacposting->ndeletedtids = 0;
1479  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1480  }
1481  else
1482  {
1483  /* Second or subsequent dead table TID */
1484  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1485  }
1486  }
1487 
1488  *nremaining = live;
1489  return vacposting;
1490 }
unsigned short uint16
Definition: c.h:517
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
void * callback_state
Definition: nbtree.h:335
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:911
uint16 ndeletedtids
Definition: nbtree.h:910
IndexTuple itup
Definition: nbtree.h:906
OffsetNumber updatedoffset
Definition: nbtree.h:907
static ItemArray items
Definition: test_tidstore.c:48

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, items, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 360 of file nbtree.c.

362 {
363  BTScanOpaque so = (BTScanOpaque) scan->opaque;
364 
365  /* we aren't holding any read locks, but gotta drop the pins */
367  {
368  /* Before leaving current page, deal with any killed items */
369  if (so->numKilled > 0)
370  _bt_killitems(scan);
373  }
374 
375  so->markItemIndex = -1;
376  so->needPrimScan = false;
377  so->scanBehind = false;
378  so->oppositeDirCheck = false;
381 
382  /*
383  * Allocate tuple workspace arrays, if needed for an index-only scan and
384  * not already done in a previous rescan call. To save on palloc
385  * overhead, both workspaces are allocated as one palloc block; only this
386  * function and btendscan know that.
387  *
388  * NOTE: this data structure also makes it safe to return data from a
389  * "name" column, even though btree name_ops uses an underlying storage
390  * datatype of cstring. The risk there is that "name" is supposed to be
391  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
392  * However, since we only return data out of tuples sitting in the
393  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
394  * data out of the markTuples array --- running off the end of memory for
395  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
396  * adding special-case treatment for name_ops elsewhere.
397  */
398  if (scan->xs_want_itup && so->currTuples == NULL)
399  {
400  so->currTuples = (char *) palloc(BLCKSZ * 2);
401  so->markTuples = so->currTuples + BLCKSZ;
402  }
403 
404  /*
405  * Reset the scan keys
406  */
407  if (scankey && scan->numberOfKeys > 0)
408  memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
409  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
410  so->numArrayKeys = 0; /* ditto */
411 }
struct ScanKeyData * keyData
Definition: relscan.h:145

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, palloc(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 479 of file nbtree.c.

480 {
481  BTScanOpaque so = (BTScanOpaque) scan->opaque;
482 
483  if (so->markItemIndex >= 0)
484  {
485  /*
486  * The scan has never moved to a new page since the last mark. Just
487  * restore the itemIndex.
488  *
489  * NB: In this case we can't count on anything in so->markPos to be
490  * accurate.
491  */
492  so->currPos.itemIndex = so->markItemIndex;
493  }
494  else
495  {
496  /*
497  * The scan moved to a new page after last mark or restore, and we are
498  * now restoring to the marked page. We aren't holding any read
499  * locks, but if we're still holding the pin for the current position,
500  * we must drop it.
501  */
502  if (BTScanPosIsValid(so->currPos))
503  {
504  /* Before leaving current page, deal with any killed items */
505  if (so->numKilled > 0)
506  _bt_killitems(scan);
508  }
509 
510  if (BTScanPosIsValid(so->markPos))
511  {
512  /* bump pin on mark buffer for assignment to current buffer */
513  if (BTScanPosIsPinned(so->markPos))
515  memcpy(&so->currPos, &so->markPos,
516  offsetof(BTScanPosData, items[1]) +
517  so->markPos.lastItem * sizeof(BTScanPosItem));
518  if (so->currTuples)
519  memcpy(so->currTuples, so->markTuples,
521  /* Reset the scan's array keys (see _bt_steppage for why) */
522  if (so->numArrayKeys)
523  {
524  _bt_start_array_keys(scan, so->currPos.dir);
525  so->needPrimScan = false;
526  }
527  }
528  else
530  }
531 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4956
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:993
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:1352
Buffer buf
Definition: nbtree.h:953
int nextTupleOffset
Definition: nbtree.h:968
ScanDirection dir
Definition: nbtree.h:962

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::dir, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, items, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 892 of file nbtree.c.

893 {
894  BlockNumber num_delpages;
895 
896  /* No-op in ANALYZE ONLY mode */
897  if (info->analyze_only)
898  return stats;
899 
900  /*
901  * If btbulkdelete was called, we need not do anything (we just maintain
902  * the information used within _bt_vacuum_needs_cleanup() by calling
903  * _bt_set_cleanup_info() below).
904  *
905  * If btbulkdelete was _not_ called, then we have a choice to make: we
906  * must decide whether or not a btvacuumscan() call is needed now (i.e.
907  * whether the ongoing VACUUM operation can entirely avoid a physical scan
908  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
909  * now.
910  */
911  if (stats == NULL)
912  {
913  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
914  if (!_bt_vacuum_needs_cleanup(info->index))
915  return NULL;
916 
917  /*
918  * Since we aren't going to actually delete any leaf items, there's no
919  * need to go through all the vacuum-cycle-ID pushups here.
920  *
921  * Posting list tuples are a source of inaccuracy for cleanup-only
922  * scans. btvacuumscan() will assume that the number of index tuples
923  * from each page can be used as num_index_tuples, even though
924  * num_index_tuples is supposed to represent the number of TIDs in the
925  * index. This naive approach can underestimate the number of tuples
926  * in the index significantly.
927  *
928  * We handle the problem by making num_index_tuples an estimate in
929  * cleanup-only case.
930  */
932  btvacuumscan(info, stats, NULL, NULL, 0);
933  stats->estimated_count = true;
934  }
935 
936  /*
937  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
938  *
939  * num_delpages is the number of deleted pages now in the index that were
940  * not safe to place in the FSM to be recycled just yet. num_delpages is
941  * greater than 0 only when _bt_pagedel() actually deleted pages during
942  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
943  * have failed to place any newly deleted pages in the FSM just moments
944  * ago. (Actually, there are edge cases where recycling of the current
945  * VACUUM's newly deleted pages does not even become safe by the time the
946  * next VACUUM comes around. See nbtree/README.)
947  */
948  Assert(stats->pages_deleted >= stats->pages_free);
949  num_delpages = stats->pages_deleted - stats->pages_free;
950  _bt_set_cleanup_info(info->index, num_delpages);
951 
952  /*
953  * It's quite possible for us to be fooled by concurrent page splits into
954  * double-counting some index tuples, so disbelieve any total that exceeds
955  * the underlying heap's count ... if we know that accurately. Otherwise
956  * this might just make matters worse.
957  */
958  if (!info->estimated_count)
959  {
960  if (stats->num_index_tuples > info->num_heap_tuples)
961  stats->num_index_tuples = info->num_heap_tuples;
962  }
963 
964  return stats;
965 }
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:232
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:179
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_free
Definition: genam.h:83
double num_index_tuples
Definition: genam.h:79
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
bool estimated_count
Definition: genam.h:50

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1114 of file nbtree.c.

1115 {
1116  IndexVacuumInfo *info = vstate->info;
1117  IndexBulkDeleteResult *stats = vstate->stats;
1119  void *callback_state = vstate->callback_state;
1120  Relation rel = info->index;
1121  Relation heaprel = info->heaprel;
1122  bool attempt_pagedel;
1123  BlockNumber blkno,
1124  backtrack_to;
1125  Buffer buf;
1126  Page page;
1127  BTPageOpaque opaque;
1128 
1129  blkno = scanblkno;
1130 
1131 backtrack:
1132 
1133  attempt_pagedel = false;
1134  backtrack_to = P_NONE;
1135 
1136  /* call vacuum_delay_point while not holding any buffer lock */
1138 
1139  /*
1140  * We can't use _bt_getbuf() here because it always applies
1141  * _bt_checkpage(), which will barf on an all-zero page. We want to
1142  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1143  * buffer access strategy.
1144  */
1146  info->strategy);
1147  _bt_lockbuf(rel, buf, BT_READ);
1148  page = BufferGetPage(buf);
1149  opaque = NULL;
1150  if (!PageIsNew(page))
1151  {
1152  _bt_checkpage(rel, buf);
1153  opaque = BTPageGetOpaque(page);
1154  }
1155 
1156  Assert(blkno <= scanblkno);
1157  if (blkno != scanblkno)
1158  {
1159  /*
1160  * We're backtracking.
1161  *
1162  * We followed a right link to a sibling leaf page (a page that
1163  * happens to be from a block located before scanblkno). The only
1164  * case we want to do anything with is a live leaf page having the
1165  * current vacuum cycle ID.
1166  *
1167  * The page had better be in a state that's consistent with what we
1168  * expect. Check for conditions that imply corruption in passing. It
1169  * can't be half-dead because only an interrupted VACUUM process can
1170  * leave pages in that state, so we'd definitely have dealt with it
1171  * back when the page was the scanblkno page (half-dead pages are
1172  * always marked fully deleted by _bt_pagedel(), barring corruption).
1173  */
1174  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1175  {
1176  Assert(false);
1177  ereport(LOG,
1178  (errcode(ERRCODE_INDEX_CORRUPTED),
1179  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1180  blkno, scanblkno, RelationGetRelationName(rel))));
1181  _bt_relbuf(rel, buf);
1182  return;
1183  }
1184 
1185  /*
1186  * We may have already processed the page in an earlier call, when the
1187  * page was scanblkno. This happens when the leaf page split occurred
1188  * after the scan began, but before the right sibling page became the
1189  * scanblkno.
1190  *
1191  * Page may also have been deleted by current btvacuumpage() call,
1192  * since _bt_pagedel() sometimes deletes the right sibling page of
1193  * scanblkno in passing (it does so after we decided where to
1194  * backtrack to). We don't need to process this page as a deleted
1195  * page a second time now (in fact, it would be wrong to count it as a
1196  * deleted page in the bulk delete statistics a second time).
1197  */
1198  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1199  {
1200  /* Done with current scanblkno (and all lower split pages) */
1201  _bt_relbuf(rel, buf);
1202  return;
1203  }
1204  }
1205 
1206  if (!opaque || BTPageIsRecyclable(page, heaprel))
1207  {
1208  /* Okay to recycle this page (which could be leaf or internal) */
1209  RecordFreeIndexPage(rel, blkno);
1210  stats->pages_deleted++;
1211  stats->pages_free++;
1212  }
1213  else if (P_ISDELETED(opaque))
1214  {
1215  /*
1216  * Already deleted page (which could be leaf or internal). Can't
1217  * recycle yet.
1218  */
1219  stats->pages_deleted++;
1220  }
1221  else if (P_ISHALFDEAD(opaque))
1222  {
1223  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1224  attempt_pagedel = true;
1225 
1226  /*
1227  * _bt_pagedel() will increment both pages_newly_deleted and
1228  * pages_deleted stats in all cases (barring corruption)
1229  */
1230  }
1231  else if (P_ISLEAF(opaque))
1232  {
1234  int ndeletable;
1236  int nupdatable;
1237  OffsetNumber offnum,
1238  minoff,
1239  maxoff;
1240  int nhtidsdead,
1241  nhtidslive;
1242 
1243  /*
1244  * Trade in the initial read lock for a full cleanup lock on this
1245  * page. We must get such a lock on every leaf page over the course
1246  * of the vacuum scan, whether or not it actually contains any
1247  * deletable tuples --- see nbtree/README.
1248  */
1250 
1251  /*
1252  * Check whether we need to backtrack to earlier pages. What we are
1253  * concerned about is a page split that happened since we started the
1254  * vacuum scan. If the split moved tuples on the right half of the
1255  * split (i.e. the tuples that sort high) to a block that we already
1256  * passed over, then we might have missed the tuples. We need to
1257  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1258  * or deleting scanblkno page below!)
1259  */
1260  if (vstate->cycleid != 0 &&
1261  opaque->btpo_cycleid == vstate->cycleid &&
1262  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1263  !P_RIGHTMOST(opaque) &&
1264  opaque->btpo_next < scanblkno)
1265  backtrack_to = opaque->btpo_next;
1266 
1267  ndeletable = 0;
1268  nupdatable = 0;
1269  minoff = P_FIRSTDATAKEY(opaque);
1270  maxoff = PageGetMaxOffsetNumber(page);
1271  nhtidsdead = 0;
1272  nhtidslive = 0;
1273  if (callback)
1274  {
1275  /* btbulkdelete callback tells us what to delete (or update) */
1276  for (offnum = minoff;
1277  offnum <= maxoff;
1278  offnum = OffsetNumberNext(offnum))
1279  {
1280  IndexTuple itup;
1281 
1282  itup = (IndexTuple) PageGetItem(page,
1283  PageGetItemId(page, offnum));
1284 
1285  Assert(!BTreeTupleIsPivot(itup));
1286  if (!BTreeTupleIsPosting(itup))
1287  {
1288  /* Regular tuple, standard table TID representation */
1289  if (callback(&itup->t_tid, callback_state))
1290  {
1291  deletable[ndeletable++] = offnum;
1292  nhtidsdead++;
1293  }
1294  else
1295  nhtidslive++;
1296  }
1297  else
1298  {
1299  BTVacuumPosting vacposting;
1300  int nremaining;
1301 
1302  /* Posting list tuple */
1303  vacposting = btreevacuumposting(vstate, itup, offnum,
1304  &nremaining);
1305  if (vacposting == NULL)
1306  {
1307  /*
1308  * All table TIDs from the posting tuple remain, so no
1309  * delete or update required
1310  */
1311  Assert(nremaining == BTreeTupleGetNPosting(itup));
1312  }
1313  else if (nremaining > 0)
1314  {
1315 
1316  /*
1317  * Store metadata about posting list tuple in
1318  * updatable array for entire page. Existing tuple
1319  * will be updated during the later call to
1320  * _bt_delitems_vacuum().
1321  */
1322  Assert(nremaining < BTreeTupleGetNPosting(itup));
1323  updatable[nupdatable++] = vacposting;
1324  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1325  }
1326  else
1327  {
1328  /*
1329  * All table TIDs from the posting list must be
1330  * deleted. We'll delete the index tuple completely
1331  * (no update required).
1332  */
1333  Assert(nremaining == 0);
1334  deletable[ndeletable++] = offnum;
1335  nhtidsdead += BTreeTupleGetNPosting(itup);
1336  pfree(vacposting);
1337  }
1338 
1339  nhtidslive += nremaining;
1340  }
1341  }
1342  }
1343 
1344  /*
1345  * Apply any needed deletes or updates. We issue just one
1346  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1347  */
1348  if (ndeletable > 0 || nupdatable > 0)
1349  {
1350  Assert(nhtidsdead >= ndeletable + nupdatable);
1351  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1352  nupdatable);
1353 
1354  stats->tuples_removed += nhtidsdead;
1355  /* must recompute maxoff */
1356  maxoff = PageGetMaxOffsetNumber(page);
1357 
1358  /* can't leak memory here */
1359  for (int i = 0; i < nupdatable; i++)
1360  pfree(updatable[i]);
1361  }
1362  else
1363  {
1364  /*
1365  * If the leaf page has been split during this vacuum cycle, it
1366  * seems worth expending a write to clear btpo_cycleid even if we
1367  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1368  * takes care of this.) This ensures we won't process the page
1369  * again.
1370  *
1371  * We treat this like a hint-bit update because there's no need to
1372  * WAL-log it.
1373  */
1374  Assert(nhtidsdead == 0);
1375  if (vstate->cycleid != 0 &&
1376  opaque->btpo_cycleid == vstate->cycleid)
1377  {
1378  opaque->btpo_cycleid = 0;
1379  MarkBufferDirtyHint(buf, true);
1380  }
1381  }
1382 
1383  /*
1384  * If the leaf page is now empty, try to delete it; else count the
1385  * live tuples (live table TIDs in posting lists are counted as
1386  * separate live tuples). We don't delete when backtracking, though,
1387  * since that would require teaching _bt_pagedel() about backtracking
1388  * (doesn't seem worth adding more complexity to deal with that).
1389  *
1390  * We don't count the number of live TIDs during cleanup-only calls to
1391  * btvacuumscan (i.e. when callback is not set). We count the number
1392  * of index tuples directly instead. This avoids the expense of
1393  * directly examining all of the tuples on each page. VACUUM will
1394  * treat num_index_tuples as an estimate in cleanup-only case, so it
1395  * doesn't matter that this underestimates num_index_tuples
1396  * significantly in some cases.
1397  */
1398  if (minoff > maxoff)
1399  attempt_pagedel = (blkno == scanblkno);
1400  else if (callback)
1401  stats->num_index_tuples += nhtidslive;
1402  else
1403  stats->num_index_tuples += maxoff - minoff + 1;
1404 
1405  Assert(!attempt_pagedel || nhtidslive == 0);
1406  }
1407 
1408  if (attempt_pagedel)
1409  {
1410  MemoryContext oldcontext;
1411 
1412  /* Run pagedel in a temp context to avoid memory leakage */
1414  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1415 
1416  /*
1417  * _bt_pagedel maintains the bulk delete stats on our behalf;
1418  * pages_newly_deleted and pages_deleted are likely to be incremented
1419  * during call
1420  */
1421  Assert(blkno == scanblkno);
1422  _bt_pagedel(rel, buf, vstate);
1423 
1424  MemoryContextSwitchTo(oldcontext);
1425  /* pagedel released buffer, so we shouldn't */
1426  }
1427  else
1428  _bt_relbuf(rel, buf);
1429 
1430  if (backtrack_to != P_NONE)
1431  {
1432  blkno = backtrack_to;
1433  goto backtrack;
1434  }
1435 }
int Buffer
Definition: buf.h:23
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4988
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
@ RBM_NORMAL
Definition: bufmgr.h:45
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsNew(Page page)
Definition: bufpage.h:233
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode(int sqlerrcode)
Definition: elog.c:853
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:87
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:165
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1023
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1802
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1154
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:797
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1039
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1449
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:719
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition: nbtree.h:291
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:72
MemoryContextSwitchTo(old_ctx)
#define RelationGetRelationName(relation)
Definition: rel.h:539
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
BTCycleId cycleid
Definition: nbtree.h:336
MemoryContext pagedelcontext
Definition: nbtree.h:337
IndexVacuumInfo * info
Definition: nbtree.h:332
double tuples_removed
Definition: genam.h:80
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
void vacuum_delay_point(void)
Definition: vacuum.c:2361

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 980 of file nbtree.c.

983 {
984  Relation rel = info->index;
985  BTVacState vstate;
986  BlockNumber num_pages;
987  BlockNumber scanblkno;
988  bool needLock;
989 
990  /*
991  * Reset fields that track information about the entire index now. This
992  * avoids double-counting in the case where a single VACUUM command
993  * requires multiple scans of the index.
994  *
995  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
996  * since they track information about the VACUUM command, and so must last
997  * across each call to btvacuumscan().
998  *
999  * (Note that pages_free is treated as state about the whole index, not
1000  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
1001  * calls are idempotent, and get repeated for the same deleted pages in
1002  * some scenarios. The point for us is to track the number of recyclable
1003  * pages in the index at the end of the VACUUM command.)
1004  */
1005  stats->num_pages = 0;
1006  stats->num_index_tuples = 0;
1007  stats->pages_deleted = 0;
1008  stats->pages_free = 0;
1009 
1010  /* Set up info to pass down to btvacuumpage */
1011  vstate.info = info;
1012  vstate.stats = stats;
1013  vstate.callback = callback;
1014  vstate.callback_state = callback_state;
1015  vstate.cycleid = cycleid;
1016 
1017  /* Create a temporary memory context to run _bt_pagedel in */
1019  "_bt_pagedel",
1021 
1022  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
1023  vstate.bufsize = 0;
1024  vstate.maxbufsize = 0;
1025  vstate.pendingpages = NULL;
1026  vstate.npendingpages = 0;
1027  /* Consider applying _bt_pendingfsm_finalize optimization */
1028  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
1029 
1030  /*
1031  * The outer loop iterates over all index pages except the metapage, in
1032  * physical order (we hope the kernel will cooperate in providing
1033  * read-ahead for speed). It is critical that we visit all leaf pages,
1034  * including ones added after we start the scan, else we might fail to
1035  * delete some deletable tuples. Hence, we must repeatedly check the
1036  * relation length. We must acquire the relation-extension lock while
1037  * doing so to avoid a race condition: if someone else is extending the
1038  * relation, there is a window where bufmgr/smgr have created a new
1039  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1040  * we manage to scan such a page here, we'll improperly assume it can be
1041  * recycled. Taking the lock synchronizes things enough to prevent a
1042  * problem: either num_pages won't include the new page, or _bt_getbuf
1043  * already has write lock on the buffer and it will be fully initialized
1044  * before we can examine it. Also, we need not worry if a page is added
1045  * immediately after we look; the page splitting code already has
1046  * write-lock on the left page before it adds a right page, so we must
1047  * already have processed any tuples due to be moved into such a page.
1048  *
1049  * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
1050  * think the use of the extension lock is still required.
1051  *
1052  * We can skip locking for new or temp relations, however, since no one
1053  * else could be accessing them.
1054  */
1055  needLock = !RELATION_IS_LOCAL(rel);
1056 
1057  scanblkno = BTREE_METAPAGE + 1;
1058  for (;;)
1059  {
1060  /* Get the current relation length */
1061  if (needLock)
1063  num_pages = RelationGetNumberOfBlocks(rel);
1064  if (needLock)
1066 
1067  if (info->report_progress)
1069  num_pages);
1070 
1071  /* Quit if we've scanned the whole relation */
1072  if (scanblkno >= num_pages)
1073  break;
1074  /* Iterate over pages, then loop back to recheck length */
1075  for (; scanblkno < num_pages; scanblkno++)
1076  {
1077  btvacuumpage(&vstate, scanblkno);
1078  if (info->report_progress)
1080  scanblkno);
1081  }
1082  }
1083 
1084  /* Set statistics num_pages field to final size of index */
1085  stats->num_pages = num_pages;
1086 
1088 
1089  /*
1090  * If there were any calls to _bt_pagedel() during scan of the index then
1091  * see if any of the resulting pages can be placed in the FSM now. When
1092  * it's not safe we'll have to leave it up to a future VACUUM operation.
1093  *
1094  * Finally, if we placed any pages in the FSM (either just now or during
1095  * the scan), forcibly update the upper-level FSM pages to ensure that
1096  * searchers can find them.
1097  */
1098  _bt_pendingfsm_finalize(rel, &vstate);
1099  if (stats->pages_free > 0)
1101 }
void pgstat_progress_update_param(int index, int64 val)
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:419
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:469
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2995
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2954
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1114
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:123
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:122
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
BTPendingFSM * pendingpages
Definition: nbtree.h:344
int npendingpages
Definition: nbtree.h:345
int bufsize
Definition: nbtree.h:342
int maxbufsize
Definition: nbtree.h:343
BlockNumber num_pages
Definition: genam.h:77
bool report_progress
Definition: genam.h:49

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().