PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 77 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 53 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 756 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

757 {
758  BTScanOpaque so = (BTScanOpaque) scan->opaque;
759  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
760  BTParallelScanDesc btscan;
761 
762  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
763  parallel_scan->ps_offset);
764 
765  so->arrayKeyCount++;
766  SpinLockAcquire(&btscan->btps_mutex);
767  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
768  {
769  btscan->btps_scanPage = InvalidBlockNumber;
770  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
771  btscan->btps_arrayKeyCount++;
772  }
773  SpinLockRelease(&btscan->btps_mutex);
774 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
#define OffsetToPointer(base, offset)
Definition: c.h:707
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 715 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

716 {
717  BTScanOpaque so = (BTScanOpaque) scan->opaque;
718  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
719  BTParallelScanDesc btscan;
720  bool status_changed = false;
721 
722  /* Do nothing, for non-parallel scans */
723  if (parallel_scan == NULL)
724  return;
725 
726  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
727  parallel_scan->ps_offset);
728 
729  /*
730  * Mark the parallel scan as done for this combination of scan keys,
731  * unless some other process already did so. See also
732  * _bt_advance_array_keys.
733  */
734  SpinLockAcquire(&btscan->btps_mutex);
735  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
736  btscan->btps_pageStatus != BTPARALLEL_DONE)
737  {
738  btscan->btps_pageStatus = BTPARALLEL_DONE;
739  status_changed = true;
740  }
741  SpinLockRelease(&btscan->btps_mutex);
742 
743  /* wake up all the workers associated with this parallel scan */
744  if (status_changed)
745  ConditionVariableBroadcast(&btscan->btps_cv);
746 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:77
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
#define OffsetToPointer(base, offset)
Definition: c.h:707
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 692 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

693 {
694  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
695  BTParallelScanDesc btscan;
696 
697  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
698  parallel_scan->ps_offset);
699 
700  SpinLockAcquire(&btscan->btps_mutex);
701  btscan->btps_scanPage = scan_page;
703  SpinLockRelease(&btscan->btps_mutex);
705 }
slock_t btps_mutex
Definition: nbtree.c:73
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:77
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:74
#define OffsetToPointer(base, offset)
Definition: c.h:707
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166
BlockNumber btps_scanPage
Definition: nbtree.c:67
BTPS_State btps_pageStatus
Definition: nbtree.c:68

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 634 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

635 {
636  BTScanOpaque so = (BTScanOpaque) scan->opaque;
637  BTPS_State pageStatus;
638  bool exit_loop = false;
639  bool status = true;
640  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
641  BTParallelScanDesc btscan;
642 
643  *pageno = P_NONE;
644 
645  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
646  parallel_scan->ps_offset);
647 
648  while (1)
649  {
650  SpinLockAcquire(&btscan->btps_mutex);
651  pageStatus = btscan->btps_pageStatus;
652 
653  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
654  {
655  /* Parallel scan has already advanced to a new set of scankeys. */
656  status = false;
657  }
658  else if (pageStatus == BTPARALLEL_DONE)
659  {
660  /*
661  * We're done with this set of scankeys. This may be the end, or
662  * there could be more sets to try.
663  */
664  status = false;
665  }
666  else if (pageStatus != BTPARALLEL_ADVANCING)
667  {
668  /*
669  * We have successfully seized control of the scan for the purpose
670  * of advancing it to a new page!
671  */
672  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
673  *pageno = btscan->btps_scanPage;
674  exit_loop = true;
675  }
676  SpinLockRelease(&btscan->btps_mutex);
677  if (exit_loop || !status)
678  break;
680  }
682 
683  return status;
684 }
#define P_NONE
Definition: nbtree.h:211
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
BTPS_State
Definition: nbtree.c:53
#define OffsetToPointer(base, offset)
Definition: c.h:707
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:229

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 342 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

343 {
344  IndexScanDesc scan;
345  BTScanOpaque so;
346 
347  /* no order by operators allowed */
348  Assert(norderbys == 0);
349 
350  /* get the scan */
351  scan = RelationGetIndexScan(rel, nkeys, norderbys);
352 
353  /* allocate private workspace */
354  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
357  if (scan->numberOfKeys > 0)
358  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
359  else
360  so->keyData = NULL;
361 
362  so->arrayKeyData = NULL; /* assume no array keys for now */
363  so->numArrayKeys = 0;
364  so->arrayKeys = NULL;
365  so->arrayContext = NULL;
366 
367  so->killedItems = NULL; /* until needed */
368  so->numKilled = 0;
369 
370  /*
371  * We don't know yet whether the scan will be index-only, so we do not
372  * allocate the tuple workspace arrays until btrescan. However, we set up
373  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
374  */
375  so->currTuples = so->markTuples = NULL;
376 
377  scan->xs_itupdesc = RelationGetDescr(rel);
378 
379  scan->opaque = so;
380 
381  return scan;
382 }
#define RelationGetDescr(relation)
Definition: rel.h:503
MemoryContext arrayContext
Definition: nbtree.h:1044
char * currTuples
Definition: nbtree.h:1055
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143
BTScanPosData markPos
Definition: nbtree.h:1069
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:1056
ScanKey arrayKeyData
Definition: nbtree.h:1038
int * killedItems
Definition: nbtree.h:1047
#define Assert(condition)
Definition: c.h:804
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1011
BTScanPosData currPos
Definition: nbtree.h:1068
void * palloc(Size size)
Definition: mcxt.c:1062
ScanKey keyData
Definition: nbtree.h:1035
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1043
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 150 of file nbtree.c.

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), SMgrRelationData::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationGetSmgr(), smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

151 {
152  Page metapage;
153 
154  /* Construct metapage. */
155  metapage = (Page) palloc(BLCKSZ);
156  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
157 
158  /*
159  * Write the page and log it. It might seem that an immediate sync would
160  * be sufficient to guarantee that the file exists on disk, but recovery
161  * itself might remove it while replaying, for example, an
162  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
163  * this even when wal_level=minimal.
164  */
167  (char *) metapage, true);
168  log_newpage(&RelationGetSmgr(index)->smgr_rnode.node, INIT_FORKNUM,
169  BTREE_METAPAGE, metapage, true);
170 
171  /*
172  * An immediate sync is required even if we xlog'd the page, because the
173  * write did not go through shared_buffers and therefore a concurrent
174  * checkpoint may have moved the redo pointer past our xlog record.
175  */
177 }
#define P_NONE
Definition: nbtree.h:211
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:69
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BTREE_METAPAGE
Definition: nbtree.h:146
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:544
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1532
void * palloc(Size size)
Definition: mcxt.c:1062
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1050
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2692
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:660
Pointer Page
Definition: bufpage.h:78
dlist_node node
Definition: smgr.h:72

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 784 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

786 {
787  Relation rel = info->index;
788  BTCycleId cycleid;
789 
790  /* allocate stats if first time through, else re-use existing struct */
791  if (stats == NULL)
793 
794  /* Establish the vacuum cycle ID to use for this scan */
795  /* The ENSURE stuff ensures we clean up shared memory on failure */
797  {
798  cycleid = _bt_start_vacuum(rel);
799 
800  btvacuumscan(info, stats, callback, callback_state, cycleid);
801  }
803  _bt_end_vacuum(rel);
804 
805  return stats;
806 }
#define PointerGetDatum(X)
Definition: postgres.h:600
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2054
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:902
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:29
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2026
void * palloc0(Size size)
Definition: mcxt.c:1093
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1969

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1417 of file nbtree.c.

Referenced by bthandler().

1418 {
1419  return true;
1420 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 447 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

448 {
449  BTScanOpaque so = (BTScanOpaque) scan->opaque;
450 
451  /* we aren't holding any read locks, but gotta drop the pins */
452  if (BTScanPosIsValid(so->currPos))
453  {
454  /* Before leaving current page, deal with any killed items */
455  if (so->numKilled > 0)
456  _bt_killitems(scan);
458  }
459 
460  so->markItemIndex = -1;
462 
463  /* No need to invalidate positions, the RAM is about to be freed. */
464 
465  /* Release storage */
466  if (so->keyData != NULL)
467  pfree(so->keyData);
468  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
469  if (so->arrayContext != NULL)
471  if (so->killedItems != NULL)
472  pfree(so->killedItems);
473  if (so->currTuples != NULL)
474  pfree(so->currTuples);
475  /* so->markTuples should not be pfree'd, see btrescan */
476  pfree(so);
477 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
MemoryContext arrayContext
Definition: nbtree.h:1044
char * currTuples
Definition: nbtree.h:1055
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1005
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:1069
void pfree(void *pointer)
Definition: mcxt.c:1169
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
int * killedItems
Definition: nbtree.h:1047
BTScanPosData currPos
Definition: nbtree.h:1068
ScanKey keyData
Definition: nbtree.h:1035
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:999

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 569 of file nbtree.c.

Referenced by bthandler().

570 {
571  return sizeof(BTParallelScanDescData);
572 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 284 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

285 {
286  BTScanOpaque so = (BTScanOpaque) scan->opaque;
287  int64 ntids = 0;
288  ItemPointer heapTid;
289 
290  /*
291  * If we have any array keys, initialize them.
292  */
293  if (so->numArrayKeys)
294  {
295  /* punt if we have any unsatisfiable array keys */
296  if (so->numArrayKeys < 0)
297  return ntids;
298 
300  }
301 
302  /* This loop handles advancing to the next array elements, if any */
303  do
304  {
305  /* Fetch the first page & tuple */
306  if (_bt_first(scan, ForwardScanDirection))
307  {
308  /* Save tuple ID, and continue scanning */
309  heapTid = &scan->xs_heaptid;
310  tbm_add_tuples(tbm, heapTid, 1, false);
311  ntids++;
312 
313  for (;;)
314  {
315  /*
316  * Advance to next tuple within page. This is the same as the
317  * easy case in _bt_next().
318  */
319  if (++so->currPos.itemIndex > so->currPos.lastItem)
320  {
321  /* let _bt_next do the heavy lifting */
322  if (!_bt_next(scan, ForwardScanDirection))
323  break;
324  }
325 
326  /* Save tuple ID, and continue scanning */
327  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
328  tbm_add_tuples(tbm, heapTid, 1, false);
329  ntids++;
330  }
331  }
332  /* Now see if we have more array keys to deal with */
334 
335  return ntids;
336 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:981
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
int lastItem
Definition: nbtree.h:980
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
ItemPointerData xs_heaptid
Definition: relscan.h:147
ItemPointerData heapTid
Definition: nbtree.h:944
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:983
BTScanPosData currPos
Definition: nbtree.h:1068
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 210 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

211 {
212  BTScanOpaque so = (BTScanOpaque) scan->opaque;
213  bool res;
214 
215  /* btree indexes are never lossy */
216  scan->xs_recheck = false;
217 
218  /*
219  * If we have any array keys, initialize them during first call for a
220  * scan. We can't do this in btrescan because we don't know the scan
221  * direction at that time.
222  */
223  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
224  {
225  /* punt if we have any unsatisfiable array keys */
226  if (so->numArrayKeys < 0)
227  return false;
228 
229  _bt_start_array_keys(scan, dir);
230  }
231 
232  /* This loop handles advancing to the next array elements, if any */
233  do
234  {
235  /*
236  * If we've already initialized this scan, we can just advance it in
237  * the appropriate direction. If we haven't done so yet, we call
238  * _bt_first() to get the first item in the scan.
239  */
240  if (!BTScanPosIsValid(so->currPos))
241  res = _bt_first(scan, dir);
242  else
243  {
244  /*
245  * Check to see if we should kill the previously-fetched tuple.
246  */
247  if (scan->kill_prior_tuple)
248  {
249  /*
250  * Yes, remember it for later. (We'll deal with all such
251  * tuples at once right before leaving the index page.) The
252  * test for numKilled overrun is not just paranoia: if the
253  * caller reverses direction in the indexscan then the same
254  * item might get entered multiple times. It's not worth
255  * trying to optimize that, so we don't detect it, but instead
256  * just forget any excess entries.
257  */
258  if (so->killedItems == NULL)
259  so->killedItems = (int *)
260  palloc(MaxTIDsPerBTreePage * sizeof(int));
261  if (so->numKilled < MaxTIDsPerBTreePage)
262  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
263  }
264 
265  /*
266  * Now continue the scan.
267  */
268  res = _bt_next(scan, dir);
269  }
270 
271  /* If we have a tuple, return it ... */
272  if (res)
273  break;
274  /* ... otherwise see if we have more array keys to deal with */
275  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
276 
277  return res;
278 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:981
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1005
#define MaxTIDsPerBTreePage
Definition: nbtree.h:184
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
int * killedItems
Definition: nbtree.h:1047
BTScanPosData currPos
Definition: nbtree.h:1068
void * palloc(Size size)
Definition: mcxt.c:1062
bool kill_prior_tuple
Definition: relscan.h:128
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 95 of file nbtree.c.

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

96 {
98 
99  amroutine->amstrategies = BTMaxStrategyNumber;
100  amroutine->amsupport = BTNProcs;
101  amroutine->amoptsprocnum = BTOPTIONS_PROC;
102  amroutine->amcanorder = true;
103  amroutine->amcanorderbyop = false;
104  amroutine->amcanbackward = true;
105  amroutine->amcanunique = true;
106  amroutine->amcanmulticol = true;
107  amroutine->amoptionalkey = true;
108  amroutine->amsearcharray = true;
109  amroutine->amsearchnulls = true;
110  amroutine->amstorage = false;
111  amroutine->amclusterable = true;
112  amroutine->ampredlocks = true;
113  amroutine->amcanparallel = true;
114  amroutine->amcaninclude = true;
115  amroutine->amusemaintenanceworkmem = false;
116  amroutine->amparallelvacuumoptions =
118  amroutine->amkeytype = InvalidOid;
119 
120  amroutine->ambuild = btbuild;
121  amroutine->ambuildempty = btbuildempty;
122  amroutine->aminsert = btinsert;
123  amroutine->ambulkdelete = btbulkdelete;
124  amroutine->amvacuumcleanup = btvacuumcleanup;
125  amroutine->amcanreturn = btcanreturn;
126  amroutine->amcostestimate = btcostestimate;
127  amroutine->amoptions = btoptions;
128  amroutine->amproperty = btproperty;
129  amroutine->ambuildphasename = btbuildphasename;
130  amroutine->amvalidate = btvalidate;
131  amroutine->amadjustmembers = btadjustmembers;
132  amroutine->ambeginscan = btbeginscan;
133  amroutine->amrescan = btrescan;
134  amroutine->amgettuple = btgettuple;
135  amroutine->amgetbitmap = btgetbitmap;
136  amroutine->amendscan = btendscan;
137  amroutine->ammarkpos = btmarkpos;
138  amroutine->amrestrpos = btrestrpos;
141  amroutine->amparallelrescan = btparallelrescan;
142 
143  PG_RETURN_POINTER(amroutine);
144 }
ambeginscan_function ambeginscan
Definition: amapi.h:271
uint8 amparallelvacuumoptions
Definition: amapi.h:248
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
ambulkdelete_function ambulkdelete
Definition: amapi.h:262
bool amcanmulticol
Definition: amapi.h:228
uint16 amsupport
Definition: amapi.h:216
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:388
amgettuple_function amgettuple
Definition: amapi.h:273
bool amcanorderbyop
Definition: amapi.h:222
amproperty_function amproperty
Definition: amapi.h:267
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2152
amparallelrescan_function amparallelrescan
Definition: amapi.h:282
bool amstorage
Definition: amapi.h:236
bool ampredlocks
Definition: amapi.h:240
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:342
aminsert_function aminsert
Definition: amapi.h:261
Oid amkeytype
Definition: amapi.h:250
bool amoptionalkey
Definition: amapi.h:230
amvalidate_function amvalidate
Definition: amapi.h:269
Size btestimateparallelscan(void)
Definition: nbtree.c:569
#define BTOPTIONS_PROC
Definition: nbtree.h:704
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btbuildempty(Relation index)
Definition: nbtree.c:150
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:814
amgetbitmap_function amgetbitmap
Definition: amapi.h:274
ambuild_function ambuild
Definition: amapi.h:259
amoptions_function amoptions
Definition: amapi.h:266
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:284
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:298
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:447
bool amcaninclude
Definition: amapi.h:244
amcostestimate_function amcostestimate
Definition: amapi.h:265
bool amcanunique
Definition: amapi.h:226
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6611
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:275
bool amcanbackward
Definition: amapi.h:224
amrescan_function amrescan
Definition: amapi.h:272
bool amcanparallel
Definition: amapi.h:242
bool amsearchnulls
Definition: amapi.h:234
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:210
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:483
bool amclusterable
Definition: amapi.h:238
bool amsearcharray
Definition: amapi.h:232
#define InvalidOid
Definition: postgres_ext.h:36
bool amusemaintenanceworkmem
Definition: amapi.h:246
#define makeNode(_type_)
Definition: nodes.h:584
amadjustmembers_function amadjustmembers
Definition: amapi.h:270
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2129
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:513
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:186
ammarkpos_function ammarkpos
Definition: amapi.h:276
bool amcanorder
Definition: amapi.h:220
ambuildphasename_function ambuildphasename
Definition: amapi.h:268
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1417
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:593
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:280
#define BTNProcs
Definition: nbtree.h:705
uint16 amstrategies
Definition: amapi.h:214
void btinitparallelscan(void *target)
Definition: nbtree.c:578
uint16 amoptsprocnum
Definition: amapi.h:218
ambuildempty_function ambuildempty
Definition: amapi.h:260
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2104
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:264
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:281
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:784
amrestrpos_function amrestrpos
Definition: amapi.h:277

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 578 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

579 {
580  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
581 
582  SpinLockInit(&bt_target->btps_mutex);
583  bt_target->btps_scanPage = InvalidBlockNumber;
585  bt_target->btps_arrayKeyCount = 0;
586  ConditionVariableInit(&bt_target->btps_cv);
587 }
slock_t btps_mutex
Definition: nbtree.c:73
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:77
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:74
BlockNumber btps_scanPage
Definition: nbtree.c:67
BTPS_State btps_pageStatus
Definition: nbtree.c:68
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 186 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

191 {
192  bool result;
193  IndexTuple itup;
194 
195  /* generate an index tuple */
196  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
197  itup->t_tid = *ht_ctid;
198 
199  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
200 
201  pfree(itup);
202 
203  return result;
204 }
#define RelationGetDescr(relation)
Definition: rel.h:503
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1169
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:99
static Datum values[MAXATTR]
Definition: bootstrap.c:156

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 483 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

484 {
485  BTScanOpaque so = (BTScanOpaque) scan->opaque;
486 
487  /* There may be an old mark with a pin (but no lock). */
489 
490  /*
491  * Just record the current itemIndex. If we later step to next page
492  * before releasing the marked position, _bt_steppage makes a full copy of
493  * the currPos struct in markPos. If (as often happens) the mark is moved
494  * before we leave the page, we don't have to do that work.
495  */
496  if (BTScanPosIsValid(so->currPos))
497  so->markItemIndex = so->currPos.itemIndex;
498  else
499  {
501  so->markItemIndex = -1;
502  }
503 
504  /* Also record the current positions of any array keys */
505  if (so->numArrayKeys)
506  _bt_mark_array_keys(scan);
507 }
int itemIndex
Definition: nbtree.h:981
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1005
BTScanPosData markPos
Definition: nbtree.h:1069
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:603
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1011
BTScanPosData currPos
Definition: nbtree.h:1068
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:999

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 593 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

594 {
595  BTParallelScanDesc btscan;
596  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
597 
598  Assert(parallel_scan);
599 
600  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
601  parallel_scan->ps_offset);
602 
603  /*
604  * In theory, we don't need to acquire the spinlock here, because there
605  * shouldn't be any other workers running at this point, but we do so for
606  * consistency.
607  */
608  SpinLockAcquire(&btscan->btps_mutex);
611  btscan->btps_arrayKeyCount = 0;
612  SpinLockRelease(&btscan->btps_mutex);
613 }
slock_t btps_mutex
Definition: nbtree.c:73
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:77
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:707
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166
BlockNumber btps_scanPage
Definition: nbtree.c:67
#define Assert(condition)
Definition: c.h:804
BTPS_State btps_pageStatus
Definition: nbtree.c:68
#define InvalidBlockNumber
Definition: block.h:33

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1368 of file nbtree.c.

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, offsetof, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

1370 {
1371  int live = 0;
1372  int nitem = BTreeTupleGetNPosting(posting);
1373  ItemPointer items = BTreeTupleGetPosting(posting);
1374  BTVacuumPosting vacposting = NULL;
1375 
1376  for (int i = 0; i < nitem; i++)
1377  {
1378  if (!vstate->callback(items + i, vstate->callback_state))
1379  {
1380  /* Live table TID */
1381  live++;
1382  }
1383  else if (vacposting == NULL)
1384  {
1385  /*
1386  * First dead table TID encountered.
1387  *
1388  * It's now clear that we need to delete one or more dead table
1389  * TIDs, so start maintaining metadata describing how to update
1390  * existing posting list tuple.
1391  */
1392  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1393  nitem * sizeof(uint16));
1394 
1395  vacposting->itup = posting;
1396  vacposting->updatedoffset = updatedoffset;
1397  vacposting->ndeletedtids = 0;
1398  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1399  }
1400  else
1401  {
1402  /* Second or subsequent dead table TID */
1403  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1404  }
1405  }
1406 
1407  *nremaining = live;
1408  return vacposting;
1409 }
uint16 ndeletedtids
Definition: nbtree.h:908
void * callback_state
Definition: nbtree.h:335
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:530
OffsetNumber updatedoffset
Definition: nbtree.h:905
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
IndexTuple itup
Definition: nbtree.h:904
unsigned short uint16
Definition: c.h:440
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:909
void * palloc(Size size)
Definition: mcxt.c:1062
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
#define offsetof(type, field)
Definition: c.h:727

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 388 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

390 {
391  BTScanOpaque so = (BTScanOpaque) scan->opaque;
392 
393  /* we aren't holding any read locks, but gotta drop the pins */
394  if (BTScanPosIsValid(so->currPos))
395  {
396  /* Before leaving current page, deal with any killed items */
397  if (so->numKilled > 0)
398  _bt_killitems(scan);
401  }
402 
403  so->markItemIndex = -1;
404  so->arrayKeyCount = 0;
407 
408  /*
409  * Allocate tuple workspace arrays, if needed for an index-only scan and
410  * not already done in a previous rescan call. To save on palloc
411  * overhead, both workspaces are allocated as one palloc block; only this
412  * function and btendscan know that.
413  *
414  * NOTE: this data structure also makes it safe to return data from a
415  * "name" column, even though btree name_ops uses an underlying storage
416  * datatype of cstring. The risk there is that "name" is supposed to be
417  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
418  * However, since we only return data out of tuples sitting in the
419  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
420  * data out of the markTuples array --- running off the end of memory for
421  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
422  * adding special-case treatment for name_ops elsewhere.
423  */
424  if (scan->xs_want_itup && so->currTuples == NULL)
425  {
426  so->currTuples = (char *) palloc(BLCKSZ * 2);
427  so->markTuples = so->currTuples + BLCKSZ;
428  }
429 
430  /*
431  * Reset the scan keys
432  */
433  if (scankey && scan->numberOfKeys > 0)
434  memmove(scan->keyData,
435  scankey,
436  scan->numberOfKeys * sizeof(ScanKeyData));
437  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
438 
439  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
441 }
char * currTuples
Definition: nbtree.h:1055
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1005
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:1069
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
char * markTuples
Definition: nbtree.h:1056
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1011
struct ScanKeyData * keyData
Definition: relscan.h:122
BTScanPosData currPos
Definition: nbtree.h:1068
void * palloc(Size size)
Definition: mcxt.c:1062
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:999

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 513 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

514 {
515  BTScanOpaque so = (BTScanOpaque) scan->opaque;
516 
517  /* Restore the marked positions of any array keys */
518  if (so->numArrayKeys)
520 
521  if (so->markItemIndex >= 0)
522  {
523  /*
524  * The scan has never moved to a new page since the last mark. Just
525  * restore the itemIndex.
526  *
527  * NB: In this case we can't count on anything in so->markPos to be
528  * accurate.
529  */
530  so->currPos.itemIndex = so->markItemIndex;
531  }
532  else
533  {
534  /*
535  * The scan moved to a new page after last mark or restore, and we are
536  * now restoring to the marked page. We aren't holding any read
537  * locks, but if we're still holding the pin for the current position,
538  * we must drop it.
539  */
540  if (BTScanPosIsValid(so->currPos))
541  {
542  /* Before leaving current page, deal with any killed items */
543  if (so->numKilled > 0)
544  _bt_killitems(scan);
546  }
547 
548  if (BTScanPosIsValid(so->markPos))
549  {
550  /* bump pin on mark buffer for assignment to current buffer */
551  if (BTScanPosIsPinned(so->markPos))
553  memcpy(&so->currPos, &so->markPos,
554  offsetof(BTScanPosData, items[1]) +
555  so->markPos.lastItem * sizeof(BTScanPosItem));
556  if (so->currTuples)
557  memcpy(so->currTuples, so->markTuples,
559  }
560  else
562  }
563 }
int itemIndex
Definition: nbtree.h:981
char * currTuples
Definition: nbtree.h:1055
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1005
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:622
int nextTupleOffset
Definition: nbtree.h:970
int lastItem
Definition: nbtree.h:980
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:1069
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1072
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:988
char * markTuples
Definition: nbtree.h:1056
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1011
BTScanPosData currPos
Definition: nbtree.h:1068
Buffer buf
Definition: nbtree.h:951
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3806
#define offsetof(type, field)
Definition: c.h:727
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:999

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 814 of file nbtree.c.

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

815 {
816  BlockNumber num_delpages;
817 
818  /* No-op in ANALYZE ONLY mode */
819  if (info->analyze_only)
820  return stats;
821 
822  /*
823  * If btbulkdelete was called, we need not do anything (we just maintain
824  * the information used within _bt_vacuum_needs_cleanup() by calling
825  * _bt_set_cleanup_info() below).
826  *
827  * If btbulkdelete was _not_ called, then we have a choice to make: we
828  * must decide whether or not a btvacuumscan() call is needed now (i.e.
829  * whether the ongoing VACUUM operation can entirely avoid a physical scan
830  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
831  * now.
832  */
833  if (stats == NULL)
834  {
835  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
836  if (!_bt_vacuum_needs_cleanup(info->index))
837  return NULL;
838 
839  /*
840  * Since we aren't going to actually delete any leaf items, there's no
841  * need to go through all the vacuum-cycle-ID pushups here.
842  *
843  * Posting list tuples are a source of inaccuracy for cleanup-only
844  * scans. btvacuumscan() will assume that the number of index tuples
845  * from each page can be used as num_index_tuples, even though
846  * num_index_tuples is supposed to represent the number of TIDs in the
847  * index. This naive approach can underestimate the number of tuples
848  * in the index significantly.
849  *
850  * We handle the problem by making num_index_tuples an estimate in
851  * cleanup-only case.
852  */
854  btvacuumscan(info, stats, NULL, NULL, 0);
855  stats->estimated_count = true;
856  }
857 
858  /*
859  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
860  *
861  * num_delpages is the number of deleted pages now in the index that were
862  * not safe to place in the FSM to be recycled just yet. num_delpages is
863  * greater than 0 only when _bt_pagedel() actually deleted pages during
864  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
865  * have failed to place any newly deleted pages in the FSM just moments
866  * ago. (Actually, there are edge cases where recycling of the current
867  * VACUUM's newly deleted pages does not even become safe by the time the
868  * next VACUUM comes around. See nbtree/README.)
869  */
870  Assert(stats->pages_deleted >= stats->pages_free);
871  num_delpages = stats->pages_deleted - stats->pages_free;
872  _bt_set_cleanup_info(info->index, num_delpages);
873 
874  /*
875  * It's quite possible for us to be fooled by concurrent page splits into
876  * double-counting some index tuples, so disbelieve any total that exceeds
877  * the underlying heap's count ... if we know that accurately. Otherwise
878  * this might just make matters worse.
879  */
880  if (!info->estimated_count)
881  {
882  if (stats->num_index_tuples > info->num_heap_tuples)
883  stats->num_index_tuples = info->num_heap_tuples;
884  }
885 
886  return stats;
887 }
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:234
bool analyze_only
Definition: genam.h:47
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:902
BlockNumber pages_free
Definition: genam.h:82
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:181
BlockNumber pages_deleted
Definition: genam.h:81
void * palloc0(Size size)
Definition: mcxt.c:1093
double num_heap_tuples
Definition: genam.h:51
#define Assert(condition)
Definition: c.h:804
double num_index_tuples
Definition: genam.h:78
bool estimated_count
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1033 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage, callback(), BTVacState::callback, BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

1034 {
1035  IndexVacuumInfo *info = vstate->info;
1036  IndexBulkDeleteResult *stats = vstate->stats;
1038  void *callback_state = vstate->callback_state;
1039  Relation rel = info->index;
1040  bool attempt_pagedel;
1041  BlockNumber blkno,
1042  backtrack_to;
1043  Buffer buf;
1044  Page page;
1045  BTPageOpaque opaque;
1046 
1047  blkno = scanblkno;
1048 
1049 backtrack:
1050 
1051  attempt_pagedel = false;
1052  backtrack_to = P_NONE;
1053 
1054  /* call vacuum_delay_point while not holding any buffer lock */
1056 
1057  /*
1058  * We can't use _bt_getbuf() here because it always applies
1059  * _bt_checkpage(), which will barf on an all-zero page. We want to
1060  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1061  * buffer access strategy.
1062  */
1063  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1064  info->strategy);
1065  _bt_lockbuf(rel, buf, BT_READ);
1066  page = BufferGetPage(buf);
1067  opaque = NULL;
1068  if (!PageIsNew(page))
1069  {
1070  _bt_checkpage(rel, buf);
1071  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1072  }
1073 
1074  Assert(blkno <= scanblkno);
1075  if (blkno != scanblkno)
1076  {
1077  /*
1078  * We're backtracking.
1079  *
1080  * We followed a right link to a sibling leaf page (a page that
1081  * happens to be from a block located before scanblkno). The only
1082  * case we want to do anything with is a live leaf page having the
1083  * current vacuum cycle ID.
1084  *
1085  * The page had better be in a state that's consistent with what we
1086  * expect. Check for conditions that imply corruption in passing. It
1087  * can't be half-dead because only an interrupted VACUUM process can
1088  * leave pages in that state, so we'd definitely have dealt with it
1089  * back when the page was the scanblkno page (half-dead pages are
1090  * always marked fully deleted by _bt_pagedel()). This assumes that
1091  * there can be only one vacuum process running at a time.
1092  */
1093  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1094  {
1095  Assert(false);
1096  ereport(LOG,
1097  (errcode(ERRCODE_INDEX_CORRUPTED),
1098  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1099  blkno, scanblkno, RelationGetRelationName(rel))));
1100  _bt_relbuf(rel, buf);
1101  return;
1102  }
1103 
1104  /*
1105  * We may have already processed the page in an earlier call, when the
1106  * page was scanblkno. This happens when the leaf page split occurred
1107  * after the scan began, but before the right sibling page became the
1108  * scanblkno.
1109  *
1110  * Page may also have been deleted by current btvacuumpage() call,
1111  * since _bt_pagedel() sometimes deletes the right sibling page of
1112  * scanblkno in passing (it does so after we decided where to
1113  * backtrack to). We don't need to process this page as a deleted
1114  * page a second time now (in fact, it would be wrong to count it as a
1115  * deleted page in the bulk delete statistics a second time).
1116  */
1117  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1118  {
1119  /* Done with current scanblkno (and all lower split pages) */
1120  _bt_relbuf(rel, buf);
1121  return;
1122  }
1123  }
1124 
1125  if (!opaque || BTPageIsRecyclable(page))
1126  {
1127  /* Okay to recycle this page (which could be leaf or internal) */
1128  RecordFreeIndexPage(rel, blkno);
1129  stats->pages_deleted++;
1130  stats->pages_free++;
1131  }
1132  else if (P_ISDELETED(opaque))
1133  {
1134  /*
1135  * Already deleted page (which could be leaf or internal). Can't
1136  * recycle yet.
1137  */
1138  stats->pages_deleted++;
1139  }
1140  else if (P_ISHALFDEAD(opaque))
1141  {
1142  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1143  attempt_pagedel = true;
1144 
1145  /*
1146  * _bt_pagedel() will increment both pages_newly_deleted and
1147  * pages_deleted stats in all cases (barring corruption)
1148  */
1149  }
1150  else if (P_ISLEAF(opaque))
1151  {
1153  int ndeletable;
1155  int nupdatable;
1156  OffsetNumber offnum,
1157  minoff,
1158  maxoff;
1159  int nhtidsdead,
1160  nhtidslive;
1161 
1162  /*
1163  * Trade in the initial read lock for a super-exclusive write lock on
1164  * this page. We must get such a lock on every leaf page over the
1165  * course of the vacuum scan, whether or not it actually contains any
1166  * deletable tuples --- see nbtree/README.
1167  */
1168  _bt_upgradelockbufcleanup(rel, buf);
1169 
1170  /*
1171  * Check whether we need to backtrack to earlier pages. What we are
1172  * concerned about is a page split that happened since we started the
1173  * vacuum scan. If the split moved tuples on the right half of the
1174  * split (i.e. the tuples that sort high) to a block that we already
1175  * passed over, then we might have missed the tuples. We need to
1176  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1177  * or deleting scanblkno page below!)
1178  */
1179  if (vstate->cycleid != 0 &&
1180  opaque->btpo_cycleid == vstate->cycleid &&
1181  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1182  !P_RIGHTMOST(opaque) &&
1183  opaque->btpo_next < scanblkno)
1184  backtrack_to = opaque->btpo_next;
1185 
1186  ndeletable = 0;
1187  nupdatable = 0;
1188  minoff = P_FIRSTDATAKEY(opaque);
1189  maxoff = PageGetMaxOffsetNumber(page);
1190  nhtidsdead = 0;
1191  nhtidslive = 0;
1192  if (callback)
1193  {
1194  /* btbulkdelete callback tells us what to delete (or update) */
1195  for (offnum = minoff;
1196  offnum <= maxoff;
1197  offnum = OffsetNumberNext(offnum))
1198  {
1199  IndexTuple itup;
1200 
1201  itup = (IndexTuple) PageGetItem(page,
1202  PageGetItemId(page, offnum));
1203 
1204  Assert(!BTreeTupleIsPivot(itup));
1205  if (!BTreeTupleIsPosting(itup))
1206  {
1207  /* Regular tuple, standard table TID representation */
1208  if (callback(&itup->t_tid, callback_state))
1209  {
1210  deletable[ndeletable++] = offnum;
1211  nhtidsdead++;
1212  }
1213  else
1214  nhtidslive++;
1215  }
1216  else
1217  {
1218  BTVacuumPosting vacposting;
1219  int nremaining;
1220 
1221  /* Posting list tuple */
1222  vacposting = btreevacuumposting(vstate, itup, offnum,
1223  &nremaining);
1224  if (vacposting == NULL)
1225  {
1226  /*
1227  * All table TIDs from the posting tuple remain, so no
1228  * delete or update required
1229  */
1230  Assert(nremaining == BTreeTupleGetNPosting(itup));
1231  }
1232  else if (nremaining > 0)
1233  {
1234 
1235  /*
1236  * Store metadata about posting list tuple in
1237  * updatable array for entire page. Existing tuple
1238  * will be updated during the later call to
1239  * _bt_delitems_vacuum().
1240  */
1241  Assert(nremaining < BTreeTupleGetNPosting(itup));
1242  updatable[nupdatable++] = vacposting;
1243  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1244  }
1245  else
1246  {
1247  /*
1248  * All table TIDs from the posting list must be
1249  * deleted. We'll delete the index tuple completely
1250  * (no update required).
1251  */
1252  Assert(nremaining == 0);
1253  deletable[ndeletable++] = offnum;
1254  nhtidsdead += BTreeTupleGetNPosting(itup);
1255  pfree(vacposting);
1256  }
1257 
1258  nhtidslive += nremaining;
1259  }
1260  }
1261  }
1262 
1263  /*
1264  * Apply any needed deletes or updates. We issue just one
1265  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1266  */
1267  if (ndeletable > 0 || nupdatable > 0)
1268  {
1269  Assert(nhtidsdead >= ndeletable + nupdatable);
1270  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1271  nupdatable);
1272 
1273  stats->tuples_removed += nhtidsdead;
1274  /* must recompute maxoff */
1275  maxoff = PageGetMaxOffsetNumber(page);
1276 
1277  /* can't leak memory here */
1278  for (int i = 0; i < nupdatable; i++)
1279  pfree(updatable[i]);
1280  }
1281  else
1282  {
1283  /*
1284  * If the leaf page has been split during this vacuum cycle, it
1285  * seems worth expending a write to clear btpo_cycleid even if we
1286  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1287  * takes care of this.) This ensures we won't process the page
1288  * again.
1289  *
1290  * We treat this like a hint-bit update because there's no need to
1291  * WAL-log it.
1292  */
1293  Assert(nhtidsdead == 0);
1294  if (vstate->cycleid != 0 &&
1295  opaque->btpo_cycleid == vstate->cycleid)
1296  {
1297  opaque->btpo_cycleid = 0;
1298  MarkBufferDirtyHint(buf, true);
1299  }
1300  }
1301 
1302  /*
1303  * If the leaf page is now empty, try to delete it; else count the
1304  * live tuples (live table TIDs in posting lists are counted as
1305  * separate live tuples). We don't delete when backtracking, though,
1306  * since that would require teaching _bt_pagedel() about backtracking
1307  * (doesn't seem worth adding more complexity to deal with that).
1308  *
1309  * We don't count the number of live TIDs during cleanup-only calls to
1310  * btvacuumscan (i.e. when callback is not set). We count the number
1311  * of index tuples directly instead. This avoids the expense of
1312  * directly examining all of the tuples on each page. VACUUM will
1313  * treat num_index_tuples as an estimate in cleanup-only case, so it
1314  * doesn't matter that this underestimates num_index_tuples
1315  * significantly in some cases.
1316  */
1317  if (minoff > maxoff)
1318  attempt_pagedel = (blkno == scanblkno);
1319  else if (callback)
1320  stats->num_index_tuples += nhtidslive;
1321  else
1322  stats->num_index_tuples += maxoff - minoff + 1;
1323 
1324  Assert(!attempt_pagedel || nhtidslive == 0);
1325  }
1326 
1327  if (attempt_pagedel)
1328  {
1329  MemoryContext oldcontext;
1330 
1331  /* Run pagedel in a temp context to avoid memory leakage */
1333  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1334 
1335  /*
1336  * _bt_pagedel maintains the bulk delete stats on our behalf;
1337  * pages_newly_deleted and pages_deleted are likely to be incremented
1338  * during call
1339  */
1340  Assert(blkno == scanblkno);
1341  _bt_pagedel(rel, buf, vstate);
1342 
1343  MemoryContextSwitchTo(oldcontext);
1344  /* pagedel released buffer, so we shouldn't */
1345  }
1346  else
1347  _bt_relbuf(rel, buf);
1348 
1349  if (backtrack_to != P_NONE)
1350  {
1351  blkno = backtrack_to;
1352  goto backtrack;
1353  }
1354 }
#define BTP_SPLIT_END
Definition: nbtree.h:79
BlockNumber btpo_next
Definition: nbtree.h:65
MemoryContext pagedelcontext
Definition: nbtree.h:337
double tuples_removed
Definition: genam.h:79
void * callback_state
Definition: nbtree.h:335
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:473
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3838
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1167
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:741
ItemPointerData t_tid
Definition: itup.h:37
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1368
BufferAccessStrategy strategy
Definition: genam.h:52
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:211
int errcode(int sqlerrcode)
Definition: elog.c:698
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
uint32 BlockNumber
Definition: block.h:31
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:71
uint16 OffsetNumber
Definition: off.h:24
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1816
#define BT_READ
Definition: nbtree.h:712
void pfree(void *pointer)
Definition: mcxt.c:1169
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:794
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:223
BlockNumber pages_free
Definition: genam.h:82
BTCycleId btpo_cycleid
Definition: nbtree.h:68
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static char * buf
Definition: pg_test_fsync.c:68
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:511
BlockNumber pages_deleted
Definition: genam.h:81
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:221
BTCycleId cycleid
Definition: nbtree.h:336
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1051
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1035
#define ereport(elevel,...)
Definition: elog.h:157
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:485
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
#define Assert(condition)
Definition: c.h:804
IndexVacuumInfo * info
Definition: nbtree.h:332
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
static bool BTPageIsRecyclable(Page page)
Definition: nbtree.h:290
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:511
void vacuum_delay_point(void)
Definition: vacuum.c:2149
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1122
uint16 btpo_flags
Definition: nbtree.h:67
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:78
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:218
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:86
#define P_ISLEAF(opaque)
Definition: nbtree.h:219

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 902 of file nbtree.c.

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, callback(), BTVacState::callback, BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().

905 {
906  Relation rel = info->index;
907  BTVacState vstate;
908  BlockNumber num_pages;
909  BlockNumber scanblkno;
910  bool needLock;
911 
912  /*
913  * Reset fields that track information about the entire index now. This
914  * avoids double-counting in the case where a single VACUUM command
915  * requires multiple scans of the index.
916  *
917  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
918  * since they track information about the VACUUM command, and so must last
919  * across each call to btvacuumscan().
920  *
921  * (Note that pages_free is treated as state about the whole index, not
922  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
923  * calls are idempotent, and get repeated for the same deleted pages in
924  * some scenarios. The point for us is to track the number of recyclable
925  * pages in the index at the end of the VACUUM command.)
926  */
927  stats->num_pages = 0;
928  stats->num_index_tuples = 0;
929  stats->pages_deleted = 0;
930  stats->pages_free = 0;
931 
932  /* Set up info to pass down to btvacuumpage */
933  vstate.info = info;
934  vstate.stats = stats;
935  vstate.callback = callback;
936  vstate.callback_state = callback_state;
937  vstate.cycleid = cycleid;
938 
939  /* Create a temporary memory context to run _bt_pagedel in */
941  "_bt_pagedel",
943 
944  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
945  vstate.bufsize = 0;
946  vstate.maxbufsize = 0;
947  vstate.pendingpages = NULL;
948  vstate.npendingpages = 0;
949  /* Consider applying _bt_pendingfsm_finalize optimization */
950  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
951 
952  /*
953  * The outer loop iterates over all index pages except the metapage, in
954  * physical order (we hope the kernel will cooperate in providing
955  * read-ahead for speed). It is critical that we visit all leaf pages,
956  * including ones added after we start the scan, else we might fail to
957  * delete some deletable tuples. Hence, we must repeatedly check the
958  * relation length. We must acquire the relation-extension lock while
959  * doing so to avoid a race condition: if someone else is extending the
960  * relation, there is a window where bufmgr/smgr have created a new
961  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
962  * we manage to scan such a page here, we'll improperly assume it can be
963  * recycled. Taking the lock synchronizes things enough to prevent a
964  * problem: either num_pages won't include the new page, or _bt_getbuf
965  * already has write lock on the buffer and it will be fully initialized
966  * before we can examine it. Also, we need not worry if a page is added
967  * immediately after we look; the page splitting code already has
968  * write-lock on the left page before it adds a right page, so we must
969  * already have processed any tuples due to be moved into such a page.
970  *
971  * We can skip locking for new or temp relations, however, since no one
972  * else could be accessing them.
973  */
974  needLock = !RELATION_IS_LOCAL(rel);
975 
976  scanblkno = BTREE_METAPAGE + 1;
977  for (;;)
978  {
979  /* Get the current relation length */
980  if (needLock)
982  num_pages = RelationGetNumberOfBlocks(rel);
983  if (needLock)
985 
986  if (info->report_progress)
988  num_pages);
989 
990  /* Quit if we've scanned the whole relation */
991  if (scanblkno >= num_pages)
992  break;
993  /* Iterate over pages, then loop back to recheck length */
994  for (; scanblkno < num_pages; scanblkno++)
995  {
996  btvacuumpage(&vstate, scanblkno);
997  if (info->report_progress)
999  scanblkno);
1000  }
1001  }
1002 
1003  /* Set statistics num_pages field to final size of index */
1004  stats->num_pages = num_pages;
1005 
1007 
1008  /*
1009  * If there were any calls to _bt_pagedel() during scan of the index then
1010  * see if any of the resulting pages can be placed in the FSM now. When
1011  * it's not safe we'll have to leave it up to a future VACUUM operation.
1012  *
1013  * Finally, if we placed any pages in the FSM (either just now or during
1014  * the scan), forcibly update the upper-level FSM pages to ensure that
1015  * searchers can find them.
1016  */
1017  _bt_pendingfsm_finalize(rel, &vstate);
1018  if (stats->pages_free > 0)
1020 }
int maxbufsize
Definition: nbtree.h:343
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
MemoryContext pagedelcontext
Definition: nbtree.h:337
void * callback_state
Definition: nbtree.h:335
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:621
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
bool report_progress
Definition: genam.h:48
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1033
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
BTPendingFSM * pendingpages
Definition: nbtree.h:344
BlockNumber num_pages
Definition: genam.h:76
BlockNumber pages_free
Definition: genam.h:82
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
BlockNumber pages_deleted
Definition: genam.h:81
#define BTREE_METAPAGE
Definition: nbtree.h:146
BTCycleId cycleid
Definition: nbtree.h:336
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:403
int npendingpages
Definition: nbtree.h:345
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:453
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:212
void pgstat_progress_update_param(int index, int64 val)
IndexVacuumInfo * info
Definition: nbtree.h:332
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2956
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2915
double num_index_tuples
Definition: genam.h:78
int bufsize
Definition: nbtree.h:342