PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE , BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 78 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 54 of file nbtree.c.

55 {
60 } BTPS_State;
BTPS_State
Definition: nbtree.c:55
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:57
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:56
@ BTPARALLEL_IDLE
Definition: nbtree.c:58
@ BTPARALLEL_DONE
Definition: nbtree.c:59

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 761 of file nbtree.c.

762 {
763  BTScanOpaque so = (BTScanOpaque) scan->opaque;
764  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
765  BTParallelScanDesc btscan;
766 
767  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
768  parallel_scan->ps_offset);
769 
770  so->arrayKeyCount++;
771  SpinLockAcquire(&btscan->btps_mutex);
772  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
773  {
774  btscan->btps_scanPage = InvalidBlockNumber;
775  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
776  btscan->btps_arrayKeyCount++;
777  }
778  SpinLockRelease(&btscan->btps_mutex);
779 }
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetToPointer(base, offset)
Definition: c.h:761
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1081
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 720 of file nbtree.c.

721 {
722  BTScanOpaque so = (BTScanOpaque) scan->opaque;
723  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
724  BTParallelScanDesc btscan;
725  bool status_changed = false;
726 
727  /* Do nothing, for non-parallel scans */
728  if (parallel_scan == NULL)
729  return;
730 
731  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
732  parallel_scan->ps_offset);
733 
734  /*
735  * Mark the parallel scan as done for this combination of scan keys,
736  * unless some other process already did so. See also
737  * _bt_advance_array_keys.
738  */
739  SpinLockAcquire(&btscan->btps_mutex);
740  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
741  btscan->btps_pageStatus != BTPARALLEL_DONE)
742  {
743  btscan->btps_pageStatus = BTPARALLEL_DONE;
744  status_changed = true;
745  }
746  SpinLockRelease(&btscan->btps_mutex);
747 
748  /* wake up all the workers associated with this parallel scan */
749  if (status_changed)
750  ConditionVariableBroadcast(&btscan->btps_cv);
751 }
void ConditionVariableBroadcast(ConditionVariable *cv)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:78

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), if(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 697 of file nbtree.c.

698 {
699  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
700  BTParallelScanDesc btscan;
701 
702  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
703  parallel_scan->ps_offset);
704 
705  SpinLockAcquire(&btscan->btps_mutex);
706  btscan->btps_scanPage = scan_page;
708  SpinLockRelease(&btscan->btps_mutex);
710 }
void ConditionVariableSignal(ConditionVariable *cv)
slock_t btps_mutex
Definition: nbtree.c:74
BTPS_State btps_pageStatus
Definition: nbtree.c:69
ConditionVariable btps_cv
Definition: nbtree.c:75
BlockNumber btps_scanPage
Definition: nbtree.c:68

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 639 of file nbtree.c.

640 {
641  BTScanOpaque so = (BTScanOpaque) scan->opaque;
642  BTPS_State pageStatus;
643  bool exit_loop = false;
644  bool status = true;
645  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
646  BTParallelScanDesc btscan;
647 
648  *pageno = P_NONE;
649 
650  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
651  parallel_scan->ps_offset);
652 
653  while (1)
654  {
655  SpinLockAcquire(&btscan->btps_mutex);
656  pageStatus = btscan->btps_pageStatus;
657 
658  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
659  {
660  /* Parallel scan has already advanced to a new set of scankeys. */
661  status = false;
662  }
663  else if (pageStatus == BTPARALLEL_DONE)
664  {
665  /*
666  * We're done with this set of scankeys. This may be the end, or
667  * there could be more sets to try.
668  */
669  status = false;
670  }
671  else if (pageStatus != BTPARALLEL_ADVANCING)
672  {
673  /*
674  * We have successfully seized control of the scan for the purpose
675  * of advancing it to a new page!
676  */
677  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
678  *pageno = btscan->btps_scanPage;
679  exit_loop = true;
680  }
681  SpinLockRelease(&btscan->btps_mutex);
682  if (exit_loop || !status)
683  break;
684  ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
685  }
687 
688  return status;
689 }
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define P_NONE
Definition: nbtree.h:212

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 346 of file nbtree.c.

347 {
348  IndexScanDesc scan;
349  BTScanOpaque so;
350 
351  /* no order by operators allowed */
352  Assert(norderbys == 0);
353 
354  /* get the scan */
355  scan = RelationGetIndexScan(rel, nkeys, norderbys);
356 
357  /* allocate private workspace */
358  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
361  if (scan->numberOfKeys > 0)
362  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
363  else
364  so->keyData = NULL;
365 
366  so->arrayKeyData = NULL; /* assume no array keys for now */
367  so->arraysStarted = false;
368  so->numArrayKeys = 0;
369  so->arrayKeys = NULL;
370  so->arrayContext = NULL;
371 
372  so->killedItems = NULL; /* until needed */
373  so->numKilled = 0;
374 
375  /*
376  * We don't know yet whether the scan will be index-only, so we do not
377  * allocate the tuple workspace arrays until btrescan. However, we set up
378  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
379  */
380  so->currTuples = so->markTuples = NULL;
381 
382  scan->xs_itupdesc = RelationGetDescr(rel);
383 
384  scan->opaque = so;
385 
386  return scan;
387 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1226
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1018
#define RelationGetDescr(relation)
Definition: rel.h:530
ScanKeyData * ScanKey
Definition: skey.h:75
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1052
bool arraysStarted
Definition: nbtree.h:1046
char * markTuples
Definition: nbtree.h:1065
BTScanPosData currPos
Definition: nbtree.h:1077
int * killedItems
Definition: nbtree.h:1056
char * currTuples
Definition: nbtree.h:1064
ScanKey arrayKeyData
Definition: nbtree.h:1045
BTScanPosData markPos
Definition: nbtree.h:1078
ScanKey keyData
Definition: nbtree.h:1042
MemoryContext arrayContext
Definition: nbtree.h:1053
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, BTScanOpaqueData::arraysStarted, Assert(), BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 152 of file nbtree.c.

153 {
154  bool allequalimage = _bt_allequalimage(index, false);
155  Buffer metabuf;
156  Page metapage;
157 
158  /*
159  * Initalize the metapage.
160  *
161  * Regular index build bypasses the buffer manager and uses smgr functions
162  * directly, with an smgrimmedsync() call at the end. That makes sense
163  * when the index is large, but for an empty index, it's better to use the
164  * buffer cache to avoid the smgrimmedsync().
165  */
168  _bt_lockbuf(index, metabuf, BT_WRITE);
169 
171 
172  metapage = BufferGetPage(metabuf);
173  _bt_initmetapage(metapage, P_NONE, 0, allequalimage);
174  MarkBufferDirty(metabuf);
175  log_newpage_buffer(metabuf, true);
176 
178 
179  _bt_unlockbuf(index, metabuf);
180  ReleaseBuffer(metabuf);
181 }
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:755
#define P_NEW
Definition: bufmgr.h:152
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
@ RBM_NORMAL
Definition: bufmgr.h:44
Pointer Page
Definition: bufpage.h:78
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1070
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1039
#define BTREE_METAPAGE
Definition: nbtree.h:148
#define BT_WRITE
Definition: nbtree.h:720
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2704
@ INIT_FORKNUM
Definition: relpath.h:53
Definition: type.h:95
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1224

References _bt_allequalimage(), _bt_initmetapage(), _bt_lockbuf(), _bt_unlockbuf(), Assert(), BT_WRITE, BTREE_METAPAGE, BufferGetBlockNumber(), BufferGetPage(), END_CRIT_SECTION, INIT_FORKNUM, log_newpage_buffer(), MarkBufferDirty(), P_NEW, P_NONE, RBM_NORMAL, ReadBufferExtended(), ReleaseBuffer(), and START_CRIT_SECTION.

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 789 of file nbtree.c.

791 {
792  Relation rel = info->index;
793  BTCycleId cycleid;
794 
795  /* allocate stats if first time through, else re-use existing struct */
796  if (stats == NULL)
798 
799  /* Establish the vacuum cycle ID to use for this scan */
800  /* The ENSURE stuff ensures we clean up shared memory on failure */
802  {
803  cycleid = _bt_start_vacuum(rel);
804 
805  btvacuumscan(info, stats, callback, callback_state, cycleid);
806  }
808  _bt_end_vacuum(rel);
809 
810  return stats;
811 }
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1257
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:907
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2048
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2076
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1991
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
Relation index
Definition: genam.h:46
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1425 of file nbtree.c.

1426 {
1427  return true;
1428 }

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 452 of file nbtree.c.

453 {
454  BTScanOpaque so = (BTScanOpaque) scan->opaque;
455 
456  /* we aren't holding any read locks, but gotta drop the pins */
458  {
459  /* Before leaving current page, deal with any killed items */
460  if (so->numKilled > 0)
461  _bt_killitems(scan);
463  }
464 
465  so->markItemIndex = -1;
467 
468  /* No need to invalidate positions, the RAM is about to be freed. */
469 
470  /* Release storage */
471  if (so->keyData != NULL)
472  pfree(so->keyData);
473  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
474  if (so->arrayContext != NULL)
476  if (so->killedItems != NULL)
477  pfree(so->killedItems);
478  if (so->currTuples != NULL)
479  pfree(so->currTuples);
480  /* so->markTuples should not be pfree'd, see btrescan */
481  pfree(so);
482 }
void pfree(void *pointer)
Definition: mcxt.c:1456
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1012
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1006
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1740

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 574 of file nbtree.c.

575 {
576  return sizeof(BTParallelScanDescData);
577 }
struct BTParallelScanDescData BTParallelScanDescData

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 288 of file nbtree.c.

289 {
290  BTScanOpaque so = (BTScanOpaque) scan->opaque;
291  int64 ntids = 0;
292  ItemPointer heapTid;
293 
294  /*
295  * If we have any array keys, initialize them.
296  */
297  if (so->numArrayKeys)
298  {
299  /* punt if we have any unsatisfiable array keys */
300  if (so->numArrayKeys < 0)
301  return ntids;
302 
304  }
305 
306  /* This loop handles advancing to the next array elements, if any */
307  do
308  {
309  /* Fetch the first page & tuple */
310  if (_bt_first(scan, ForwardScanDirection))
311  {
312  /* Save tuple ID, and continue scanning */
313  heapTid = &scan->xs_heaptid;
314  tbm_add_tuples(tbm, heapTid, 1, false);
315  ntids++;
316 
317  for (;;)
318  {
319  /*
320  * Advance to next tuple within page. This is the same as the
321  * easy case in _bt_next().
322  */
323  if (++so->currPos.itemIndex > so->currPos.lastItem)
324  {
325  /* let _bt_next do the heavy lifting */
326  if (!_bt_next(scan, ForwardScanDirection))
327  break;
328  }
329 
330  /* Save tuple ID, and continue scanning */
331  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
332  tbm_add_tuples(tbm, heapTid, 1, false);
333  ntids++;
334  }
335  }
336  /* Now see if we have more array keys to deal with */
338 
339  return ntids;
340 }
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:860
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1477
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:553
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:525
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:987
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:990
int itemIndex
Definition: nbtree.h:988
ItemPointerData heapTid
Definition: nbtree.h:951
ItemPointerData xs_heaptid
Definition: relscan.h:147
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 214 of file nbtree.c.

215 {
216  BTScanOpaque so = (BTScanOpaque) scan->opaque;
217  bool res;
218 
219  /* btree indexes are never lossy */
220  scan->xs_recheck = false;
221 
222  /*
223  * If we have any array keys, initialize them during first call for a
224  * scan. We can't do this in btrescan because we don't know the scan
225  * direction at that time.
226  */
227  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
228  {
229  /* punt if we have any unsatisfiable array keys */
230  if (so->numArrayKeys < 0)
231  return false;
232 
233  _bt_start_array_keys(scan, dir);
234  }
235 
236  /* This loop handles advancing to the next array elements, if any */
237  do
238  {
239  /*
240  * If we've already initialized this scan, we can just advance it in
241  * the appropriate direction. If we haven't done so yet, we call
242  * _bt_first() to get the first item in the scan.
243  */
244  if (!BTScanPosIsValid(so->currPos))
245  res = _bt_first(scan, dir);
246  else
247  {
248  /*
249  * Check to see if we should kill the previously-fetched tuple.
250  */
251  if (scan->kill_prior_tuple)
252  {
253  /*
254  * Yes, remember it for later. (We'll deal with all such
255  * tuples at once right before leaving the index page.) The
256  * test for numKilled overrun is not just paranoia: if the
257  * caller reverses direction in the indexscan then the same
258  * item might get entered multiple times. It's not worth
259  * trying to optimize that, so we don't detect it, but instead
260  * just forget any excess entries.
261  */
262  if (so->killedItems == NULL)
263  so->killedItems = (int *)
264  palloc(MaxTIDsPerBTreePage * sizeof(int));
265  if (so->numKilled < MaxTIDsPerBTreePage)
266  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
267  }
268 
269  /*
270  * Now continue the scan.
271  */
272  res = _bt_next(scan, dir);
273  }
274 
275  /* If we have a tuple, return it ... */
276  if (res)
277  break;
278  /* ... otherwise see if we have more array keys to deal with */
279  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
280 
281  return res;
282 }
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:128

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), res, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 96 of file nbtree.c.

97 {
99 
100  amroutine->amstrategies = BTMaxStrategyNumber;
101  amroutine->amsupport = BTNProcs;
102  amroutine->amoptsprocnum = BTOPTIONS_PROC;
103  amroutine->amcanorder = true;
104  amroutine->amcanorderbyop = false;
105  amroutine->amcanbackward = true;
106  amroutine->amcanunique = true;
107  amroutine->amcanmulticol = true;
108  amroutine->amoptionalkey = true;
109  amroutine->amsearcharray = true;
110  amroutine->amsearchnulls = true;
111  amroutine->amstorage = false;
112  amroutine->amclusterable = true;
113  amroutine->ampredlocks = true;
114  amroutine->amcanparallel = true;
115  amroutine->amcaninclude = true;
116  amroutine->amusemaintenanceworkmem = false;
117  amroutine->amsummarizing = false;
118  amroutine->amparallelvacuumoptions =
120  amroutine->amkeytype = InvalidOid;
121 
122  amroutine->ambuild = btbuild;
123  amroutine->ambuildempty = btbuildempty;
124  amroutine->aminsert = btinsert;
125  amroutine->ambulkdelete = btbulkdelete;
126  amroutine->amvacuumcleanup = btvacuumcleanup;
127  amroutine->amcanreturn = btcanreturn;
128  amroutine->amcostestimate = btcostestimate;
129  amroutine->amoptions = btoptions;
130  amroutine->amproperty = btproperty;
131  amroutine->ambuildphasename = btbuildphasename;
132  amroutine->amvalidate = btvalidate;
133  amroutine->amadjustmembers = btadjustmembers;
134  amroutine->ambeginscan = btbeginscan;
135  amroutine->amrescan = btrescan;
136  amroutine->amgettuple = btgettuple;
137  amroutine->amgetbitmap = btgetbitmap;
138  amroutine->amendscan = btendscan;
139  amroutine->ammarkpos = btmarkpos;
140  amroutine->amrestrpos = btrestrpos;
143  amroutine->amparallelrescan = btparallelrescan;
144 
145  PG_RETURN_POINTER(amroutine);
146 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1425
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:346
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:789
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:819
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:214
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:598
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:190
void btbuildempty(Relation index)
Definition: nbtree.c:152
Size btestimateparallelscan(void)
Definition: nbtree.c:574
void btinitparallelscan(void *target)
Definition: nbtree.c:583
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:288
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:488
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:452
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:393
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:518
#define BTNProcs
Definition: nbtree.h:712
#define BTOPTIONS_PROC
Definition: nbtree.h:711
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:301
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2172
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2126
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2149
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
#define makeNode(_type_)
Definition: nodes.h:176
#define InvalidOid
Definition: postgres_ext.h:36
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6670
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:270
ambuildempty_function ambuildempty
Definition: amapi.h:262
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:265
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:268
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:282
amrestrpos_function amrestrpos
Definition: amapi.h:279
aminsert_function aminsert
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:277
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:284
Oid amkeytype
Definition: amapi.h:252
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:267
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:272
ambuild_function ambuild
Definition: amapi.h:261
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:275
amcanreturn_function amcanreturn
Definition: amapi.h:266
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:276
amproperty_function amproperty
Definition: amapi.h:269
ambulkdelete_function ambulkdelete
Definition: amapi.h:264
bool amsearcharray
Definition: amapi.h:232
bool amsummarizing
Definition: amapi.h:248
amvalidate_function amvalidate
Definition: amapi.h:271
ammarkpos_function ammarkpos
Definition: amapi.h:278
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:273
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:274
bool amcanorder
Definition: amapi.h:220
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:283
uint8 amparallelvacuumoptions
Definition: amapi.h:250
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:47
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:54

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 190 of file nbtree.c.

195 {
196  bool result;
197  IndexTuple itup;
198 
199  /* generate an index tuple */
200  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
201  itup->t_tid = *ht_ctid;
202 
203  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
204 
205  pfree(itup);
206 
207  return result;
208 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:102
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 488 of file nbtree.c.

489 {
490  BTScanOpaque so = (BTScanOpaque) scan->opaque;
491 
492  /* There may be an old mark with a pin (but no lock). */
494 
495  /*
496  * Just record the current itemIndex. If we later step to next page
497  * before releasing the marked position, _bt_steppage makes a full copy of
498  * the currPos struct in markPos. If (as often happens) the mark is moved
499  * before we leave the page, we don't have to do that work.
500  */
501  if (BTScanPosIsValid(so->currPos))
502  so->markItemIndex = so->currPos.itemIndex;
503  else
504  {
506  so->markItemIndex = -1;
507  }
508 
509  /* Also record the current positions of any array keys */
510  if (so->numArrayKeys)
511  _bt_mark_array_keys(scan);
512 }
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:620

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 598 of file nbtree.c.

599 {
600  BTParallelScanDesc btscan;
601  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
602 
603  Assert(parallel_scan);
604 
605  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
606  parallel_scan->ps_offset);
607 
608  /*
609  * In theory, we don't need to acquire the spinlock here, because there
610  * shouldn't be any other workers running at this point, but we do so for
611  * consistency.
612  */
613  SpinLockAcquire(&btscan->btps_mutex);
616  btscan->btps_arrayKeyCount = 0;
617  SpinLockRelease(&btscan->btps_mutex);
618 }

References Assert(), BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1376 of file nbtree.c.

1378 {
1379  int live = 0;
1380  int nitem = BTreeTupleGetNPosting(posting);
1381  ItemPointer items = BTreeTupleGetPosting(posting);
1382  BTVacuumPosting vacposting = NULL;
1383 
1384  for (int i = 0; i < nitem; i++)
1385  {
1386  if (!vstate->callback(items + i, vstate->callback_state))
1387  {
1388  /* Live table TID */
1389  live++;
1390  }
1391  else if (vacposting == NULL)
1392  {
1393  /*
1394  * First dead table TID encountered.
1395  *
1396  * It's now clear that we need to delete one or more dead table
1397  * TIDs, so start maintaining metadata describing how to update
1398  * existing posting list tuple.
1399  */
1400  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1401  nitem * sizeof(uint16));
1402 
1403  vacposting->itup = posting;
1404  vacposting->updatedoffset = updatedoffset;
1405  vacposting->ndeletedtids = 0;
1406  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1407  }
1408  else
1409  {
1410  /* Second or subsequent dead table TID */
1411  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1412  }
1413  }
1414 
1415  *nremaining = live;
1416  return vacposting;
1417 }
unsigned short uint16
Definition: c.h:494
int i
Definition: isn.c:73
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
void * callback_state
Definition: nbtree.h:335
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:916
uint16 ndeletedtids
Definition: nbtree.h:915
IndexTuple itup
Definition: nbtree.h:911
OffsetNumber updatedoffset
Definition: nbtree.h:912

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 393 of file nbtree.c.

395 {
396  BTScanOpaque so = (BTScanOpaque) scan->opaque;
397 
398  /* we aren't holding any read locks, but gotta drop the pins */
400  {
401  /* Before leaving current page, deal with any killed items */
402  if (so->numKilled > 0)
403  _bt_killitems(scan);
406  }
407 
408  so->markItemIndex = -1;
409  so->arrayKeyCount = 0;
412 
413  /*
414  * Allocate tuple workspace arrays, if needed for an index-only scan and
415  * not already done in a previous rescan call. To save on palloc
416  * overhead, both workspaces are allocated as one palloc block; only this
417  * function and btendscan know that.
418  *
419  * NOTE: this data structure also makes it safe to return data from a
420  * "name" column, even though btree name_ops uses an underlying storage
421  * datatype of cstring. The risk there is that "name" is supposed to be
422  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
423  * However, since we only return data out of tuples sitting in the
424  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
425  * data out of the markTuples array --- running off the end of memory for
426  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
427  * adding special-case treatment for name_ops elsewhere.
428  */
429  if (scan->xs_want_itup && so->currTuples == NULL)
430  {
431  so->currTuples = (char *) palloc(BLCKSZ * 2);
432  so->markTuples = so->currTuples + BLCKSZ;
433  }
434 
435  /*
436  * Reset the scan keys
437  */
438  if (scankey && scan->numberOfKeys > 0)
439  memmove(scan->keyData,
440  scankey,
441  scan->numberOfKeys * sizeof(ScanKeyData));
442  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
443 
444  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
446 }
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:210
struct ScanKeyData * keyData
Definition: relscan.h:122

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 518 of file nbtree.c.

519 {
520  BTScanOpaque so = (BTScanOpaque) scan->opaque;
521 
522  /* Restore the marked positions of any array keys */
523  if (so->numArrayKeys)
525 
526  if (so->markItemIndex >= 0)
527  {
528  /*
529  * The scan has never moved to a new page since the last mark. Just
530  * restore the itemIndex.
531  *
532  * NB: In this case we can't count on anything in so->markPos to be
533  * accurate.
534  */
535  so->currPos.itemIndex = so->markItemIndex;
536  }
537  else
538  {
539  /*
540  * The scan moved to a new page after last mark or restore, and we are
541  * now restoring to the marked page. We aren't holding any read
542  * locks, but if we're still holding the pin for the current position,
543  * we must drop it.
544  */
545  if (BTScanPosIsValid(so->currPos))
546  {
547  /* Before leaving current page, deal with any killed items */
548  if (so->numKilled > 0)
549  _bt_killitems(scan);
551  }
552 
553  if (BTScanPosIsValid(so->markPos))
554  {
555  /* bump pin on mark buffer for assignment to current buffer */
556  if (BTScanPosIsPinned(so->markPos))
558  memcpy(&so->currPos, &so->markPos,
559  offsetof(BTScanPosData, items[1]) +
560  so->markPos.lastItem * sizeof(BTScanPosItem));
561  if (so->currTuples)
562  memcpy(so->currTuples, so->markTuples,
564  }
565  else
567  }
568 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4512
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:995
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:639
Buffer buf
Definition: nbtree.h:958
int nextTupleOffset
Definition: nbtree.h:977

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 819 of file nbtree.c.

820 {
821  BlockNumber num_delpages;
822 
823  /* No-op in ANALYZE ONLY mode */
824  if (info->analyze_only)
825  return stats;
826 
827  /*
828  * If btbulkdelete was called, we need not do anything (we just maintain
829  * the information used within _bt_vacuum_needs_cleanup() by calling
830  * _bt_set_cleanup_info() below).
831  *
832  * If btbulkdelete was _not_ called, then we have a choice to make: we
833  * must decide whether or not a btvacuumscan() call is needed now (i.e.
834  * whether the ongoing VACUUM operation can entirely avoid a physical scan
835  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
836  * now.
837  */
838  if (stats == NULL)
839  {
840  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
841  if (!_bt_vacuum_needs_cleanup(info->index))
842  return NULL;
843 
844  /*
845  * Since we aren't going to actually delete any leaf items, there's no
846  * need to go through all the vacuum-cycle-ID pushups here.
847  *
848  * Posting list tuples are a source of inaccuracy for cleanup-only
849  * scans. btvacuumscan() will assume that the number of index tuples
850  * from each page can be used as num_index_tuples, even though
851  * num_index_tuples is supposed to represent the number of TIDs in the
852  * index. This naive approach can underestimate the number of tuples
853  * in the index significantly.
854  *
855  * We handle the problem by making num_index_tuples an estimate in
856  * cleanup-only case.
857  */
859  btvacuumscan(info, stats, NULL, NULL, 0);
860  stats->estimated_count = true;
861  }
862 
863  /*
864  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
865  *
866  * num_delpages is the number of deleted pages now in the index that were
867  * not safe to place in the FSM to be recycled just yet. num_delpages is
868  * greater than 0 only when _bt_pagedel() actually deleted pages during
869  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
870  * have failed to place any newly deleted pages in the FSM just moments
871  * ago. (Actually, there are edge cases where recycling of the current
872  * VACUUM's newly deleted pages does not even become safe by the time the
873  * next VACUUM comes around. See nbtree/README.)
874  */
875  Assert(stats->pages_deleted >= stats->pages_free);
876  num_delpages = stats->pages_deleted - stats->pages_free;
877  _bt_set_cleanup_info(info->index, num_delpages);
878 
879  /*
880  * It's quite possible for us to be fooled by concurrent page splits into
881  * double-counting some index tuples, so disbelieve any total that exceeds
882  * the underlying heap's count ... if we know that accurately. Otherwise
883  * this might just make matters worse.
884  */
885  if (!info->estimated_count)
886  {
887  if (stats->num_index_tuples > info->num_heap_tuples)
888  stats->num_index_tuples = info->num_heap_tuples;
889  }
890 
891  return stats;
892 }
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:232
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:179
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_free
Definition: genam.h:83
double num_index_tuples
Definition: genam.h:79
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
bool estimated_count
Definition: genam.h:50

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert(), btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1041 of file nbtree.c.

1042 {
1043  IndexVacuumInfo *info = vstate->info;
1044  IndexBulkDeleteResult *stats = vstate->stats;
1046  void *callback_state = vstate->callback_state;
1047  Relation rel = info->index;
1048  Relation heaprel = info->heaprel;
1049  bool attempt_pagedel;
1050  BlockNumber blkno,
1051  backtrack_to;
1052  Buffer buf;
1053  Page page;
1054  BTPageOpaque opaque;
1055 
1056  blkno = scanblkno;
1057 
1058 backtrack:
1059 
1060  attempt_pagedel = false;
1061  backtrack_to = P_NONE;
1062 
1063  /* call vacuum_delay_point while not holding any buffer lock */
1065 
1066  /*
1067  * We can't use _bt_getbuf() here because it always applies
1068  * _bt_checkpage(), which will barf on an all-zero page. We want to
1069  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1070  * buffer access strategy.
1071  */
1073  info->strategy);
1074  _bt_lockbuf(rel, buf, BT_READ);
1075  page = BufferGetPage(buf);
1076  opaque = NULL;
1077  if (!PageIsNew(page))
1078  {
1079  _bt_checkpage(rel, buf);
1080  opaque = BTPageGetOpaque(page);
1081  }
1082 
1083  Assert(blkno <= scanblkno);
1084  if (blkno != scanblkno)
1085  {
1086  /*
1087  * We're backtracking.
1088  *
1089  * We followed a right link to a sibling leaf page (a page that
1090  * happens to be from a block located before scanblkno). The only
1091  * case we want to do anything with is a live leaf page having the
1092  * current vacuum cycle ID.
1093  *
1094  * The page had better be in a state that's consistent with what we
1095  * expect. Check for conditions that imply corruption in passing. It
1096  * can't be half-dead because only an interrupted VACUUM process can
1097  * leave pages in that state, so we'd definitely have dealt with it
1098  * back when the page was the scanblkno page (half-dead pages are
1099  * always marked fully deleted by _bt_pagedel(), barring corruption).
1100  */
1101  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1102  {
1103  Assert(false);
1104  ereport(LOG,
1105  (errcode(ERRCODE_INDEX_CORRUPTED),
1106  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1107  blkno, scanblkno, RelationGetRelationName(rel))));
1108  _bt_relbuf(rel, buf);
1109  return;
1110  }
1111 
1112  /*
1113  * We may have already processed the page in an earlier call, when the
1114  * page was scanblkno. This happens when the leaf page split occurred
1115  * after the scan began, but before the right sibling page became the
1116  * scanblkno.
1117  *
1118  * Page may also have been deleted by current btvacuumpage() call,
1119  * since _bt_pagedel() sometimes deletes the right sibling page of
1120  * scanblkno in passing (it does so after we decided where to
1121  * backtrack to). We don't need to process this page as a deleted
1122  * page a second time now (in fact, it would be wrong to count it as a
1123  * deleted page in the bulk delete statistics a second time).
1124  */
1125  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1126  {
1127  /* Done with current scanblkno (and all lower split pages) */
1128  _bt_relbuf(rel, buf);
1129  return;
1130  }
1131  }
1132 
1133  if (!opaque || BTPageIsRecyclable(page, heaprel))
1134  {
1135  /* Okay to recycle this page (which could be leaf or internal) */
1136  RecordFreeIndexPage(rel, blkno);
1137  stats->pages_deleted++;
1138  stats->pages_free++;
1139  }
1140  else if (P_ISDELETED(opaque))
1141  {
1142  /*
1143  * Already deleted page (which could be leaf or internal). Can't
1144  * recycle yet.
1145  */
1146  stats->pages_deleted++;
1147  }
1148  else if (P_ISHALFDEAD(opaque))
1149  {
1150  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1151  attempt_pagedel = true;
1152 
1153  /*
1154  * _bt_pagedel() will increment both pages_newly_deleted and
1155  * pages_deleted stats in all cases (barring corruption)
1156  */
1157  }
1158  else if (P_ISLEAF(opaque))
1159  {
1161  int ndeletable;
1163  int nupdatable;
1164  OffsetNumber offnum,
1165  minoff,
1166  maxoff;
1167  int nhtidsdead,
1168  nhtidslive;
1169 
1170  /*
1171  * Trade in the initial read lock for a full cleanup lock on this
1172  * page. We must get such a lock on every leaf page over the course
1173  * of the vacuum scan, whether or not it actually contains any
1174  * deletable tuples --- see nbtree/README.
1175  */
1177 
1178  /*
1179  * Check whether we need to backtrack to earlier pages. What we are
1180  * concerned about is a page split that happened since we started the
1181  * vacuum scan. If the split moved tuples on the right half of the
1182  * split (i.e. the tuples that sort high) to a block that we already
1183  * passed over, then we might have missed the tuples. We need to
1184  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1185  * or deleting scanblkno page below!)
1186  */
1187  if (vstate->cycleid != 0 &&
1188  opaque->btpo_cycleid == vstate->cycleid &&
1189  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1190  !P_RIGHTMOST(opaque) &&
1191  opaque->btpo_next < scanblkno)
1192  backtrack_to = opaque->btpo_next;
1193 
1194  ndeletable = 0;
1195  nupdatable = 0;
1196  minoff = P_FIRSTDATAKEY(opaque);
1197  maxoff = PageGetMaxOffsetNumber(page);
1198  nhtidsdead = 0;
1199  nhtidslive = 0;
1200  if (callback)
1201  {
1202  /* btbulkdelete callback tells us what to delete (or update) */
1203  for (offnum = minoff;
1204  offnum <= maxoff;
1205  offnum = OffsetNumberNext(offnum))
1206  {
1207  IndexTuple itup;
1208 
1209  itup = (IndexTuple) PageGetItem(page,
1210  PageGetItemId(page, offnum));
1211 
1212  Assert(!BTreeTupleIsPivot(itup));
1213  if (!BTreeTupleIsPosting(itup))
1214  {
1215  /* Regular tuple, standard table TID representation */
1216  if (callback(&itup->t_tid, callback_state))
1217  {
1218  deletable[ndeletable++] = offnum;
1219  nhtidsdead++;
1220  }
1221  else
1222  nhtidslive++;
1223  }
1224  else
1225  {
1226  BTVacuumPosting vacposting;
1227  int nremaining;
1228 
1229  /* Posting list tuple */
1230  vacposting = btreevacuumposting(vstate, itup, offnum,
1231  &nremaining);
1232  if (vacposting == NULL)
1233  {
1234  /*
1235  * All table TIDs from the posting tuple remain, so no
1236  * delete or update required
1237  */
1238  Assert(nremaining == BTreeTupleGetNPosting(itup));
1239  }
1240  else if (nremaining > 0)
1241  {
1242 
1243  /*
1244  * Store metadata about posting list tuple in
1245  * updatable array for entire page. Existing tuple
1246  * will be updated during the later call to
1247  * _bt_delitems_vacuum().
1248  */
1249  Assert(nremaining < BTreeTupleGetNPosting(itup));
1250  updatable[nupdatable++] = vacposting;
1251  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1252  }
1253  else
1254  {
1255  /*
1256  * All table TIDs from the posting list must be
1257  * deleted. We'll delete the index tuple completely
1258  * (no update required).
1259  */
1260  Assert(nremaining == 0);
1261  deletable[ndeletable++] = offnum;
1262  nhtidsdead += BTreeTupleGetNPosting(itup);
1263  pfree(vacposting);
1264  }
1265 
1266  nhtidslive += nremaining;
1267  }
1268  }
1269  }
1270 
1271  /*
1272  * Apply any needed deletes or updates. We issue just one
1273  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1274  */
1275  if (ndeletable > 0 || nupdatable > 0)
1276  {
1277  Assert(nhtidsdead >= ndeletable + nupdatable);
1278  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1279  nupdatable);
1280 
1281  stats->tuples_removed += nhtidsdead;
1282  /* must recompute maxoff */
1283  maxoff = PageGetMaxOffsetNumber(page);
1284 
1285  /* can't leak memory here */
1286  for (int i = 0; i < nupdatable; i++)
1287  pfree(updatable[i]);
1288  }
1289  else
1290  {
1291  /*
1292  * If the leaf page has been split during this vacuum cycle, it
1293  * seems worth expending a write to clear btpo_cycleid even if we
1294  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1295  * takes care of this.) This ensures we won't process the page
1296  * again.
1297  *
1298  * We treat this like a hint-bit update because there's no need to
1299  * WAL-log it.
1300  */
1301  Assert(nhtidsdead == 0);
1302  if (vstate->cycleid != 0 &&
1303  opaque->btpo_cycleid == vstate->cycleid)
1304  {
1305  opaque->btpo_cycleid = 0;
1306  MarkBufferDirtyHint(buf, true);
1307  }
1308  }
1309 
1310  /*
1311  * If the leaf page is now empty, try to delete it; else count the
1312  * live tuples (live table TIDs in posting lists are counted as
1313  * separate live tuples). We don't delete when backtracking, though,
1314  * since that would require teaching _bt_pagedel() about backtracking
1315  * (doesn't seem worth adding more complexity to deal with that).
1316  *
1317  * We don't count the number of live TIDs during cleanup-only calls to
1318  * btvacuumscan (i.e. when callback is not set). We count the number
1319  * of index tuples directly instead. This avoids the expense of
1320  * directly examining all of the tuples on each page. VACUUM will
1321  * treat num_index_tuples as an estimate in cleanup-only case, so it
1322  * doesn't matter that this underestimates num_index_tuples
1323  * significantly in some cases.
1324  */
1325  if (minoff > maxoff)
1326  attempt_pagedel = (blkno == scanblkno);
1327  else if (callback)
1328  stats->num_index_tuples += nhtidslive;
1329  else
1330  stats->num_index_tuples += maxoff - minoff + 1;
1331 
1332  Assert(!attempt_pagedel || nhtidslive == 0);
1333  }
1334 
1335  if (attempt_pagedel)
1336  {
1337  MemoryContext oldcontext;
1338 
1339  /* Run pagedel in a temp context to avoid memory leakage */
1341  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1342 
1343  /*
1344  * _bt_pagedel maintains the bulk delete stats on our behalf;
1345  * pages_newly_deleted and pages_deleted are likely to be incremented
1346  * during call
1347  */
1348  Assert(blkno == scanblkno);
1349  _bt_pagedel(rel, buf, vstate);
1350 
1351  MemoryContextSwitchTo(oldcontext);
1352  /* pagedel released buffer, so we shouldn't */
1353  }
1354  else
1355  _bt_relbuf(rel, buf);
1356 
1357  if (backtrack_to != P_NONE)
1358  {
1359  blkno = backtrack_to;
1360  goto backtrack;
1361  }
1362 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4544
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errcode(int sqlerrcode)
Definition: elog.c:858
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:87
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:165
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:330
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1023
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1807
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1154
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:797
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1376
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:719
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition: nbtree.h:291
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:538
@ MAIN_FORKNUM
Definition: relpath.h:50
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
BTCycleId cycleid
Definition: nbtree.h:336
MemoryContext pagedelcontext
Definition: nbtree.h:337
IndexVacuumInfo * info
Definition: nbtree.h:332
double tuples_removed
Definition: genam.h:80
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
void vacuum_delay_point(void)
Definition: vacuum.c:2322

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert(), BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 907 of file nbtree.c.

910 {
911  Relation rel = info->index;
912  BTVacState vstate;
913  BlockNumber num_pages;
914  BlockNumber scanblkno;
915  bool needLock;
916 
917  /*
918  * Reset fields that track information about the entire index now. This
919  * avoids double-counting in the case where a single VACUUM command
920  * requires multiple scans of the index.
921  *
922  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
923  * since they track information about the VACUUM command, and so must last
924  * across each call to btvacuumscan().
925  *
926  * (Note that pages_free is treated as state about the whole index, not
927  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
928  * calls are idempotent, and get repeated for the same deleted pages in
929  * some scenarios. The point for us is to track the number of recyclable
930  * pages in the index at the end of the VACUUM command.)
931  */
932  stats->num_pages = 0;
933  stats->num_index_tuples = 0;
934  stats->pages_deleted = 0;
935  stats->pages_free = 0;
936 
937  /* Set up info to pass down to btvacuumpage */
938  vstate.info = info;
939  vstate.stats = stats;
940  vstate.callback = callback;
941  vstate.callback_state = callback_state;
942  vstate.cycleid = cycleid;
943 
944  /* Create a temporary memory context to run _bt_pagedel in */
946  "_bt_pagedel",
948 
949  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
950  vstate.bufsize = 0;
951  vstate.maxbufsize = 0;
952  vstate.pendingpages = NULL;
953  vstate.npendingpages = 0;
954  /* Consider applying _bt_pendingfsm_finalize optimization */
955  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
956 
957  /*
958  * The outer loop iterates over all index pages except the metapage, in
959  * physical order (we hope the kernel will cooperate in providing
960  * read-ahead for speed). It is critical that we visit all leaf pages,
961  * including ones added after we start the scan, else we might fail to
962  * delete some deletable tuples. Hence, we must repeatedly check the
963  * relation length. We must acquire the relation-extension lock while
964  * doing so to avoid a race condition: if someone else is extending the
965  * relation, there is a window where bufmgr/smgr have created a new
966  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
967  * we manage to scan such a page here, we'll improperly assume it can be
968  * recycled. Taking the lock synchronizes things enough to prevent a
969  * problem: either num_pages won't include the new page, or _bt_getbuf
970  * already has write lock on the buffer and it will be fully initialized
971  * before we can examine it. Also, we need not worry if a page is added
972  * immediately after we look; the page splitting code already has
973  * write-lock on the left page before it adds a right page, so we must
974  * already have processed any tuples due to be moved into such a page.
975  *
976  * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
977  * think the use of the extension lock is still required.
978  *
979  * We can skip locking for new or temp relations, however, since no one
980  * else could be accessing them.
981  */
982  needLock = !RELATION_IS_LOCAL(rel);
983 
984  scanblkno = BTREE_METAPAGE + 1;
985  for (;;)
986  {
987  /* Get the current relation length */
988  if (needLock)
990  num_pages = RelationGetNumberOfBlocks(rel);
991  if (needLock)
993 
994  if (info->report_progress)
996  num_pages);
997 
998  /* Quit if we've scanned the whole relation */
999  if (scanblkno >= num_pages)
1000  break;
1001  /* Iterate over pages, then loop back to recheck length */
1002  for (; scanblkno < num_pages; scanblkno++)
1003  {
1004  btvacuumpage(&vstate, scanblkno);
1005  if (info->report_progress)
1007  scanblkno);
1008  }
1009  }
1010 
1011  /* Set statistics num_pages field to final size of index */
1012  stats->num_pages = num_pages;
1013 
1015 
1016  /*
1017  * If there were any calls to _bt_pagedel() during scan of the index then
1018  * see if any of the resulting pages can be placed in the FSM now. When
1019  * it's not safe we'll have to leave it up to a future VACUUM operation.
1020  *
1021  * Finally, if we placed any pages in the FSM (either just now or during
1022  * the scan), forcibly update the upper-level FSM pages to ensure that
1023  * searchers can find them.
1024  */
1025  _bt_pendingfsm_finalize(rel, &vstate);
1026  if (stats->pages_free > 0)
1028 }
void pgstat_progress_update_param(int index, int64 val)
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:227
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:431
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:481
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2990
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2949
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1041
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:122
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:121
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:649
BTPendingFSM * pendingpages
Definition: nbtree.h:344
int npendingpages
Definition: nbtree.h:345
int bufsize
Definition: nbtree.h:342
int maxbufsize
Definition: nbtree.h:343
BlockNumber num_pages
Definition: genam.h:77
bool report_progress
Definition: genam.h:49

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().