PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE , BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 78 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 54 of file nbtree.c.

55 {
60 } BTPS_State;
BTPS_State
Definition: nbtree.c:55
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:57
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:56
@ BTPARALLEL_IDLE
Definition: nbtree.c:58
@ BTPARALLEL_DONE
Definition: nbtree.c:59

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 757 of file nbtree.c.

758 {
759  BTScanOpaque so = (BTScanOpaque) scan->opaque;
760  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
761  BTParallelScanDesc btscan;
762 
763  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
764  parallel_scan->ps_offset);
765 
766  so->arrayKeyCount++;
767  SpinLockAcquire(&btscan->btps_mutex);
768  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
769  {
770  btscan->btps_scanPage = InvalidBlockNumber;
771  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
772  btscan->btps_arrayKeyCount++;
773  }
774  SpinLockRelease(&btscan->btps_mutex);
775 }
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetToPointer(base, offset)
Definition: c.h:708
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1073
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 716 of file nbtree.c.

717 {
718  BTScanOpaque so = (BTScanOpaque) scan->opaque;
719  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
720  BTParallelScanDesc btscan;
721  bool status_changed = false;
722 
723  /* Do nothing, for non-parallel scans */
724  if (parallel_scan == NULL)
725  return;
726 
727  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
728  parallel_scan->ps_offset);
729 
730  /*
731  * Mark the parallel scan as done for this combination of scan keys,
732  * unless some other process already did so. See also
733  * _bt_advance_array_keys.
734  */
735  SpinLockAcquire(&btscan->btps_mutex);
736  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
737  btscan->btps_pageStatus != BTPARALLEL_DONE)
738  {
739  btscan->btps_pageStatus = BTPARALLEL_DONE;
740  status_changed = true;
741  }
742  SpinLockRelease(&btscan->btps_mutex);
743 
744  /* wake up all the workers associated with this parallel scan */
745  if (status_changed)
746  ConditionVariableBroadcast(&btscan->btps_cv);
747 }
void ConditionVariableBroadcast(ConditionVariable *cv)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:78

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), if(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 693 of file nbtree.c.

694 {
695  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
696  BTParallelScanDesc btscan;
697 
698  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
699  parallel_scan->ps_offset);
700 
701  SpinLockAcquire(&btscan->btps_mutex);
702  btscan->btps_scanPage = scan_page;
704  SpinLockRelease(&btscan->btps_mutex);
706 }
void ConditionVariableSignal(ConditionVariable *cv)
slock_t btps_mutex
Definition: nbtree.c:74
BTPS_State btps_pageStatus
Definition: nbtree.c:69
ConditionVariable btps_cv
Definition: nbtree.c:75
BlockNumber btps_scanPage
Definition: nbtree.c:68

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 635 of file nbtree.c.

636 {
637  BTScanOpaque so = (BTScanOpaque) scan->opaque;
638  BTPS_State pageStatus;
639  bool exit_loop = false;
640  bool status = true;
641  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
642  BTParallelScanDesc btscan;
643 
644  *pageno = P_NONE;
645 
646  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
647  parallel_scan->ps_offset);
648 
649  while (1)
650  {
651  SpinLockAcquire(&btscan->btps_mutex);
652  pageStatus = btscan->btps_pageStatus;
653 
654  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
655  {
656  /* Parallel scan has already advanced to a new set of scankeys. */
657  status = false;
658  }
659  else if (pageStatus == BTPARALLEL_DONE)
660  {
661  /*
662  * We're done with this set of scankeys. This may be the end, or
663  * there could be more sets to try.
664  */
665  status = false;
666  }
667  else if (pageStatus != BTPARALLEL_ADVANCING)
668  {
669  /*
670  * We have successfully seized control of the scan for the purpose
671  * of advancing it to a new page!
672  */
673  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
674  *pageno = btscan->btps_scanPage;
675  exit_loop = true;
676  }
677  SpinLockRelease(&btscan->btps_mutex);
678  if (exit_loop || !status)
679  break;
681  }
683 
684  return status;
685 }
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableCancelSleep(void)
#define P_NONE
Definition: nbtree.h:212
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
@ WAIT_EVENT_BTREE_PAGE
Definition: wait_event.h:88

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 343 of file nbtree.c.

344 {
345  IndexScanDesc scan;
346  BTScanOpaque so;
347 
348  /* no order by operators allowed */
349  Assert(norderbys == 0);
350 
351  /* get the scan */
352  scan = RelationGetIndexScan(rel, nkeys, norderbys);
353 
354  /* allocate private workspace */
355  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
358  if (scan->numberOfKeys > 0)
359  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
360  else
361  so->keyData = NULL;
362 
363  so->arrayKeyData = NULL; /* assume no array keys for now */
364  so->numArrayKeys = 0;
365  so->arrayKeys = NULL;
366  so->arrayContext = NULL;
367 
368  so->killedItems = NULL; /* until needed */
369  so->numKilled = 0;
370 
371  /*
372  * We don't know yet whether the scan will be index-only, so we do not
373  * allocate the tuple workspace arrays until btrescan. However, we set up
374  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
375  */
376  so->currTuples = so->markTuples = NULL;
377 
378  scan->xs_itupdesc = RelationGetDescr(rel);
379 
380  scan->opaque = so;
381 
382  return scan;
383 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1199
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1012
#define RelationGetDescr(relation)
Definition: rel.h:527
ScanKeyData * ScanKey
Definition: skey.h:75
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1044
char * markTuples
Definition: nbtree.h:1057
BTScanPosData currPos
Definition: nbtree.h:1069
int * killedItems
Definition: nbtree.h:1048
char * currTuples
Definition: nbtree.h:1056
ScanKey arrayKeyData
Definition: nbtree.h:1039
BTScanPosData markPos
Definition: nbtree.h:1070
ScanKey keyData
Definition: nbtree.h:1036
MemoryContext arrayContext
Definition: nbtree.h:1045
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert(), BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 151 of file nbtree.c.

152 {
153  Page metapage;
154 
155  /* Construct metapage. */
156  metapage = (Page) palloc(BLCKSZ);
157  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
158 
159  /*
160  * Write the page and log it. It might seem that an immediate sync would
161  * be sufficient to guarantee that the file exists on disk, but recovery
162  * itself might remove it while replaying, for example, an
163  * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
164  * this even when wal_level=minimal.
165  */
168  (char *) metapage, true);
169  log_newpage(&RelationGetSmgr(index)->smgr_rlocator.locator, INIT_FORKNUM,
170  BTREE_METAPAGE, metapage, true);
171 
172  /*
173  * An immediate sync is required even if we xlog'd the page, because the
174  * write did not go through shared_buffers and therefore a concurrent
175  * checkpoint may have moved the redo pointer past our xlog record.
176  */
178 }
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1539
Pointer Page
Definition: bufpage.h:78
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:69
#define BTREE_METAPAGE
Definition: nbtree.h:148
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2696
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569
@ INIT_FORKNUM
Definition: relpath.h:53
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:554
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:691
Definition: type.h:95
XLogRecPtr log_newpage(RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1097

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), P_NONE, PageSetChecksumInplace(), palloc(), RelationGetSmgr(), smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 785 of file nbtree.c.

787 {
788  Relation rel = info->index;
789  BTCycleId cycleid;
790 
791  /* allocate stats if first time through, else re-use existing struct */
792  if (stats == NULL)
794 
795  /* Establish the vacuum cycle ID to use for this scan */
796  /* The ENSURE stuff ensures we clean up shared memory on failure */
798  {
799  cycleid = _bt_start_vacuum(rel);
800 
801  btvacuumscan(info, stats, callback, callback_state, cycleid);
802  }
804  _bt_end_vacuum(rel);
805 
806  return stats;
807 }
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1230
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:903
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2033
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2061
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1976
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:670
Relation index
Definition: genam.h:46
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1418 of file nbtree.c.

1419 {
1420  return true;
1421 }

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 448 of file nbtree.c.

449 {
450  BTScanOpaque so = (BTScanOpaque) scan->opaque;
451 
452  /* we aren't holding any read locks, but gotta drop the pins */
454  {
455  /* Before leaving current page, deal with any killed items */
456  if (so->numKilled > 0)
457  _bt_killitems(scan);
459  }
460 
461  so->markItemIndex = -1;
463 
464  /* No need to invalidate positions, the RAM is about to be freed. */
465 
466  /* Release storage */
467  if (so->keyData != NULL)
468  pfree(so->keyData);
469  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
470  if (so->arrayContext != NULL)
472  if (so->killedItems != NULL)
473  pfree(so->killedItems);
474  if (so->currTuples != NULL)
475  pfree(so->currTuples);
476  /* so->markTuples should not be pfree'd, see btrescan */
477  pfree(so);
478 }
void pfree(void *pointer)
Definition: mcxt.c:1306
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:376
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1006
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1000
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1725

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 570 of file nbtree.c.

571 {
572  return sizeof(BTParallelScanDescData);
573 }
struct BTParallelScanDescData BTParallelScanDescData

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 285 of file nbtree.c.

286 {
287  BTScanOpaque so = (BTScanOpaque) scan->opaque;
288  int64 ntids = 0;
289  ItemPointer heapTid;
290 
291  /*
292  * If we have any array keys, initialize them.
293  */
294  if (so->numArrayKeys)
295  {
296  /* punt if we have any unsatisfiable array keys */
297  if (so->numArrayKeys < 0)
298  return ntids;
299 
301  }
302 
303  /* This loop handles advancing to the next array elements, if any */
304  do
305  {
306  /* Fetch the first page & tuple */
307  if (_bt_first(scan, ForwardScanDirection))
308  {
309  /* Save tuple ID, and continue scanning */
310  heapTid = &scan->xs_heaptid;
311  tbm_add_tuples(tbm, heapTid, 1, false);
312  ntids++;
313 
314  for (;;)
315  {
316  /*
317  * Advance to next tuple within page. This is the same as the
318  * easy case in _bt_next().
319  */
320  if (++so->currPos.itemIndex > so->currPos.lastItem)
321  {
322  /* let _bt_next do the heavy lifting */
323  if (!_bt_next(scan, ForwardScanDirection))
324  break;
325  }
326 
327  /* Save tuple ID, and continue scanning */
328  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
329  tbm_add_tuples(tbm, heapTid, 1, false);
330  ntids++;
331  }
332  }
333  /* Now see if we have more array keys to deal with */
335 
336  return ntids;
337 }
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:860
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1466
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:551
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:525
@ ForwardScanDirection
Definition: sdir.h:26
int lastItem
Definition: nbtree.h:981
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:984
int itemIndex
Definition: nbtree.h:982
ItemPointerData heapTid
Definition: nbtree.h:945
ItemPointerData xs_heaptid
Definition: relscan.h:147
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 211 of file nbtree.c.

212 {
213  BTScanOpaque so = (BTScanOpaque) scan->opaque;
214  bool res;
215 
216  /* btree indexes are never lossy */
217  scan->xs_recheck = false;
218 
219  /*
220  * If we have any array keys, initialize them during first call for a
221  * scan. We can't do this in btrescan because we don't know the scan
222  * direction at that time.
223  */
224  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
225  {
226  /* punt if we have any unsatisfiable array keys */
227  if (so->numArrayKeys < 0)
228  return false;
229 
230  _bt_start_array_keys(scan, dir);
231  }
232 
233  /* This loop handles advancing to the next array elements, if any */
234  do
235  {
236  /*
237  * If we've already initialized this scan, we can just advance it in
238  * the appropriate direction. If we haven't done so yet, we call
239  * _bt_first() to get the first item in the scan.
240  */
241  if (!BTScanPosIsValid(so->currPos))
242  res = _bt_first(scan, dir);
243  else
244  {
245  /*
246  * Check to see if we should kill the previously-fetched tuple.
247  */
248  if (scan->kill_prior_tuple)
249  {
250  /*
251  * Yes, remember it for later. (We'll deal with all such
252  * tuples at once right before leaving the index page.) The
253  * test for numKilled overrun is not just paranoia: if the
254  * caller reverses direction in the indexscan then the same
255  * item might get entered multiple times. It's not worth
256  * trying to optimize that, so we don't detect it, but instead
257  * just forget any excess entries.
258  */
259  if (so->killedItems == NULL)
260  so->killedItems = (int *)
261  palloc(MaxTIDsPerBTreePage * sizeof(int));
262  if (so->numKilled < MaxTIDsPerBTreePage)
263  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
264  }
265 
266  /*
267  * Now continue the scan.
268  */
269  res = _bt_next(scan, dir);
270  }
271 
272  /* If we have a tuple, return it ... */
273  if (res)
274  break;
275  /* ... otherwise see if we have more array keys to deal with */
276  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
277 
278  return res;
279 }
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:128

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), res, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 96 of file nbtree.c.

97 {
99 
100  amroutine->amstrategies = BTMaxStrategyNumber;
101  amroutine->amsupport = BTNProcs;
102  amroutine->amoptsprocnum = BTOPTIONS_PROC;
103  amroutine->amcanorder = true;
104  amroutine->amcanorderbyop = false;
105  amroutine->amcanbackward = true;
106  amroutine->amcanunique = true;
107  amroutine->amcanmulticol = true;
108  amroutine->amoptionalkey = true;
109  amroutine->amsearcharray = true;
110  amroutine->amsearchnulls = true;
111  amroutine->amstorage = false;
112  amroutine->amclusterable = true;
113  amroutine->ampredlocks = true;
114  amroutine->amcanparallel = true;
115  amroutine->amcaninclude = true;
116  amroutine->amusemaintenanceworkmem = false;
117  amroutine->amparallelvacuumoptions =
119  amroutine->amkeytype = InvalidOid;
120 
121  amroutine->ambuild = btbuild;
122  amroutine->ambuildempty = btbuildempty;
123  amroutine->aminsert = btinsert;
124  amroutine->ambulkdelete = btbulkdelete;
125  amroutine->amvacuumcleanup = btvacuumcleanup;
126  amroutine->amcanreturn = btcanreturn;
127  amroutine->amcostestimate = btcostestimate;
128  amroutine->amoptions = btoptions;
129  amroutine->amproperty = btproperty;
130  amroutine->ambuildphasename = btbuildphasename;
131  amroutine->amvalidate = btvalidate;
132  amroutine->amadjustmembers = btadjustmembers;
133  amroutine->ambeginscan = btbeginscan;
134  amroutine->amrescan = btrescan;
135  amroutine->amgettuple = btgettuple;
136  amroutine->amgetbitmap = btgetbitmap;
137  amroutine->amendscan = btendscan;
138  amroutine->ammarkpos = btmarkpos;
139  amroutine->amrestrpos = btrestrpos;
142  amroutine->amparallelrescan = btparallelrescan;
143 
144  PG_RETURN_POINTER(amroutine);
145 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1418
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:343
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:785
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:815
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:211
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:594
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:187
void btbuildempty(Relation index)
Definition: nbtree.c:151
Size btestimateparallelscan(void)
Definition: nbtree.c:570
void btinitparallelscan(void *target)
Definition: nbtree.c:579
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:285
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:484
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:448
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:389
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:514
#define BTNProcs
Definition: nbtree.h:706
#define BTOPTIONS_PROC
Definition: nbtree.h:705
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:301
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2157
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2111
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2134
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
#define makeNode(_type_)
Definition: nodes.h:159
#define InvalidOid
Definition: postgres_ext.h:36
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6668
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:268
ambuildempty_function ambuildempty
Definition: amapi.h:260
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:263
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:266
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:280
amrestrpos_function amrestrpos
Definition: amapi.h:277
aminsert_function aminsert
Definition: amapi.h:261
amendscan_function amendscan
Definition: amapi.h:275
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:282
Oid amkeytype
Definition: amapi.h:250
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:265
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:270
ambuild_function ambuild
Definition: amapi.h:259
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:273
amcanreturn_function amcanreturn
Definition: amapi.h:264
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:274
amproperty_function amproperty
Definition: amapi.h:267
ambulkdelete_function ambulkdelete
Definition: amapi.h:262
bool amsearcharray
Definition: amapi.h:232
amvalidate_function amvalidate
Definition: amapi.h:269
ammarkpos_function ammarkpos
Definition: amapi.h:276
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:271
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:272
bool amcanorder
Definition: amapi.h:220
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:281
uint8 amparallelvacuumoptions
Definition: amapi.h:248
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:47
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:54

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 187 of file nbtree.c.

192 {
193  bool result;
194  IndexTuple itup;
195 
196  /* generate an index tuple */
197  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
198  itup->t_tid = *ht_ctid;
199 
200  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
201 
202  pfree(itup);
203 
204  return result;
205 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:100
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 484 of file nbtree.c.

485 {
486  BTScanOpaque so = (BTScanOpaque) scan->opaque;
487 
488  /* There may be an old mark with a pin (but no lock). */
490 
491  /*
492  * Just record the current itemIndex. If we later step to next page
493  * before releasing the marked position, _bt_steppage makes a full copy of
494  * the currPos struct in markPos. If (as often happens) the mark is moved
495  * before we leave the page, we don't have to do that work.
496  */
497  if (BTScanPosIsValid(so->currPos))
498  so->markItemIndex = so->currPos.itemIndex;
499  else
500  {
502  so->markItemIndex = -1;
503  }
504 
505  /* Also record the current positions of any array keys */
506  if (so->numArrayKeys)
507  _bt_mark_array_keys(scan);
508 }
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:610

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 594 of file nbtree.c.

595 {
596  BTParallelScanDesc btscan;
597  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
598 
599  Assert(parallel_scan);
600 
601  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
602  parallel_scan->ps_offset);
603 
604  /*
605  * In theory, we don't need to acquire the spinlock here, because there
606  * shouldn't be any other workers running at this point, but we do so for
607  * consistency.
608  */
609  SpinLockAcquire(&btscan->btps_mutex);
612  btscan->btps_arrayKeyCount = 0;
613  SpinLockRelease(&btscan->btps_mutex);
614 }

References Assert(), BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1369 of file nbtree.c.

1371 {
1372  int live = 0;
1373  int nitem = BTreeTupleGetNPosting(posting);
1374  ItemPointer items = BTreeTupleGetPosting(posting);
1375  BTVacuumPosting vacposting = NULL;
1376 
1377  for (int i = 0; i < nitem; i++)
1378  {
1379  if (!vstate->callback(items + i, vstate->callback_state))
1380  {
1381  /* Live table TID */
1382  live++;
1383  }
1384  else if (vacposting == NULL)
1385  {
1386  /*
1387  * First dead table TID encountered.
1388  *
1389  * It's now clear that we need to delete one or more dead table
1390  * TIDs, so start maintaining metadata describing how to update
1391  * existing posting list tuple.
1392  */
1393  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1394  nitem * sizeof(uint16));
1395 
1396  vacposting->itup = posting;
1397  vacposting->updatedoffset = updatedoffset;
1398  vacposting->ndeletedtids = 0;
1399  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1400  }
1401  else
1402  {
1403  /* Second or subsequent dead table TID */
1404  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1405  }
1406  }
1407 
1408  *nremaining = live;
1409  return vacposting;
1410 }
unsigned short uint16
Definition: c.h:441
int i
Definition: isn.c:73
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:512
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:531
IndexBulkDeleteCallback callback
Definition: nbtree.h:335
void * callback_state
Definition: nbtree.h:336
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:910
uint16 ndeletedtids
Definition: nbtree.h:909
IndexTuple itup
Definition: nbtree.h:905
OffsetNumber updatedoffset
Definition: nbtree.h:906

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 389 of file nbtree.c.

391 {
392  BTScanOpaque so = (BTScanOpaque) scan->opaque;
393 
394  /* we aren't holding any read locks, but gotta drop the pins */
396  {
397  /* Before leaving current page, deal with any killed items */
398  if (so->numKilled > 0)
399  _bt_killitems(scan);
402  }
403 
404  so->markItemIndex = -1;
405  so->arrayKeyCount = 0;
408 
409  /*
410  * Allocate tuple workspace arrays, if needed for an index-only scan and
411  * not already done in a previous rescan call. To save on palloc
412  * overhead, both workspaces are allocated as one palloc block; only this
413  * function and btendscan know that.
414  *
415  * NOTE: this data structure also makes it safe to return data from a
416  * "name" column, even though btree name_ops uses an underlying storage
417  * datatype of cstring. The risk there is that "name" is supposed to be
418  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
419  * However, since we only return data out of tuples sitting in the
420  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
421  * data out of the markTuples array --- running off the end of memory for
422  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
423  * adding special-case treatment for name_ops elsewhere.
424  */
425  if (scan->xs_want_itup && so->currTuples == NULL)
426  {
427  so->currTuples = (char *) palloc(BLCKSZ * 2);
428  so->markTuples = so->currTuples + BLCKSZ;
429  }
430 
431  /*
432  * Reset the scan keys
433  */
434  if (scankey && scan->numberOfKeys > 0)
435  memmove(scan->keyData,
436  scankey,
437  scan->numberOfKeys * sizeof(ScanKeyData));
438  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
439 
440  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
442 }
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:210
struct ScanKeyData * keyData
Definition: relscan.h:122

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 514 of file nbtree.c.

515 {
516  BTScanOpaque so = (BTScanOpaque) scan->opaque;
517 
518  /* Restore the marked positions of any array keys */
519  if (so->numArrayKeys)
521 
522  if (so->markItemIndex >= 0)
523  {
524  /*
525  * The scan has never moved to a new page since the last mark. Just
526  * restore the itemIndex.
527  *
528  * NB: In this case we can't count on anything in so->markPos to be
529  * accurate.
530  */
531  so->currPos.itemIndex = so->markItemIndex;
532  }
533  else
534  {
535  /*
536  * The scan moved to a new page after last mark or restore, and we are
537  * now restoring to the marked page. We aren't holding any read
538  * locks, but if we're still holding the pin for the current position,
539  * we must drop it.
540  */
541  if (BTScanPosIsValid(so->currPos))
542  {
543  /* Before leaving current page, deal with any killed items */
544  if (so->numKilled > 0)
545  _bt_killitems(scan);
547  }
548 
549  if (BTScanPosIsValid(so->markPos))
550  {
551  /* bump pin on mark buffer for assignment to current buffer */
552  if (BTScanPosIsPinned(so->markPos))
554  memcpy(&so->currPos, &so->markPos,
555  offsetof(BTScanPosData, items[1]) +
556  so->markPos.lastItem * sizeof(BTScanPosItem));
557  if (so->currTuples)
558  memcpy(so->currTuples, so->markTuples,
560  }
561  else
563  }
564 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3969
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:989
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:629
Buffer buf
Definition: nbtree.h:952
int nextTupleOffset
Definition: nbtree.h:971

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 815 of file nbtree.c.

816 {
817  BlockNumber num_delpages;
818 
819  /* No-op in ANALYZE ONLY mode */
820  if (info->analyze_only)
821  return stats;
822 
823  /*
824  * If btbulkdelete was called, we need not do anything (we just maintain
825  * the information used within _bt_vacuum_needs_cleanup() by calling
826  * _bt_set_cleanup_info() below).
827  *
828  * If btbulkdelete was _not_ called, then we have a choice to make: we
829  * must decide whether or not a btvacuumscan() call is needed now (i.e.
830  * whether the ongoing VACUUM operation can entirely avoid a physical scan
831  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
832  * now.
833  */
834  if (stats == NULL)
835  {
836  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
837  if (!_bt_vacuum_needs_cleanup(info->index))
838  return NULL;
839 
840  /*
841  * Since we aren't going to actually delete any leaf items, there's no
842  * need to go through all the vacuum-cycle-ID pushups here.
843  *
844  * Posting list tuples are a source of inaccuracy for cleanup-only
845  * scans. btvacuumscan() will assume that the number of index tuples
846  * from each page can be used as num_index_tuples, even though
847  * num_index_tuples is supposed to represent the number of TIDs in the
848  * index. This naive approach can underestimate the number of tuples
849  * in the index significantly.
850  *
851  * We handle the problem by making num_index_tuples an estimate in
852  * cleanup-only case.
853  */
855  btvacuumscan(info, stats, NULL, NULL, 0);
856  stats->estimated_count = true;
857  }
858 
859  /*
860  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
861  *
862  * num_delpages is the number of deleted pages now in the index that were
863  * not safe to place in the FSM to be recycled just yet. num_delpages is
864  * greater than 0 only when _bt_pagedel() actually deleted pages during
865  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
866  * have failed to place any newly deleted pages in the FSM just moments
867  * ago. (Actually, there are edge cases where recycling of the current
868  * VACUUM's newly deleted pages does not even become safe by the time the
869  * next VACUUM comes around. See nbtree/README.)
870  */
871  Assert(stats->pages_deleted >= stats->pages_free);
872  num_delpages = stats->pages_deleted - stats->pages_free;
873  _bt_set_cleanup_info(info->index, num_delpages);
874 
875  /*
876  * It's quite possible for us to be fooled by concurrent page splits into
877  * double-counting some index tuples, so disbelieve any total that exceeds
878  * the underlying heap's count ... if we know that accurately. Otherwise
879  * this might just make matters worse.
880  */
881  if (!info->estimated_count)
882  {
883  if (stats->num_index_tuples > info->num_heap_tuples)
884  stats->num_index_tuples = info->num_heap_tuples;
885  }
886 
887  return stats;
888 }
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:234
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:181
bool estimated_count
Definition: genam.h:77
BlockNumber pages_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:82
double num_index_tuples
Definition: genam.h:78
double num_heap_tuples
Definition: genam.h:51
bool analyze_only
Definition: genam.h:47
bool estimated_count
Definition: genam.h:49

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert(), btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1034 of file nbtree.c.

1035 {
1036  IndexVacuumInfo *info = vstate->info;
1037  IndexBulkDeleteResult *stats = vstate->stats;
1039  void *callback_state = vstate->callback_state;
1040  Relation rel = info->index;
1041  bool attempt_pagedel;
1042  BlockNumber blkno,
1043  backtrack_to;
1044  Buffer buf;
1045  Page page;
1046  BTPageOpaque opaque;
1047 
1048  blkno = scanblkno;
1049 
1050 backtrack:
1051 
1052  attempt_pagedel = false;
1053  backtrack_to = P_NONE;
1054 
1055  /* call vacuum_delay_point while not holding any buffer lock */
1057 
1058  /*
1059  * We can't use _bt_getbuf() here because it always applies
1060  * _bt_checkpage(), which will barf on an all-zero page. We want to
1061  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1062  * buffer access strategy.
1063  */
1065  info->strategy);
1066  _bt_lockbuf(rel, buf, BT_READ);
1067  page = BufferGetPage(buf);
1068  opaque = NULL;
1069  if (!PageIsNew(page))
1070  {
1071  _bt_checkpage(rel, buf);
1072  opaque = BTPageGetOpaque(page);
1073  }
1074 
1075  Assert(blkno <= scanblkno);
1076  if (blkno != scanblkno)
1077  {
1078  /*
1079  * We're backtracking.
1080  *
1081  * We followed a right link to a sibling leaf page (a page that
1082  * happens to be from a block located before scanblkno). The only
1083  * case we want to do anything with is a live leaf page having the
1084  * current vacuum cycle ID.
1085  *
1086  * The page had better be in a state that's consistent with what we
1087  * expect. Check for conditions that imply corruption in passing. It
1088  * can't be half-dead because only an interrupted VACUUM process can
1089  * leave pages in that state, so we'd definitely have dealt with it
1090  * back when the page was the scanblkno page (half-dead pages are
1091  * always marked fully deleted by _bt_pagedel()). This assumes that
1092  * there can be only one vacuum process running at a time.
1093  */
1094  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1095  {
1096  Assert(false);
1097  ereport(LOG,
1098  (errcode(ERRCODE_INDEX_CORRUPTED),
1099  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1100  blkno, scanblkno, RelationGetRelationName(rel))));
1101  _bt_relbuf(rel, buf);
1102  return;
1103  }
1104 
1105  /*
1106  * We may have already processed the page in an earlier call, when the
1107  * page was scanblkno. This happens when the leaf page split occurred
1108  * after the scan began, but before the right sibling page became the
1109  * scanblkno.
1110  *
1111  * Page may also have been deleted by current btvacuumpage() call,
1112  * since _bt_pagedel() sometimes deletes the right sibling page of
1113  * scanblkno in passing (it does so after we decided where to
1114  * backtrack to). We don't need to process this page as a deleted
1115  * page a second time now (in fact, it would be wrong to count it as a
1116  * deleted page in the bulk delete statistics a second time).
1117  */
1118  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1119  {
1120  /* Done with current scanblkno (and all lower split pages) */
1121  _bt_relbuf(rel, buf);
1122  return;
1123  }
1124  }
1125 
1126  if (!opaque || BTPageIsRecyclable(page))
1127  {
1128  /* Okay to recycle this page (which could be leaf or internal) */
1129  RecordFreeIndexPage(rel, blkno);
1130  stats->pages_deleted++;
1131  stats->pages_free++;
1132  }
1133  else if (P_ISDELETED(opaque))
1134  {
1135  /*
1136  * Already deleted page (which could be leaf or internal). Can't
1137  * recycle yet.
1138  */
1139  stats->pages_deleted++;
1140  }
1141  else if (P_ISHALFDEAD(opaque))
1142  {
1143  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1144  attempt_pagedel = true;
1145 
1146  /*
1147  * _bt_pagedel() will increment both pages_newly_deleted and
1148  * pages_deleted stats in all cases (barring corruption)
1149  */
1150  }
1151  else if (P_ISLEAF(opaque))
1152  {
1154  int ndeletable;
1156  int nupdatable;
1157  OffsetNumber offnum,
1158  minoff,
1159  maxoff;
1160  int nhtidsdead,
1161  nhtidslive;
1162 
1163  /*
1164  * Trade in the initial read lock for a full cleanup lock on this
1165  * page. We must get such a lock on every leaf page over the course
1166  * of the vacuum scan, whether or not it actually contains any
1167  * deletable tuples --- see nbtree/README.
1168  */
1170 
1171  /*
1172  * Check whether we need to backtrack to earlier pages. What we are
1173  * concerned about is a page split that happened since we started the
1174  * vacuum scan. If the split moved tuples on the right half of the
1175  * split (i.e. the tuples that sort high) to a block that we already
1176  * passed over, then we might have missed the tuples. We need to
1177  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1178  * or deleting scanblkno page below!)
1179  */
1180  if (vstate->cycleid != 0 &&
1181  opaque->btpo_cycleid == vstate->cycleid &&
1182  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1183  !P_RIGHTMOST(opaque) &&
1184  opaque->btpo_next < scanblkno)
1185  backtrack_to = opaque->btpo_next;
1186 
1187  ndeletable = 0;
1188  nupdatable = 0;
1189  minoff = P_FIRSTDATAKEY(opaque);
1190  maxoff = PageGetMaxOffsetNumber(page);
1191  nhtidsdead = 0;
1192  nhtidslive = 0;
1193  if (callback)
1194  {
1195  /* btbulkdelete callback tells us what to delete (or update) */
1196  for (offnum = minoff;
1197  offnum <= maxoff;
1198  offnum = OffsetNumberNext(offnum))
1199  {
1200  IndexTuple itup;
1201 
1202  itup = (IndexTuple) PageGetItem(page,
1203  PageGetItemId(page, offnum));
1204 
1205  Assert(!BTreeTupleIsPivot(itup));
1206  if (!BTreeTupleIsPosting(itup))
1207  {
1208  /* Regular tuple, standard table TID representation */
1209  if (callback(&itup->t_tid, callback_state))
1210  {
1211  deletable[ndeletable++] = offnum;
1212  nhtidsdead++;
1213  }
1214  else
1215  nhtidslive++;
1216  }
1217  else
1218  {
1219  BTVacuumPosting vacposting;
1220  int nremaining;
1221 
1222  /* Posting list tuple */
1223  vacposting = btreevacuumposting(vstate, itup, offnum,
1224  &nremaining);
1225  if (vacposting == NULL)
1226  {
1227  /*
1228  * All table TIDs from the posting tuple remain, so no
1229  * delete or update required
1230  */
1231  Assert(nremaining == BTreeTupleGetNPosting(itup));
1232  }
1233  else if (nremaining > 0)
1234  {
1235 
1236  /*
1237  * Store metadata about posting list tuple in
1238  * updatable array for entire page. Existing tuple
1239  * will be updated during the later call to
1240  * _bt_delitems_vacuum().
1241  */
1242  Assert(nremaining < BTreeTupleGetNPosting(itup));
1243  updatable[nupdatable++] = vacposting;
1244  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1245  }
1246  else
1247  {
1248  /*
1249  * All table TIDs from the posting list must be
1250  * deleted. We'll delete the index tuple completely
1251  * (no update required).
1252  */
1253  Assert(nremaining == 0);
1254  deletable[ndeletable++] = offnum;
1255  nhtidsdead += BTreeTupleGetNPosting(itup);
1256  pfree(vacposting);
1257  }
1258 
1259  nhtidslive += nremaining;
1260  }
1261  }
1262  }
1263 
1264  /*
1265  * Apply any needed deletes or updates. We issue just one
1266  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1267  */
1268  if (ndeletable > 0 || nupdatable > 0)
1269  {
1270  Assert(nhtidsdead >= ndeletable + nupdatable);
1271  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1272  nupdatable);
1273 
1274  stats->tuples_removed += nhtidsdead;
1275  /* must recompute maxoff */
1276  maxoff = PageGetMaxOffsetNumber(page);
1277 
1278  /* can't leak memory here */
1279  for (int i = 0; i < nupdatable; i++)
1280  pfree(updatable[i]);
1281  }
1282  else
1283  {
1284  /*
1285  * If the leaf page has been split during this vacuum cycle, it
1286  * seems worth expending a write to clear btpo_cycleid even if we
1287  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1288  * takes care of this.) This ensures we won't process the page
1289  * again.
1290  *
1291  * We treat this like a hint-bit update because there's no need to
1292  * WAL-log it.
1293  */
1294  Assert(nhtidsdead == 0);
1295  if (vstate->cycleid != 0 &&
1296  opaque->btpo_cycleid == vstate->cycleid)
1297  {
1298  opaque->btpo_cycleid = 0;
1299  MarkBufferDirtyHint(buf, true);
1300  }
1301  }
1302 
1303  /*
1304  * If the leaf page is now empty, try to delete it; else count the
1305  * live tuples (live table TIDs in posting lists are counted as
1306  * separate live tuples). We don't delete when backtracking, though,
1307  * since that would require teaching _bt_pagedel() about backtracking
1308  * (doesn't seem worth adding more complexity to deal with that).
1309  *
1310  * We don't count the number of live TIDs during cleanup-only calls to
1311  * btvacuumscan (i.e. when callback is not set). We count the number
1312  * of index tuples directly instead. This avoids the expense of
1313  * directly examining all of the tuples on each page. VACUUM will
1314  * treat num_index_tuples as an estimate in cleanup-only case, so it
1315  * doesn't matter that this underestimates num_index_tuples
1316  * significantly in some cases.
1317  */
1318  if (minoff > maxoff)
1319  attempt_pagedel = (blkno == scanblkno);
1320  else if (callback)
1321  stats->num_index_tuples += nhtidslive;
1322  else
1323  stats->num_index_tuples += maxoff - minoff + 1;
1324 
1325  Assert(!attempt_pagedel || nhtidslive == 0);
1326  }
1327 
1328  if (attempt_pagedel)
1329  {
1330  MemoryContext oldcontext;
1331 
1332  /* Run pagedel in a temp context to avoid memory leakage */
1334  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1335 
1336  /*
1337  * _bt_pagedel maintains the bulk delete stats on our behalf;
1338  * pages_newly_deleted and pages_deleted are likely to be incremented
1339  * during call
1340  */
1341  Assert(blkno == scanblkno);
1342  _bt_pagedel(rel, buf, vstate);
1343 
1344  MemoryContextSwitchTo(oldcontext);
1345  /* pagedel released buffer, so we shouldn't */
1346  }
1347  else
1348  _bt_relbuf(rel, buf);
1349 
1350  if (backtrack_to != P_NONE)
1351  {
1352  blkno = backtrack_to;
1353  goto backtrack;
1354  }
1355 }
int Buffer
Definition: buf.h:23
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4001
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:280
@ RBM_NORMAL
Definition: bufmgr.h:39
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
int errcode(int sqlerrcode)
Definition: elog.c:695
#define LOG
Definition: elog.h:27
#define ereport(elevel,...)
Definition: elog.h:145
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:86
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:165
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:303
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1035
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1816
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1166
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:794
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1121
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1051
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1369
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:474
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
static bool BTPageIsRecyclable(Page page)
Definition: nbtree.h:291
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:713
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:486
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:535
@ MAIN_FORKNUM
Definition: relpath.h:50
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:334
BTCycleId cycleid
Definition: nbtree.h:337
MemoryContext pagedelcontext
Definition: nbtree.h:338
IndexVacuumInfo * info
Definition: nbtree.h:333
double tuples_removed
Definition: genam.h:79
BufferAccessStrategy strategy
Definition: genam.h:52
void vacuum_delay_point(void)
Definition: vacuum.c:2162

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert(), BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 903 of file nbtree.c.

906 {
907  Relation rel = info->index;
908  BTVacState vstate;
909  BlockNumber num_pages;
910  BlockNumber scanblkno;
911  bool needLock;
912 
913  /*
914  * Reset fields that track information about the entire index now. This
915  * avoids double-counting in the case where a single VACUUM command
916  * requires multiple scans of the index.
917  *
918  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
919  * since they track information about the VACUUM command, and so must last
920  * across each call to btvacuumscan().
921  *
922  * (Note that pages_free is treated as state about the whole index, not
923  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
924  * calls are idempotent, and get repeated for the same deleted pages in
925  * some scenarios. The point for us is to track the number of recyclable
926  * pages in the index at the end of the VACUUM command.)
927  */
928  stats->num_pages = 0;
929  stats->num_index_tuples = 0;
930  stats->pages_deleted = 0;
931  stats->pages_free = 0;
932 
933  /* Set up info to pass down to btvacuumpage */
934  vstate.info = info;
935  vstate.stats = stats;
936  vstate.callback = callback;
937  vstate.callback_state = callback_state;
938  vstate.cycleid = cycleid;
939 
940  /* Create a temporary memory context to run _bt_pagedel in */
942  "_bt_pagedel",
944 
945  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
946  vstate.bufsize = 0;
947  vstate.maxbufsize = 0;
948  vstate.pendingpages = NULL;
949  vstate.npendingpages = 0;
950  /* Consider applying _bt_pendingfsm_finalize optimization */
951  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
952 
953  /*
954  * The outer loop iterates over all index pages except the metapage, in
955  * physical order (we hope the kernel will cooperate in providing
956  * read-ahead for speed). It is critical that we visit all leaf pages,
957  * including ones added after we start the scan, else we might fail to
958  * delete some deletable tuples. Hence, we must repeatedly check the
959  * relation length. We must acquire the relation-extension lock while
960  * doing so to avoid a race condition: if someone else is extending the
961  * relation, there is a window where bufmgr/smgr have created a new
962  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
963  * we manage to scan such a page here, we'll improperly assume it can be
964  * recycled. Taking the lock synchronizes things enough to prevent a
965  * problem: either num_pages won't include the new page, or _bt_getbuf
966  * already has write lock on the buffer and it will be fully initialized
967  * before we can examine it. Also, we need not worry if a page is added
968  * immediately after we look; the page splitting code already has
969  * write-lock on the left page before it adds a right page, so we must
970  * already have processed any tuples due to be moved into such a page.
971  *
972  * We can skip locking for new or temp relations, however, since no one
973  * else could be accessing them.
974  */
975  needLock = !RELATION_IS_LOCAL(rel);
976 
977  scanblkno = BTREE_METAPAGE + 1;
978  for (;;)
979  {
980  /* Get the current relation length */
981  if (needLock)
983  num_pages = RelationGetNumberOfBlocks(rel);
984  if (needLock)
986 
987  if (info->report_progress)
989  num_pages);
990 
991  /* Quit if we've scanned the whole relation */
992  if (scanblkno >= num_pages)
993  break;
994  /* Iterate over pages, then loop back to recheck length */
995  for (; scanblkno < num_pages; scanblkno++)
996  {
997  btvacuumpage(&vstate, scanblkno);
998  if (info->report_progress)
1000  scanblkno);
1001  }
1002  }
1003 
1004  /* Set statistics num_pages field to final size of index */
1005  stats->num_pages = num_pages;
1006 
1008 
1009  /*
1010  * If there were any calls to _bt_pagedel() during scan of the index then
1011  * see if any of the resulting pages can be placed in the FSM now. When
1012  * it's not safe we'll have to leave it up to a future VACUUM operation.
1013  *
1014  * Finally, if we placed any pages in the FSM (either just now or during
1015  * the scan), forcibly update the upper-level FSM pages to ensure that
1016  * searchers can find them.
1017  */
1018  _bt_pendingfsm_finalize(rel, &vstate);
1019  if (stats->pages_free > 0)
1021 }
void pgstat_progress_update_param(int index, int64 val)
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:156
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:431
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:481
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:124
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2956
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2915
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1034
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:646
BTPendingFSM * pendingpages
Definition: nbtree.h:345
int npendingpages
Definition: nbtree.h:346
int bufsize
Definition: nbtree.h:343
int maxbufsize
Definition: nbtree.h:344
BlockNumber num_pages
Definition: genam.h:76
bool report_progress
Definition: genam.h:48

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().