PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE , BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 78 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 54 of file nbtree.c.

55 {
60 } BTPS_State;
BTPS_State
Definition: nbtree.c:55
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:57
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:56
@ BTPARALLEL_IDLE
Definition: nbtree.c:58
@ BTPARALLEL_DONE
Definition: nbtree.c:59

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 758 of file nbtree.c.

759 {
760  BTScanOpaque so = (BTScanOpaque) scan->opaque;
761  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
762  BTParallelScanDesc btscan;
763 
764  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
765  parallel_scan->ps_offset);
766 
767  so->arrayKeyCount++;
768  SpinLockAcquire(&btscan->btps_mutex);
769  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
770  {
771  btscan->btps_scanPage = InvalidBlockNumber;
772  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
773  btscan->btps_arrayKeyCount++;
774  }
775  SpinLockRelease(&btscan->btps_mutex);
776 }
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetToPointer(base, offset)
Definition: c.h:756
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1080
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:166

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 717 of file nbtree.c.

718 {
719  BTScanOpaque so = (BTScanOpaque) scan->opaque;
720  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
721  BTParallelScanDesc btscan;
722  bool status_changed = false;
723 
724  /* Do nothing, for non-parallel scans */
725  if (parallel_scan == NULL)
726  return;
727 
728  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
729  parallel_scan->ps_offset);
730 
731  /*
732  * Mark the parallel scan as done for this combination of scan keys,
733  * unless some other process already did so. See also
734  * _bt_advance_array_keys.
735  */
736  SpinLockAcquire(&btscan->btps_mutex);
737  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
738  btscan->btps_pageStatus != BTPARALLEL_DONE)
739  {
740  btscan->btps_pageStatus = BTPARALLEL_DONE;
741  status_changed = true;
742  }
743  SpinLockRelease(&btscan->btps_mutex);
744 
745  /* wake up all the workers associated with this parallel scan */
746  if (status_changed)
747  ConditionVariableBroadcast(&btscan->btps_cv);
748 }
void ConditionVariableBroadcast(ConditionVariable *cv)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:78

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), if(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 694 of file nbtree.c.

695 {
696  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
697  BTParallelScanDesc btscan;
698 
699  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
700  parallel_scan->ps_offset);
701 
702  SpinLockAcquire(&btscan->btps_mutex);
703  btscan->btps_scanPage = scan_page;
705  SpinLockRelease(&btscan->btps_mutex);
707 }
void ConditionVariableSignal(ConditionVariable *cv)
slock_t btps_mutex
Definition: nbtree.c:74
BTPS_State btps_pageStatus
Definition: nbtree.c:69
ConditionVariable btps_cv
Definition: nbtree.c:75
BlockNumber btps_scanPage
Definition: nbtree.c:68

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 636 of file nbtree.c.

637 {
638  BTScanOpaque so = (BTScanOpaque) scan->opaque;
639  BTPS_State pageStatus;
640  bool exit_loop = false;
641  bool status = true;
642  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
643  BTParallelScanDesc btscan;
644 
645  *pageno = P_NONE;
646 
647  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
648  parallel_scan->ps_offset);
649 
650  while (1)
651  {
652  SpinLockAcquire(&btscan->btps_mutex);
653  pageStatus = btscan->btps_pageStatus;
654 
655  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
656  {
657  /* Parallel scan has already advanced to a new set of scankeys. */
658  status = false;
659  }
660  else if (pageStatus == BTPARALLEL_DONE)
661  {
662  /*
663  * We're done with this set of scankeys. This may be the end, or
664  * there could be more sets to try.
665  */
666  status = false;
667  }
668  else if (pageStatus != BTPARALLEL_ADVANCING)
669  {
670  /*
671  * We have successfully seized control of the scan for the purpose
672  * of advancing it to a new page!
673  */
674  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
675  *pageno = btscan->btps_scanPage;
676  exit_loop = true;
677  }
678  SpinLockRelease(&btscan->btps_mutex);
679  if (exit_loop || !status)
680  break;
682  }
684 
685  return status;
686 }
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void ConditionVariableCancelSleep(void)
#define P_NONE
Definition: nbtree.h:212
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:224
@ WAIT_EVENT_BTREE_PAGE
Definition: wait_event.h:89

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 344 of file nbtree.c.

345 {
346  IndexScanDesc scan;
347  BTScanOpaque so;
348 
349  /* no order by operators allowed */
350  Assert(norderbys == 0);
351 
352  /* get the scan */
353  scan = RelationGetIndexScan(rel, nkeys, norderbys);
354 
355  /* allocate private workspace */
356  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
359  if (scan->numberOfKeys > 0)
360  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
361  else
362  so->keyData = NULL;
363 
364  so->arrayKeyData = NULL; /* assume no array keys for now */
365  so->numArrayKeys = 0;
366  so->arrayKeys = NULL;
367  so->arrayContext = NULL;
368 
369  so->killedItems = NULL; /* until needed */
370  so->numKilled = 0;
371 
372  /*
373  * We don't know yet whether the scan will be index-only, so we do not
374  * allocate the tuple workspace arrays until btrescan. However, we set up
375  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
376  */
377  so->currTuples = so->markTuples = NULL;
378 
379  scan->xs_itupdesc = RelationGetDescr(rel);
380 
381  scan->opaque = so;
382 
383  return scan;
384 }
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1210
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1019
#define RelationGetDescr(relation)
Definition: rel.h:529
ScanKeyData * ScanKey
Definition: skey.h:75
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1051
char * markTuples
Definition: nbtree.h:1064
BTScanPosData currPos
Definition: nbtree.h:1076
int * killedItems
Definition: nbtree.h:1055
char * currTuples
Definition: nbtree.h:1063
ScanKey arrayKeyData
Definition: nbtree.h:1046
BTScanPosData markPos
Definition: nbtree.h:1077
ScanKey keyData
Definition: nbtree.h:1043
MemoryContext arrayContext
Definition: nbtree.h:1052
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert(), BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 152 of file nbtree.c.

153 {
154  Page metapage;
155 
156  /* Construct metapage. */
157  metapage = (Page) palloc(BLCKSZ);
158  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
159 
160  /*
161  * Write the page and log it. It might seem that an immediate sync would
162  * be sufficient to guarantee that the file exists on disk, but recovery
163  * itself might remove it while replaying, for example, an
164  * XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
165  * this even when wal_level=minimal.
166  */
169  metapage, true);
170  log_newpage(&RelationGetSmgr(index)->smgr_rlocator.locator, INIT_FORKNUM,
171  BTREE_METAPAGE, metapage, true);
172 
173  /*
174  * An immediate sync is required even if we xlog'd the page, because the
175  * write did not go through shared_buffers and therefore a concurrent
176  * checkpoint may have moved the redo pointer past our xlog record.
177  */
179 }
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1539
Pointer Page
Definition: bufpage.h:78
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:69
#define BTREE_METAPAGE
Definition: nbtree.h:148
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2689
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:571
@ INIT_FORKNUM
Definition: relpath.h:53
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:554
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:691
Definition: type.h:95
XLogRecPtr log_newpage(RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1097

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), P_NONE, PageSetChecksumInplace(), palloc(), RelationGetSmgr(), smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 786 of file nbtree.c.

788 {
789  Relation rel = info->index;
790  BTCycleId cycleid;
791 
792  /* allocate stats if first time through, else re-use existing struct */
793  if (stats == NULL)
795 
796  /* Establish the vacuum cycle ID to use for this scan */
797  /* The ENSURE stuff ensures we clean up shared memory on failure */
799  {
800  cycleid = _bt_start_vacuum(rel);
801 
802  btvacuumscan(info, stats, callback, callback_state, cycleid);
803  }
805  _bt_end_vacuum(rel);
806 
807  return stats;
808 }
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1241
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:904
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2033
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2061
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1976
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
Relation index
Definition: genam.h:46
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1419 of file nbtree.c.

1420 {
1421  return true;
1422 }

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 449 of file nbtree.c.

450 {
451  BTScanOpaque so = (BTScanOpaque) scan->opaque;
452 
453  /* we aren't holding any read locks, but gotta drop the pins */
455  {
456  /* Before leaving current page, deal with any killed items */
457  if (so->numKilled > 0)
458  _bt_killitems(scan);
460  }
461 
462  so->markItemIndex = -1;
464 
465  /* No need to invalidate positions, the RAM is about to be freed. */
466 
467  /* Release storage */
468  if (so->keyData != NULL)
469  pfree(so->keyData);
470  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
471  if (so->arrayContext != NULL)
473  if (so->killedItems != NULL)
474  pfree(so->killedItems);
475  if (so->currTuples != NULL)
476  pfree(so->currTuples);
477  /* so->markTuples should not be pfree'd, see btrescan */
478  pfree(so);
479 }
void pfree(void *pointer)
Definition: mcxt.c:1436
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:387
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1013
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1007
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1725

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 571 of file nbtree.c.

572 {
573  return sizeof(BTParallelScanDescData);
574 }
struct BTParallelScanDescData BTParallelScanDescData

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 286 of file nbtree.c.

287 {
288  BTScanOpaque so = (BTScanOpaque) scan->opaque;
289  int64 ntids = 0;
290  ItemPointer heapTid;
291 
292  /*
293  * If we have any array keys, initialize them.
294  */
295  if (so->numArrayKeys)
296  {
297  /* punt if we have any unsatisfiable array keys */
298  if (so->numArrayKeys < 0)
299  return ntids;
300 
302  }
303 
304  /* This loop handles advancing to the next array elements, if any */
305  do
306  {
307  /* Fetch the first page & tuple */
308  if (_bt_first(scan, ForwardScanDirection))
309  {
310  /* Save tuple ID, and continue scanning */
311  heapTid = &scan->xs_heaptid;
312  tbm_add_tuples(tbm, heapTid, 1, false);
313  ntids++;
314 
315  for (;;)
316  {
317  /*
318  * Advance to next tuple within page. This is the same as the
319  * easy case in _bt_next().
320  */
321  if (++so->currPos.itemIndex > so->currPos.lastItem)
322  {
323  /* let _bt_next do the heavy lifting */
324  if (!_bt_next(scan, ForwardScanDirection))
325  break;
326  }
327 
328  /* Save tuple ID, and continue scanning */
329  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
330  tbm_add_tuples(tbm, heapTid, 1, false);
331  ntids++;
332  }
333  }
334  /* Now see if we have more array keys to deal with */
336 
337  return ntids;
338 }
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:860
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1466
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:551
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:525
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:988
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:991
int itemIndex
Definition: nbtree.h:989
ItemPointerData heapTid
Definition: nbtree.h:952
ItemPointerData xs_heaptid
Definition: relscan.h:147
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 212 of file nbtree.c.

213 {
214  BTScanOpaque so = (BTScanOpaque) scan->opaque;
215  bool res;
216 
217  /* btree indexes are never lossy */
218  scan->xs_recheck = false;
219 
220  /*
221  * If we have any array keys, initialize them during first call for a
222  * scan. We can't do this in btrescan because we don't know the scan
223  * direction at that time.
224  */
225  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
226  {
227  /* punt if we have any unsatisfiable array keys */
228  if (so->numArrayKeys < 0)
229  return false;
230 
231  _bt_start_array_keys(scan, dir);
232  }
233 
234  /* This loop handles advancing to the next array elements, if any */
235  do
236  {
237  /*
238  * If we've already initialized this scan, we can just advance it in
239  * the appropriate direction. If we haven't done so yet, we call
240  * _bt_first() to get the first item in the scan.
241  */
242  if (!BTScanPosIsValid(so->currPos))
243  res = _bt_first(scan, dir);
244  else
245  {
246  /*
247  * Check to see if we should kill the previously-fetched tuple.
248  */
249  if (scan->kill_prior_tuple)
250  {
251  /*
252  * Yes, remember it for later. (We'll deal with all such
253  * tuples at once right before leaving the index page.) The
254  * test for numKilled overrun is not just paranoia: if the
255  * caller reverses direction in the indexscan then the same
256  * item might get entered multiple times. It's not worth
257  * trying to optimize that, so we don't detect it, but instead
258  * just forget any excess entries.
259  */
260  if (so->killedItems == NULL)
261  so->killedItems = (int *)
262  palloc(MaxTIDsPerBTreePage * sizeof(int));
263  if (so->numKilled < MaxTIDsPerBTreePage)
264  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
265  }
266 
267  /*
268  * Now continue the scan.
269  */
270  res = _bt_next(scan, dir);
271  }
272 
273  /* If we have a tuple, return it ... */
274  if (res)
275  break;
276  /* ... otherwise see if we have more array keys to deal with */
277  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
278 
279  return res;
280 }
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:128

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), res, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 96 of file nbtree.c.

97 {
99 
100  amroutine->amstrategies = BTMaxStrategyNumber;
101  amroutine->amsupport = BTNProcs;
102  amroutine->amoptsprocnum = BTOPTIONS_PROC;
103  amroutine->amcanorder = true;
104  amroutine->amcanorderbyop = false;
105  amroutine->amcanbackward = true;
106  amroutine->amcanunique = true;
107  amroutine->amcanmulticol = true;
108  amroutine->amoptionalkey = true;
109  amroutine->amsearcharray = true;
110  amroutine->amsearchnulls = true;
111  amroutine->amstorage = false;
112  amroutine->amclusterable = true;
113  amroutine->ampredlocks = true;
114  amroutine->amcanparallel = true;
115  amroutine->amcaninclude = true;
116  amroutine->amusemaintenanceworkmem = false;
117  amroutine->amsummarizing = false;
118  amroutine->amparallelvacuumoptions =
120  amroutine->amkeytype = InvalidOid;
121 
122  amroutine->ambuild = btbuild;
123  amroutine->ambuildempty = btbuildempty;
124  amroutine->aminsert = btinsert;
125  amroutine->ambulkdelete = btbulkdelete;
126  amroutine->amvacuumcleanup = btvacuumcleanup;
127  amroutine->amcanreturn = btcanreturn;
128  amroutine->amcostestimate = btcostestimate;
129  amroutine->amoptions = btoptions;
130  amroutine->amproperty = btproperty;
131  amroutine->ambuildphasename = btbuildphasename;
132  amroutine->amvalidate = btvalidate;
133  amroutine->amadjustmembers = btadjustmembers;
134  amroutine->ambeginscan = btbeginscan;
135  amroutine->amrescan = btrescan;
136  amroutine->amgettuple = btgettuple;
137  amroutine->amgetbitmap = btgetbitmap;
138  amroutine->amendscan = btendscan;
139  amroutine->ammarkpos = btmarkpos;
140  amroutine->amrestrpos = btrestrpos;
143  amroutine->amparallelrescan = btparallelrescan;
144 
145  PG_RETURN_POINTER(amroutine);
146 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1419
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:344
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:786
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:816
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:212
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:595
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:188
void btbuildempty(Relation index)
Definition: nbtree.c:152
Size btestimateparallelscan(void)
Definition: nbtree.c:571
void btinitparallelscan(void *target)
Definition: nbtree.c:580
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:286
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:485
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:449
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:390
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:515
#define BTNProcs
Definition: nbtree.h:713
#define BTOPTIONS_PROC
Definition: nbtree.h:712
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:301
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2157
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2111
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2134
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
#define makeNode(_type_)
Definition: nodes.h:176
#define InvalidOid
Definition: postgres_ext.h:36
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6670
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:270
ambuildempty_function ambuildempty
Definition: amapi.h:262
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:265
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:268
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:282
amrestrpos_function amrestrpos
Definition: amapi.h:279
aminsert_function aminsert
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:277
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:284
Oid amkeytype
Definition: amapi.h:252
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:267
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:272
ambuild_function ambuild
Definition: amapi.h:261
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:275
amcanreturn_function amcanreturn
Definition: amapi.h:266
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:276
amproperty_function amproperty
Definition: amapi.h:269
ambulkdelete_function ambulkdelete
Definition: amapi.h:264
bool amsearcharray
Definition: amapi.h:232
bool amsummarizing
Definition: amapi.h:248
amvalidate_function amvalidate
Definition: amapi.h:271
ammarkpos_function ammarkpos
Definition: amapi.h:278
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:273
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:274
bool amcanorder
Definition: amapi.h:220
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:283
uint8 amparallelvacuumoptions
Definition: amapi.h:250
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:47
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:54

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 188 of file nbtree.c.

193 {
194  bool result;
195  IndexTuple itup;
196 
197  /* generate an index tuple */
198  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
199  itup->t_tid = *ht_ctid;
200 
201  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
202 
203  pfree(itup);
204 
205  return result;
206 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:100
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 485 of file nbtree.c.

486 {
487  BTScanOpaque so = (BTScanOpaque) scan->opaque;
488 
489  /* There may be an old mark with a pin (but no lock). */
491 
492  /*
493  * Just record the current itemIndex. If we later step to next page
494  * before releasing the marked position, _bt_steppage makes a full copy of
495  * the currPos struct in markPos. If (as often happens) the mark is moved
496  * before we leave the page, we don't have to do that work.
497  */
498  if (BTScanPosIsValid(so->currPos))
499  so->markItemIndex = so->currPos.itemIndex;
500  else
501  {
503  so->markItemIndex = -1;
504  }
505 
506  /* Also record the current positions of any array keys */
507  if (so->numArrayKeys)
508  _bt_mark_array_keys(scan);
509 }
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:610

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 595 of file nbtree.c.

596 {
597  BTParallelScanDesc btscan;
598  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
599 
600  Assert(parallel_scan);
601 
602  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
603  parallel_scan->ps_offset);
604 
605  /*
606  * In theory, we don't need to acquire the spinlock here, because there
607  * shouldn't be any other workers running at this point, but we do so for
608  * consistency.
609  */
610  SpinLockAcquire(&btscan->btps_mutex);
613  btscan->btps_arrayKeyCount = 0;
614  SpinLockRelease(&btscan->btps_mutex);
615 }

References Assert(), BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1370 of file nbtree.c.

1372 {
1373  int live = 0;
1374  int nitem = BTreeTupleGetNPosting(posting);
1375  ItemPointer items = BTreeTupleGetPosting(posting);
1376  BTVacuumPosting vacposting = NULL;
1377 
1378  for (int i = 0; i < nitem; i++)
1379  {
1380  if (!vstate->callback(items + i, vstate->callback_state))
1381  {
1382  /* Live table TID */
1383  live++;
1384  }
1385  else if (vacposting == NULL)
1386  {
1387  /*
1388  * First dead table TID encountered.
1389  *
1390  * It's now clear that we need to delete one or more dead table
1391  * TIDs, so start maintaining metadata describing how to update
1392  * existing posting list tuple.
1393  */
1394  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1395  nitem * sizeof(uint16));
1396 
1397  vacposting->itup = posting;
1398  vacposting->updatedoffset = updatedoffset;
1399  vacposting->ndeletedtids = 0;
1400  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1401  }
1402  else
1403  {
1404  /* Second or subsequent dead table TID */
1405  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1406  }
1407  }
1408 
1409  *nremaining = live;
1410  return vacposting;
1411 }
unsigned short uint16
Definition: c.h:489
int i
Definition: isn.c:73
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:519
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:538
IndexBulkDeleteCallback callback
Definition: nbtree.h:335
void * callback_state
Definition: nbtree.h:336
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:917
uint16 ndeletedtids
Definition: nbtree.h:916
IndexTuple itup
Definition: nbtree.h:912
OffsetNumber updatedoffset
Definition: nbtree.h:913

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 390 of file nbtree.c.

392 {
393  BTScanOpaque so = (BTScanOpaque) scan->opaque;
394 
395  /* we aren't holding any read locks, but gotta drop the pins */
397  {
398  /* Before leaving current page, deal with any killed items */
399  if (so->numKilled > 0)
400  _bt_killitems(scan);
403  }
404 
405  so->markItemIndex = -1;
406  so->arrayKeyCount = 0;
409 
410  /*
411  * Allocate tuple workspace arrays, if needed for an index-only scan and
412  * not already done in a previous rescan call. To save on palloc
413  * overhead, both workspaces are allocated as one palloc block; only this
414  * function and btendscan know that.
415  *
416  * NOTE: this data structure also makes it safe to return data from a
417  * "name" column, even though btree name_ops uses an underlying storage
418  * datatype of cstring. The risk there is that "name" is supposed to be
419  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
420  * However, since we only return data out of tuples sitting in the
421  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
422  * data out of the markTuples array --- running off the end of memory for
423  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
424  * adding special-case treatment for name_ops elsewhere.
425  */
426  if (scan->xs_want_itup && so->currTuples == NULL)
427  {
428  so->currTuples = (char *) palloc(BLCKSZ * 2);
429  so->markTuples = so->currTuples + BLCKSZ;
430  }
431 
432  /*
433  * Reset the scan keys
434  */
435  if (scankey && scan->numberOfKeys > 0)
436  memmove(scan->keyData,
437  scankey,
438  scan->numberOfKeys * sizeof(ScanKeyData));
439  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
440 
441  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
443 }
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:210
struct ScanKeyData * keyData
Definition: relscan.h:122

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 515 of file nbtree.c.

516 {
517  BTScanOpaque so = (BTScanOpaque) scan->opaque;
518 
519  /* Restore the marked positions of any array keys */
520  if (so->numArrayKeys)
522 
523  if (so->markItemIndex >= 0)
524  {
525  /*
526  * The scan has never moved to a new page since the last mark. Just
527  * restore the itemIndex.
528  *
529  * NB: In this case we can't count on anything in so->markPos to be
530  * accurate.
531  */
532  so->currPos.itemIndex = so->markItemIndex;
533  }
534  else
535  {
536  /*
537  * The scan moved to a new page after last mark or restore, and we are
538  * now restoring to the marked page. We aren't holding any read
539  * locks, but if we're still holding the pin for the current position,
540  * we must drop it.
541  */
542  if (BTScanPosIsValid(so->currPos))
543  {
544  /* Before leaving current page, deal with any killed items */
545  if (so->numKilled > 0)
546  _bt_killitems(scan);
548  }
549 
550  if (BTScanPosIsValid(so->markPos))
551  {
552  /* bump pin on mark buffer for assignment to current buffer */
553  if (BTScanPosIsPinned(so->markPos))
555  memcpy(&so->currPos, &so->markPos,
556  offsetof(BTScanPosData, items[1]) +
557  so->markPos.lastItem * sizeof(BTScanPosItem));
558  if (so->currTuples)
559  memcpy(so->currTuples, so->markTuples,
561  }
562  else
564  }
565 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4043
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:996
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:629
Buffer buf
Definition: nbtree.h:959
int nextTupleOffset
Definition: nbtree.h:978

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 816 of file nbtree.c.

817 {
818  BlockNumber num_delpages;
819 
820  /* No-op in ANALYZE ONLY mode */
821  if (info->analyze_only)
822  return stats;
823 
824  /*
825  * If btbulkdelete was called, we need not do anything (we just maintain
826  * the information used within _bt_vacuum_needs_cleanup() by calling
827  * _bt_set_cleanup_info() below).
828  *
829  * If btbulkdelete was _not_ called, then we have a choice to make: we
830  * must decide whether or not a btvacuumscan() call is needed now (i.e.
831  * whether the ongoing VACUUM operation can entirely avoid a physical scan
832  * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
833  * now.
834  */
835  if (stats == NULL)
836  {
837  /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
838  if (!_bt_vacuum_needs_cleanup(info->index))
839  return NULL;
840 
841  /*
842  * Since we aren't going to actually delete any leaf items, there's no
843  * need to go through all the vacuum-cycle-ID pushups here.
844  *
845  * Posting list tuples are a source of inaccuracy for cleanup-only
846  * scans. btvacuumscan() will assume that the number of index tuples
847  * from each page can be used as num_index_tuples, even though
848  * num_index_tuples is supposed to represent the number of TIDs in the
849  * index. This naive approach can underestimate the number of tuples
850  * in the index significantly.
851  *
852  * We handle the problem by making num_index_tuples an estimate in
853  * cleanup-only case.
854  */
856  btvacuumscan(info, stats, NULL, NULL, 0);
857  stats->estimated_count = true;
858  }
859 
860  /*
861  * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
862  *
863  * num_delpages is the number of deleted pages now in the index that were
864  * not safe to place in the FSM to be recycled just yet. num_delpages is
865  * greater than 0 only when _bt_pagedel() actually deleted pages during
866  * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
867  * have failed to place any newly deleted pages in the FSM just moments
868  * ago. (Actually, there are edge cases where recycling of the current
869  * VACUUM's newly deleted pages does not even become safe by the time the
870  * next VACUUM comes around. See nbtree/README.)
871  */
872  Assert(stats->pages_deleted >= stats->pages_free);
873  num_delpages = stats->pages_deleted - stats->pages_free;
874  _bt_set_cleanup_info(info->index, num_delpages);
875 
876  /*
877  * It's quite possible for us to be fooled by concurrent page splits into
878  * double-counting some index tuples, so disbelieve any total that exceeds
879  * the underlying heap's count ... if we know that accurately. Otherwise
880  * this might just make matters worse.
881  */
882  if (!info->estimated_count)
883  {
884  if (stats->num_index_tuples > info->num_heap_tuples)
885  stats->num_index_tuples = info->num_heap_tuples;
886  }
887 
888  return stats;
889 }
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:234
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:181
bool estimated_count
Definition: genam.h:77
BlockNumber pages_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:82
double num_index_tuples
Definition: genam.h:78
double num_heap_tuples
Definition: genam.h:51
bool analyze_only
Definition: genam.h:47
bool estimated_count
Definition: genam.h:49

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert(), btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1035 of file nbtree.c.

1036 {
1037  IndexVacuumInfo *info = vstate->info;
1038  IndexBulkDeleteResult *stats = vstate->stats;
1040  void *callback_state = vstate->callback_state;
1041  Relation rel = info->index;
1042  bool attempt_pagedel;
1043  BlockNumber blkno,
1044  backtrack_to;
1045  Buffer buf;
1046  Page page;
1047  BTPageOpaque opaque;
1048 
1049  blkno = scanblkno;
1050 
1051 backtrack:
1052 
1053  attempt_pagedel = false;
1054  backtrack_to = P_NONE;
1055 
1056  /* call vacuum_delay_point while not holding any buffer lock */
1058 
1059  /*
1060  * We can't use _bt_getbuf() here because it always applies
1061  * _bt_checkpage(), which will barf on an all-zero page. We want to
1062  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1063  * buffer access strategy.
1064  */
1066  info->strategy);
1067  _bt_lockbuf(rel, buf, BT_READ);
1068  page = BufferGetPage(buf);
1069  opaque = NULL;
1070  if (!PageIsNew(page))
1071  {
1072  _bt_checkpage(rel, buf);
1073  opaque = BTPageGetOpaque(page);
1074  }
1075 
1076  Assert(blkno <= scanblkno);
1077  if (blkno != scanblkno)
1078  {
1079  /*
1080  * We're backtracking.
1081  *
1082  * We followed a right link to a sibling leaf page (a page that
1083  * happens to be from a block located before scanblkno). The only
1084  * case we want to do anything with is a live leaf page having the
1085  * current vacuum cycle ID.
1086  *
1087  * The page had better be in a state that's consistent with what we
1088  * expect. Check for conditions that imply corruption in passing. It
1089  * can't be half-dead because only an interrupted VACUUM process can
1090  * leave pages in that state, so we'd definitely have dealt with it
1091  * back when the page was the scanblkno page (half-dead pages are
1092  * always marked fully deleted by _bt_pagedel()). This assumes that
1093  * there can be only one vacuum process running at a time.
1094  */
1095  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1096  {
1097  Assert(false);
1098  ereport(LOG,
1099  (errcode(ERRCODE_INDEX_CORRUPTED),
1100  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1101  blkno, scanblkno, RelationGetRelationName(rel))));
1102  _bt_relbuf(rel, buf);
1103  return;
1104  }
1105 
1106  /*
1107  * We may have already processed the page in an earlier call, when the
1108  * page was scanblkno. This happens when the leaf page split occurred
1109  * after the scan began, but before the right sibling page became the
1110  * scanblkno.
1111  *
1112  * Page may also have been deleted by current btvacuumpage() call,
1113  * since _bt_pagedel() sometimes deletes the right sibling page of
1114  * scanblkno in passing (it does so after we decided where to
1115  * backtrack to). We don't need to process this page as a deleted
1116  * page a second time now (in fact, it would be wrong to count it as a
1117  * deleted page in the bulk delete statistics a second time).
1118  */
1119  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1120  {
1121  /* Done with current scanblkno (and all lower split pages) */
1122  _bt_relbuf(rel, buf);
1123  return;
1124  }
1125  }
1126 
1127  if (!opaque || BTPageIsRecyclable(page))
1128  {
1129  /* Okay to recycle this page (which could be leaf or internal) */
1130  RecordFreeIndexPage(rel, blkno);
1131  stats->pages_deleted++;
1132  stats->pages_free++;
1133  }
1134  else if (P_ISDELETED(opaque))
1135  {
1136  /*
1137  * Already deleted page (which could be leaf or internal). Can't
1138  * recycle yet.
1139  */
1140  stats->pages_deleted++;
1141  }
1142  else if (P_ISHALFDEAD(opaque))
1143  {
1144  /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1145  attempt_pagedel = true;
1146 
1147  /*
1148  * _bt_pagedel() will increment both pages_newly_deleted and
1149  * pages_deleted stats in all cases (barring corruption)
1150  */
1151  }
1152  else if (P_ISLEAF(opaque))
1153  {
1155  int ndeletable;
1157  int nupdatable;
1158  OffsetNumber offnum,
1159  minoff,
1160  maxoff;
1161  int nhtidsdead,
1162  nhtidslive;
1163 
1164  /*
1165  * Trade in the initial read lock for a full cleanup lock on this
1166  * page. We must get such a lock on every leaf page over the course
1167  * of the vacuum scan, whether or not it actually contains any
1168  * deletable tuples --- see nbtree/README.
1169  */
1171 
1172  /*
1173  * Check whether we need to backtrack to earlier pages. What we are
1174  * concerned about is a page split that happened since we started the
1175  * vacuum scan. If the split moved tuples on the right half of the
1176  * split (i.e. the tuples that sort high) to a block that we already
1177  * passed over, then we might have missed the tuples. We need to
1178  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1179  * or deleting scanblkno page below!)
1180  */
1181  if (vstate->cycleid != 0 &&
1182  opaque->btpo_cycleid == vstate->cycleid &&
1183  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1184  !P_RIGHTMOST(opaque) &&
1185  opaque->btpo_next < scanblkno)
1186  backtrack_to = opaque->btpo_next;
1187 
1188  ndeletable = 0;
1189  nupdatable = 0;
1190  minoff = P_FIRSTDATAKEY(opaque);
1191  maxoff = PageGetMaxOffsetNumber(page);
1192  nhtidsdead = 0;
1193  nhtidslive = 0;
1194  if (callback)
1195  {
1196  /* btbulkdelete callback tells us what to delete (or update) */
1197  for (offnum = minoff;
1198  offnum <= maxoff;
1199  offnum = OffsetNumberNext(offnum))
1200  {
1201  IndexTuple itup;
1202 
1203  itup = (IndexTuple) PageGetItem(page,
1204  PageGetItemId(page, offnum));
1205 
1206  Assert(!BTreeTupleIsPivot(itup));
1207  if (!BTreeTupleIsPosting(itup))
1208  {
1209  /* Regular tuple, standard table TID representation */
1210  if (callback(&itup->t_tid, callback_state))
1211  {
1212  deletable[ndeletable++] = offnum;
1213  nhtidsdead++;
1214  }
1215  else
1216  nhtidslive++;
1217  }
1218  else
1219  {
1220  BTVacuumPosting vacposting;
1221  int nremaining;
1222 
1223  /* Posting list tuple */
1224  vacposting = btreevacuumposting(vstate, itup, offnum,
1225  &nremaining);
1226  if (vacposting == NULL)
1227  {
1228  /*
1229  * All table TIDs from the posting tuple remain, so no
1230  * delete or update required
1231  */
1232  Assert(nremaining == BTreeTupleGetNPosting(itup));
1233  }
1234  else if (nremaining > 0)
1235  {
1236 
1237  /*
1238  * Store metadata about posting list tuple in
1239  * updatable array for entire page. Existing tuple
1240  * will be updated during the later call to
1241  * _bt_delitems_vacuum().
1242  */
1243  Assert(nremaining < BTreeTupleGetNPosting(itup));
1244  updatable[nupdatable++] = vacposting;
1245  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1246  }
1247  else
1248  {
1249  /*
1250  * All table TIDs from the posting list must be
1251  * deleted. We'll delete the index tuple completely
1252  * (no update required).
1253  */
1254  Assert(nremaining == 0);
1255  deletable[ndeletable++] = offnum;
1256  nhtidsdead += BTreeTupleGetNPosting(itup);
1257  pfree(vacposting);
1258  }
1259 
1260  nhtidslive += nremaining;
1261  }
1262  }
1263  }
1264 
1265  /*
1266  * Apply any needed deletes or updates. We issue just one
1267  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1268  */
1269  if (ndeletable > 0 || nupdatable > 0)
1270  {
1271  Assert(nhtidsdead >= ndeletable + nupdatable);
1272  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1273  nupdatable);
1274 
1275  stats->tuples_removed += nhtidsdead;
1276  /* must recompute maxoff */
1277  maxoff = PageGetMaxOffsetNumber(page);
1278 
1279  /* can't leak memory here */
1280  for (int i = 0; i < nupdatable; i++)
1281  pfree(updatable[i]);
1282  }
1283  else
1284  {
1285  /*
1286  * If the leaf page has been split during this vacuum cycle, it
1287  * seems worth expending a write to clear btpo_cycleid even if we
1288  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1289  * takes care of this.) This ensures we won't process the page
1290  * again.
1291  *
1292  * We treat this like a hint-bit update because there's no need to
1293  * WAL-log it.
1294  */
1295  Assert(nhtidsdead == 0);
1296  if (vstate->cycleid != 0 &&
1297  opaque->btpo_cycleid == vstate->cycleid)
1298  {
1299  opaque->btpo_cycleid = 0;
1300  MarkBufferDirtyHint(buf, true);
1301  }
1302  }
1303 
1304  /*
1305  * If the leaf page is now empty, try to delete it; else count the
1306  * live tuples (live table TIDs in posting lists are counted as
1307  * separate live tuples). We don't delete when backtracking, though,
1308  * since that would require teaching _bt_pagedel() about backtracking
1309  * (doesn't seem worth adding more complexity to deal with that).
1310  *
1311  * We don't count the number of live TIDs during cleanup-only calls to
1312  * btvacuumscan (i.e. when callback is not set). We count the number
1313  * of index tuples directly instead. This avoids the expense of
1314  * directly examining all of the tuples on each page. VACUUM will
1315  * treat num_index_tuples as an estimate in cleanup-only case, so it
1316  * doesn't matter that this underestimates num_index_tuples
1317  * significantly in some cases.
1318  */
1319  if (minoff > maxoff)
1320  attempt_pagedel = (blkno == scanblkno);
1321  else if (callback)
1322  stats->num_index_tuples += nhtidslive;
1323  else
1324  stats->num_index_tuples += maxoff - minoff + 1;
1325 
1326  Assert(!attempt_pagedel || nhtidslive == 0);
1327  }
1328 
1329  if (attempt_pagedel)
1330  {
1331  MemoryContext oldcontext;
1332 
1333  /* Run pagedel in a temp context to avoid memory leakage */
1335  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1336 
1337  /*
1338  * _bt_pagedel maintains the bulk delete stats on our behalf;
1339  * pages_newly_deleted and pages_deleted are likely to be incremented
1340  * during call
1341  */
1342  Assert(blkno == scanblkno);
1343  _bt_pagedel(rel, buf, vstate);
1344 
1345  MemoryContextSwitchTo(oldcontext);
1346  /* pagedel released buffer, so we shouldn't */
1347  }
1348  else
1349  _bt_relbuf(rel, buf);
1350 
1351  if (backtrack_to != P_NONE)
1352  {
1353  blkno = backtrack_to;
1354  goto backtrack;
1355  }
1356 }
int Buffer
Definition: buf.h:23
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4075
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:751
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:285
@ RBM_NORMAL
Definition: bufmgr.h:44
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errcode(int sqlerrcode)
Definition: elog.c:858
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:86
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:165
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:314
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1035
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1816
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1166
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:794
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1121
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1051
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1370
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:481
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
static bool BTPageIsRecyclable(Page page)
Definition: nbtree.h:291
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:720
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:493
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:537
@ MAIN_FORKNUM
Definition: relpath.h:50
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:334
BTCycleId cycleid
Definition: nbtree.h:337
MemoryContext pagedelcontext
Definition: nbtree.h:338
IndexVacuumInfo * info
Definition: nbtree.h:333
double tuples_removed
Definition: genam.h:79
BufferAccessStrategy strategy
Definition: genam.h:52
void vacuum_delay_point(void)
Definition: vacuum.c:2211

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert(), BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 904 of file nbtree.c.

907 {
908  Relation rel = info->index;
909  BTVacState vstate;
910  BlockNumber num_pages;
911  BlockNumber scanblkno;
912  bool needLock;
913 
914  /*
915  * Reset fields that track information about the entire index now. This
916  * avoids double-counting in the case where a single VACUUM command
917  * requires multiple scans of the index.
918  *
919  * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
920  * since they track information about the VACUUM command, and so must last
921  * across each call to btvacuumscan().
922  *
923  * (Note that pages_free is treated as state about the whole index, not
924  * the current VACUUM. This is appropriate because RecordFreeIndexPage()
925  * calls are idempotent, and get repeated for the same deleted pages in
926  * some scenarios. The point for us is to track the number of recyclable
927  * pages in the index at the end of the VACUUM command.)
928  */
929  stats->num_pages = 0;
930  stats->num_index_tuples = 0;
931  stats->pages_deleted = 0;
932  stats->pages_free = 0;
933 
934  /* Set up info to pass down to btvacuumpage */
935  vstate.info = info;
936  vstate.stats = stats;
937  vstate.callback = callback;
938  vstate.callback_state = callback_state;
939  vstate.cycleid = cycleid;
940 
941  /* Create a temporary memory context to run _bt_pagedel in */
943  "_bt_pagedel",
945 
946  /* Initialize vstate fields used by _bt_pendingfsm_finalize */
947  vstate.bufsize = 0;
948  vstate.maxbufsize = 0;
949  vstate.pendingpages = NULL;
950  vstate.npendingpages = 0;
951  /* Consider applying _bt_pendingfsm_finalize optimization */
952  _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
953 
954  /*
955  * The outer loop iterates over all index pages except the metapage, in
956  * physical order (we hope the kernel will cooperate in providing
957  * read-ahead for speed). It is critical that we visit all leaf pages,
958  * including ones added after we start the scan, else we might fail to
959  * delete some deletable tuples. Hence, we must repeatedly check the
960  * relation length. We must acquire the relation-extension lock while
961  * doing so to avoid a race condition: if someone else is extending the
962  * relation, there is a window where bufmgr/smgr have created a new
963  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
964  * we manage to scan such a page here, we'll improperly assume it can be
965  * recycled. Taking the lock synchronizes things enough to prevent a
966  * problem: either num_pages won't include the new page, or _bt_getbuf
967  * already has write lock on the buffer and it will be fully initialized
968  * before we can examine it. Also, we need not worry if a page is added
969  * immediately after we look; the page splitting code already has
970  * write-lock on the left page before it adds a right page, so we must
971  * already have processed any tuples due to be moved into such a page.
972  *
973  * We can skip locking for new or temp relations, however, since no one
974  * else could be accessing them.
975  */
976  needLock = !RELATION_IS_LOCAL(rel);
977 
978  scanblkno = BTREE_METAPAGE + 1;
979  for (;;)
980  {
981  /* Get the current relation length */
982  if (needLock)
984  num_pages = RelationGetNumberOfBlocks(rel);
985  if (needLock)
987 
988  if (info->report_progress)
990  num_pages);
991 
992  /* Quit if we've scanned the whole relation */
993  if (scanblkno >= num_pages)
994  break;
995  /* Iterate over pages, then loop back to recheck length */
996  for (; scanblkno < num_pages; scanblkno++)
997  {
998  btvacuumpage(&vstate, scanblkno);
999  if (info->report_progress)
1001  scanblkno);
1002  }
1003  }
1004 
1005  /* Set statistics num_pages field to final size of index */
1006  stats->num_pages = num_pages;
1007 
1009 
1010  /*
1011  * If there were any calls to _bt_pagedel() during scan of the index then
1012  * see if any of the resulting pages can be placed in the FSM now. When
1013  * it's not safe we'll have to leave it up to a future VACUUM operation.
1014  *
1015  * Finally, if we placed any pages in the FSM (either just now or during
1016  * the scan), forcibly update the upper-level FSM pages to ensure that
1017  * searchers can find them.
1018  */
1019  _bt_pendingfsm_finalize(rel, &vstate);
1020  if (stats->pages_free > 0)
1022 }
void pgstat_progress_update_param(int index, int64 val)
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:161
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:431
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:481
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2956
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2915
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1035
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
BTPendingFSM * pendingpages
Definition: nbtree.h:345
int npendingpages
Definition: nbtree.h:346
int bufsize
Definition: nbtree.h:343
int maxbufsize
Definition: nbtree.h:344
BlockNumber num_pages
Definition: genam.h:76
bool report_progress
Definition: genam.h:48

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, LockRelationForExtension(), BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().