PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 90 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 66 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 769 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

770 {
771  BTScanOpaque so = (BTScanOpaque) scan->opaque;
772  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
773  BTParallelScanDesc btscan;
774 
775  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
776  parallel_scan->ps_offset);
777 
778  so->arrayKeyCount++;
779  SpinLockAcquire(&btscan->btps_mutex);
780  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
781  {
782  btscan->btps_scanPage = InvalidBlockNumber;
783  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
784  btscan->btps_arrayKeyCount++;
785  }
786  SpinLockRelease(&btscan->btps_mutex);
787 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int arrayKeyCount
Definition: nbtree.h:917
#define OffsetToPointer(base, offset)
Definition: c.h:695
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 728 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

729 {
730  BTScanOpaque so = (BTScanOpaque) scan->opaque;
731  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
732  BTParallelScanDesc btscan;
733  bool status_changed = false;
734 
735  /* Do nothing, for non-parallel scans */
736  if (parallel_scan == NULL)
737  return;
738 
739  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
740  parallel_scan->ps_offset);
741 
742  /*
743  * Mark the parallel scan as done for this combination of scan keys,
744  * unless some other process already did so. See also
745  * _bt_advance_array_keys.
746  */
747  SpinLockAcquire(&btscan->btps_mutex);
748  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
749  btscan->btps_pageStatus != BTPARALLEL_DONE)
750  {
751  btscan->btps_pageStatus = BTPARALLEL_DONE;
752  status_changed = true;
753  }
754  SpinLockRelease(&btscan->btps_mutex);
755 
756  /* wake up all the workers associated with this parallel scan */
757  if (status_changed)
758  ConditionVariableBroadcast(&btscan->btps_cv);
759 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int arrayKeyCount
Definition: nbtree.h:917
#define OffsetToPointer(base, offset)
Definition: c.h:695
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 705 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

706 {
707  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
708  BTParallelScanDesc btscan;
709 
710  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
711  parallel_scan->ps_offset);
712 
713  SpinLockAcquire(&btscan->btps_mutex);
714  btscan->btps_scanPage = scan_page;
716  SpinLockRelease(&btscan->btps_mutex);
718 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
#define OffsetToPointer(base, offset)
Definition: c.h:695
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 647 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

648 {
649  BTScanOpaque so = (BTScanOpaque) scan->opaque;
650  BTPS_State pageStatus;
651  bool exit_loop = false;
652  bool status = true;
653  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
654  BTParallelScanDesc btscan;
655 
656  *pageno = P_NONE;
657 
658  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
659  parallel_scan->ps_offset);
660 
661  while (1)
662  {
663  SpinLockAcquire(&btscan->btps_mutex);
664  pageStatus = btscan->btps_pageStatus;
665 
666  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
667  {
668  /* Parallel scan has already advanced to a new set of scankeys. */
669  status = false;
670  }
671  else if (pageStatus == BTPARALLEL_DONE)
672  {
673  /*
674  * We're done with this set of scankeys. This may be the end, or
675  * there could be more sets to try.
676  */
677  status = false;
678  }
679  else if (pageStatus != BTPARALLEL_ADVANCING)
680  {
681  /*
682  * We have successfully seized control of the scan for the purpose
683  * of advancing it to a new page!
684  */
685  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
686  *pageno = btscan->btps_scanPage;
687  exit_loop = true;
688  }
689  SpinLockRelease(&btscan->btps_mutex);
690  if (exit_loop || !status)
691  break;
693  }
695 
696  return status;
697 }
#define P_NONE
Definition: nbtree.h:207
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
BTPS_State
Definition: nbtree.c:66
int arrayKeyCount
Definition: nbtree.h:917
#define OffsetToPointer(base, offset)
Definition: c.h:695
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 800 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, GlobalVisCheckRemovableXid(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, TransactionIdIsValid, vacuum_cleanup_index_scale_factor, and BTOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

801 {
802  Buffer metabuf;
803  Page metapg;
804  BTMetaPageData *metad;
805  bool result = false;
806 
807  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
808  metapg = BufferGetPage(metabuf);
809  metad = BTPageGetMeta(metapg);
810 
811  /*
812  * XXX: If IndexVacuumInfo contained the heap relation, we could be more
813  * aggressive about vacuuming non catalog relations by passing the table
814  * to GlobalVisCheckRemovableXid().
815  */
816 
817  if (metad->btm_version < BTREE_NOVAC_VERSION)
818  {
819  /*
820  * Do cleanup if metapage needs upgrade, because we don't have
821  * cleanup-related meta-information yet.
822  */
823  result = true;
824  }
825  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
827  {
828  /*
829  * If any oldest btpo.xact from a previously deleted page in the index
830  * is visible to everyone, then at least one deleted page can be
831  * recycled -- don't skip cleanup.
832  */
833  result = true;
834  }
835  else
836  {
837  BTOptions *relopts;
838  float8 cleanup_scale_factor;
839  float8 prev_num_heap_tuples;
840 
841  /*
842  * If table receives enough insertions and no cleanup was performed,
843  * then index would appear have stale statistics. If scale factor is
844  * set, we avoid that by performing cleanup if the number of inserted
845  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
846  * original tuples count.
847  */
848  relopts = (BTOptions *) info->index->rd_options;
849  cleanup_scale_factor = (relopts &&
850  relopts->vacuum_cleanup_index_scale_factor >= 0)
853  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
854 
855  if (cleanup_scale_factor <= 0 ||
856  info->num_heap_tuples < 0 ||
857  prev_num_heap_tuples <= 0 ||
858  (info->num_heap_tuples - prev_num_heap_tuples) /
859  prev_num_heap_tuples >= cleanup_scale_factor)
860  result = true;
861  }
862 
863  _bt_relbuf(info->index, metabuf);
864  return result;
865 }
uint32 btm_version
Definition: nbtree.h:102
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:809
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:152
#define BT_READ
Definition: nbtree.h:588
float8 vacuum_cleanup_index_scale_factor
Definition: nbtree.h:966
double float8
Definition: c.h:553
#define BTPageGetMeta(p)
Definition: nbtree.h:115
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:146
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:142
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:965
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4163
double num_heap_tuples
Definition: genam.h:51
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:110
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:108
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:158
Pointer Page
Definition: bufpage.h:78

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 355 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

356 {
357  IndexScanDesc scan;
358  BTScanOpaque so;
359 
360  /* no order by operators allowed */
361  Assert(norderbys == 0);
362 
363  /* get the scan */
364  scan = RelationGetIndexScan(rel, nkeys, norderbys);
365 
366  /* allocate private workspace */
367  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
370  if (scan->numberOfKeys > 0)
371  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
372  else
373  so->keyData = NULL;
374 
375  so->arrayKeyData = NULL; /* assume no array keys for now */
376  so->numArrayKeys = 0;
377  so->arrayKeys = NULL;
378  so->arrayContext = NULL;
379 
380  so->killedItems = NULL; /* until needed */
381  so->numKilled = 0;
382 
383  /*
384  * We don't know yet whether the scan will be index-only, so we do not
385  * allocate the tuple workspace arrays until btrescan. However, we set up
386  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
387  */
388  so->currTuples = so->markTuples = NULL;
389 
390  scan->xs_itupdesc = RelationGetDescr(rel);
391 
392  scan->opaque = so;
393 
394  return scan;
395 }
#define RelationGetDescr(relation)
Definition: rel.h:483
MemoryContext arrayContext
Definition: nbtree.h:920
char * currTuples
Definition: nbtree.h:931
struct TupleDescData * xs_itupdesc
Definition: relscan.h:140
BTScanPosData markPos
Definition: nbtree.h:945
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int numArrayKeys
Definition: nbtree.h:915
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:932
ScanKey arrayKeyData
Definition: nbtree.h:914
int * killedItems
Definition: nbtree.h:923
#define Assert(condition)
Definition: c.h:792
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:887
BTScanPosData currPos
Definition: nbtree.h:944
void * palloc(Size size)
Definition: mcxt.c:950
ScanKey keyData
Definition: nbtree.h:911
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:919
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 163 of file nbtree.c.

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

164 {
165  Page metapage;
166 
167  /* Construct metapage. */
168  metapage = (Page) palloc(BLCKSZ);
169  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
170 
171  /*
172  * Write the page and log it. It might seem that an immediate sync would
173  * be sufficient to guarantee that the file exists on disk, but recovery
174  * itself might remove it while replaying, for example, an
175  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
176  * this even when wal_level=minimal.
177  */
180  (char *) metapage, true);
182  BTREE_METAPAGE, metapage, true);
183 
184  /*
185  * An immediate sync is required even if we xlog'd the page, because the
186  * write did not go through shared_buffers and therefore a concurrent
187  * checkpoint may have moved the redo pointer past our xlog record.
188  */
190 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define P_NONE
Definition: nbtree.h:207
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BTREE_METAPAGE
Definition: nbtree.h:142
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1422
void * palloc(Size size)
Definition: mcxt.c:950
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:996
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2692
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:660
Pointer Page
Definition: bufpage.h:78

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 875 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

877 {
878  Relation rel = info->index;
879  BTCycleId cycleid;
880 
881  /* allocate stats if first time through, else re-use existing struct */
882  if (stats == NULL)
884 
885  /* Establish the vacuum cycle ID to use for this scan */
886  /* The ENSURE stuff ensures we clean up shared memory on failure */
888  {
889  cycleid = _bt_start_vacuum(rel);
890 
891  btvacuumscan(info, stats, callback, callback_state, cycleid);
892  }
894  _bt_end_vacuum(rel);
895 
896  return stats;
897 }
#define PointerGetDatum(X)
Definition: postgres.h:556
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2054
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:967
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:29
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2026
void * palloc0(Size size)
Definition: mcxt.c:981
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1969

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1509 of file nbtree.c.

Referenced by bthandler().

1510 {
1511  return true;
1512 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 460 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

461 {
462  BTScanOpaque so = (BTScanOpaque) scan->opaque;
463 
464  /* we aren't holding any read locks, but gotta drop the pins */
465  if (BTScanPosIsValid(so->currPos))
466  {
467  /* Before leaving current page, deal with any killed items */
468  if (so->numKilled > 0)
469  _bt_killitems(scan);
471  }
472 
473  so->markItemIndex = -1;
475 
476  /* No need to invalidate positions, the RAM is about to be freed. */
477 
478  /* Release storage */
479  if (so->keyData != NULL)
480  pfree(so->keyData);
481  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
482  if (so->arrayContext != NULL)
484  if (so->killedItems != NULL)
485  pfree(so->killedItems);
486  if (so->currTuples != NULL)
487  pfree(so->currTuples);
488  /* so->markTuples should not be pfree'd, see btrescan */
489  pfree(so);
490 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
MemoryContext arrayContext
Definition: nbtree.h:920
char * currTuples
Definition: nbtree.h:931
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:881
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:945
void pfree(void *pointer)
Definition: mcxt.c:1057
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int markItemIndex
Definition: nbtree.h:941
int * killedItems
Definition: nbtree.h:923
BTScanPosData currPos
Definition: nbtree.h:944
ScanKey keyData
Definition: nbtree.h:911
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:875

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 582 of file nbtree.c.

Referenced by bthandler().

583 {
584  return sizeof(BTParallelScanDescData);
585 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 297 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

298 {
299  BTScanOpaque so = (BTScanOpaque) scan->opaque;
300  int64 ntids = 0;
301  ItemPointer heapTid;
302 
303  /*
304  * If we have any array keys, initialize them.
305  */
306  if (so->numArrayKeys)
307  {
308  /* punt if we have any unsatisfiable array keys */
309  if (so->numArrayKeys < 0)
310  return ntids;
311 
313  }
314 
315  /* This loop handles advancing to the next array elements, if any */
316  do
317  {
318  /* Fetch the first page & tuple */
319  if (_bt_first(scan, ForwardScanDirection))
320  {
321  /* Save tuple ID, and continue scanning */
322  heapTid = &scan->xs_heaptid;
323  tbm_add_tuples(tbm, heapTid, 1, false);
324  ntids++;
325 
326  for (;;)
327  {
328  /*
329  * Advance to next tuple within page. This is the same as the
330  * easy case in _bt_next().
331  */
332  if (++so->currPos.itemIndex > so->currPos.lastItem)
333  {
334  /* let _bt_next do the heavy lifting */
335  if (!_bt_next(scan, ForwardScanDirection))
336  break;
337  }
338 
339  /* Save tuple ID, and continue scanning */
340  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
341  tbm_add_tuples(tbm, heapTid, 1, false);
342  ntids++;
343  }
344  }
345  /* Now see if we have more array keys to deal with */
347 
348  return ntids;
349 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:857
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
int lastItem
Definition: nbtree.h:856
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int numArrayKeys
Definition: nbtree.h:915
ItemPointerData xs_heaptid
Definition: relscan.h:144
ItemPointerData heapTid
Definition: nbtree.h:820
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:859
BTScanPosData currPos
Definition: nbtree.h:944
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 223 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

224 {
225  BTScanOpaque so = (BTScanOpaque) scan->opaque;
226  bool res;
227 
228  /* btree indexes are never lossy */
229  scan->xs_recheck = false;
230 
231  /*
232  * If we have any array keys, initialize them during first call for a
233  * scan. We can't do this in btrescan because we don't know the scan
234  * direction at that time.
235  */
236  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
237  {
238  /* punt if we have any unsatisfiable array keys */
239  if (so->numArrayKeys < 0)
240  return false;
241 
242  _bt_start_array_keys(scan, dir);
243  }
244 
245  /* This loop handles advancing to the next array elements, if any */
246  do
247  {
248  /*
249  * If we've already initialized this scan, we can just advance it in
250  * the appropriate direction. If we haven't done so yet, we call
251  * _bt_first() to get the first item in the scan.
252  */
253  if (!BTScanPosIsValid(so->currPos))
254  res = _bt_first(scan, dir);
255  else
256  {
257  /*
258  * Check to see if we should kill the previously-fetched tuple.
259  */
260  if (scan->kill_prior_tuple)
261  {
262  /*
263  * Yes, remember it for later. (We'll deal with all such
264  * tuples at once right before leaving the index page.) The
265  * test for numKilled overrun is not just paranoia: if the
266  * caller reverses direction in the indexscan then the same
267  * item might get entered multiple times. It's not worth
268  * trying to optimize that, so we don't detect it, but instead
269  * just forget any excess entries.
270  */
271  if (so->killedItems == NULL)
272  so->killedItems = (int *)
273  palloc(MaxTIDsPerBTreePage * sizeof(int));
274  if (so->numKilled < MaxTIDsPerBTreePage)
275  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
276  }
277 
278  /*
279  * Now continue the scan.
280  */
281  res = _bt_next(scan, dir);
282  }
283 
284  /* If we have a tuple, return it ... */
285  if (res)
286  break;
287  /* ... otherwise see if we have more array keys to deal with */
288  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
289 
290  return res;
291 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:857
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:881
#define MaxTIDsPerBTreePage
Definition: nbtree.h:180
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int numArrayKeys
Definition: nbtree.h:915
int * killedItems
Definition: nbtree.h:923
BTScanPosData currPos
Definition: nbtree.h:944
void * palloc(Size size)
Definition: mcxt.c:950
bool kill_prior_tuple
Definition: relscan.h:125
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 108 of file nbtree.c.

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

109 {
110  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
111 
112  amroutine->amstrategies = BTMaxStrategyNumber;
113  amroutine->amsupport = BTNProcs;
114  amroutine->amoptsprocnum = BTOPTIONS_PROC;
115  amroutine->amcanorder = true;
116  amroutine->amcanorderbyop = false;
117  amroutine->amcanbackward = true;
118  amroutine->amcanunique = true;
119  amroutine->amcanmulticol = true;
120  amroutine->amoptionalkey = true;
121  amroutine->amsearcharray = true;
122  amroutine->amsearchnulls = true;
123  amroutine->amstorage = false;
124  amroutine->amclusterable = true;
125  amroutine->ampredlocks = true;
126  amroutine->amcanparallel = true;
127  amroutine->amcaninclude = true;
128  amroutine->amusemaintenanceworkmem = false;
129  amroutine->amparallelvacuumoptions =
131  amroutine->amkeytype = InvalidOid;
132 
133  amroutine->ambuild = btbuild;
134  amroutine->ambuildempty = btbuildempty;
135  amroutine->aminsert = btinsert;
136  amroutine->ambulkdelete = btbulkdelete;
137  amroutine->amvacuumcleanup = btvacuumcleanup;
138  amroutine->amcanreturn = btcanreturn;
139  amroutine->amcostestimate = btcostestimate;
140  amroutine->amoptions = btoptions;
141  amroutine->amproperty = btproperty;
142  amroutine->ambuildphasename = btbuildphasename;
143  amroutine->amvalidate = btvalidate;
144  amroutine->amadjustmembers = btadjustmembers;
145  amroutine->ambeginscan = btbeginscan;
146  amroutine->amrescan = btrescan;
147  amroutine->amgettuple = btgettuple;
148  amroutine->amgetbitmap = btgetbitmap;
149  amroutine->amendscan = btendscan;
150  amroutine->ammarkpos = btmarkpos;
151  amroutine->amrestrpos = btrestrpos;
154  amroutine->amparallelrescan = btparallelrescan;
155 
156  PG_RETURN_POINTER(amroutine);
157 }
ambeginscan_function ambeginscan
Definition: amapi.h:271
uint8 amparallelvacuumoptions
Definition: amapi.h:248
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
ambulkdelete_function ambulkdelete
Definition: amapi.h:262
bool amcanmulticol
Definition: amapi.h:228
uint16 amsupport
Definition: amapi.h:216
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:401
amgettuple_function amgettuple
Definition: amapi.h:273
bool amcanorderbyop
Definition: amapi.h:222
amproperty_function amproperty
Definition: amapi.h:267
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2152
amparallelrescan_function amparallelrescan
Definition: amapi.h:282
bool amstorage
Definition: amapi.h:236
bool ampredlocks
Definition: amapi.h:240
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:355
aminsert_function aminsert
Definition: amapi.h:261
Oid amkeytype
Definition: amapi.h:250
bool amoptionalkey
Definition: amapi.h:230
amvalidate_function amvalidate
Definition: amapi.h:269
Size btestimateparallelscan(void)
Definition: nbtree.c:582
#define BTOPTIONS_PROC
Definition: nbtree.h:580
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btbuildempty(Relation index)
Definition: nbtree.c:163
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:905
amgetbitmap_function amgetbitmap
Definition: amapi.h:274
ambuild_function ambuild
Definition: amapi.h:259
amoptions_function amoptions
Definition: amapi.h:266
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:297
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:298
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:460
bool amcaninclude
Definition: amapi.h:244
amcostestimate_function amcostestimate
Definition: amapi.h:265
bool amcanunique
Definition: amapi.h:226
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6241
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:275
bool amcanbackward
Definition: amapi.h:224
amrescan_function amrescan
Definition: amapi.h:272
bool amcanparallel
Definition: amapi.h:242
bool amsearchnulls
Definition: amapi.h:234
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:223
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:496
bool amclusterable
Definition: amapi.h:238
bool amsearcharray
Definition: amapi.h:232
#define InvalidOid
Definition: postgres_ext.h:36
bool amusemaintenanceworkmem
Definition: amapi.h:246
#define makeNode(_type_)
Definition: nodes.h:576
amadjustmembers_function amadjustmembers
Definition: amapi.h:270
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2129
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:526
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:199
ammarkpos_function ammarkpos
Definition: amapi.h:276
bool amcanorder
Definition: amapi.h:220
ambuildphasename_function ambuildphasename
Definition: amapi.h:268
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1509
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:606
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:280
#define BTNProcs
Definition: nbtree.h:581
uint16 amstrategies
Definition: amapi.h:214
void btinitparallelscan(void *target)
Definition: nbtree.c:591
uint16 amoptsprocnum
Definition: amapi.h:218
ambuildempty_function ambuildempty
Definition: amapi.h:260
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2104
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:264
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:281
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:875
amrestrpos_function amrestrpos
Definition: amapi.h:277

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 591 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

592 {
593  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
594 
595  SpinLockInit(&bt_target->btps_mutex);
596  bt_target->btps_scanPage = InvalidBlockNumber;
598  bt_target->btps_arrayKeyCount = 0;
599  ConditionVariableInit(&bt_target->btps_cv);
600 }
slock_t btps_mutex
Definition: nbtree.c:86
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 199 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

204 {
205  bool result;
206  IndexTuple itup;
207 
208  /* generate an index tuple */
209  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
210  itup->t_tid = *ht_ctid;
211 
212  result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
213 
214  pfree(itup);
215 
216  return result;
217 }
#define RelationGetDescr(relation)
Definition: rel.h:483
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1057
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:99
static Datum values[MAXATTR]
Definition: bootstrap.c:165

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 496 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

497 {
498  BTScanOpaque so = (BTScanOpaque) scan->opaque;
499 
500  /* There may be an old mark with a pin (but no lock). */
502 
503  /*
504  * Just record the current itemIndex. If we later step to next page
505  * before releasing the marked position, _bt_steppage makes a full copy of
506  * the currPos struct in markPos. If (as often happens) the mark is moved
507  * before we leave the page, we don't have to do that work.
508  */
509  if (BTScanPosIsValid(so->currPos))
510  so->markItemIndex = so->currPos.itemIndex;
511  else
512  {
514  so->markItemIndex = -1;
515  }
516 
517  /* Also record the current positions of any array keys */
518  if (so->numArrayKeys)
519  _bt_mark_array_keys(scan);
520 }
int itemIndex
Definition: nbtree.h:857
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:881
BTScanPosData markPos
Definition: nbtree.h:945
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int numArrayKeys
Definition: nbtree.h:915
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:603
int markItemIndex
Definition: nbtree.h:941
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:887
BTScanPosData currPos
Definition: nbtree.h:944
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:875

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 606 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

607 {
608  BTParallelScanDesc btscan;
609  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
610 
611  Assert(parallel_scan);
612 
613  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
614  parallel_scan->ps_offset);
615 
616  /*
617  * In theory, we don't need to acquire the spinlock here, because there
618  * shouldn't be any other workers running at this point, but we do so for
619  * consistency.
620  */
621  SpinLockAcquire(&btscan->btps_mutex);
624  btscan->btps_arrayKeyCount = 0;
625  SpinLockRelease(&btscan->btps_mutex);
626 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:695
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
#define Assert(condition)
Definition: c.h:792
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1460 of file nbtree.c.

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, offsetof, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

1462 {
1463  int live = 0;
1464  int nitem = BTreeTupleGetNPosting(posting);
1465  ItemPointer items = BTreeTupleGetPosting(posting);
1466  BTVacuumPosting vacposting = NULL;
1467 
1468  for (int i = 0; i < nitem; i++)
1469  {
1470  if (!vstate->callback(items + i, vstate->callback_state))
1471  {
1472  /* Live table TID */
1473  live++;
1474  }
1475  else if (vacposting == NULL)
1476  {
1477  /*
1478  * First dead table TID encountered.
1479  *
1480  * It's now clear that we need to delete one or more dead table
1481  * TIDs, so start maintaining metadata describing how to update
1482  * existing posting list tuple.
1483  */
1484  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1485  nitem * sizeof(uint16));
1486 
1487  vacposting->itup = posting;
1488  vacposting->updatedoffset = updatedoffset;
1489  vacposting->ndeletedtids = 0;
1490  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1491  }
1492  else
1493  {
1494  /* Second or subsequent dead table TID */
1495  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1496  }
1497  }
1498 
1499  *nremaining = live;
1500  return vacposting;
1501 }
uint16 ndeletedtids
Definition: nbtree.h:784
void * callback_state
Definition: nbtree.c:47
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:406
OffsetNumber updatedoffset
Definition: nbtree.h:781
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
IndexTuple itup
Definition: nbtree.h:780
unsigned short uint16
Definition: c.h:428
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:785
void * palloc(Size size)
Definition: mcxt.c:950
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:387
#define offsetof(type, field)
Definition: c.h:715

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 401 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

403 {
404  BTScanOpaque so = (BTScanOpaque) scan->opaque;
405 
406  /* we aren't holding any read locks, but gotta drop the pins */
407  if (BTScanPosIsValid(so->currPos))
408  {
409  /* Before leaving current page, deal with any killed items */
410  if (so->numKilled > 0)
411  _bt_killitems(scan);
414  }
415 
416  so->markItemIndex = -1;
417  so->arrayKeyCount = 0;
420 
421  /*
422  * Allocate tuple workspace arrays, if needed for an index-only scan and
423  * not already done in a previous rescan call. To save on palloc
424  * overhead, both workspaces are allocated as one palloc block; only this
425  * function and btendscan know that.
426  *
427  * NOTE: this data structure also makes it safe to return data from a
428  * "name" column, even though btree name_ops uses an underlying storage
429  * datatype of cstring. The risk there is that "name" is supposed to be
430  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
431  * However, since we only return data out of tuples sitting in the
432  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
433  * data out of the markTuples array --- running off the end of memory for
434  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
435  * adding special-case treatment for name_ops elsewhere.
436  */
437  if (scan->xs_want_itup && so->currTuples == NULL)
438  {
439  so->currTuples = (char *) palloc(BLCKSZ * 2);
440  so->markTuples = so->currTuples + BLCKSZ;
441  }
442 
443  /*
444  * Reset the scan keys
445  */
446  if (scankey && scan->numberOfKeys > 0)
447  memmove(scan->keyData,
448  scankey,
449  scan->numberOfKeys * sizeof(ScanKeyData));
450  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
451 
452  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
454 }
char * currTuples
Definition: nbtree.h:931
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:881
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:945
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int arrayKeyCount
Definition: nbtree.h:917
char * markTuples
Definition: nbtree.h:932
int markItemIndex
Definition: nbtree.h:941
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:887
int numberOfKeys
Definition: nbtree.h:910
struct ScanKeyData * keyData
Definition: relscan.h:119
BTScanPosData currPos
Definition: nbtree.h:944
void * palloc(Size size)
Definition: mcxt.c:950
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:875

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 526 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

527 {
528  BTScanOpaque so = (BTScanOpaque) scan->opaque;
529 
530  /* Restore the marked positions of any array keys */
531  if (so->numArrayKeys)
533 
534  if (so->markItemIndex >= 0)
535  {
536  /*
537  * The scan has never moved to a new page since the last mark. Just
538  * restore the itemIndex.
539  *
540  * NB: In this case we can't count on anything in so->markPos to be
541  * accurate.
542  */
543  so->currPos.itemIndex = so->markItemIndex;
544  }
545  else
546  {
547  /*
548  * The scan moved to a new page after last mark or restore, and we are
549  * now restoring to the marked page. We aren't holding any read
550  * locks, but if we're still holding the pin for the current position,
551  * we must drop it.
552  */
553  if (BTScanPosIsValid(so->currPos))
554  {
555  /* Before leaving current page, deal with any killed items */
556  if (so->numKilled > 0)
557  _bt_killitems(scan);
559  }
560 
561  if (BTScanPosIsValid(so->markPos))
562  {
563  /* bump pin on mark buffer for assignment to current buffer */
564  if (BTScanPosIsPinned(so->markPos))
566  memcpy(&so->currPos, &so->markPos,
567  offsetof(BTScanPosData, items[1]) +
568  so->markPos.lastItem * sizeof(BTScanPosItem));
569  if (so->currTuples)
570  memcpy(so->currTuples, so->markTuples,
572  }
573  else
575  }
576 }
int itemIndex
Definition: nbtree.h:857
char * currTuples
Definition: nbtree.h:931
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:881
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:622
int nextTupleOffset
Definition: nbtree.h:846
int lastItem
Definition: nbtree.h:856
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:945
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:948
int numArrayKeys
Definition: nbtree.h:915
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:864
char * markTuples
Definition: nbtree.h:932
int markItemIndex
Definition: nbtree.h:941
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:887
BTScanPosData currPos
Definition: nbtree.h:944
Buffer buf
Definition: nbtree.h:827
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3738
#define offsetof(type, field)
Definition: c.h:715
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:875

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 905 of file nbtree.c.

References _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

906 {
907  /* No-op in ANALYZE ONLY mode */
908  if (info->analyze_only)
909  return stats;
910 
911  /*
912  * If btbulkdelete was called, we need not do anything, just return the
913  * stats from the latest btbulkdelete call. If it wasn't called, we might
914  * still need to do a pass over the index, to recycle any newly-recyclable
915  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
916  * determines if either are needed.
917  *
918  * Since we aren't going to actually delete any leaf items, there's no
919  * need to go through all the vacuum-cycle-ID pushups.
920  */
921  if (stats == NULL)
922  {
923  /* Check if we need a cleanup */
924  if (!_bt_vacuum_needs_cleanup(info))
925  return NULL;
926 
928  btvacuumscan(info, stats, NULL, NULL, 0);
929  }
930 
931  /*
932  * It's quite possible for us to be fooled by concurrent page splits into
933  * double-counting some index tuples, so disbelieve any total that exceeds
934  * the underlying heap's count ... if we know that accurately. Otherwise
935  * this might just make matters worse.
936  *
937  * Posting list tuples are another source of inaccuracy. Cleanup-only
938  * btvacuumscan calls assume that the number of index tuples can be used
939  * as num_index_tuples, even though num_index_tuples is supposed to
940  * represent the number of TIDs in the index. This naive approach can
941  * underestimate the number of tuples in the index.
942  */
943  if (!info->estimated_count)
944  {
945  if (stats->num_index_tuples > info->num_heap_tuples)
946  stats->num_index_tuples = info->num_heap_tuples;
947  }
948 
949  return stats;
950 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:800
bool analyze_only
Definition: genam.h:47
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:967
void * palloc0(Size size)
Definition: mcxt.c:981
double num_heap_tuples
Definition: genam.h:51
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1099 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1100 {
1101  IndexVacuumInfo *info = vstate->info;
1102  IndexBulkDeleteResult *stats = vstate->stats;
1104  void *callback_state = vstate->callback_state;
1105  Relation rel = info->index;
1106  bool attempt_pagedel;
1107  BlockNumber blkno,
1108  backtrack_to;
1109  Buffer buf;
1110  Page page;
1111  BTPageOpaque opaque;
1112 
1113  blkno = scanblkno;
1114 
1115 backtrack:
1116 
1117  attempt_pagedel = false;
1118  backtrack_to = P_NONE;
1119 
1120  /* call vacuum_delay_point while not holding any buffer lock */
1122 
1123  /*
1124  * We can't use _bt_getbuf() here because it always applies
1125  * _bt_checkpage(), which will barf on an all-zero page. We want to
1126  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1127  * buffer access strategy.
1128  */
1129  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1130  info->strategy);
1131  _bt_lockbuf(rel, buf, BT_READ);
1132  page = BufferGetPage(buf);
1133  opaque = NULL;
1134  if (!PageIsNew(page))
1135  {
1136  _bt_checkpage(rel, buf);
1137  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1138  }
1139 
1140  Assert(blkno <= scanblkno);
1141  if (blkno != scanblkno)
1142  {
1143  /*
1144  * We're backtracking.
1145  *
1146  * We followed a right link to a sibling leaf page (a page that
1147  * happens to be from a block located before scanblkno). The only
1148  * case we want to do anything with is a live leaf page having the
1149  * current vacuum cycle ID.
1150  *
1151  * The page had better be in a state that's consistent with what we
1152  * expect. Check for conditions that imply corruption in passing. It
1153  * can't be half-dead because only an interrupted VACUUM process can
1154  * leave pages in that state, so we'd definitely have dealt with it
1155  * back when the page was the scanblkno page (half-dead pages are
1156  * always marked fully deleted by _bt_pagedel()). This assumes that
1157  * there can be only one vacuum process running at a time.
1158  */
1159  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1160  {
1161  Assert(false);
1162  ereport(LOG,
1163  (errcode(ERRCODE_INDEX_CORRUPTED),
1164  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1165  blkno, scanblkno, RelationGetRelationName(rel))));
1166  _bt_relbuf(rel, buf);
1167  return;
1168  }
1169 
1170  /*
1171  * We may have already processed the page in an earlier call, when the
1172  * page was scanblkno. This happens when the leaf page split occurred
1173  * after the scan began, but before the right sibling page became the
1174  * scanblkno.
1175  *
1176  * Page may also have been deleted by current btvacuumpage() call,
1177  * since _bt_pagedel() sometimes deletes the right sibling page of
1178  * scanblkno in passing (it does so after we decided where to
1179  * backtrack to). We don't need to process this page as a deleted
1180  * page a second time now (in fact, it would be wrong to count it as a
1181  * deleted page in the bulk delete statistics a second time).
1182  */
1183  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1184  {
1185  /* Done with current scanblkno (and all lower split pages) */
1186  _bt_relbuf(rel, buf);
1187  return;
1188  }
1189  }
1190 
1191  /* Page is valid, see what to do with it */
1192  if (_bt_page_recyclable(page))
1193  {
1194  /* Okay to recycle this page (which could be leaf or internal) */
1195  RecordFreeIndexPage(rel, blkno);
1196  vstate->totFreePages++;
1197  stats->pages_deleted++;
1198  }
1199  else if (P_ISDELETED(opaque))
1200  {
1201  /*
1202  * Already deleted page (which could be leaf or internal). Can't
1203  * recycle yet.
1204  */
1205  stats->pages_deleted++;
1206 
1207  /* Maintain the oldest btpo.xact */
1208  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1209  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1210  vstate->oldestBtpoXact = opaque->btpo.xact;
1211  }
1212  else if (P_ISHALFDEAD(opaque))
1213  {
1214  /*
1215  * Half-dead leaf page. Try to delete now. Might update
1216  * oldestBtpoXact and pages_deleted below.
1217  */
1218  attempt_pagedel = true;
1219  }
1220  else if (P_ISLEAF(opaque))
1221  {
1223  int ndeletable;
1225  int nupdatable;
1226  OffsetNumber offnum,
1227  minoff,
1228  maxoff;
1229  int nhtidsdead,
1230  nhtidslive;
1231 
1232  /*
1233  * Trade in the initial read lock for a super-exclusive write lock on
1234  * this page. We must get such a lock on every leaf page over the
1235  * course of the vacuum scan, whether or not it actually contains any
1236  * deletable tuples --- see nbtree/README.
1237  */
1238  _bt_upgradelockbufcleanup(rel, buf);
1239 
1240  /*
1241  * Check whether we need to backtrack to earlier pages. What we are
1242  * concerned about is a page split that happened since we started the
1243  * vacuum scan. If the split moved tuples on the right half of the
1244  * split (i.e. the tuples that sort high) to a block that we already
1245  * passed over, then we might have missed the tuples. We need to
1246  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1247  * or deleting scanblkno page below!)
1248  */
1249  if (vstate->cycleid != 0 &&
1250  opaque->btpo_cycleid == vstate->cycleid &&
1251  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1252  !P_RIGHTMOST(opaque) &&
1253  opaque->btpo_next < scanblkno)
1254  backtrack_to = opaque->btpo_next;
1255 
1256  /*
1257  * When each VACUUM begins, it determines an OldestXmin cutoff value.
1258  * Tuples before the cutoff are removed by VACUUM. Scan over all
1259  * items to see which ones need to be deleted according to cutoff
1260  * point using callback.
1261  */
1262  ndeletable = 0;
1263  nupdatable = 0;
1264  minoff = P_FIRSTDATAKEY(opaque);
1265  maxoff = PageGetMaxOffsetNumber(page);
1266  nhtidsdead = 0;
1267  nhtidslive = 0;
1268  if (callback)
1269  {
1270  for (offnum = minoff;
1271  offnum <= maxoff;
1272  offnum = OffsetNumberNext(offnum))
1273  {
1274  IndexTuple itup;
1275 
1276  itup = (IndexTuple) PageGetItem(page,
1277  PageGetItemId(page, offnum));
1278 
1279  /*
1280  * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1281  * records do not produce their own conflicts. This is safe
1282  * as long as the callback function only considers whether the
1283  * index tuple refers to pre-cutoff heap tuples that were
1284  * certainly already pruned away during VACUUM's initial heap
1285  * scan by the time we get here. (heapam's XLOG_HEAP2_CLEAN
1286  * and XLOG_HEAP2_CLEANUP_INFO records produce conflicts using
1287  * a latestRemovedXid value for the pointed-to heap tuples, so
1288  * there is no need to produce our own conflict now.)
1289  *
1290  * Backends with snapshots acquired after a VACUUM starts but
1291  * before it finishes could have visibility cutoff with a
1292  * later xid than VACUUM's OldestXmin cutoff. These backends
1293  * might happen to opportunistically mark some index tuples
1294  * LP_DEAD before we reach them, even though they may be after
1295  * our cutoff. We don't try to kill these "extra" index
1296  * tuples in _bt_delitems_vacuum(). This keep things simple,
1297  * and allows us to always avoid generating our own conflicts.
1298  */
1299  Assert(!BTreeTupleIsPivot(itup));
1300  if (!BTreeTupleIsPosting(itup))
1301  {
1302  /* Regular tuple, standard table TID representation */
1303  if (callback(&itup->t_tid, callback_state))
1304  {
1305  deletable[ndeletable++] = offnum;
1306  nhtidsdead++;
1307  }
1308  else
1309  nhtidslive++;
1310  }
1311  else
1312  {
1313  BTVacuumPosting vacposting;
1314  int nremaining;
1315 
1316  /* Posting list tuple */
1317  vacposting = btreevacuumposting(vstate, itup, offnum,
1318  &nremaining);
1319  if (vacposting == NULL)
1320  {
1321  /*
1322  * All table TIDs from the posting tuple remain, so no
1323  * delete or update required
1324  */
1325  Assert(nremaining == BTreeTupleGetNPosting(itup));
1326  }
1327  else if (nremaining > 0)
1328  {
1329 
1330  /*
1331  * Store metadata about posting list tuple in
1332  * updatable array for entire page. Existing tuple
1333  * will be updated during the later call to
1334  * _bt_delitems_vacuum().
1335  */
1336  Assert(nremaining < BTreeTupleGetNPosting(itup));
1337  updatable[nupdatable++] = vacposting;
1338  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1339  }
1340  else
1341  {
1342  /*
1343  * All table TIDs from the posting list must be
1344  * deleted. We'll delete the index tuple completely
1345  * (no update required).
1346  */
1347  Assert(nremaining == 0);
1348  deletable[ndeletable++] = offnum;
1349  nhtidsdead += BTreeTupleGetNPosting(itup);
1350  pfree(vacposting);
1351  }
1352 
1353  nhtidslive += nremaining;
1354  }
1355  }
1356  }
1357 
1358  /*
1359  * Apply any needed deletes or updates. We issue just one
1360  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1361  */
1362  if (ndeletable > 0 || nupdatable > 0)
1363  {
1364  Assert(nhtidsdead >= ndeletable + nupdatable);
1365  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1366  nupdatable);
1367 
1368  stats->tuples_removed += nhtidsdead;
1369  /* must recompute maxoff */
1370  maxoff = PageGetMaxOffsetNumber(page);
1371 
1372  /* can't leak memory here */
1373  for (int i = 0; i < nupdatable; i++)
1374  pfree(updatable[i]);
1375  }
1376  else
1377  {
1378  /*
1379  * If the leaf page has been split during this vacuum cycle, it
1380  * seems worth expending a write to clear btpo_cycleid even if we
1381  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1382  * takes care of this.) This ensures we won't process the page
1383  * again.
1384  *
1385  * We treat this like a hint-bit update because there's no need to
1386  * WAL-log it.
1387  */
1388  Assert(nhtidsdead == 0);
1389  if (vstate->cycleid != 0 &&
1390  opaque->btpo_cycleid == vstate->cycleid)
1391  {
1392  opaque->btpo_cycleid = 0;
1393  MarkBufferDirtyHint(buf, true);
1394  }
1395  }
1396 
1397  /*
1398  * If the leaf page is now empty, try to delete it; else count the
1399  * live tuples (live table TIDs in posting lists are counted as
1400  * separate live tuples). We don't delete when backtracking, though,
1401  * since that would require teaching _bt_pagedel() about backtracking
1402  * (doesn't seem worth adding more complexity to deal with that).
1403  *
1404  * We don't count the number of live TIDs during cleanup-only calls to
1405  * btvacuumscan (i.e. when callback is not set). We count the number
1406  * of index tuples directly instead. This avoids the expense of
1407  * directly examining all of the tuples on each page.
1408  */
1409  if (minoff > maxoff)
1410  attempt_pagedel = (blkno == scanblkno);
1411  else if (callback)
1412  stats->num_index_tuples += nhtidslive;
1413  else
1414  stats->num_index_tuples += maxoff - minoff + 1;
1415 
1416  Assert(!attempt_pagedel || nhtidslive == 0);
1417  }
1418 
1419  if (attempt_pagedel)
1420  {
1421  MemoryContext oldcontext;
1422 
1423  /* Run pagedel in a temp context to avoid memory leakage */
1425  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1426 
1427  /*
1428  * We trust the _bt_pagedel return value because it does not include
1429  * any page that a future call here from btvacuumscan is expected to
1430  * count. There will be no double-counting.
1431  */
1432  Assert(blkno == scanblkno);
1433  stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1434 
1435  MemoryContextSwitchTo(oldcontext);
1436  /* pagedel released buffer, so we shouldn't */
1437  }
1438  else
1439  _bt_relbuf(rel, buf);
1440 
1441  if (backtrack_to != P_NONE)
1442  {
1443  blkno = backtrack_to;
1444  goto backtrack;
1445  }
1446 }
#define BTP_SPLIT_END
Definition: nbtree.h:78
BlockNumber btpo_next
Definition: nbtree.h:60
MemoryContext pagedelcontext
Definition: nbtree.c:51
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:349
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3770
BlockNumber totFreePages
Definition: nbtree.c:49
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1131
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:245
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:666
ItemPointerData t_tid
Definition: itup.h:37
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1460
union BTPageOpaqueData::@46 btpo
BufferAccessStrategy strategy
Definition: genam.h:52
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:207
int errcode(int sqlerrcode)
Definition: elog.c:704
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:137
uint32 BlockNumber
Definition: block.h:31
#define LOG
Definition: elog.h:26
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:1087
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:64
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:70
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:588
void pfree(void *pointer)
Definition: mcxt.c:1057
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:732
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:219
BTCycleId btpo_cycleid
Definition: nbtree.h:67
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
uint32 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
Definition: nbtpage.c:1782
static char * buf
Definition: pg_test_fsync.c:68
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:491
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:217
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:981
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:965
#define ereport(elevel,...)
Definition: elog.h:155
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:361
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1002
#define Assert(condition)
Definition: c.h:792
IndexVacuumInfo * info
Definition: nbtree.c:44
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:387
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:2047
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1052
uint16 btpo_flags
Definition: nbtree.h:66
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:214
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
#define P_ISLEAF(opaque)
Definition: nbtree.h:215

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 967 of file nbtree.c.

References _bt_update_meta_cleanup_info(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, LockRelationForExtension(), MemoryContextDelete(), IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, BTVacState::totFreePages, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().

970 {
971  Relation rel = info->index;
972  BTVacState vstate;
973  BlockNumber num_pages;
974  BlockNumber scanblkno;
975  bool needLock;
976 
977  /*
978  * Reset counts that will be incremented during the scan; needed in case
979  * of multiple scans during a single VACUUM command
980  */
981  stats->estimated_count = false;
982  stats->num_index_tuples = 0;
983  stats->pages_deleted = 0;
984 
985  /* Set up info to pass down to btvacuumpage */
986  vstate.info = info;
987  vstate.stats = stats;
988  vstate.callback = callback;
989  vstate.callback_state = callback_state;
990  vstate.cycleid = cycleid;
991  vstate.totFreePages = 0;
993 
994  /* Create a temporary memory context to run _bt_pagedel in */
996  "_bt_pagedel",
998 
999  /*
1000  * The outer loop iterates over all index pages except the metapage, in
1001  * physical order (we hope the kernel will cooperate in providing
1002  * read-ahead for speed). It is critical that we visit all leaf pages,
1003  * including ones added after we start the scan, else we might fail to
1004  * delete some deletable tuples. Hence, we must repeatedly check the
1005  * relation length. We must acquire the relation-extension lock while
1006  * doing so to avoid a race condition: if someone else is extending the
1007  * relation, there is a window where bufmgr/smgr have created a new
1008  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1009  * we manage to scan such a page here, we'll improperly assume it can be
1010  * recycled. Taking the lock synchronizes things enough to prevent a
1011  * problem: either num_pages won't include the new page, or _bt_getbuf
1012  * already has write lock on the buffer and it will be fully initialized
1013  * before we can examine it. (See also vacuumlazy.c, which has the same
1014  * issue.) Also, we need not worry if a page is added immediately after
1015  * we look; the page splitting code already has write-lock on the left
1016  * page before it adds a right page, so we must already have processed any
1017  * tuples due to be moved into such a page.
1018  *
1019  * We can skip locking for new or temp relations, however, since no one
1020  * else could be accessing them.
1021  */
1022  needLock = !RELATION_IS_LOCAL(rel);
1023 
1024  scanblkno = BTREE_METAPAGE + 1;
1025  for (;;)
1026  {
1027  /* Get the current relation length */
1028  if (needLock)
1030  num_pages = RelationGetNumberOfBlocks(rel);
1031  if (needLock)
1033 
1034  if (info->report_progress)
1036  num_pages);
1037 
1038  /* Quit if we've scanned the whole relation */
1039  if (scanblkno >= num_pages)
1040  break;
1041  /* Iterate over pages, then loop back to recheck length */
1042  for (; scanblkno < num_pages; scanblkno++)
1043  {
1044  btvacuumpage(&vstate, scanblkno);
1045  if (info->report_progress)
1047  scanblkno);
1048  }
1049  }
1050 
1052 
1053  /*
1054  * If we found any recyclable pages (and recorded them in the FSM), then
1055  * forcibly update the upper-level FSM pages to ensure that searchers can
1056  * find them. It's possible that the pages were also found during
1057  * previous scans and so this is a waste of time, but it's cheap enough
1058  * relative to scanning the index that it shouldn't matter much, and
1059  * making sure that free pages are available sooner not later seems
1060  * worthwhile.
1061  *
1062  * Note that if no recyclable pages exist, we don't bother vacuuming the
1063  * FSM at all.
1064  */
1065  if (vstate.totFreePages > 0)
1067 
1068  /*
1069  * Maintain the oldest btpo.xact and a count of the current number of heap
1070  * tuples in the metapage (for the benefit of _bt_vacuum_needs_cleanup).
1071  *
1072  * The page with the oldest btpo.xact is typically a page deleted by this
1073  * VACUUM operation, since pages deleted by a previous VACUUM operation
1074  * tend to be placed in the FSM (by the current VACUUM operation) -- such
1075  * pages are not candidates to be the oldest btpo.xact. (Note that pages
1076  * placed in the FSM are reported as deleted pages in the bulk delete
1077  * statistics, despite not counting as deleted pages for the purposes of
1078  * determining the oldest btpo.xact.)
1079  */
1081  info->num_heap_tuples);
1082 
1083  /* update statistics */
1084  stats->num_pages = num_pages;
1085  stats->pages_free = vstate.totFreePages;
1086 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:51
void * callback_state
Definition: nbtree.c:47
BlockNumber totFreePages
Definition: nbtree.c:49
#define ExclusiveLock
Definition: lockdefs.h:44
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3478
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:584
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
bool report_progress
Definition: genam.h:48
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1099
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:180
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:142
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
double num_heap_tuples
Definition: genam.h:51
IndexVacuumInfo * info
Definition: nbtree.c:44
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:76