PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 90 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 66 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 768 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

769 {
770  BTScanOpaque so = (BTScanOpaque) scan->opaque;
771  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
772  BTParallelScanDesc btscan;
773 
774  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
775  parallel_scan->ps_offset);
776 
777  so->arrayKeyCount++;
778  SpinLockAcquire(&btscan->btps_mutex);
779  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
780  {
781  btscan->btps_scanPage = InvalidBlockNumber;
782  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
783  btscan->btps_arrayKeyCount++;
784  }
785  SpinLockRelease(&btscan->btps_mutex);
786 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int arrayKeyCount
Definition: nbtree.h:914
#define OffsetToPointer(base, offset)
Definition: c.h:641
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 727 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

728 {
729  BTScanOpaque so = (BTScanOpaque) scan->opaque;
730  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
731  BTParallelScanDesc btscan;
732  bool status_changed = false;
733 
734  /* Do nothing, for non-parallel scans */
735  if (parallel_scan == NULL)
736  return;
737 
738  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
739  parallel_scan->ps_offset);
740 
741  /*
742  * Mark the parallel scan as done for this combination of scan keys,
743  * unless some other process already did so. See also
744  * _bt_advance_array_keys.
745  */
746  SpinLockAcquire(&btscan->btps_mutex);
747  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
748  btscan->btps_pageStatus != BTPARALLEL_DONE)
749  {
750  btscan->btps_pageStatus = BTPARALLEL_DONE;
751  status_changed = true;
752  }
753  SpinLockRelease(&btscan->btps_mutex);
754 
755  /* wake up all the workers associated with this parallel scan */
756  if (status_changed)
757  ConditionVariableBroadcast(&btscan->btps_cv);
758 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int arrayKeyCount
Definition: nbtree.h:914
#define OffsetToPointer(base, offset)
Definition: c.h:641
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 704 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

705 {
706  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
707  BTParallelScanDesc btscan;
708 
709  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
710  parallel_scan->ps_offset);
711 
712  SpinLockAcquire(&btscan->btps_mutex);
713  btscan->btps_scanPage = scan_page;
715  SpinLockRelease(&btscan->btps_mutex);
717 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
#define OffsetToPointer(base, offset)
Definition: c.h:641
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 646 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

647 {
648  BTScanOpaque so = (BTScanOpaque) scan->opaque;
649  BTPS_State pageStatus;
650  bool exit_loop = false;
651  bool status = true;
652  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
653  BTParallelScanDesc btscan;
654 
655  *pageno = P_NONE;
656 
657  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
658  parallel_scan->ps_offset);
659 
660  while (1)
661  {
662  SpinLockAcquire(&btscan->btps_mutex);
663  pageStatus = btscan->btps_pageStatus;
664 
665  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
666  {
667  /* Parallel scan has already advanced to a new set of scankeys. */
668  status = false;
669  }
670  else if (pageStatus == BTPARALLEL_DONE)
671  {
672  /*
673  * We're done with this set of scankeys. This may be the end, or
674  * there could be more sets to try.
675  */
676  status = false;
677  }
678  else if (pageStatus != BTPARALLEL_ADVANCING)
679  {
680  /*
681  * We have successfully seized control of the scan for the purpose
682  * of advancing it to a new page!
683  */
684  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
685  *pageno = btscan->btps_scanPage;
686  exit_loop = true;
687  }
688  SpinLockRelease(&btscan->btps_mutex);
689  if (exit_loop || !status)
690  break;
692  }
694 
695  return status;
696 }
#define P_NONE
Definition: nbtree.h:206
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
BTPS_State
Definition: nbtree.c:66
int arrayKeyCount
Definition: nbtree.h:914
#define OffsetToPointer(base, offset)
Definition: c.h:641
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 799 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, RecentGlobalXmin, TransactionIdIsValid, TransactionIdPrecedes(), vacuum_cleanup_index_scale_factor, and BTOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

800 {
801  Buffer metabuf;
802  Page metapg;
803  BTMetaPageData *metad;
804  bool result = false;
805 
806  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
807  metapg = BufferGetPage(metabuf);
808  metad = BTPageGetMeta(metapg);
809 
810  if (metad->btm_version < BTREE_NOVAC_VERSION)
811  {
812  /*
813  * Do cleanup if metapage needs upgrade, because we don't have
814  * cleanup-related meta-information yet.
815  */
816  result = true;
817  }
818  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
821  {
822  /*
823  * If any oldest btpo.xact from a previously deleted page in the index
824  * is older than RecentGlobalXmin, then at least one deleted page can
825  * be recycled -- don't skip cleanup.
826  */
827  result = true;
828  }
829  else
830  {
831  BTOptions *relopts;
832  float8 cleanup_scale_factor;
833  float8 prev_num_heap_tuples;
834 
835  /*
836  * If table receives enough insertions and no cleanup was performed,
837  * then index would appear have stale statistics. If scale factor is
838  * set, we avoid that by performing cleanup if the number of inserted
839  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
840  * original tuples count.
841  */
842  relopts = (BTOptions *) info->index->rd_options;
843  cleanup_scale_factor = (relopts &&
844  relopts->vacuum_cleanup_index_scale_factor >= 0)
847  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
848 
849  if (cleanup_scale_factor <= 0 ||
850  prev_num_heap_tuples <= 0 ||
851  (info->num_heap_tuples - prev_num_heap_tuples) /
852  prev_num_heap_tuples >= cleanup_scale_factor)
853  result = true;
854  }
855 
856  _bt_relbuf(info->index, metabuf);
857  return result;
858 }
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:150
#define BT_READ
Definition: nbtree.h:587
float8 vacuum_cleanup_index_scale_factor
Definition: nbtree.h:963
double float8
Definition: c.h:491
#define BTPageGetMeta(p)
Definition: nbtree.h:114
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
double num_heap_tuples
Definition: genam.h:51
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:157
Pointer Page
Definition: bufpage.h:78

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 353 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

354 {
355  IndexScanDesc scan;
356  BTScanOpaque so;
357 
358  /* no order by operators allowed */
359  Assert(norderbys == 0);
360 
361  /* get the scan */
362  scan = RelationGetIndexScan(rel, nkeys, norderbys);
363 
364  /* allocate private workspace */
365  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
368  if (scan->numberOfKeys > 0)
369  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
370  else
371  so->keyData = NULL;
372 
373  so->arrayKeyData = NULL; /* assume no array keys for now */
374  so->numArrayKeys = 0;
375  so->arrayKeys = NULL;
376  so->arrayContext = NULL;
377 
378  so->killedItems = NULL; /* until needed */
379  so->numKilled = 0;
380 
381  /*
382  * We don't know yet whether the scan will be index-only, so we do not
383  * allocate the tuple workspace arrays until btrescan. However, we set up
384  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
385  */
386  so->currTuples = so->markTuples = NULL;
387 
388  scan->xs_itupdesc = RelationGetDescr(rel);
389 
390  scan->opaque = so;
391 
392  return scan;
393 }
#define RelationGetDescr(relation)
Definition: rel.h:482
MemoryContext arrayContext
Definition: nbtree.h:917
char * currTuples
Definition: nbtree.h:928
struct TupleDescData * xs_itupdesc
Definition: relscan.h:128
BTScanPosData markPos
Definition: nbtree.h:942
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int numArrayKeys
Definition: nbtree.h:912
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:929
ScanKey arrayKeyData
Definition: nbtree.h:911
int * killedItems
Definition: nbtree.h:920
#define Assert(condition)
Definition: c.h:738
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:884
BTScanPosData currPos
Definition: nbtree.h:941
void * palloc(Size size)
Definition: mcxt.c:949
ScanKey keyData
Definition: nbtree.h:908
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:916
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 162 of file nbtree.c.

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

163 {
164  Page metapage;
165 
166  /* Construct metapage. */
167  metapage = (Page) palloc(BLCKSZ);
168  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
169 
170  /*
171  * Write the page and log it. It might seem that an immediate sync would
172  * be sufficient to guarantee that the file exists on disk, but recovery
173  * itself might remove it while replaying, for example, an
174  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
175  * this even when wal_level=minimal.
176  */
179  (char *) metapage, true);
181  BTREE_METAPAGE, metapage, true);
182 
183  /*
184  * An immediate sync is required even if we xlog'd the page, because the
185  * write did not go through shared_buffers and therefore a concurrent
186  * checkpoint may have moved the redo pointer past our xlog record.
187  */
189 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define P_NONE
Definition: nbtree.h:206
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:60
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:513
#define BTREE_METAPAGE
Definition: nbtree.h:141
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1194
void * palloc(Size size)
Definition: mcxt.c:949
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:977
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2691
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:620
Pointer Page
Definition: bufpage.h:78

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 868 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

870 {
871  Relation rel = info->index;
872  BTCycleId cycleid;
873 
874  /* allocate stats if first time through, else re-use existing struct */
875  if (stats == NULL)
877 
878  /* Establish the vacuum cycle ID to use for this scan */
879  /* The ENSURE stuff ensures we clean up shared memory on failure */
881  {
882  cycleid = _bt_start_vacuum(rel);
883 
884  btvacuumscan(info, stats, callback, callback_state, cycleid);
885  }
887  _bt_end_vacuum(rel);
888 
889  return stats;
890 }
#define PointerGetDatum(X)
Definition: postgres.h:556
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2053
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:954
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:28
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2025
void * palloc0(Size size)
Definition: mcxt.c:980
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1968

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1491 of file nbtree.c.

Referenced by bthandler().

1492 {
1493  return true;
1494 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 459 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

460 {
461  BTScanOpaque so = (BTScanOpaque) scan->opaque;
462 
463  /* we aren't holding any read locks, but gotta drop the pins */
464  if (BTScanPosIsValid(so->currPos))
465  {
466  /* Before leaving current page, deal with any killed items */
467  if (so->numKilled > 0)
468  _bt_killitems(scan);
470  }
471 
472  so->markItemIndex = -1;
474 
475  /* No need to invalidate positions, the RAM is about to be freed. */
476 
477  /* Release storage */
478  if (so->keyData != NULL)
479  pfree(so->keyData);
480  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
481  if (so->arrayContext != NULL)
483  if (so->killedItems != NULL)
484  pfree(so->killedItems);
485  if (so->currTuples != NULL)
486  pfree(so->currTuples);
487  /* so->markTuples should not be pfree'd, see btrescan */
488  pfree(so);
489 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
MemoryContext arrayContext
Definition: nbtree.h:917
char * currTuples
Definition: nbtree.h:928
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:878
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:942
void pfree(void *pointer)
Definition: mcxt.c:1056
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int markItemIndex
Definition: nbtree.h:938
int * killedItems
Definition: nbtree.h:920
BTScanPosData currPos
Definition: nbtree.h:941
ScanKey keyData
Definition: nbtree.h:908
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:872

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 581 of file nbtree.c.

Referenced by bthandler().

582 {
583  return sizeof(BTParallelScanDescData);
584 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 295 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

296 {
297  BTScanOpaque so = (BTScanOpaque) scan->opaque;
298  int64 ntids = 0;
299  ItemPointer heapTid;
300 
301  /*
302  * If we have any array keys, initialize them.
303  */
304  if (so->numArrayKeys)
305  {
306  /* punt if we have any unsatisfiable array keys */
307  if (so->numArrayKeys < 0)
308  return ntids;
309 
311  }
312 
313  /* This loop handles advancing to the next array elements, if any */
314  do
315  {
316  /* Fetch the first page & tuple */
317  if (_bt_first(scan, ForwardScanDirection))
318  {
319  /* Save tuple ID, and continue scanning */
320  heapTid = &scan->xs_heaptid;
321  tbm_add_tuples(tbm, heapTid, 1, false);
322  ntids++;
323 
324  for (;;)
325  {
326  /*
327  * Advance to next tuple within page. This is the same as the
328  * easy case in _bt_next().
329  */
330  if (++so->currPos.itemIndex > so->currPos.lastItem)
331  {
332  /* let _bt_next do the heavy lifting */
333  if (!_bt_next(scan, ForwardScanDirection))
334  break;
335  }
336 
337  /* Save tuple ID, and continue scanning */
338  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
339  tbm_add_tuples(tbm, heapTid, 1, false);
340  ntids++;
341  }
342  }
343  /* Now see if we have more array keys to deal with */
345 
346  return ntids;
347 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:854
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:852
int lastItem
Definition: nbtree.h:853
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int numArrayKeys
Definition: nbtree.h:912
ItemPointerData xs_heaptid
Definition: relscan.h:132
ItemPointerData heapTid
Definition: nbtree.h:817
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:856
BTScanPosData currPos
Definition: nbtree.h:941
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 221 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

222 {
223  BTScanOpaque so = (BTScanOpaque) scan->opaque;
224  bool res;
225 
226  /* btree indexes are never lossy */
227  scan->xs_recheck = false;
228 
229  /*
230  * If we have any array keys, initialize them during first call for a
231  * scan. We can't do this in btrescan because we don't know the scan
232  * direction at that time.
233  */
234  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
235  {
236  /* punt if we have any unsatisfiable array keys */
237  if (so->numArrayKeys < 0)
238  return false;
239 
240  _bt_start_array_keys(scan, dir);
241  }
242 
243  /* This loop handles advancing to the next array elements, if any */
244  do
245  {
246  /*
247  * If we've already initialized this scan, we can just advance it in
248  * the appropriate direction. If we haven't done so yet, we call
249  * _bt_first() to get the first item in the scan.
250  */
251  if (!BTScanPosIsValid(so->currPos))
252  res = _bt_first(scan, dir);
253  else
254  {
255  /*
256  * Check to see if we should kill the previously-fetched tuple.
257  */
258  if (scan->kill_prior_tuple)
259  {
260  /*
261  * Yes, remember it for later. (We'll deal with all such
262  * tuples at once right before leaving the index page.) The
263  * test for numKilled overrun is not just paranoia: if the
264  * caller reverses direction in the indexscan then the same
265  * item might get entered multiple times. It's not worth
266  * trying to optimize that, so we don't detect it, but instead
267  * just forget any excess entries.
268  */
269  if (so->killedItems == NULL)
270  so->killedItems = (int *)
271  palloc(MaxTIDsPerBTreePage * sizeof(int));
272  if (so->numKilled < MaxTIDsPerBTreePage)
273  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
274  }
275 
276  /*
277  * Now continue the scan.
278  */
279  res = _bt_next(scan, dir);
280  }
281 
282  /* If we have a tuple, return it ... */
283  if (res)
284  break;
285  /* ... otherwise see if we have more array keys to deal with */
286  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
287 
288  return res;
289 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:854
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:878
#define MaxTIDsPerBTreePage
Definition: nbtree.h:179
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:852
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int numArrayKeys
Definition: nbtree.h:912
int * killedItems
Definition: nbtree.h:920
BTScanPosData currPos
Definition: nbtree.h:941
void * palloc(Size size)
Definition: mcxt.c:949
bool kill_prior_tuple
Definition: relscan.h:113
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 108 of file nbtree.c.

References IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

109 {
110  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
111 
112  amroutine->amstrategies = BTMaxStrategyNumber;
113  amroutine->amsupport = BTNProcs;
114  amroutine->amoptsprocnum = BTOPTIONS_PROC;
115  amroutine->amcanorder = true;
116  amroutine->amcanorderbyop = false;
117  amroutine->amcanbackward = true;
118  amroutine->amcanunique = true;
119  amroutine->amcanmulticol = true;
120  amroutine->amoptionalkey = true;
121  amroutine->amsearcharray = true;
122  amroutine->amsearchnulls = true;
123  amroutine->amstorage = false;
124  amroutine->amclusterable = true;
125  amroutine->ampredlocks = true;
126  amroutine->amcanparallel = true;
127  amroutine->amcaninclude = true;
128  amroutine->amusemaintenanceworkmem = false;
129  amroutine->amparallelvacuumoptions =
131  amroutine->amkeytype = InvalidOid;
132 
133  amroutine->ambuild = btbuild;
134  amroutine->ambuildempty = btbuildempty;
135  amroutine->aminsert = btinsert;
136  amroutine->ambulkdelete = btbulkdelete;
137  amroutine->amvacuumcleanup = btvacuumcleanup;
138  amroutine->amcanreturn = btcanreturn;
139  amroutine->amcostestimate = btcostestimate;
140  amroutine->amoptions = btoptions;
141  amroutine->amproperty = btproperty;
142  amroutine->ambuildphasename = btbuildphasename;
143  amroutine->amvalidate = btvalidate;
144  amroutine->ambeginscan = btbeginscan;
145  amroutine->amrescan = btrescan;
146  amroutine->amgettuple = btgettuple;
147  amroutine->amgetbitmap = btgetbitmap;
148  amroutine->amendscan = btendscan;
149  amroutine->ammarkpos = btmarkpos;
150  amroutine->amrestrpos = btrestrpos;
153  amroutine->amparallelrescan = btparallelrescan;
154 
155  PG_RETURN_POINTER(amroutine);
156 }
ambeginscan_function ambeginscan
Definition: amapi.h:227
uint8 amparallelvacuumoptions
Definition: amapi.h:205
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
ambulkdelete_function ambulkdelete
Definition: amapi.h:219
bool amcanmulticol
Definition: amapi.h:185
uint16 amsupport
Definition: amapi.h:173
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:399
amgettuple_function amgettuple
Definition: amapi.h:229
bool amcanorderbyop
Definition: amapi.h:179
amproperty_function amproperty
Definition: amapi.h:224
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2151
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:198
amparallelrescan_function amparallelrescan
Definition: amapi.h:238
bool amstorage
Definition: amapi.h:193
bool ampredlocks
Definition: amapi.h:197
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:353
aminsert_function aminsert
Definition: amapi.h:218
Oid amkeytype
Definition: amapi.h:207
bool amoptionalkey
Definition: amapi.h:187
amvalidate_function amvalidate
Definition: amapi.h:226
Size btestimateparallelscan(void)
Definition: nbtree.c:581
#define BTOPTIONS_PROC
Definition: nbtree.h:579
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
void btbuildempty(Relation index)
Definition: nbtree.c:162
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:898
amgetbitmap_function amgetbitmap
Definition: amapi.h:230
ambuild_function ambuild
Definition: amapi.h:216
amoptions_function amoptions
Definition: amapi.h:223
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:295
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:299
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:459
bool amcaninclude
Definition: amapi.h:201
amcostestimate_function amcostestimate
Definition: amapi.h:222
bool amcanunique
Definition: amapi.h:183
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6132
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:220
amendscan_function amendscan
Definition: amapi.h:231
bool amcanbackward
Definition: amapi.h:181
amrescan_function amrescan
Definition: amapi.h:228
bool amcanparallel
Definition: amapi.h:199
bool amsearchnulls
Definition: amapi.h:191
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:221
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:495
bool amclusterable
Definition: amapi.h:195
bool amsearcharray
Definition: amapi.h:189
#define InvalidOid
Definition: postgres_ext.h:36
bool amusemaintenanceworkmem
Definition: amapi.h:203
#define makeNode(_type_)
Definition: nodes.h:577
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2128
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:525
ammarkpos_function ammarkpos
Definition: amapi.h:232
bool amcanorder
Definition: amapi.h:177
ambuildphasename_function ambuildphasename
Definition: amapi.h:225
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1491
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:605
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:236
#define BTNProcs
Definition: nbtree.h:580
uint16 amstrategies
Definition: amapi.h:171
void btinitparallelscan(void *target)
Definition: nbtree.c:590
uint16 amoptsprocnum
Definition: amapi.h:175
ambuildempty_function ambuildempty
Definition: amapi.h:217
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2103
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:221
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:237
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:868
amrestrpos_function amrestrpos
Definition: amapi.h:233

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 590 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

591 {
592  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
593 
594  SpinLockInit(&bt_target->btps_mutex);
595  bt_target->btps_scanPage = InvalidBlockNumber;
597  bt_target->btps_arrayKeyCount = 0;
598  ConditionVariableInit(&bt_target->btps_cv);
599 }
slock_t btps_mutex
Definition: nbtree.c:86
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 198 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

202 {
203  bool result;
204  IndexTuple itup;
205 
206  /* generate an index tuple */
207  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
208  itup->t_tid = *ht_ctid;
209 
210  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
211 
212  pfree(itup);
213 
214  return result;
215 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:82
#define RelationGetDescr(relation)
Definition: rel.h:482
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1056
static Datum values[MAXATTR]
Definition: bootstrap.c:167

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 495 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

496 {
497  BTScanOpaque so = (BTScanOpaque) scan->opaque;
498 
499  /* There may be an old mark with a pin (but no lock). */
501 
502  /*
503  * Just record the current itemIndex. If we later step to next page
504  * before releasing the marked position, _bt_steppage makes a full copy of
505  * the currPos struct in markPos. If (as often happens) the mark is moved
506  * before we leave the page, we don't have to do that work.
507  */
508  if (BTScanPosIsValid(so->currPos))
509  so->markItemIndex = so->currPos.itemIndex;
510  else
511  {
513  so->markItemIndex = -1;
514  }
515 
516  /* Also record the current positions of any array keys */
517  if (so->numArrayKeys)
518  _bt_mark_array_keys(scan);
519 }
int itemIndex
Definition: nbtree.h:854
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:878
BTScanPosData markPos
Definition: nbtree.h:942
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int numArrayKeys
Definition: nbtree.h:912
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:603
int markItemIndex
Definition: nbtree.h:938
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:884
BTScanPosData currPos
Definition: nbtree.h:941
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:872

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 605 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

606 {
607  BTParallelScanDesc btscan;
608  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
609 
610  Assert(parallel_scan);
611 
612  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
613  parallel_scan->ps_offset);
614 
615  /*
616  * In theory, we don't need to acquire the spinlock here, because there
617  * shouldn't be any other workers running at this point, but we do so for
618  * consistency.
619  */
620  SpinLockAcquire(&btscan->btps_mutex);
623  btscan->btps_arrayKeyCount = 0;
624  SpinLockRelease(&btscan->btps_mutex);
625 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:641
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
BlockNumber btps_scanPage
Definition: nbtree.c:80
#define Assert(condition)
Definition: c.h:738
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1442 of file nbtree.c.

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, offsetof, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

1444 {
1445  int live = 0;
1446  int nitem = BTreeTupleGetNPosting(posting);
1447  ItemPointer items = BTreeTupleGetPosting(posting);
1448  BTVacuumPosting vacposting = NULL;
1449 
1450  for (int i = 0; i < nitem; i++)
1451  {
1452  if (!vstate->callback(items + i, vstate->callback_state))
1453  {
1454  /* Live table TID */
1455  live++;
1456  }
1457  else if (vacposting == NULL)
1458  {
1459  /*
1460  * First dead table TID encountered.
1461  *
1462  * It's now clear that we need to delete one or more dead table
1463  * TIDs, so start maintaining metadata describing how to update
1464  * existing posting list tuple.
1465  */
1466  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1467  nitem * sizeof(uint16));
1468 
1469  vacposting->itup = posting;
1470  vacposting->updatedoffset = updatedoffset;
1471  vacposting->ndeletedtids = 0;
1472  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1473  }
1474  else
1475  {
1476  /* Second or subsequent dead table TID */
1477  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1478  }
1479  }
1480 
1481  *nremaining = live;
1482  return vacposting;
1483 }
uint16 ndeletedtids
Definition: nbtree.h:781
void * callback_state
Definition: nbtree.c:47
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
OffsetNumber updatedoffset
Definition: nbtree.h:778
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
IndexTuple itup
Definition: nbtree.h:777
unsigned short uint16
Definition: c.h:366
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:782
void * palloc(Size size)
Definition: mcxt.c:949
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define offsetof(type, field)
Definition: c.h:661

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 399 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

401 {
402  BTScanOpaque so = (BTScanOpaque) scan->opaque;
403 
404  /* we aren't holding any read locks, but gotta drop the pins */
405  if (BTScanPosIsValid(so->currPos))
406  {
407  /* Before leaving current page, deal with any killed items */
408  if (so->numKilled > 0)
409  _bt_killitems(scan);
412  }
413 
414  so->markItemIndex = -1;
415  so->arrayKeyCount = 0;
418 
419  /*
420  * Allocate tuple workspace arrays, if needed for an index-only scan and
421  * not already done in a previous rescan call. To save on palloc
422  * overhead, both workspaces are allocated as one palloc block; only this
423  * function and btendscan know that.
424  *
425  * NOTE: this data structure also makes it safe to return data from a
426  * "name" column, even though btree name_ops uses an underlying storage
427  * datatype of cstring. The risk there is that "name" is supposed to be
428  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
429  * However, since we only return data out of tuples sitting in the
430  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
431  * data out of the markTuples array --- running off the end of memory for
432  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
433  * adding special-case treatment for name_ops elsewhere.
434  */
435  if (scan->xs_want_itup && so->currTuples == NULL)
436  {
437  so->currTuples = (char *) palloc(BLCKSZ * 2);
438  so->markTuples = so->currTuples + BLCKSZ;
439  }
440 
441  /*
442  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
443  * - vadim 05/05/97
444  */
445  if (scankey && scan->numberOfKeys > 0)
446  memmove(scan->keyData,
447  scankey,
448  scan->numberOfKeys * sizeof(ScanKeyData));
449  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
450 
451  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
453 }
char * currTuples
Definition: nbtree.h:928
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:878
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:942
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int arrayKeyCount
Definition: nbtree.h:914
char * markTuples
Definition: nbtree.h:929
int markItemIndex
Definition: nbtree.h:938
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:884
int numberOfKeys
Definition: nbtree.h:907
struct ScanKeyData * keyData
Definition: relscan.h:107
BTScanPosData currPos
Definition: nbtree.h:941
void * palloc(Size size)
Definition: mcxt.c:949
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:872

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 525 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

526 {
527  BTScanOpaque so = (BTScanOpaque) scan->opaque;
528 
529  /* Restore the marked positions of any array keys */
530  if (so->numArrayKeys)
532 
533  if (so->markItemIndex >= 0)
534  {
535  /*
536  * The scan has never moved to a new page since the last mark. Just
537  * restore the itemIndex.
538  *
539  * NB: In this case we can't count on anything in so->markPos to be
540  * accurate.
541  */
542  so->currPos.itemIndex = so->markItemIndex;
543  }
544  else
545  {
546  /*
547  * The scan moved to a new page after last mark or restore, and we are
548  * now restoring to the marked page. We aren't holding any read
549  * locks, but if we're still holding the pin for the current position,
550  * we must drop it.
551  */
552  if (BTScanPosIsValid(so->currPos))
553  {
554  /* Before leaving current page, deal with any killed items */
555  if (so->numKilled > 0)
556  _bt_killitems(scan);
558  }
559 
560  if (BTScanPosIsValid(so->markPos))
561  {
562  /* bump pin on mark buffer for assignment to current buffer */
563  if (BTScanPosIsPinned(so->markPos))
565  memcpy(&so->currPos, &so->markPos,
566  offsetof(BTScanPosData, items[1]) +
567  so->markPos.lastItem * sizeof(BTScanPosItem));
568  if (so->currTuples)
569  memcpy(so->currTuples, so->markTuples,
571  }
572  else
574  }
575 }
int itemIndex
Definition: nbtree.h:854
char * currTuples
Definition: nbtree.h:928
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:878
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:622
int nextTupleOffset
Definition: nbtree.h:843
int lastItem
Definition: nbtree.h:853
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:942
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:945
int numArrayKeys
Definition: nbtree.h:912
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:861
char * markTuples
Definition: nbtree.h:929
int markItemIndex
Definition: nbtree.h:938
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:884
BTScanPosData currPos
Definition: nbtree.h:941
Buffer buf
Definition: nbtree.h:824
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3521
#define offsetof(type, field)
Definition: c.h:661
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:872

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 898 of file nbtree.c.

References _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

899 {
900  /* No-op in ANALYZE ONLY mode */
901  if (info->analyze_only)
902  return stats;
903 
904  /*
905  * If btbulkdelete was called, we need not do anything, just return the
906  * stats from the latest btbulkdelete call. If it wasn't called, we might
907  * still need to do a pass over the index, to recycle any newly-recyclable
908  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
909  * determines if either are needed.
910  *
911  * Since we aren't going to actually delete any leaf items, there's no
912  * need to go through all the vacuum-cycle-ID pushups.
913  */
914  if (stats == NULL)
915  {
916  /* Check if we need a cleanup */
917  if (!_bt_vacuum_needs_cleanup(info))
918  return NULL;
919 
921  btvacuumscan(info, stats, NULL, NULL, 0);
922  }
923 
924  /*
925  * It's quite possible for us to be fooled by concurrent page splits into
926  * double-counting some index tuples, so disbelieve any total that exceeds
927  * the underlying heap's count ... if we know that accurately. Otherwise
928  * this might just make matters worse.
929  */
930  if (!info->estimated_count)
931  {
932  if (stats->num_index_tuples > info->num_heap_tuples)
933  stats->num_index_tuples = info->num_heap_tuples;
934  }
935 
936  return stats;
937 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:799
bool analyze_only
Definition: genam.h:47
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:954
void * palloc0(Size size)
Definition: mcxt.c:980
double num_heap_tuples
Definition: genam.h:51
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1086 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), Assert, BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BUFFER_LOCK_UNLOCK, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LockBuffer(), LockBufferForCleanup(), LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1087 {
1088  IndexVacuumInfo *info = vstate->info;
1089  IndexBulkDeleteResult *stats = vstate->stats;
1091  void *callback_state = vstate->callback_state;
1092  Relation rel = info->index;
1093  bool attempt_pagedel;
1094  BlockNumber blkno,
1095  backtrack_to;
1096  Buffer buf;
1097  Page page;
1098  BTPageOpaque opaque;
1099 
1100  blkno = scanblkno;
1101 
1102 backtrack:
1103 
1104  attempt_pagedel = false;
1105  backtrack_to = P_NONE;
1106 
1107  /* call vacuum_delay_point while not holding any buffer lock */
1109 
1110  /*
1111  * We can't use _bt_getbuf() here because it always applies
1112  * _bt_checkpage(), which will barf on an all-zero page. We want to
1113  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1114  * buffer access strategy.
1115  */
1116  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1117  info->strategy);
1118  LockBuffer(buf, BT_READ);
1119  page = BufferGetPage(buf);
1120  opaque = NULL;
1121  if (!PageIsNew(page))
1122  {
1123  _bt_checkpage(rel, buf);
1124  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1125  }
1126 
1127  Assert(blkno <= scanblkno);
1128  if (blkno != scanblkno)
1129  {
1130  /*
1131  * We're backtracking.
1132  *
1133  * We followed a right link to a sibling leaf page (a page that
1134  * happens to be from a block located before scanblkno). The only
1135  * case we want to do anything with is a live leaf page having the
1136  * current vacuum cycle ID.
1137  *
1138  * The page had better be in a state that's consistent with what we
1139  * expect. Check for conditions that imply corruption in passing. It
1140  * can't be half-dead because only an interrupted VACUUM process can
1141  * leave pages in that state, so we'd definitely have dealt with it
1142  * back when the page was the scanblkno page (half-dead pages are
1143  * always marked fully deleted by _bt_pagedel()). This assumes that
1144  * there can be only one vacuum process running at a time.
1145  */
1146  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1147  {
1148  Assert(false);
1149  ereport(LOG,
1150  (errcode(ERRCODE_INDEX_CORRUPTED),
1151  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1152  blkno, scanblkno, RelationGetRelationName(rel))));
1153  _bt_relbuf(rel, buf);
1154  return;
1155  }
1156 
1157  /*
1158  * We may have already processed the page in an earlier call, when the
1159  * page was scanblkno. This happens when the leaf page split occurred
1160  * after the scan began, but before the right sibling page became the
1161  * scanblkno.
1162  *
1163  * Page may also have been deleted by current btvacuumpage() call,
1164  * since _bt_pagedel() sometimes deletes the right sibling page of
1165  * scanblkno in passing (it does so after we decided where to
1166  * backtrack to). We don't need to process this page as a deleted
1167  * page a second time now (in fact, it would be wrong to count it as a
1168  * deleted page in the bulk delete statistics a second time).
1169  */
1170  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1171  {
1172  /* Done with current scanblkno (and all lower split pages) */
1173  _bt_relbuf(rel, buf);
1174  return;
1175  }
1176  }
1177 
1178  /* Page is valid, see what to do with it */
1179  if (_bt_page_recyclable(page))
1180  {
1181  /* Okay to recycle this page (which could be leaf or internal) */
1182  RecordFreeIndexPage(rel, blkno);
1183  vstate->totFreePages++;
1184  stats->pages_deleted++;
1185  }
1186  else if (P_ISDELETED(opaque))
1187  {
1188  /*
1189  * Already deleted page (which could be leaf or internal). Can't
1190  * recycle yet.
1191  */
1192  stats->pages_deleted++;
1193 
1194  /* Maintain the oldest btpo.xact */
1195  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1196  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1197  vstate->oldestBtpoXact = opaque->btpo.xact;
1198  }
1199  else if (P_ISHALFDEAD(opaque))
1200  {
1201  /*
1202  * Half-dead leaf page. Try to delete now. Might update
1203  * oldestBtpoXact and pages_deleted below.
1204  */
1205  attempt_pagedel = true;
1206  }
1207  else if (P_ISLEAF(opaque))
1208  {
1210  int ndeletable;
1212  int nupdatable;
1213  OffsetNumber offnum,
1214  minoff,
1215  maxoff;
1216  int nhtidsdead,
1217  nhtidslive;
1218 
1219  /*
1220  * Trade in the initial read lock for a super-exclusive write lock on
1221  * this page. We must get such a lock on every leaf page over the
1222  * course of the vacuum scan, whether or not it actually contains any
1223  * deletable tuples --- see nbtree/README.
1224  */
1226  LockBufferForCleanup(buf);
1227 
1228  /*
1229  * Check whether we need to backtrack to earlier pages. What we are
1230  * concerned about is a page split that happened since we started the
1231  * vacuum scan. If the split moved tuples on the right half of the
1232  * split (i.e. the tuples that sort high) to a block that we already
1233  * passed over, then we might have missed the tuples. We need to
1234  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1235  * or deleting scanblkno page below!)
1236  */
1237  if (vstate->cycleid != 0 &&
1238  opaque->btpo_cycleid == vstate->cycleid &&
1239  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1240  !P_RIGHTMOST(opaque) &&
1241  opaque->btpo_next < scanblkno)
1242  backtrack_to = opaque->btpo_next;
1243 
1244  /*
1245  * When each VACUUM begins, it determines an OldestXmin cutoff value.
1246  * Tuples before the cutoff are removed by VACUUM. Scan over all
1247  * items to see which ones need to be deleted according to cutoff
1248  * point using callback.
1249  */
1250  ndeletable = 0;
1251  nupdatable = 0;
1252  minoff = P_FIRSTDATAKEY(opaque);
1253  maxoff = PageGetMaxOffsetNumber(page);
1254  nhtidsdead = 0;
1255  nhtidslive = 0;
1256  if (callback)
1257  {
1258  for (offnum = minoff;
1259  offnum <= maxoff;
1260  offnum = OffsetNumberNext(offnum))
1261  {
1262  IndexTuple itup;
1263 
1264  itup = (IndexTuple) PageGetItem(page,
1265  PageGetItemId(page, offnum));
1266 
1267  /*
1268  * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1269  * records do not produce their own conflicts. This is safe
1270  * as long as the callback function only considers whether the
1271  * index tuple refers to pre-cutoff heap tuples that were
1272  * certainly already pruned away during VACUUM's initial heap
1273  * scan by the time we get here. (XLOG_HEAP2_CLEANUP_INFO
1274  * records produce conflicts using a latestRemovedXid value
1275  * for the entire VACUUM, so there is no need to produce our
1276  * own conflict now.)
1277  *
1278  * Backends with snapshots acquired after a VACUUM starts but
1279  * before it finishes could have a RecentGlobalXmin with a
1280  * later xid than the VACUUM's OldestXmin cutoff. These
1281  * backends might happen to opportunistically mark some index
1282  * tuples LP_DEAD before we reach them, even though they may
1283  * be after our cutoff. We don't try to kill these "extra"
1284  * index tuples in _bt_delitems_vacuum(). This keep things
1285  * simple, and allows us to always avoid generating our own
1286  * conflicts.
1287  */
1288  Assert(!BTreeTupleIsPivot(itup));
1289  if (!BTreeTupleIsPosting(itup))
1290  {
1291  /* Regular tuple, standard table TID representation */
1292  if (callback(&itup->t_tid, callback_state))
1293  {
1294  deletable[ndeletable++] = offnum;
1295  nhtidsdead++;
1296  }
1297  else
1298  nhtidslive++;
1299  }
1300  else
1301  {
1302  BTVacuumPosting vacposting;
1303  int nremaining;
1304 
1305  /* Posting list tuple */
1306  vacposting = btreevacuumposting(vstate, itup, offnum,
1307  &nremaining);
1308  if (vacposting == NULL)
1309  {
1310  /*
1311  * All table TIDs from the posting tuple remain, so no
1312  * delete or update required
1313  */
1314  Assert(nremaining == BTreeTupleGetNPosting(itup));
1315  }
1316  else if (nremaining > 0)
1317  {
1318 
1319  /*
1320  * Store metadata about posting list tuple in
1321  * updatable array for entire page. Existing tuple
1322  * will be updated during the later call to
1323  * _bt_delitems_vacuum().
1324  */
1325  Assert(nremaining < BTreeTupleGetNPosting(itup));
1326  updatable[nupdatable++] = vacposting;
1327  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1328  }
1329  else
1330  {
1331  /*
1332  * All table TIDs from the posting list must be
1333  * deleted. We'll delete the index tuple completely
1334  * (no update required).
1335  */
1336  Assert(nremaining == 0);
1337  deletable[ndeletable++] = offnum;
1338  nhtidsdead += BTreeTupleGetNPosting(itup);
1339  pfree(vacposting);
1340  }
1341 
1342  nhtidslive += nremaining;
1343  }
1344  }
1345  }
1346 
1347  /*
1348  * Apply any needed deletes or updates. We issue just one
1349  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1350  */
1351  if (ndeletable > 0 || nupdatable > 0)
1352  {
1353  Assert(nhtidsdead >= ndeletable + nupdatable);
1354  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1355  nupdatable);
1356 
1357  stats->tuples_removed += nhtidsdead;
1358  /* must recompute maxoff */
1359  maxoff = PageGetMaxOffsetNumber(page);
1360 
1361  /* can't leak memory here */
1362  for (int i = 0; i < nupdatable; i++)
1363  pfree(updatable[i]);
1364  }
1365  else
1366  {
1367  /*
1368  * If the leaf page has been split during this vacuum cycle, it
1369  * seems worth expending a write to clear btpo_cycleid even if we
1370  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1371  * takes care of this.) This ensures we won't process the page
1372  * again.
1373  *
1374  * We treat this like a hint-bit update because there's no need to
1375  * WAL-log it.
1376  */
1377  Assert(nhtidsdead == 0);
1378  if (vstate->cycleid != 0 &&
1379  opaque->btpo_cycleid == vstate->cycleid)
1380  {
1381  opaque->btpo_cycleid = 0;
1382  MarkBufferDirtyHint(buf, true);
1383  }
1384  }
1385 
1386  /*
1387  * If the leaf page is now empty, try to delete it; else count the
1388  * live tuples (live table TIDs in posting lists are counted as
1389  * separate live tuples). We don't delete when backtracking, though,
1390  * since that would require teaching _bt_pagedel() about backtracking
1391  * (doesn't seem worth adding more complexity to deal with that).
1392  */
1393  if (minoff > maxoff)
1394  attempt_pagedel = (blkno == scanblkno);
1395  else
1396  stats->num_index_tuples += nhtidslive;
1397 
1398  Assert(!attempt_pagedel || nhtidslive == 0);
1399  }
1400 
1401  if (attempt_pagedel)
1402  {
1403  MemoryContext oldcontext;
1404 
1405  /* Run pagedel in a temp context to avoid memory leakage */
1407  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1408 
1409  /*
1410  * We trust the _bt_pagedel return value because it does not include
1411  * any page that a future call here from btvacuumscan is expected to
1412  * count. There will be no double-counting.
1413  */
1414  Assert(blkno == scanblkno);
1415  stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1416 
1417  MemoryContextSwitchTo(oldcontext);
1418  /* pagedel released buffer, so we shouldn't */
1419  }
1420  else
1421  _bt_relbuf(rel, buf);
1422 
1423  if (backtrack_to != P_NONE)
1424  {
1425  blkno = backtrack_to;
1426  goto backtrack;
1427  }
1428 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3779
#define BTP_SPLIT_END
Definition: nbtree.h:77
BlockNumber btpo_next
Definition: nbtree.h:59
MemoryContext pagedelcontext
Definition: nbtree.c:51
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3553
BlockNumber totFreePages
Definition: nbtree.c:49
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1017
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:652
ItemPointerData t_tid
Definition: itup.h:37
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1442
BufferAccessStrategy strategy
Definition: genam.h:52
union BTPageOpaqueData::@45 btpo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:206
int errcode(int sqlerrcode)
Definition: elog.c:610
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
#define LOG
Definition: elog.h:26
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:974
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1056
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:725
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
BTCycleId btpo_cycleid
Definition: nbtree.h:66
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
uint32 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
Definition: nbtpage.c:1442
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:490
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:738
IndexVacuumInfo * info
Definition: nbtree.c:44
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:1995
uint16 btpo_flags
Definition: nbtree.h:65
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 954 of file nbtree.c.

References _bt_update_meta_cleanup_info(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, LockRelationForExtension(), MemoryContextDelete(), IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, BTVacState::totFreePages, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().

957 {
958  Relation rel = info->index;
959  BTVacState vstate;
960  BlockNumber num_pages;
961  BlockNumber scanblkno;
962  bool needLock;
963 
964  /*
965  * Reset counts that will be incremented during the scan; needed in case
966  * of multiple scans during a single VACUUM command
967  */
968  stats->estimated_count = false;
969  stats->num_index_tuples = 0;
970  stats->pages_deleted = 0;
971 
972  /* Set up info to pass down to btvacuumpage */
973  vstate.info = info;
974  vstate.stats = stats;
975  vstate.callback = callback;
976  vstate.callback_state = callback_state;
977  vstate.cycleid = cycleid;
978  vstate.totFreePages = 0;
980 
981  /* Create a temporary memory context to run _bt_pagedel in */
983  "_bt_pagedel",
985 
986  /*
987  * The outer loop iterates over all index pages except the metapage, in
988  * physical order (we hope the kernel will cooperate in providing
989  * read-ahead for speed). It is critical that we visit all leaf pages,
990  * including ones added after we start the scan, else we might fail to
991  * delete some deletable tuples. Hence, we must repeatedly check the
992  * relation length. We must acquire the relation-extension lock while
993  * doing so to avoid a race condition: if someone else is extending the
994  * relation, there is a window where bufmgr/smgr have created a new
995  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
996  * we manage to scan such a page here, we'll improperly assume it can be
997  * recycled. Taking the lock synchronizes things enough to prevent a
998  * problem: either num_pages won't include the new page, or _bt_getbuf
999  * already has write lock on the buffer and it will be fully initialized
1000  * before we can examine it. (See also vacuumlazy.c, which has the same
1001  * issue.) Also, we need not worry if a page is added immediately after
1002  * we look; the page splitting code already has write-lock on the left
1003  * page before it adds a right page, so we must already have processed any
1004  * tuples due to be moved into such a page.
1005  *
1006  * We can skip locking for new or temp relations, however, since no one
1007  * else could be accessing them.
1008  */
1009  needLock = !RELATION_IS_LOCAL(rel);
1010 
1011  scanblkno = BTREE_METAPAGE + 1;
1012  for (;;)
1013  {
1014  /* Get the current relation length */
1015  if (needLock)
1017  num_pages = RelationGetNumberOfBlocks(rel);
1018  if (needLock)
1020 
1021  if (info->report_progress)
1023  num_pages);
1024 
1025  /* Quit if we've scanned the whole relation */
1026  if (scanblkno >= num_pages)
1027  break;
1028  /* Iterate over pages, then loop back to recheck length */
1029  for (; scanblkno < num_pages; scanblkno++)
1030  {
1031  btvacuumpage(&vstate, scanblkno);
1032  if (info->report_progress)
1034  scanblkno);
1035  }
1036  }
1037 
1039 
1040  /*
1041  * If we found any recyclable pages (and recorded them in the FSM), then
1042  * forcibly update the upper-level FSM pages to ensure that searchers can
1043  * find them. It's possible that the pages were also found during
1044  * previous scans and so this is a waste of time, but it's cheap enough
1045  * relative to scanning the index that it shouldn't matter much, and
1046  * making sure that free pages are available sooner not later seems
1047  * worthwhile.
1048  *
1049  * Note that if no recyclable pages exist, we don't bother vacuuming the
1050  * FSM at all.
1051  */
1052  if (vstate.totFreePages > 0)
1054 
1055  /*
1056  * Maintain the oldest btpo.xact and a count of the current number of heap
1057  * tuples in the metapage (for the benefit of _bt_vacuum_needs_cleanup).
1058  *
1059  * The page with the oldest btpo.xact is typically a page deleted by this
1060  * VACUUM operation, since pages deleted by a previous VACUUM operation
1061  * tend to be placed in the FSM (by the current VACUUM operation) -- such
1062  * pages are not candidates to be the oldest btpo.xact. (Note that pages
1063  * placed in the FSM are reported as deleted pages in the bulk delete
1064  * statistics, despite not counting as deleted pages for the purposes of
1065  * determining the oldest btpo.xact.)
1066  */
1068  info->num_heap_tuples);
1069 
1070  /* update statistics */
1071  stats->num_pages = num_pages;
1072  stats->pages_free = vstate.totFreePages;
1073 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:51
void * callback_state
Definition: nbtree.c:47
BlockNumber totFreePages
Definition: nbtree.c:49
#define ExclusiveLock
Definition: lockdefs.h:44
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3235
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
bool report_progress
Definition: genam.h:48
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1086
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:173
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:141
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
double num_heap_tuples
Definition: genam.h:51
IndexVacuumInfo * info
Definition: nbtree.c:44
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:76