PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 90 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 66 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 769 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

770 {
771  BTScanOpaque so = (BTScanOpaque) scan->opaque;
772  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
773  BTParallelScanDesc btscan;
774 
775  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
776  parallel_scan->ps_offset);
777 
778  so->arrayKeyCount++;
779  SpinLockAcquire(&btscan->btps_mutex);
780  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
781  {
782  btscan->btps_scanPage = InvalidBlockNumber;
783  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
784  btscan->btps_arrayKeyCount++;
785  }
786  SpinLockRelease(&btscan->btps_mutex);
787 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:648
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 728 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

729 {
730  BTScanOpaque so = (BTScanOpaque) scan->opaque;
731  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
732  BTParallelScanDesc btscan;
733  bool status_changed = false;
734 
735  /* Do nothing, for non-parallel scans */
736  if (parallel_scan == NULL)
737  return;
738 
739  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
740  parallel_scan->ps_offset);
741 
742  /*
743  * Mark the parallel scan as done for this combination of scan keys,
744  * unless some other process already did so. See also
745  * _bt_advance_array_keys.
746  */
747  SpinLockAcquire(&btscan->btps_mutex);
748  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
749  btscan->btps_pageStatus != BTPARALLEL_DONE)
750  {
751  btscan->btps_pageStatus = BTPARALLEL_DONE;
752  status_changed = true;
753  }
754  SpinLockRelease(&btscan->btps_mutex);
755 
756  /* wake up all the workers associated with this parallel scan */
757  if (status_changed)
758  ConditionVariableBroadcast(&btscan->btps_cv);
759 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:648
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 705 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

706 {
707  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
708  BTParallelScanDesc btscan;
709 
710  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
711  parallel_scan->ps_offset);
712 
713  SpinLockAcquire(&btscan->btps_mutex);
714  btscan->btps_scanPage = scan_page;
716  SpinLockRelease(&btscan->btps_mutex);
718 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
#define OffsetToPointer(base, offset)
Definition: c.h:648
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 647 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

648 {
649  BTScanOpaque so = (BTScanOpaque) scan->opaque;
650  BTPS_State pageStatus;
651  bool exit_loop = false;
652  bool status = true;
653  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
654  BTParallelScanDesc btscan;
655 
656  *pageno = P_NONE;
657 
658  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
659  parallel_scan->ps_offset);
660 
661  while (1)
662  {
663  SpinLockAcquire(&btscan->btps_mutex);
664  pageStatus = btscan->btps_pageStatus;
665 
666  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
667  {
668  /* Parallel scan has already advanced to a new set of scankeys. */
669  status = false;
670  }
671  else if (pageStatus == BTPARALLEL_DONE)
672  {
673  /*
674  * We're done with this set of scankeys. This may be the end, or
675  * there could be more sets to try.
676  */
677  status = false;
678  }
679  else if (pageStatus != BTPARALLEL_ADVANCING)
680  {
681  /*
682  * We have successfully seized control of the scan for the purpose
683  * of advancing it to a new page!
684  */
685  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
686  *pageno = btscan->btps_scanPage;
687  exit_loop = true;
688  }
689  SpinLockRelease(&btscan->btps_mutex);
690  if (exit_loop || !status)
691  break;
693  }
695 
696  return status;
697 }
#define P_NONE
Definition: nbtree.h:206
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
BTPS_State
Definition: nbtree.c:66
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:648
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 800 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, RecentGlobalXmin, TransactionIdIsValid, TransactionIdPrecedes(), vacuum_cleanup_index_scale_factor, and BTOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

801 {
802  Buffer metabuf;
803  Page metapg;
804  BTMetaPageData *metad;
805  bool result = false;
806 
807  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
808  metapg = BufferGetPage(metabuf);
809  metad = BTPageGetMeta(metapg);
810 
811  if (metad->btm_version < BTREE_NOVAC_VERSION)
812  {
813  /*
814  * Do cleanup if metapage needs upgrade, because we don't have
815  * cleanup-related meta-information yet.
816  */
817  result = true;
818  }
819  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
822  {
823  /*
824  * If any oldest btpo.xact from a previously deleted page in the index
825  * is older than RecentGlobalXmin, then at least one deleted page can
826  * be recycled -- don't skip cleanup.
827  */
828  result = true;
829  }
830  else
831  {
832  BTOptions *relopts;
833  float8 cleanup_scale_factor;
834  float8 prev_num_heap_tuples;
835 
836  /*
837  * If table receives enough insertions and no cleanup was performed,
838  * then index would appear have stale statistics. If scale factor is
839  * set, we avoid that by performing cleanup if the number of inserted
840  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
841  * original tuples count.
842  */
843  relopts = (BTOptions *) info->index->rd_options;
844  cleanup_scale_factor = (relopts &&
845  relopts->vacuum_cleanup_index_scale_factor >= 0)
848  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
849 
850  if (cleanup_scale_factor <= 0 ||
851  prev_num_heap_tuples <= 0 ||
852  (info->num_heap_tuples - prev_num_heap_tuples) /
853  prev_num_heap_tuples >= cleanup_scale_factor)
854  result = true;
855  }
856 
857  _bt_relbuf(info->index, metabuf);
858  return result;
859 }
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:151
#define BT_READ
Definition: nbtree.h:587
float8 vacuum_cleanup_index_scale_factor
Definition: nbtree.h:964
double float8
Definition: c.h:498
#define BTPageGetMeta(p)
Definition: nbtree.h:114
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
double num_heap_tuples
Definition: genam.h:51
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:157
Pointer Page
Definition: bufpage.h:78

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 354 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

355 {
356  IndexScanDesc scan;
357  BTScanOpaque so;
358 
359  /* no order by operators allowed */
360  Assert(norderbys == 0);
361 
362  /* get the scan */
363  scan = RelationGetIndexScan(rel, nkeys, norderbys);
364 
365  /* allocate private workspace */
366  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
369  if (scan->numberOfKeys > 0)
370  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
371  else
372  so->keyData = NULL;
373 
374  so->arrayKeyData = NULL; /* assume no array keys for now */
375  so->numArrayKeys = 0;
376  so->arrayKeys = NULL;
377  so->arrayContext = NULL;
378 
379  so->killedItems = NULL; /* until needed */
380  so->numKilled = 0;
381 
382  /*
383  * We don't know yet whether the scan will be index-only, so we do not
384  * allocate the tuple workspace arrays until btrescan. However, we set up
385  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
386  */
387  so->currTuples = so->markTuples = NULL;
388 
389  scan->xs_itupdesc = RelationGetDescr(rel);
390 
391  scan->opaque = so;
392 
393  return scan;
394 }
#define RelationGetDescr(relation)
Definition: rel.h:482
MemoryContext arrayContext
Definition: nbtree.h:918
char * currTuples
Definition: nbtree.h:929
struct TupleDescData * xs_itupdesc
Definition: relscan.h:140
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:930
ScanKey arrayKeyData
Definition: nbtree.h:912
int * killedItems
Definition: nbtree.h:921
#define Assert(condition)
Definition: c.h:745
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:949
ScanKey keyData
Definition: nbtree.h:909
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:917
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 163 of file nbtree.c.

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

164 {
165  Page metapage;
166 
167  /* Construct metapage. */
168  metapage = (Page) palloc(BLCKSZ);
169  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
170 
171  /*
172  * Write the page and log it. It might seem that an immediate sync would
173  * be sufficient to guarantee that the file exists on disk, but recovery
174  * itself might remove it while replaying, for example, an
175  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
176  * this even when wal_level=minimal.
177  */
180  (char *) metapage, true);
182  BTREE_METAPAGE, metapage, true);
183 
184  /*
185  * An immediate sync is required even if we xlog'd the page, because the
186  * write did not go through shared_buffers and therefore a concurrent
187  * checkpoint may have moved the redo pointer past our xlog record.
188  */
190 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define P_NONE
Definition: nbtree.h:206
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:61
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:524
#define BTREE_METAPAGE
Definition: nbtree.h:141
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1194
void * palloc(Size size)
Definition: mcxt.c:949
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:996
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2691
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:643
Pointer Page
Definition: bufpage.h:78

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 869 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

871 {
872  Relation rel = info->index;
873  BTCycleId cycleid;
874 
875  /* allocate stats if first time through, else re-use existing struct */
876  if (stats == NULL)
878 
879  /* Establish the vacuum cycle ID to use for this scan */
880  /* The ENSURE stuff ensures we clean up shared memory on failure */
882  {
883  cycleid = _bt_start_vacuum(rel);
884 
885  btvacuumscan(info, stats, callback, callback_state, cycleid);
886  }
888  _bt_end_vacuum(rel);
889 
890  return stats;
891 }
#define PointerGetDatum(X)
Definition: postgres.h:556
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2053
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:955
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:28
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2025
void * palloc0(Size size)
Definition: mcxt.c:980
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1968

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1491 of file nbtree.c.

Referenced by bthandler().

1492 {
1493  return true;
1494 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 460 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

461 {
462  BTScanOpaque so = (BTScanOpaque) scan->opaque;
463 
464  /* we aren't holding any read locks, but gotta drop the pins */
465  if (BTScanPosIsValid(so->currPos))
466  {
467  /* Before leaving current page, deal with any killed items */
468  if (so->numKilled > 0)
469  _bt_killitems(scan);
471  }
472 
473  so->markItemIndex = -1;
475 
476  /* No need to invalidate positions, the RAM is about to be freed. */
477 
478  /* Release storage */
479  if (so->keyData != NULL)
480  pfree(so->keyData);
481  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
482  if (so->arrayContext != NULL)
484  if (so->killedItems != NULL)
485  pfree(so->killedItems);
486  if (so->currTuples != NULL)
487  pfree(so->currTuples);
488  /* so->markTuples should not be pfree'd, see btrescan */
489  pfree(so);
490 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
MemoryContext arrayContext
Definition: nbtree.h:918
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
void pfree(void *pointer)
Definition: mcxt.c:1056
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int markItemIndex
Definition: nbtree.h:939
int * killedItems
Definition: nbtree.h:921
BTScanPosData currPos
Definition: nbtree.h:942
ScanKey keyData
Definition: nbtree.h:909
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 582 of file nbtree.c.

Referenced by bthandler().

583 {
584  return sizeof(BTParallelScanDescData);
585 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 296 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

297 {
298  BTScanOpaque so = (BTScanOpaque) scan->opaque;
299  int64 ntids = 0;
300  ItemPointer heapTid;
301 
302  /*
303  * If we have any array keys, initialize them.
304  */
305  if (so->numArrayKeys)
306  {
307  /* punt if we have any unsatisfiable array keys */
308  if (so->numArrayKeys < 0)
309  return ntids;
310 
312  }
313 
314  /* This loop handles advancing to the next array elements, if any */
315  do
316  {
317  /* Fetch the first page & tuple */
318  if (_bt_first(scan, ForwardScanDirection))
319  {
320  /* Save tuple ID, and continue scanning */
321  heapTid = &scan->xs_heaptid;
322  tbm_add_tuples(tbm, heapTid, 1, false);
323  ntids++;
324 
325  for (;;)
326  {
327  /*
328  * Advance to next tuple within page. This is the same as the
329  * easy case in _bt_next().
330  */
331  if (++so->currPos.itemIndex > so->currPos.lastItem)
332  {
333  /* let _bt_next do the heavy lifting */
334  if (!_bt_next(scan, ForwardScanDirection))
335  break;
336  }
337 
338  /* Save tuple ID, and continue scanning */
339  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
340  tbm_add_tuples(tbm, heapTid, 1, false);
341  ntids++;
342  }
343  }
344  /* Now see if we have more array keys to deal with */
346 
347  return ntids;
348 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:855
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
int lastItem
Definition: nbtree.h:854
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
ItemPointerData xs_heaptid
Definition: relscan.h:144
ItemPointerData heapTid
Definition: nbtree.h:818
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:857
BTScanPosData currPos
Definition: nbtree.h:942
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1450

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 222 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

223 {
224  BTScanOpaque so = (BTScanOpaque) scan->opaque;
225  bool res;
226 
227  /* btree indexes are never lossy */
228  scan->xs_recheck = false;
229 
230  /*
231  * If we have any array keys, initialize them during first call for a
232  * scan. We can't do this in btrescan because we don't know the scan
233  * direction at that time.
234  */
235  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
236  {
237  /* punt if we have any unsatisfiable array keys */
238  if (so->numArrayKeys < 0)
239  return false;
240 
241  _bt_start_array_keys(scan, dir);
242  }
243 
244  /* This loop handles advancing to the next array elements, if any */
245  do
246  {
247  /*
248  * If we've already initialized this scan, we can just advance it in
249  * the appropriate direction. If we haven't done so yet, we call
250  * _bt_first() to get the first item in the scan.
251  */
252  if (!BTScanPosIsValid(so->currPos))
253  res = _bt_first(scan, dir);
254  else
255  {
256  /*
257  * Check to see if we should kill the previously-fetched tuple.
258  */
259  if (scan->kill_prior_tuple)
260  {
261  /*
262  * Yes, remember it for later. (We'll deal with all such
263  * tuples at once right before leaving the index page.) The
264  * test for numKilled overrun is not just paranoia: if the
265  * caller reverses direction in the indexscan then the same
266  * item might get entered multiple times. It's not worth
267  * trying to optimize that, so we don't detect it, but instead
268  * just forget any excess entries.
269  */
270  if (so->killedItems == NULL)
271  so->killedItems = (int *)
272  palloc(MaxTIDsPerBTreePage * sizeof(int));
273  if (so->numKilled < MaxTIDsPerBTreePage)
274  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
275  }
276 
277  /*
278  * Now continue the scan.
279  */
280  res = _bt_next(scan, dir);
281  }
282 
283  /* If we have a tuple, return it ... */
284  if (res)
285  break;
286  /* ... otherwise see if we have more array keys to deal with */
287  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
288 
289  return res;
290 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:855
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
#define MaxTIDsPerBTreePage
Definition: nbtree.h:179
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
int * killedItems
Definition: nbtree.h:921
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:949
bool kill_prior_tuple
Definition: relscan.h:125
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1450

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 108 of file nbtree.c.

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

109 {
110  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
111 
112  amroutine->amstrategies = BTMaxStrategyNumber;
113  amroutine->amsupport = BTNProcs;
114  amroutine->amoptsprocnum = BTOPTIONS_PROC;
115  amroutine->amcanorder = true;
116  amroutine->amcanorderbyop = false;
117  amroutine->amcanbackward = true;
118  amroutine->amcanunique = true;
119  amroutine->amcanmulticol = true;
120  amroutine->amoptionalkey = true;
121  amroutine->amsearcharray = true;
122  amroutine->amsearchnulls = true;
123  amroutine->amstorage = false;
124  amroutine->amclusterable = true;
125  amroutine->ampredlocks = true;
126  amroutine->amcanparallel = true;
127  amroutine->amcaninclude = true;
128  amroutine->amusemaintenanceworkmem = false;
129  amroutine->amparallelvacuumoptions =
131  amroutine->amkeytype = InvalidOid;
132 
133  amroutine->ambuild = btbuild;
134  amroutine->ambuildempty = btbuildempty;
135  amroutine->aminsert = btinsert;
136  amroutine->ambulkdelete = btbulkdelete;
137  amroutine->amvacuumcleanup = btvacuumcleanup;
138  amroutine->amcanreturn = btcanreturn;
139  amroutine->amcostestimate = btcostestimate;
140  amroutine->amoptions = btoptions;
141  amroutine->amproperty = btproperty;
142  amroutine->ambuildphasename = btbuildphasename;
143  amroutine->amvalidate = btvalidate;
144  amroutine->amadjustmembers = btadjustmembers;
145  amroutine->ambeginscan = btbeginscan;
146  amroutine->amrescan = btrescan;
147  amroutine->amgettuple = btgettuple;
148  amroutine->amgetbitmap = btgetbitmap;
149  amroutine->amendscan = btendscan;
150  amroutine->ammarkpos = btmarkpos;
151  amroutine->amrestrpos = btrestrpos;
154  amroutine->amparallelrescan = btparallelrescan;
155 
156  PG_RETURN_POINTER(amroutine);
157 }
ambeginscan_function ambeginscan
Definition: amapi.h:270
uint8 amparallelvacuumoptions
Definition: amapi.h:247
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
ambulkdelete_function ambulkdelete
Definition: amapi.h:261
bool amcanmulticol
Definition: amapi.h:227
uint16 amsupport
Definition: amapi.h:215
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:400
amgettuple_function amgettuple
Definition: amapi.h:272
bool amcanorderbyop
Definition: amapi.h:221
amproperty_function amproperty
Definition: amapi.h:266
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2151
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:199
amparallelrescan_function amparallelrescan
Definition: amapi.h:281
bool amstorage
Definition: amapi.h:235
bool ampredlocks
Definition: amapi.h:239
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:354
aminsert_function aminsert
Definition: amapi.h:260
Oid amkeytype
Definition: amapi.h:249
bool amoptionalkey
Definition: amapi.h:229
amvalidate_function amvalidate
Definition: amapi.h:268
Size btestimateparallelscan(void)
Definition: nbtree.c:582
#define BTOPTIONS_PROC
Definition: nbtree.h:579
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btbuildempty(Relation index)
Definition: nbtree.c:163
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:899
amgetbitmap_function amgetbitmap
Definition: amapi.h:273
ambuild_function ambuild
Definition: amapi.h:258
amoptions_function amoptions
Definition: amapi.h:265
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:296
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:299
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:460
bool amcaninclude
Definition: amapi.h:243
amcostestimate_function amcostestimate
Definition: amapi.h:264
bool amcanunique
Definition: amapi.h:225
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6237
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:262
amendscan_function amendscan
Definition: amapi.h:274
bool amcanbackward
Definition: amapi.h:223
amrescan_function amrescan
Definition: amapi.h:271
bool amcanparallel
Definition: amapi.h:241
bool amsearchnulls
Definition: amapi.h:233
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:222
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:496
bool amclusterable
Definition: amapi.h:237
bool amsearcharray
Definition: amapi.h:231
#define InvalidOid
Definition: postgres_ext.h:36
bool amusemaintenanceworkmem
Definition: amapi.h:245
#define makeNode(_type_)
Definition: nodes.h:577
amadjustmembers_function amadjustmembers
Definition: amapi.h:269
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2128
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:526
ammarkpos_function ammarkpos
Definition: amapi.h:275
bool amcanorder
Definition: amapi.h:219
ambuildphasename_function ambuildphasename
Definition: amapi.h:267
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1491
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:606
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:279
#define BTNProcs
Definition: nbtree.h:580
uint16 amstrategies
Definition: amapi.h:213
void btinitparallelscan(void *target)
Definition: nbtree.c:591
uint16 amoptsprocnum
Definition: amapi.h:217
ambuildempty_function ambuildempty
Definition: amapi.h:259
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2103
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:263
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:280
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:869
amrestrpos_function amrestrpos
Definition: amapi.h:276

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 591 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

592 {
593  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
594 
595  SpinLockInit(&bt_target->btps_mutex);
596  bt_target->btps_scanPage = InvalidBlockNumber;
598  bt_target->btps_arrayKeyCount = 0;
599  ConditionVariableInit(&bt_target->btps_cv);
600 }
slock_t btps_mutex
Definition: nbtree.c:86
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 199 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

203 {
204  bool result;
205  IndexTuple itup;
206 
207  /* generate an index tuple */
208  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
209  itup->t_tid = *ht_ctid;
210 
211  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
212 
213  pfree(itup);
214 
215  return result;
216 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:82
#define RelationGetDescr(relation)
Definition: rel.h:482
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1056
static Datum values[MAXATTR]
Definition: bootstrap.c:167

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 496 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

497 {
498  BTScanOpaque so = (BTScanOpaque) scan->opaque;
499 
500  /* There may be an old mark with a pin (but no lock). */
502 
503  /*
504  * Just record the current itemIndex. If we later step to next page
505  * before releasing the marked position, _bt_steppage makes a full copy of
506  * the currPos struct in markPos. If (as often happens) the mark is moved
507  * before we leave the page, we don't have to do that work.
508  */
509  if (BTScanPosIsValid(so->currPos))
510  so->markItemIndex = so->currPos.itemIndex;
511  else
512  {
514  so->markItemIndex = -1;
515  }
516 
517  /* Also record the current positions of any array keys */
518  if (so->numArrayKeys)
519  _bt_mark_array_keys(scan);
520 }
int itemIndex
Definition: nbtree.h:855
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:603
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 606 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

607 {
608  BTParallelScanDesc btscan;
609  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
610 
611  Assert(parallel_scan);
612 
613  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
614  parallel_scan->ps_offset);
615 
616  /*
617  * In theory, we don't need to acquire the spinlock here, because there
618  * shouldn't be any other workers running at this point, but we do so for
619  * consistency.
620  */
621  SpinLockAcquire(&btscan->btps_mutex);
624  btscan->btps_arrayKeyCount = 0;
625  SpinLockRelease(&btscan->btps_mutex);
626 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:648
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
#define Assert(condition)
Definition: c.h:745
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1442 of file nbtree.c.

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, offsetof, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

1444 {
1445  int live = 0;
1446  int nitem = BTreeTupleGetNPosting(posting);
1447  ItemPointer items = BTreeTupleGetPosting(posting);
1448  BTVacuumPosting vacposting = NULL;
1449 
1450  for (int i = 0; i < nitem; i++)
1451  {
1452  if (!vstate->callback(items + i, vstate->callback_state))
1453  {
1454  /* Live table TID */
1455  live++;
1456  }
1457  else if (vacposting == NULL)
1458  {
1459  /*
1460  * First dead table TID encountered.
1461  *
1462  * It's now clear that we need to delete one or more dead table
1463  * TIDs, so start maintaining metadata describing how to update
1464  * existing posting list tuple.
1465  */
1466  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1467  nitem * sizeof(uint16));
1468 
1469  vacposting->itup = posting;
1470  vacposting->updatedoffset = updatedoffset;
1471  vacposting->ndeletedtids = 0;
1472  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1473  }
1474  else
1475  {
1476  /* Second or subsequent dead table TID */
1477  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1478  }
1479  }
1480 
1481  *nremaining = live;
1482  return vacposting;
1483 }
uint16 ndeletedtids
Definition: nbtree.h:782
void * callback_state
Definition: nbtree.c:47
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
OffsetNumber updatedoffset
Definition: nbtree.h:779
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
IndexTuple itup
Definition: nbtree.h:778
unsigned short uint16
Definition: c.h:373
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
void * palloc(Size size)
Definition: mcxt.c:949
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define offsetof(type, field)
Definition: c.h:668

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 400 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

402 {
403  BTScanOpaque so = (BTScanOpaque) scan->opaque;
404 
405  /* we aren't holding any read locks, but gotta drop the pins */
406  if (BTScanPosIsValid(so->currPos))
407  {
408  /* Before leaving current page, deal with any killed items */
409  if (so->numKilled > 0)
410  _bt_killitems(scan);
413  }
414 
415  so->markItemIndex = -1;
416  so->arrayKeyCount = 0;
419 
420  /*
421  * Allocate tuple workspace arrays, if needed for an index-only scan and
422  * not already done in a previous rescan call. To save on palloc
423  * overhead, both workspaces are allocated as one palloc block; only this
424  * function and btendscan know that.
425  *
426  * NOTE: this data structure also makes it safe to return data from a
427  * "name" column, even though btree name_ops uses an underlying storage
428  * datatype of cstring. The risk there is that "name" is supposed to be
429  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
430  * However, since we only return data out of tuples sitting in the
431  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
432  * data out of the markTuples array --- running off the end of memory for
433  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
434  * adding special-case treatment for name_ops elsewhere.
435  */
436  if (scan->xs_want_itup && so->currTuples == NULL)
437  {
438  so->currTuples = (char *) palloc(BLCKSZ * 2);
439  so->markTuples = so->currTuples + BLCKSZ;
440  }
441 
442  /*
443  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
444  * - vadim 05/05/97
445  */
446  if (scankey && scan->numberOfKeys > 0)
447  memmove(scan->keyData,
448  scankey,
449  scan->numberOfKeys * sizeof(ScanKeyData));
450  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
451 
452  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
454 }
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
char * markTuples
Definition: nbtree.h:930
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
int numberOfKeys
Definition: nbtree.h:908
struct ScanKeyData * keyData
Definition: relscan.h:119
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:949
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 526 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

527 {
528  BTScanOpaque so = (BTScanOpaque) scan->opaque;
529 
530  /* Restore the marked positions of any array keys */
531  if (so->numArrayKeys)
533 
534  if (so->markItemIndex >= 0)
535  {
536  /*
537  * The scan has never moved to a new page since the last mark. Just
538  * restore the itemIndex.
539  *
540  * NB: In this case we can't count on anything in so->markPos to be
541  * accurate.
542  */
543  so->currPos.itemIndex = so->markItemIndex;
544  }
545  else
546  {
547  /*
548  * The scan moved to a new page after last mark or restore, and we are
549  * now restoring to the marked page. We aren't holding any read
550  * locks, but if we're still holding the pin for the current position,
551  * we must drop it.
552  */
553  if (BTScanPosIsValid(so->currPos))
554  {
555  /* Before leaving current page, deal with any killed items */
556  if (so->numKilled > 0)
557  _bt_killitems(scan);
559  }
560 
561  if (BTScanPosIsValid(so->markPos))
562  {
563  /* bump pin on mark buffer for assignment to current buffer */
564  if (BTScanPosIsPinned(so->markPos))
566  memcpy(&so->currPos, &so->markPos,
567  offsetof(BTScanPosData, items[1]) +
568  so->markPos.lastItem * sizeof(BTScanPosItem));
569  if (so->currTuples)
570  memcpy(so->currTuples, so->markTuples,
572  }
573  else
575  }
576 }
int itemIndex
Definition: nbtree.h:855
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:622
int nextTupleOffset
Definition: nbtree.h:844
int lastItem
Definition: nbtree.h:854
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:862
char * markTuples
Definition: nbtree.h:930
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
Buffer buf
Definition: nbtree.h:825
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3556
#define offsetof(type, field)
Definition: c.h:668
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 899 of file nbtree.c.

References _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

900 {
901  /* No-op in ANALYZE ONLY mode */
902  if (info->analyze_only)
903  return stats;
904 
905  /*
906  * If btbulkdelete was called, we need not do anything, just return the
907  * stats from the latest btbulkdelete call. If it wasn't called, we might
908  * still need to do a pass over the index, to recycle any newly-recyclable
909  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
910  * determines if either are needed.
911  *
912  * Since we aren't going to actually delete any leaf items, there's no
913  * need to go through all the vacuum-cycle-ID pushups.
914  */
915  if (stats == NULL)
916  {
917  /* Check if we need a cleanup */
918  if (!_bt_vacuum_needs_cleanup(info))
919  return NULL;
920 
922  btvacuumscan(info, stats, NULL, NULL, 0);
923  }
924 
925  /*
926  * It's quite possible for us to be fooled by concurrent page splits into
927  * double-counting some index tuples, so disbelieve any total that exceeds
928  * the underlying heap's count ... if we know that accurately. Otherwise
929  * this might just make matters worse.
930  */
931  if (!info->estimated_count)
932  {
933  if (stats->num_index_tuples > info->num_heap_tuples)
934  stats->num_index_tuples = info->num_heap_tuples;
935  }
936 
937  return stats;
938 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:800
bool analyze_only
Definition: genam.h:47
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:955
void * palloc0(Size size)
Definition: mcxt.c:980
double num_heap_tuples
Definition: genam.h:51
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1087 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1088 {
1089  IndexVacuumInfo *info = vstate->info;
1090  IndexBulkDeleteResult *stats = vstate->stats;
1092  void *callback_state = vstate->callback_state;
1093  Relation rel = info->index;
1094  bool attempt_pagedel;
1095  BlockNumber blkno,
1096  backtrack_to;
1097  Buffer buf;
1098  Page page;
1099  BTPageOpaque opaque;
1100 
1101  blkno = scanblkno;
1102 
1103 backtrack:
1104 
1105  attempt_pagedel = false;
1106  backtrack_to = P_NONE;
1107 
1108  /* call vacuum_delay_point while not holding any buffer lock */
1110 
1111  /*
1112  * We can't use _bt_getbuf() here because it always applies
1113  * _bt_checkpage(), which will barf on an all-zero page. We want to
1114  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1115  * buffer access strategy.
1116  */
1117  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1118  info->strategy);
1119  _bt_lockbuf(rel, buf, BT_READ);
1120  page = BufferGetPage(buf);
1121  opaque = NULL;
1122  if (!PageIsNew(page))
1123  {
1124  _bt_checkpage(rel, buf);
1125  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1126  }
1127 
1128  Assert(blkno <= scanblkno);
1129  if (blkno != scanblkno)
1130  {
1131  /*
1132  * We're backtracking.
1133  *
1134  * We followed a right link to a sibling leaf page (a page that
1135  * happens to be from a block located before scanblkno). The only
1136  * case we want to do anything with is a live leaf page having the
1137  * current vacuum cycle ID.
1138  *
1139  * The page had better be in a state that's consistent with what we
1140  * expect. Check for conditions that imply corruption in passing. It
1141  * can't be half-dead because only an interrupted VACUUM process can
1142  * leave pages in that state, so we'd definitely have dealt with it
1143  * back when the page was the scanblkno page (half-dead pages are
1144  * always marked fully deleted by _bt_pagedel()). This assumes that
1145  * there can be only one vacuum process running at a time.
1146  */
1147  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1148  {
1149  Assert(false);
1150  ereport(LOG,
1151  (errcode(ERRCODE_INDEX_CORRUPTED),
1152  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1153  blkno, scanblkno, RelationGetRelationName(rel))));
1154  _bt_relbuf(rel, buf);
1155  return;
1156  }
1157 
1158  /*
1159  * We may have already processed the page in an earlier call, when the
1160  * page was scanblkno. This happens when the leaf page split occurred
1161  * after the scan began, but before the right sibling page became the
1162  * scanblkno.
1163  *
1164  * Page may also have been deleted by current btvacuumpage() call,
1165  * since _bt_pagedel() sometimes deletes the right sibling page of
1166  * scanblkno in passing (it does so after we decided where to
1167  * backtrack to). We don't need to process this page as a deleted
1168  * page a second time now (in fact, it would be wrong to count it as a
1169  * deleted page in the bulk delete statistics a second time).
1170  */
1171  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1172  {
1173  /* Done with current scanblkno (and all lower split pages) */
1174  _bt_relbuf(rel, buf);
1175  return;
1176  }
1177  }
1178 
1179  /* Page is valid, see what to do with it */
1180  if (_bt_page_recyclable(page))
1181  {
1182  /* Okay to recycle this page (which could be leaf or internal) */
1183  RecordFreeIndexPage(rel, blkno);
1184  vstate->totFreePages++;
1185  stats->pages_deleted++;
1186  }
1187  else if (P_ISDELETED(opaque))
1188  {
1189  /*
1190  * Already deleted page (which could be leaf or internal). Can't
1191  * recycle yet.
1192  */
1193  stats->pages_deleted++;
1194 
1195  /* Maintain the oldest btpo.xact */
1196  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1197  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1198  vstate->oldestBtpoXact = opaque->btpo.xact;
1199  }
1200  else if (P_ISHALFDEAD(opaque))
1201  {
1202  /*
1203  * Half-dead leaf page. Try to delete now. Might update
1204  * oldestBtpoXact and pages_deleted below.
1205  */
1206  attempt_pagedel = true;
1207  }
1208  else if (P_ISLEAF(opaque))
1209  {
1211  int ndeletable;
1213  int nupdatable;
1214  OffsetNumber offnum,
1215  minoff,
1216  maxoff;
1217  int nhtidsdead,
1218  nhtidslive;
1219 
1220  /*
1221  * Trade in the initial read lock for a super-exclusive write lock on
1222  * this page. We must get such a lock on every leaf page over the
1223  * course of the vacuum scan, whether or not it actually contains any
1224  * deletable tuples --- see nbtree/README.
1225  */
1226  _bt_upgradelockbufcleanup(rel, buf);
1227 
1228  /*
1229  * Check whether we need to backtrack to earlier pages. What we are
1230  * concerned about is a page split that happened since we started the
1231  * vacuum scan. If the split moved tuples on the right half of the
1232  * split (i.e. the tuples that sort high) to a block that we already
1233  * passed over, then we might have missed the tuples. We need to
1234  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1235  * or deleting scanblkno page below!)
1236  */
1237  if (vstate->cycleid != 0 &&
1238  opaque->btpo_cycleid == vstate->cycleid &&
1239  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1240  !P_RIGHTMOST(opaque) &&
1241  opaque->btpo_next < scanblkno)
1242  backtrack_to = opaque->btpo_next;
1243 
1244  /*
1245  * When each VACUUM begins, it determines an OldestXmin cutoff value.
1246  * Tuples before the cutoff are removed by VACUUM. Scan over all
1247  * items to see which ones need to be deleted according to cutoff
1248  * point using callback.
1249  */
1250  ndeletable = 0;
1251  nupdatable = 0;
1252  minoff = P_FIRSTDATAKEY(opaque);
1253  maxoff = PageGetMaxOffsetNumber(page);
1254  nhtidsdead = 0;
1255  nhtidslive = 0;
1256  if (callback)
1257  {
1258  for (offnum = minoff;
1259  offnum <= maxoff;
1260  offnum = OffsetNumberNext(offnum))
1261  {
1262  IndexTuple itup;
1263 
1264  itup = (IndexTuple) PageGetItem(page,
1265  PageGetItemId(page, offnum));
1266 
1267  /*
1268  * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1269  * records do not produce their own conflicts. This is safe
1270  * as long as the callback function only considers whether the
1271  * index tuple refers to pre-cutoff heap tuples that were
1272  * certainly already pruned away during VACUUM's initial heap
1273  * scan by the time we get here. (XLOG_HEAP2_CLEANUP_INFO
1274  * records produce conflicts using a latestRemovedXid value
1275  * for the entire VACUUM, so there is no need to produce our
1276  * own conflict now.)
1277  *
1278  * Backends with snapshots acquired after a VACUUM starts but
1279  * before it finishes could have a RecentGlobalXmin with a
1280  * later xid than the VACUUM's OldestXmin cutoff. These
1281  * backends might happen to opportunistically mark some index
1282  * tuples LP_DEAD before we reach them, even though they may
1283  * be after our cutoff. We don't try to kill these "extra"
1284  * index tuples in _bt_delitems_vacuum(). This keep things
1285  * simple, and allows us to always avoid generating our own
1286  * conflicts.
1287  */
1288  Assert(!BTreeTupleIsPivot(itup));
1289  if (!BTreeTupleIsPosting(itup))
1290  {
1291  /* Regular tuple, standard table TID representation */
1292  if (callback(&itup->t_tid, callback_state))
1293  {
1294  deletable[ndeletable++] = offnum;
1295  nhtidsdead++;
1296  }
1297  else
1298  nhtidslive++;
1299  }
1300  else
1301  {
1302  BTVacuumPosting vacposting;
1303  int nremaining;
1304 
1305  /* Posting list tuple */
1306  vacposting = btreevacuumposting(vstate, itup, offnum,
1307  &nremaining);
1308  if (vacposting == NULL)
1309  {
1310  /*
1311  * All table TIDs from the posting tuple remain, so no
1312  * delete or update required
1313  */
1314  Assert(nremaining == BTreeTupleGetNPosting(itup));
1315  }
1316  else if (nremaining > 0)
1317  {
1318 
1319  /*
1320  * Store metadata about posting list tuple in
1321  * updatable array for entire page. Existing tuple
1322  * will be updated during the later call to
1323  * _bt_delitems_vacuum().
1324  */
1325  Assert(nremaining < BTreeTupleGetNPosting(itup));
1326  updatable[nupdatable++] = vacposting;
1327  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1328  }
1329  else
1330  {
1331  /*
1332  * All table TIDs from the posting list must be
1333  * deleted. We'll delete the index tuple completely
1334  * (no update required).
1335  */
1336  Assert(nremaining == 0);
1337  deletable[ndeletable++] = offnum;
1338  nhtidsdead += BTreeTupleGetNPosting(itup);
1339  pfree(vacposting);
1340  }
1341 
1342  nhtidslive += nremaining;
1343  }
1344  }
1345  }
1346 
1347  /*
1348  * Apply any needed deletes or updates. We issue just one
1349  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1350  */
1351  if (ndeletable > 0 || nupdatable > 0)
1352  {
1353  Assert(nhtidsdead >= ndeletable + nupdatable);
1354  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1355  nupdatable);
1356 
1357  stats->tuples_removed += nhtidsdead;
1358  /* must recompute maxoff */
1359  maxoff = PageGetMaxOffsetNumber(page);
1360 
1361  /* can't leak memory here */
1362  for (int i = 0; i < nupdatable; i++)
1363  pfree(updatable[i]);
1364  }
1365  else
1366  {
1367  /*
1368  * If the leaf page has been split during this vacuum cycle, it
1369  * seems worth expending a write to clear btpo_cycleid even if we
1370  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1371  * takes care of this.) This ensures we won't process the page
1372  * again.
1373  *
1374  * We treat this like a hint-bit update because there's no need to
1375  * WAL-log it.
1376  */
1377  Assert(nhtidsdead == 0);
1378  if (vstate->cycleid != 0 &&
1379  opaque->btpo_cycleid == vstate->cycleid)
1380  {
1381  opaque->btpo_cycleid = 0;
1382  MarkBufferDirtyHint(buf, true);
1383  }
1384  }
1385 
1386  /*
1387  * If the leaf page is now empty, try to delete it; else count the
1388  * live tuples (live table TIDs in posting lists are counted as
1389  * separate live tuples). We don't delete when backtracking, though,
1390  * since that would require teaching _bt_pagedel() about backtracking
1391  * (doesn't seem worth adding more complexity to deal with that).
1392  */
1393  if (minoff > maxoff)
1394  attempt_pagedel = (blkno == scanblkno);
1395  else
1396  stats->num_index_tuples += nhtidslive;
1397 
1398  Assert(!attempt_pagedel || nhtidslive == 0);
1399  }
1400 
1401  if (attempt_pagedel)
1402  {
1403  MemoryContext oldcontext;
1404 
1405  /* Run pagedel in a temp context to avoid memory leakage */
1407  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1408 
1409  /*
1410  * We trust the _bt_pagedel return value because it does not include
1411  * any page that a future call here from btvacuumscan is expected to
1412  * count. There will be no double-counting.
1413  */
1414  Assert(blkno == scanblkno);
1415  stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1416 
1417  MemoryContextSwitchTo(oldcontext);
1418  /* pagedel released buffer, so we shouldn't */
1419  }
1420  else
1421  _bt_relbuf(rel, buf);
1422 
1423  if (backtrack_to != P_NONE)
1424  {
1425  blkno = backtrack_to;
1426  goto backtrack;
1427  }
1428 }
#define BTP_SPLIT_END
Definition: nbtree.h:77
BlockNumber btpo_next
Definition: nbtree.h:59
MemoryContext pagedelcontext
Definition: nbtree.c:51
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3588
BlockNumber totFreePages
Definition: nbtree.c:49
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1124
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:653
ItemPointerData t_tid
Definition: itup.h:37
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1442
BufferAccessStrategy strategy
Definition: genam.h:52
union BTPageOpaqueData::@45 btpo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:206
int errcode(int sqlerrcode)
Definition: elog.c:610
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
#define LOG
Definition: elog.h:26
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:1081
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1056
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
BTCycleId btpo_cycleid
Definition: nbtree.h:66
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
uint32 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
Definition: nbtpage.c:1549
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:490
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:745
IndexVacuumInfo * info
Definition: nbtree.c:44
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:1995
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1046
uint16 btpo_flags
Definition: nbtree.h:65
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 955 of file nbtree.c.

References _bt_update_meta_cleanup_info(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, LockRelationForExtension(), MemoryContextDelete(), IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, BTVacState::totFreePages, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().

958 {
959  Relation rel = info->index;
960  BTVacState vstate;
961  BlockNumber num_pages;
962  BlockNumber scanblkno;
963  bool needLock;
964 
965  /*
966  * Reset counts that will be incremented during the scan; needed in case
967  * of multiple scans during a single VACUUM command
968  */
969  stats->estimated_count = false;
970  stats->num_index_tuples = 0;
971  stats->pages_deleted = 0;
972 
973  /* Set up info to pass down to btvacuumpage */
974  vstate.info = info;
975  vstate.stats = stats;
976  vstate.callback = callback;
977  vstate.callback_state = callback_state;
978  vstate.cycleid = cycleid;
979  vstate.totFreePages = 0;
981 
982  /* Create a temporary memory context to run _bt_pagedel in */
984  "_bt_pagedel",
986 
987  /*
988  * The outer loop iterates over all index pages except the metapage, in
989  * physical order (we hope the kernel will cooperate in providing
990  * read-ahead for speed). It is critical that we visit all leaf pages,
991  * including ones added after we start the scan, else we might fail to
992  * delete some deletable tuples. Hence, we must repeatedly check the
993  * relation length. We must acquire the relation-extension lock while
994  * doing so to avoid a race condition: if someone else is extending the
995  * relation, there is a window where bufmgr/smgr have created a new
996  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
997  * we manage to scan such a page here, we'll improperly assume it can be
998  * recycled. Taking the lock synchronizes things enough to prevent a
999  * problem: either num_pages won't include the new page, or _bt_getbuf
1000  * already has write lock on the buffer and it will be fully initialized
1001  * before we can examine it. (See also vacuumlazy.c, which has the same
1002  * issue.) Also, we need not worry if a page is added immediately after
1003  * we look; the page splitting code already has write-lock on the left
1004  * page before it adds a right page, so we must already have processed any
1005  * tuples due to be moved into such a page.
1006  *
1007  * We can skip locking for new or temp relations, however, since no one
1008  * else could be accessing them.
1009  */
1010  needLock = !RELATION_IS_LOCAL(rel);
1011 
1012  scanblkno = BTREE_METAPAGE + 1;
1013  for (;;)
1014  {
1015  /* Get the current relation length */
1016  if (needLock)
1018  num_pages = RelationGetNumberOfBlocks(rel);
1019  if (needLock)
1021 
1022  if (info->report_progress)
1024  num_pages);
1025 
1026  /* Quit if we've scanned the whole relation */
1027  if (scanblkno >= num_pages)
1028  break;
1029  /* Iterate over pages, then loop back to recheck length */
1030  for (; scanblkno < num_pages; scanblkno++)
1031  {
1032  btvacuumpage(&vstate, scanblkno);
1033  if (info->report_progress)
1035  scanblkno);
1036  }
1037  }
1038 
1040 
1041  /*
1042  * If we found any recyclable pages (and recorded them in the FSM), then
1043  * forcibly update the upper-level FSM pages to ensure that searchers can
1044  * find them. It's possible that the pages were also found during
1045  * previous scans and so this is a waste of time, but it's cheap enough
1046  * relative to scanning the index that it shouldn't matter much, and
1047  * making sure that free pages are available sooner not later seems
1048  * worthwhile.
1049  *
1050  * Note that if no recyclable pages exist, we don't bother vacuuming the
1051  * FSM at all.
1052  */
1053  if (vstate.totFreePages > 0)
1055 
1056  /*
1057  * Maintain the oldest btpo.xact and a count of the current number of heap
1058  * tuples in the metapage (for the benefit of _bt_vacuum_needs_cleanup).
1059  *
1060  * The page with the oldest btpo.xact is typically a page deleted by this
1061  * VACUUM operation, since pages deleted by a previous VACUUM operation
1062  * tend to be placed in the FSM (by the current VACUUM operation) -- such
1063  * pages are not candidates to be the oldest btpo.xact. (Note that pages
1064  * placed in the FSM are reported as deleted pages in the bulk delete
1065  * statistics, despite not counting as deleted pages for the purposes of
1066  * determining the oldest btpo.xact.)
1067  */
1069  info->num_heap_tuples);
1070 
1071  /* update statistics */
1072  stats->num_pages = num_pages;
1073  stats->pages_free = vstate.totFreePages;
1074 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:51
void * callback_state
Definition: nbtree.c:47
BlockNumber totFreePages
Definition: nbtree.c:49
#define ExclusiveLock
Definition: lockdefs.h:44
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3231
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
bool report_progress
Definition: genam.h:48
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1087
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:174
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:141
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
double num_heap_tuples
Definition: genam.h:51
IndexVacuumInfo * info
Definition: nbtree.c:44
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:76