PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber scanblkno)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 90 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 66 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 769 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

770 {
771  BTScanOpaque so = (BTScanOpaque) scan->opaque;
772  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
773  BTParallelScanDesc btscan;
774 
775  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
776  parallel_scan->ps_offset);
777 
778  so->arrayKeyCount++;
779  SpinLockAcquire(&btscan->btps_mutex);
780  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
781  {
782  btscan->btps_scanPage = InvalidBlockNumber;
783  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
784  btscan->btps_arrayKeyCount++;
785  }
786  SpinLockRelease(&btscan->btps_mutex);
787 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:649
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 728 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

729 {
730  BTScanOpaque so = (BTScanOpaque) scan->opaque;
731  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
732  BTParallelScanDesc btscan;
733  bool status_changed = false;
734 
735  /* Do nothing, for non-parallel scans */
736  if (parallel_scan == NULL)
737  return;
738 
739  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
740  parallel_scan->ps_offset);
741 
742  /*
743  * Mark the parallel scan as done for this combination of scan keys,
744  * unless some other process already did so. See also
745  * _bt_advance_array_keys.
746  */
747  SpinLockAcquire(&btscan->btps_mutex);
748  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
749  btscan->btps_pageStatus != BTPARALLEL_DONE)
750  {
751  btscan->btps_pageStatus = BTPARALLEL_DONE;
752  status_changed = true;
753  }
754  SpinLockRelease(&btscan->btps_mutex);
755 
756  /* wake up all the workers associated with this parallel scan */
757  if (status_changed)
758  ConditionVariableBroadcast(&btscan->btps_cv);
759 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:649
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 705 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

706 {
707  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
708  BTParallelScanDesc btscan;
709 
710  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
711  parallel_scan->ps_offset);
712 
713  SpinLockAcquire(&btscan->btps_mutex);
714  btscan->btps_scanPage = scan_page;
716  SpinLockRelease(&btscan->btps_mutex);
718 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
#define OffsetToPointer(base, offset)
Definition: c.h:649
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 647 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

648 {
649  BTScanOpaque so = (BTScanOpaque) scan->opaque;
650  BTPS_State pageStatus;
651  bool exit_loop = false;
652  bool status = true;
653  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
654  BTParallelScanDesc btscan;
655 
656  *pageno = P_NONE;
657 
658  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
659  parallel_scan->ps_offset);
660 
661  while (1)
662  {
663  SpinLockAcquire(&btscan->btps_mutex);
664  pageStatus = btscan->btps_pageStatus;
665 
666  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
667  {
668  /* Parallel scan has already advanced to a new set of scankeys. */
669  status = false;
670  }
671  else if (pageStatus == BTPARALLEL_DONE)
672  {
673  /*
674  * We're done with this set of scankeys. This may be the end, or
675  * there could be more sets to try.
676  */
677  status = false;
678  }
679  else if (pageStatus != BTPARALLEL_ADVANCING)
680  {
681  /*
682  * We have successfully seized control of the scan for the purpose
683  * of advancing it to a new page!
684  */
685  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
686  *pageno = btscan->btps_scanPage;
687  exit_loop = true;
688  }
689  SpinLockRelease(&btscan->btps_mutex);
690  if (exit_loop || !status)
691  break;
693  }
695 
696  return status;
697 }
#define P_NONE
Definition: nbtree.h:206
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
BTPS_State
Definition: nbtree.c:66
int arrayKeyCount
Definition: nbtree.h:915
#define OffsetToPointer(base, offset)
Definition: c.h:649
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 800 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, GlobalVisCheckRemovableXid(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, TransactionIdIsValid, vacuum_cleanup_index_scale_factor, and BTOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

801 {
802  Buffer metabuf;
803  Page metapg;
804  BTMetaPageData *metad;
805  bool result = false;
806 
807  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
808  metapg = BufferGetPage(metabuf);
809  metad = BTPageGetMeta(metapg);
810 
811  /*
812  * XXX: If IndexVacuumInfo contained the heap relation, we could be more
813  * aggressive about vacuuming non catalog relations by passing the table
814  * to GlobalVisCheckRemovableXid().
815  */
816 
817  if (metad->btm_version < BTREE_NOVAC_VERSION)
818  {
819  /*
820  * Do cleanup if metapage needs upgrade, because we don't have
821  * cleanup-related meta-information yet.
822  */
823  result = true;
824  }
825  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
827  {
828  /*
829  * If any oldest btpo.xact from a previously deleted page in the index
830  * is visible to everyone, then at least one deleted page can be
831  * recycled -- don't skip cleanup.
832  */
833  result = true;
834  }
835  else
836  {
837  BTOptions *relopts;
838  float8 cleanup_scale_factor;
839  float8 prev_num_heap_tuples;
840 
841  /*
842  * If table receives enough insertions and no cleanup was performed,
843  * then index would appear have stale statistics. If scale factor is
844  * set, we avoid that by performing cleanup if the number of inserted
845  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
846  * original tuples count.
847  */
848  relopts = (BTOptions *) info->index->rd_options;
849  cleanup_scale_factor = (relopts &&
850  relopts->vacuum_cleanup_index_scale_factor >= 0)
853  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
854 
855  if (cleanup_scale_factor <= 0 ||
856  info->num_heap_tuples < 0 ||
857  prev_num_heap_tuples <= 0 ||
858  (info->num_heap_tuples - prev_num_heap_tuples) /
859  prev_num_heap_tuples >= cleanup_scale_factor)
860  result = true;
861  }
862 
863  _bt_relbuf(info->index, metabuf);
864  return result;
865 }
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:151
#define BT_READ
Definition: nbtree.h:587
float8 vacuum_cleanup_index_scale_factor
Definition: nbtree.h:964
double float8
Definition: c.h:499
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4094
double num_heap_tuples
Definition: genam.h:51
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:157
Pointer Page
Definition: bufpage.h:78

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 354 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

355 {
356  IndexScanDesc scan;
357  BTScanOpaque so;
358 
359  /* no order by operators allowed */
360  Assert(norderbys == 0);
361 
362  /* get the scan */
363  scan = RelationGetIndexScan(rel, nkeys, norderbys);
364 
365  /* allocate private workspace */
366  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
369  if (scan->numberOfKeys > 0)
370  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
371  else
372  so->keyData = NULL;
373 
374  so->arrayKeyData = NULL; /* assume no array keys for now */
375  so->numArrayKeys = 0;
376  so->arrayKeys = NULL;
377  so->arrayContext = NULL;
378 
379  so->killedItems = NULL; /* until needed */
380  so->numKilled = 0;
381 
382  /*
383  * We don't know yet whether the scan will be index-only, so we do not
384  * allocate the tuple workspace arrays until btrescan. However, we set up
385  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
386  */
387  so->currTuples = so->markTuples = NULL;
388 
389  scan->xs_itupdesc = RelationGetDescr(rel);
390 
391  scan->opaque = so;
392 
393  return scan;
394 }
#define RelationGetDescr(relation)
Definition: rel.h:482
MemoryContext arrayContext
Definition: nbtree.h:918
char * currTuples
Definition: nbtree.h:929
struct TupleDescData * xs_itupdesc
Definition: relscan.h:140
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:930
ScanKey arrayKeyData
Definition: nbtree.h:912
int * killedItems
Definition: nbtree.h:921
#define Assert(condition)
Definition: c.h:746
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:950
ScanKey keyData
Definition: nbtree.h:909
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:917
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:81

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 163 of file nbtree.c.

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

164 {
165  Page metapage;
166 
167  /* Construct metapage. */
168  metapage = (Page) palloc(BLCKSZ);
169  _bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
170 
171  /*
172  * Write the page and log it. It might seem that an immediate sync would
173  * be sufficient to guarantee that the file exists on disk, but recovery
174  * itself might remove it while replaying, for example, an
175  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
176  * this even when wal_level=minimal.
177  */
180  (char *) metapage, true);
182  BTREE_METAPAGE, metapage, true);
183 
184  /*
185  * An immediate sync is required even if we xlog'd the page, because the
186  * write did not go through shared_buffers and therefore a concurrent
187  * checkpoint may have moved the redo pointer past our xlog record.
188  */
190 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define P_NONE
Definition: nbtree.h:206
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:61
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:524
#define BTREE_METAPAGE
Definition: nbtree.h:141
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1414
void * palloc(Size size)
Definition: mcxt.c:950
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:996
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:2691
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:643
Pointer Page
Definition: bufpage.h:78

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 875 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

877 {
878  Relation rel = info->index;
879  BTCycleId cycleid;
880 
881  /* allocate stats if first time through, else re-use existing struct */
882  if (stats == NULL)
884 
885  /* Establish the vacuum cycle ID to use for this scan */
886  /* The ENSURE stuff ensures we clean up shared memory on failure */
888  {
889  cycleid = _bt_start_vacuum(rel);
890 
891  btvacuumscan(info, stats, callback, callback_state, cycleid);
892  }
894  _bt_end_vacuum(rel);
895 
896  return stats;
897 }
#define PointerGetDatum(X)
Definition: postgres.h:556
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2053
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:961
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:28
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2025
void * palloc0(Size size)
Definition: mcxt.c:981
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1968

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1496 of file nbtree.c.

Referenced by bthandler().

1497 {
1498  return true;
1499 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 460 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

461 {
462  BTScanOpaque so = (BTScanOpaque) scan->opaque;
463 
464  /* we aren't holding any read locks, but gotta drop the pins */
465  if (BTScanPosIsValid(so->currPos))
466  {
467  /* Before leaving current page, deal with any killed items */
468  if (so->numKilled > 0)
469  _bt_killitems(scan);
471  }
472 
473  so->markItemIndex = -1;
475 
476  /* No need to invalidate positions, the RAM is about to be freed. */
477 
478  /* Release storage */
479  if (so->keyData != NULL)
480  pfree(so->keyData);
481  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
482  if (so->arrayContext != NULL)
484  if (so->killedItems != NULL)
485  pfree(so->killedItems);
486  if (so->currTuples != NULL)
487  pfree(so->currTuples);
488  /* so->markTuples should not be pfree'd, see btrescan */
489  pfree(so);
490 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
MemoryContext arrayContext
Definition: nbtree.h:918
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
void pfree(void *pointer)
Definition: mcxt.c:1057
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int markItemIndex
Definition: nbtree.h:939
int * killedItems
Definition: nbtree.h:921
BTScanPosData currPos
Definition: nbtree.h:942
ScanKey keyData
Definition: nbtree.h:909
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 582 of file nbtree.c.

Referenced by bthandler().

583 {
584  return sizeof(BTParallelScanDescData);
585 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 296 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

297 {
298  BTScanOpaque so = (BTScanOpaque) scan->opaque;
299  int64 ntids = 0;
300  ItemPointer heapTid;
301 
302  /*
303  * If we have any array keys, initialize them.
304  */
305  if (so->numArrayKeys)
306  {
307  /* punt if we have any unsatisfiable array keys */
308  if (so->numArrayKeys < 0)
309  return ntids;
310 
312  }
313 
314  /* This loop handles advancing to the next array elements, if any */
315  do
316  {
317  /* Fetch the first page & tuple */
318  if (_bt_first(scan, ForwardScanDirection))
319  {
320  /* Save tuple ID, and continue scanning */
321  heapTid = &scan->xs_heaptid;
322  tbm_add_tuples(tbm, heapTid, 1, false);
323  ntids++;
324 
325  for (;;)
326  {
327  /*
328  * Advance to next tuple within page. This is the same as the
329  * easy case in _bt_next().
330  */
331  if (++so->currPos.itemIndex > so->currPos.lastItem)
332  {
333  /* let _bt_next do the heavy lifting */
334  if (!_bt_next(scan, ForwardScanDirection))
335  break;
336  }
337 
338  /* Save tuple ID, and continue scanning */
339  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
340  tbm_add_tuples(tbm, heapTid, 1, false);
341  ntids++;
342  }
343  }
344  /* Now see if we have more array keys to deal with */
346 
347  return ntids;
348 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:855
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
int lastItem
Definition: nbtree.h:854
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
ItemPointerData xs_heaptid
Definition: relscan.h:144
ItemPointerData heapTid
Definition: nbtree.h:818
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:857
BTScanPosData currPos
Definition: nbtree.h:942
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 222 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

223 {
224  BTScanOpaque so = (BTScanOpaque) scan->opaque;
225  bool res;
226 
227  /* btree indexes are never lossy */
228  scan->xs_recheck = false;
229 
230  /*
231  * If we have any array keys, initialize them during first call for a
232  * scan. We can't do this in btrescan because we don't know the scan
233  * direction at that time.
234  */
235  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
236  {
237  /* punt if we have any unsatisfiable array keys */
238  if (so->numArrayKeys < 0)
239  return false;
240 
241  _bt_start_array_keys(scan, dir);
242  }
243 
244  /* This loop handles advancing to the next array elements, if any */
245  do
246  {
247  /*
248  * If we've already initialized this scan, we can just advance it in
249  * the appropriate direction. If we haven't done so yet, we call
250  * _bt_first() to get the first item in the scan.
251  */
252  if (!BTScanPosIsValid(so->currPos))
253  res = _bt_first(scan, dir);
254  else
255  {
256  /*
257  * Check to see if we should kill the previously-fetched tuple.
258  */
259  if (scan->kill_prior_tuple)
260  {
261  /*
262  * Yes, remember it for later. (We'll deal with all such
263  * tuples at once right before leaving the index page.) The
264  * test for numKilled overrun is not just paranoia: if the
265  * caller reverses direction in the indexscan then the same
266  * item might get entered multiple times. It's not worth
267  * trying to optimize that, so we don't detect it, but instead
268  * just forget any excess entries.
269  */
270  if (so->killedItems == NULL)
271  so->killedItems = (int *)
272  palloc(MaxTIDsPerBTreePage * sizeof(int));
273  if (so->numKilled < MaxTIDsPerBTreePage)
274  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
275  }
276 
277  /*
278  * Now continue the scan.
279  */
280  res = _bt_next(scan, dir);
281  }
282 
283  /* If we have a tuple, return it ... */
284  if (res)
285  break;
286  /* ... otherwise see if we have more array keys to deal with */
287  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
288 
289  return res;
290 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:544
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:518
int itemIndex
Definition: nbtree.h:855
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
#define MaxTIDsPerBTreePage
Definition: nbtree.h:179
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:848
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
int * killedItems
Definition: nbtree.h:921
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:950
bool kill_prior_tuple
Definition: relscan.h:125
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1454

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 108 of file nbtree.c.

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

109 {
110  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
111 
112  amroutine->amstrategies = BTMaxStrategyNumber;
113  amroutine->amsupport = BTNProcs;
114  amroutine->amoptsprocnum = BTOPTIONS_PROC;
115  amroutine->amcanorder = true;
116  amroutine->amcanorderbyop = false;
117  amroutine->amcanbackward = true;
118  amroutine->amcanunique = true;
119  amroutine->amcanmulticol = true;
120  amroutine->amoptionalkey = true;
121  amroutine->amsearcharray = true;
122  amroutine->amsearchnulls = true;
123  amroutine->amstorage = false;
124  amroutine->amclusterable = true;
125  amroutine->ampredlocks = true;
126  amroutine->amcanparallel = true;
127  amroutine->amcaninclude = true;
128  amroutine->amusemaintenanceworkmem = false;
129  amroutine->amparallelvacuumoptions =
131  amroutine->amkeytype = InvalidOid;
132 
133  amroutine->ambuild = btbuild;
134  amroutine->ambuildempty = btbuildempty;
135  amroutine->aminsert = btinsert;
136  amroutine->ambulkdelete = btbulkdelete;
137  amroutine->amvacuumcleanup = btvacuumcleanup;
138  amroutine->amcanreturn = btcanreturn;
139  amroutine->amcostestimate = btcostestimate;
140  amroutine->amoptions = btoptions;
141  amroutine->amproperty = btproperty;
142  amroutine->ambuildphasename = btbuildphasename;
143  amroutine->amvalidate = btvalidate;
144  amroutine->amadjustmembers = btadjustmembers;
145  amroutine->ambeginscan = btbeginscan;
146  amroutine->amrescan = btrescan;
147  amroutine->amgettuple = btgettuple;
148  amroutine->amgetbitmap = btgetbitmap;
149  amroutine->amendscan = btendscan;
150  amroutine->ammarkpos = btmarkpos;
151  amroutine->amrestrpos = btrestrpos;
154  amroutine->amparallelrescan = btparallelrescan;
155 
156  PG_RETURN_POINTER(amroutine);
157 }
ambeginscan_function ambeginscan
Definition: amapi.h:270
uint8 amparallelvacuumoptions
Definition: amapi.h:247
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
ambulkdelete_function ambulkdelete
Definition: amapi.h:261
bool amcanmulticol
Definition: amapi.h:227
uint16 amsupport
Definition: amapi.h:215
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:400
amgettuple_function amgettuple
Definition: amapi.h:272
bool amcanorderbyop
Definition: amapi.h:221
amproperty_function amproperty
Definition: amapi.h:266
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2151
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:199
amparallelrescan_function amparallelrescan
Definition: amapi.h:281
bool amstorage
Definition: amapi.h:235
bool ampredlocks
Definition: amapi.h:239
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:354
aminsert_function aminsert
Definition: amapi.h:260
Oid amkeytype
Definition: amapi.h:249
bool amoptionalkey
Definition: amapi.h:229
amvalidate_function amvalidate
Definition: amapi.h:268
Size btestimateparallelscan(void)
Definition: nbtree.c:582
#define BTOPTIONS_PROC
Definition: nbtree.h:579
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:41
void btbuildempty(Relation index)
Definition: nbtree.c:163
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:905
amgetbitmap_function amgetbitmap
Definition: amapi.h:273
ambuild_function ambuild
Definition: amapi.h:258
amoptions_function amoptions
Definition: amapi.h:265
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:296
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:299
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:293
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:460
bool amcaninclude
Definition: amapi.h:243
amcostestimate_function amcostestimate
Definition: amapi.h:264
bool amcanunique
Definition: amapi.h:225
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6239
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:262
amendscan_function amendscan
Definition: amapi.h:274
bool amcanbackward
Definition: amapi.h:223
amrescan_function amrescan
Definition: amapi.h:271
bool amcanparallel
Definition: amapi.h:241
bool amsearchnulls
Definition: amapi.h:233
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:222
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:496
bool amclusterable
Definition: amapi.h:237
bool amsearcharray
Definition: amapi.h:231
#define InvalidOid
Definition: postgres_ext.h:36
bool amusemaintenanceworkmem
Definition: amapi.h:245
#define makeNode(_type_)
Definition: nodes.h:576
amadjustmembers_function amadjustmembers
Definition: amapi.h:269
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2128
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:526
ammarkpos_function ammarkpos
Definition: amapi.h:275
bool amcanorder
Definition: amapi.h:219
ambuildphasename_function ambuildphasename
Definition: amapi.h:267
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1496
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:606
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:279
#define BTNProcs
Definition: nbtree.h:580
uint16 amstrategies
Definition: amapi.h:213
void btinitparallelscan(void *target)
Definition: nbtree.c:591
uint16 amoptsprocnum
Definition: amapi.h:217
ambuildempty_function ambuildempty
Definition: amapi.h:259
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2103
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:263
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:280
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:875
amrestrpos_function amrestrpos
Definition: amapi.h:276

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 591 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

592 {
593  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
594 
595  SpinLockInit(&bt_target->btps_mutex);
596  bt_target->btps_scanPage = InvalidBlockNumber;
598  bt_target->btps_arrayKeyCount = 0;
599  ConditionVariableInit(&bt_target->btps_cv);
600 }
slock_t btps_mutex
Definition: nbtree.c:86
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:87
BlockNumber btps_scanPage
Definition: nbtree.c:80
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 199 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

203 {
204  bool result;
205  IndexTuple itup;
206 
207  /* generate an index tuple */
208  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
209  itup->t_tid = *ht_ctid;
210 
211  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
212 
213  pfree(itup);
214 
215  return result;
216 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:82
#define RelationGetDescr(relation)
Definition: rel.h:482
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1057
static Datum values[MAXATTR]
Definition: bootstrap.c:165

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 496 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

497 {
498  BTScanOpaque so = (BTScanOpaque) scan->opaque;
499 
500  /* There may be an old mark with a pin (but no lock). */
502 
503  /*
504  * Just record the current itemIndex. If we later step to next page
505  * before releasing the marked position, _bt_steppage makes a full copy of
506  * the currPos struct in markPos. If (as often happens) the mark is moved
507  * before we leave the page, we don't have to do that work.
508  */
509  if (BTScanPosIsValid(so->currPos))
510  so->markItemIndex = so->currPos.itemIndex;
511  else
512  {
514  so->markItemIndex = -1;
515  }
516 
517  /* Also record the current positions of any array keys */
518  if (so->numArrayKeys)
519  _bt_mark_array_keys(scan);
520 }
int itemIndex
Definition: nbtree.h:855
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:603
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 606 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

607 {
608  BTParallelScanDesc btscan;
609  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
610 
611  Assert(parallel_scan);
612 
613  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
614  parallel_scan->ps_offset);
615 
616  /*
617  * In theory, we don't need to acquire the spinlock here, because there
618  * shouldn't be any other workers running at this point, but we do so for
619  * consistency.
620  */
621  SpinLockAcquire(&btscan->btps_mutex);
624  btscan->btps_arrayKeyCount = 0;
625  SpinLockRelease(&btscan->btps_mutex);
626 }
slock_t btps_mutex
Definition: nbtree.c:86
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:90
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:649
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:163
BlockNumber btps_scanPage
Definition: nbtree.c:80
#define Assert(condition)
Definition: c.h:746
BTPS_State btps_pageStatus
Definition: nbtree.c:81
#define InvalidBlockNumber
Definition: block.h:33

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1447 of file nbtree.c.

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, offsetof, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

1449 {
1450  int live = 0;
1451  int nitem = BTreeTupleGetNPosting(posting);
1452  ItemPointer items = BTreeTupleGetPosting(posting);
1453  BTVacuumPosting vacposting = NULL;
1454 
1455  for (int i = 0; i < nitem; i++)
1456  {
1457  if (!vstate->callback(items + i, vstate->callback_state))
1458  {
1459  /* Live table TID */
1460  live++;
1461  }
1462  else if (vacposting == NULL)
1463  {
1464  /*
1465  * First dead table TID encountered.
1466  *
1467  * It's now clear that we need to delete one or more dead table
1468  * TIDs, so start maintaining metadata describing how to update
1469  * existing posting list tuple.
1470  */
1471  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1472  nitem * sizeof(uint16));
1473 
1474  vacposting->itup = posting;
1475  vacposting->updatedoffset = updatedoffset;
1476  vacposting->ndeletedtids = 0;
1477  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1478  }
1479  else
1480  {
1481  /* Second or subsequent dead table TID */
1482  vacposting->deletetids[vacposting->ndeletedtids++] = i;
1483  }
1484  }
1485 
1486  *nremaining = live;
1487  return vacposting;
1488 }
uint16 ndeletedtids
Definition: nbtree.h:782
void * callback_state
Definition: nbtree.c:47
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:405
OffsetNumber updatedoffset
Definition: nbtree.h:779
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
IndexTuple itup
Definition: nbtree.h:778
unsigned short uint16
Definition: c.h:374
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
void * palloc(Size size)
Definition: mcxt.c:950
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define offsetof(type, field)
Definition: c.h:669

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 400 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

402 {
403  BTScanOpaque so = (BTScanOpaque) scan->opaque;
404 
405  /* we aren't holding any read locks, but gotta drop the pins */
406  if (BTScanPosIsValid(so->currPos))
407  {
408  /* Before leaving current page, deal with any killed items */
409  if (so->numKilled > 0)
410  _bt_killitems(scan);
413  }
414 
415  so->markItemIndex = -1;
416  so->arrayKeyCount = 0;
419 
420  /*
421  * Allocate tuple workspace arrays, if needed for an index-only scan and
422  * not already done in a previous rescan call. To save on palloc
423  * overhead, both workspaces are allocated as one palloc block; only this
424  * function and btendscan know that.
425  *
426  * NOTE: this data structure also makes it safe to return data from a
427  * "name" column, even though btree name_ops uses an underlying storage
428  * datatype of cstring. The risk there is that "name" is supposed to be
429  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
430  * However, since we only return data out of tuples sitting in the
431  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
432  * data out of the markTuples array --- running off the end of memory for
433  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
434  * adding special-case treatment for name_ops elsewhere.
435  */
436  if (scan->xs_want_itup && so->currTuples == NULL)
437  {
438  so->currTuples = (char *) palloc(BLCKSZ * 2);
439  so->markTuples = so->currTuples + BLCKSZ;
440  }
441 
442  /*
443  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
444  * - vadim 05/05/97
445  */
446  if (scankey && scan->numberOfKeys > 0)
447  memmove(scan->keyData,
448  scankey,
449  scan->numberOfKeys * sizeof(ScanKeyData));
450  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
451 
452  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
454 }
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int arrayKeyCount
Definition: nbtree.h:915
char * markTuples
Definition: nbtree.h:930
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
int numberOfKeys
Definition: nbtree.h:908
struct ScanKeyData * keyData
Definition: relscan.h:119
BTScanPosData currPos
Definition: nbtree.h:942
void * palloc(Size size)
Definition: mcxt.c:950
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 526 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

527 {
528  BTScanOpaque so = (BTScanOpaque) scan->opaque;
529 
530  /* Restore the marked positions of any array keys */
531  if (so->numArrayKeys)
533 
534  if (so->markItemIndex >= 0)
535  {
536  /*
537  * The scan has never moved to a new page since the last mark. Just
538  * restore the itemIndex.
539  *
540  * NB: In this case we can't count on anything in so->markPos to be
541  * accurate.
542  */
543  so->currPos.itemIndex = so->markItemIndex;
544  }
545  else
546  {
547  /*
548  * The scan moved to a new page after last mark or restore, and we are
549  * now restoring to the marked page. We aren't holding any read
550  * locks, but if we're still holding the pin for the current position,
551  * we must drop it.
552  */
553  if (BTScanPosIsValid(so->currPos))
554  {
555  /* Before leaving current page, deal with any killed items */
556  if (so->numKilled > 0)
557  _bt_killitems(scan);
559  }
560 
561  if (BTScanPosIsValid(so->markPos))
562  {
563  /* bump pin on mark buffer for assignment to current buffer */
564  if (BTScanPosIsPinned(so->markPos))
566  memcpy(&so->currPos, &so->markPos,
567  offsetof(BTScanPosData, items[1]) +
568  so->markPos.lastItem * sizeof(BTScanPosItem));
569  if (so->currTuples)
570  memcpy(so->currTuples, so->markTuples,
572  }
573  else
575  }
576 }
int itemIndex
Definition: nbtree.h:855
char * currTuples
Definition: nbtree.h:929
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:879
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:622
int nextTupleOffset
Definition: nbtree.h:844
int lastItem
Definition: nbtree.h:854
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:943
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:946
int numArrayKeys
Definition: nbtree.h:913
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:862
char * markTuples
Definition: nbtree.h:930
int markItemIndex
Definition: nbtree.h:939
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:885
BTScanPosData currPos
Definition: nbtree.h:942
Buffer buf
Definition: nbtree.h:825
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3549
#define offsetof(type, field)
Definition: c.h:669
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:873

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 905 of file nbtree.c.

References _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

906 {
907  /* No-op in ANALYZE ONLY mode */
908  if (info->analyze_only)
909  return stats;
910 
911  /*
912  * If btbulkdelete was called, we need not do anything, just return the
913  * stats from the latest btbulkdelete call. If it wasn't called, we might
914  * still need to do a pass over the index, to recycle any newly-recyclable
915  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
916  * determines if either are needed.
917  *
918  * Since we aren't going to actually delete any leaf items, there's no
919  * need to go through all the vacuum-cycle-ID pushups.
920  */
921  if (stats == NULL)
922  {
923  /* Check if we need a cleanup */
924  if (!_bt_vacuum_needs_cleanup(info))
925  return NULL;
926 
928  btvacuumscan(info, stats, NULL, NULL, 0);
929  }
930 
931  /*
932  * It's quite possible for us to be fooled by concurrent page splits into
933  * double-counting some index tuples, so disbelieve any total that exceeds
934  * the underlying heap's count ... if we know that accurately. Otherwise
935  * this might just make matters worse.
936  */
937  if (!info->estimated_count)
938  {
939  if (stats->num_index_tuples > info->num_heap_tuples)
940  stats->num_index_tuples = info->num_heap_tuples;
941  }
942 
943  return stats;
944 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:800
bool analyze_only
Definition: genam.h:47
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:961
void * palloc0(Size size)
Definition: mcxt.c:981
double num_heap_tuples
Definition: genam.h:51
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  scanblkno 
)
static

Definition at line 1093 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1094 {
1095  IndexVacuumInfo *info = vstate->info;
1096  IndexBulkDeleteResult *stats = vstate->stats;
1098  void *callback_state = vstate->callback_state;
1099  Relation rel = info->index;
1100  bool attempt_pagedel;
1101  BlockNumber blkno,
1102  backtrack_to;
1103  Buffer buf;
1104  Page page;
1105  BTPageOpaque opaque;
1106 
1107  blkno = scanblkno;
1108 
1109 backtrack:
1110 
1111  attempt_pagedel = false;
1112  backtrack_to = P_NONE;
1113 
1114  /* call vacuum_delay_point while not holding any buffer lock */
1116 
1117  /*
1118  * We can't use _bt_getbuf() here because it always applies
1119  * _bt_checkpage(), which will barf on an all-zero page. We want to
1120  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1121  * buffer access strategy.
1122  */
1123  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1124  info->strategy);
1125  _bt_lockbuf(rel, buf, BT_READ);
1126  page = BufferGetPage(buf);
1127  opaque = NULL;
1128  if (!PageIsNew(page))
1129  {
1130  _bt_checkpage(rel, buf);
1131  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1132  }
1133 
1134  Assert(blkno <= scanblkno);
1135  if (blkno != scanblkno)
1136  {
1137  /*
1138  * We're backtracking.
1139  *
1140  * We followed a right link to a sibling leaf page (a page that
1141  * happens to be from a block located before scanblkno). The only
1142  * case we want to do anything with is a live leaf page having the
1143  * current vacuum cycle ID.
1144  *
1145  * The page had better be in a state that's consistent with what we
1146  * expect. Check for conditions that imply corruption in passing. It
1147  * can't be half-dead because only an interrupted VACUUM process can
1148  * leave pages in that state, so we'd definitely have dealt with it
1149  * back when the page was the scanblkno page (half-dead pages are
1150  * always marked fully deleted by _bt_pagedel()). This assumes that
1151  * there can be only one vacuum process running at a time.
1152  */
1153  if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1154  {
1155  Assert(false);
1156  ereport(LOG,
1157  (errcode(ERRCODE_INDEX_CORRUPTED),
1158  errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1159  blkno, scanblkno, RelationGetRelationName(rel))));
1160  _bt_relbuf(rel, buf);
1161  return;
1162  }
1163 
1164  /*
1165  * We may have already processed the page in an earlier call, when the
1166  * page was scanblkno. This happens when the leaf page split occurred
1167  * after the scan began, but before the right sibling page became the
1168  * scanblkno.
1169  *
1170  * Page may also have been deleted by current btvacuumpage() call,
1171  * since _bt_pagedel() sometimes deletes the right sibling page of
1172  * scanblkno in passing (it does so after we decided where to
1173  * backtrack to). We don't need to process this page as a deleted
1174  * page a second time now (in fact, it would be wrong to count it as a
1175  * deleted page in the bulk delete statistics a second time).
1176  */
1177  if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1178  {
1179  /* Done with current scanblkno (and all lower split pages) */
1180  _bt_relbuf(rel, buf);
1181  return;
1182  }
1183  }
1184 
1185  /* Page is valid, see what to do with it */
1186  if (_bt_page_recyclable(page))
1187  {
1188  /* Okay to recycle this page (which could be leaf or internal) */
1189  RecordFreeIndexPage(rel, blkno);
1190  vstate->totFreePages++;
1191  stats->pages_deleted++;
1192  }
1193  else if (P_ISDELETED(opaque))
1194  {
1195  /*
1196  * Already deleted page (which could be leaf or internal). Can't
1197  * recycle yet.
1198  */
1199  stats->pages_deleted++;
1200 
1201  /* Maintain the oldest btpo.xact */
1202  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1203  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1204  vstate->oldestBtpoXact = opaque->btpo.xact;
1205  }
1206  else if (P_ISHALFDEAD(opaque))
1207  {
1208  /*
1209  * Half-dead leaf page. Try to delete now. Might update
1210  * oldestBtpoXact and pages_deleted below.
1211  */
1212  attempt_pagedel = true;
1213  }
1214  else if (P_ISLEAF(opaque))
1215  {
1217  int ndeletable;
1219  int nupdatable;
1220  OffsetNumber offnum,
1221  minoff,
1222  maxoff;
1223  int nhtidsdead,
1224  nhtidslive;
1225 
1226  /*
1227  * Trade in the initial read lock for a super-exclusive write lock on
1228  * this page. We must get such a lock on every leaf page over the
1229  * course of the vacuum scan, whether or not it actually contains any
1230  * deletable tuples --- see nbtree/README.
1231  */
1232  _bt_upgradelockbufcleanup(rel, buf);
1233 
1234  /*
1235  * Check whether we need to backtrack to earlier pages. What we are
1236  * concerned about is a page split that happened since we started the
1237  * vacuum scan. If the split moved tuples on the right half of the
1238  * split (i.e. the tuples that sort high) to a block that we already
1239  * passed over, then we might have missed the tuples. We need to
1240  * backtrack now. (Must do this before possibly clearing btpo_cycleid
1241  * or deleting scanblkno page below!)
1242  */
1243  if (vstate->cycleid != 0 &&
1244  opaque->btpo_cycleid == vstate->cycleid &&
1245  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1246  !P_RIGHTMOST(opaque) &&
1247  opaque->btpo_next < scanblkno)
1248  backtrack_to = opaque->btpo_next;
1249 
1250  /*
1251  * When each VACUUM begins, it determines an OldestXmin cutoff value.
1252  * Tuples before the cutoff are removed by VACUUM. Scan over all
1253  * items to see which ones need to be deleted according to cutoff
1254  * point using callback.
1255  */
1256  ndeletable = 0;
1257  nupdatable = 0;
1258  minoff = P_FIRSTDATAKEY(opaque);
1259  maxoff = PageGetMaxOffsetNumber(page);
1260  nhtidsdead = 0;
1261  nhtidslive = 0;
1262  if (callback)
1263  {
1264  for (offnum = minoff;
1265  offnum <= maxoff;
1266  offnum = OffsetNumberNext(offnum))
1267  {
1268  IndexTuple itup;
1269 
1270  itup = (IndexTuple) PageGetItem(page,
1271  PageGetItemId(page, offnum));
1272 
1273  /*
1274  * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
1275  * records do not produce their own conflicts. This is safe
1276  * as long as the callback function only considers whether the
1277  * index tuple refers to pre-cutoff heap tuples that were
1278  * certainly already pruned away during VACUUM's initial heap
1279  * scan by the time we get here. (XLOG_HEAP2_CLEANUP_INFO
1280  * records produce conflicts using a latestRemovedXid value
1281  * for the entire VACUUM, so there is no need to produce our
1282  * own conflict now.)
1283  *
1284  * Backends with snapshots acquired after a VACUUM starts but
1285  * before it finishes could have visibility cutoff with a
1286  * later xid than VACUUM's OldestXmin cutoff. These backends
1287  * might happen to opportunistically mark some index tuples
1288  * LP_DEAD before we reach them, even though they may be after
1289  * our cutoff. We don't try to kill these "extra" index
1290  * tuples in _bt_delitems_vacuum(). This keep things simple,
1291  * and allows us to always avoid generating our own conflicts.
1292  */
1293  Assert(!BTreeTupleIsPivot(itup));
1294  if (!BTreeTupleIsPosting(itup))
1295  {
1296  /* Regular tuple, standard table TID representation */
1297  if (callback(&itup->t_tid, callback_state))
1298  {
1299  deletable[ndeletable++] = offnum;
1300  nhtidsdead++;
1301  }
1302  else
1303  nhtidslive++;
1304  }
1305  else
1306  {
1307  BTVacuumPosting vacposting;
1308  int nremaining;
1309 
1310  /* Posting list tuple */
1311  vacposting = btreevacuumposting(vstate, itup, offnum,
1312  &nremaining);
1313  if (vacposting == NULL)
1314  {
1315  /*
1316  * All table TIDs from the posting tuple remain, so no
1317  * delete or update required
1318  */
1319  Assert(nremaining == BTreeTupleGetNPosting(itup));
1320  }
1321  else if (nremaining > 0)
1322  {
1323 
1324  /*
1325  * Store metadata about posting list tuple in
1326  * updatable array for entire page. Existing tuple
1327  * will be updated during the later call to
1328  * _bt_delitems_vacuum().
1329  */
1330  Assert(nremaining < BTreeTupleGetNPosting(itup));
1331  updatable[nupdatable++] = vacposting;
1332  nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1333  }
1334  else
1335  {
1336  /*
1337  * All table TIDs from the posting list must be
1338  * deleted. We'll delete the index tuple completely
1339  * (no update required).
1340  */
1341  Assert(nremaining == 0);
1342  deletable[ndeletable++] = offnum;
1343  nhtidsdead += BTreeTupleGetNPosting(itup);
1344  pfree(vacposting);
1345  }
1346 
1347  nhtidslive += nremaining;
1348  }
1349  }
1350  }
1351 
1352  /*
1353  * Apply any needed deletes or updates. We issue just one
1354  * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1355  */
1356  if (ndeletable > 0 || nupdatable > 0)
1357  {
1358  Assert(nhtidsdead >= ndeletable + nupdatable);
1359  _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1360  nupdatable);
1361 
1362  stats->tuples_removed += nhtidsdead;
1363  /* must recompute maxoff */
1364  maxoff = PageGetMaxOffsetNumber(page);
1365 
1366  /* can't leak memory here */
1367  for (int i = 0; i < nupdatable; i++)
1368  pfree(updatable[i]);
1369  }
1370  else
1371  {
1372  /*
1373  * If the leaf page has been split during this vacuum cycle, it
1374  * seems worth expending a write to clear btpo_cycleid even if we
1375  * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1376  * takes care of this.) This ensures we won't process the page
1377  * again.
1378  *
1379  * We treat this like a hint-bit update because there's no need to
1380  * WAL-log it.
1381  */
1382  Assert(nhtidsdead == 0);
1383  if (vstate->cycleid != 0 &&
1384  opaque->btpo_cycleid == vstate->cycleid)
1385  {
1386  opaque->btpo_cycleid = 0;
1387  MarkBufferDirtyHint(buf, true);
1388  }
1389  }
1390 
1391  /*
1392  * If the leaf page is now empty, try to delete it; else count the
1393  * live tuples (live table TIDs in posting lists are counted as
1394  * separate live tuples). We don't delete when backtracking, though,
1395  * since that would require teaching _bt_pagedel() about backtracking
1396  * (doesn't seem worth adding more complexity to deal with that).
1397  */
1398  if (minoff > maxoff)
1399  attempt_pagedel = (blkno == scanblkno);
1400  else
1401  stats->num_index_tuples += nhtidslive;
1402 
1403  Assert(!attempt_pagedel || nhtidslive == 0);
1404  }
1405 
1406  if (attempt_pagedel)
1407  {
1408  MemoryContext oldcontext;
1409 
1410  /* Run pagedel in a temp context to avoid memory leakage */
1412  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1413 
1414  /*
1415  * We trust the _bt_pagedel return value because it does not include
1416  * any page that a future call here from btvacuumscan is expected to
1417  * count. There will be no double-counting.
1418  */
1419  Assert(blkno == scanblkno);
1420  stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1421 
1422  MemoryContextSwitchTo(oldcontext);
1423  /* pagedel released buffer, so we shouldn't */
1424  }
1425  else
1426  _bt_relbuf(rel, buf);
1427 
1428  if (backtrack_to != P_NONE)
1429  {
1430  blkno = backtrack_to;
1431  goto backtrack;
1432  }
1433 }
#define BTP_SPLIT_END
Definition: nbtree.h:77
BlockNumber btpo_next
Definition: nbtree.h:59
MemoryContext pagedelcontext
Definition: nbtree.c:51
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3581
BlockNumber totFreePages
Definition: nbtree.c:49
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1124
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:653
ItemPointerData t_tid
Definition: itup.h:37
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1447
BufferAccessStrategy strategy
Definition: genam.h:52
union BTPageOpaqueData::@45 btpo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:206
int errcode(int sqlerrcode)
Definition: elog.c:610
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:137
uint32 BlockNumber
Definition: block.h:31
#define LOG
Definition: elog.h:26
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:1081
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1057
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
BTCycleId btpo_cycleid
Definition: nbtree.h:66
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
uint32 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
Definition: nbtpage.c:1549
static char * buf
Definition: pg_test_fsync.c:68
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:490
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
int errmsg_internal(const char *fmt,...)
Definition: elog.c:908
#define Assert(condition)
Definition: c.h:746
IndexVacuumInfo * info
Definition: nbtree.c:44
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:2051
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1046
uint16 btpo_flags
Definition: nbtree.h:65
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 961 of file nbtree.c.

References _bt_update_meta_cleanup_info(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, LockRelationForExtension(), MemoryContextDelete(), IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, BTVacState::totFreePages, and UnlockRelationForExtension().

Referenced by btbulkdelete(), and btvacuumcleanup().

964 {
965  Relation rel = info->index;
966  BTVacState vstate;
967  BlockNumber num_pages;
968  BlockNumber scanblkno;
969  bool needLock;
970 
971  /*
972  * Reset counts that will be incremented during the scan; needed in case
973  * of multiple scans during a single VACUUM command
974  */
975  stats->estimated_count = false;
976  stats->num_index_tuples = 0;
977  stats->pages_deleted = 0;
978 
979  /* Set up info to pass down to btvacuumpage */
980  vstate.info = info;
981  vstate.stats = stats;
982  vstate.callback = callback;
983  vstate.callback_state = callback_state;
984  vstate.cycleid = cycleid;
985  vstate.totFreePages = 0;
987 
988  /* Create a temporary memory context to run _bt_pagedel in */
990  "_bt_pagedel",
992 
993  /*
994  * The outer loop iterates over all index pages except the metapage, in
995  * physical order (we hope the kernel will cooperate in providing
996  * read-ahead for speed). It is critical that we visit all leaf pages,
997  * including ones added after we start the scan, else we might fail to
998  * delete some deletable tuples. Hence, we must repeatedly check the
999  * relation length. We must acquire the relation-extension lock while
1000  * doing so to avoid a race condition: if someone else is extending the
1001  * relation, there is a window where bufmgr/smgr have created a new
1002  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1003  * we manage to scan such a page here, we'll improperly assume it can be
1004  * recycled. Taking the lock synchronizes things enough to prevent a
1005  * problem: either num_pages won't include the new page, or _bt_getbuf
1006  * already has write lock on the buffer and it will be fully initialized
1007  * before we can examine it. (See also vacuumlazy.c, which has the same
1008  * issue.) Also, we need not worry if a page is added immediately after
1009  * we look; the page splitting code already has write-lock on the left
1010  * page before it adds a right page, so we must already have processed any
1011  * tuples due to be moved into such a page.
1012  *
1013  * We can skip locking for new or temp relations, however, since no one
1014  * else could be accessing them.
1015  */
1016  needLock = !RELATION_IS_LOCAL(rel);
1017 
1018  scanblkno = BTREE_METAPAGE + 1;
1019  for (;;)
1020  {
1021  /* Get the current relation length */
1022  if (needLock)
1024  num_pages = RelationGetNumberOfBlocks(rel);
1025  if (needLock)
1027 
1028  if (info->report_progress)
1030  num_pages);
1031 
1032  /* Quit if we've scanned the whole relation */
1033  if (scanblkno >= num_pages)
1034  break;
1035  /* Iterate over pages, then loop back to recheck length */
1036  for (; scanblkno < num_pages; scanblkno++)
1037  {
1038  btvacuumpage(&vstate, scanblkno);
1039  if (info->report_progress)
1041  scanblkno);
1042  }
1043  }
1044 
1046 
1047  /*
1048  * If we found any recyclable pages (and recorded them in the FSM), then
1049  * forcibly update the upper-level FSM pages to ensure that searchers can
1050  * find them. It's possible that the pages were also found during
1051  * previous scans and so this is a waste of time, but it's cheap enough
1052  * relative to scanning the index that it shouldn't matter much, and
1053  * making sure that free pages are available sooner not later seems
1054  * worthwhile.
1055  *
1056  * Note that if no recyclable pages exist, we don't bother vacuuming the
1057  * FSM at all.
1058  */
1059  if (vstate.totFreePages > 0)
1061 
1062  /*
1063  * Maintain the oldest btpo.xact and a count of the current number of heap
1064  * tuples in the metapage (for the benefit of _bt_vacuum_needs_cleanup).
1065  *
1066  * The page with the oldest btpo.xact is typically a page deleted by this
1067  * VACUUM operation, since pages deleted by a previous VACUUM operation
1068  * tend to be placed in the FSM (by the current VACUUM operation) -- such
1069  * pages are not candidates to be the oldest btpo.xact. (Note that pages
1070  * placed in the FSM are reported as deleted pages in the bulk delete
1071  * statistics, despite not counting as deleted pages for the purposes of
1072  * determining the oldest btpo.xact.)
1073  */
1075  info->num_heap_tuples);
1076 
1077  /* update statistics */
1078  stats->num_pages = num_pages;
1079  stats->pages_free = vstate.totFreePages;
1080 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:212
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:51
void * callback_state
Definition: nbtree.c:47
BlockNumber totFreePages
Definition: nbtree.c:49
#define ExclusiveLock
Definition: lockdefs.h:44
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3374
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
bool report_progress
Definition: genam.h:48
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
Definition: nbtree.c:1093
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:174
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:141
TransactionId oldestBtpoXact
Definition: nbtree.c:50
BTCycleId cycleid
Definition: nbtree.c:48
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
double num_heap_tuples
Definition: genam.h:51
IndexVacuumInfo * info
Definition: nbtree.c:44
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:76