PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 91 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 67 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 761 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

762 {
763  BTScanOpaque so = (BTScanOpaque) scan->opaque;
764  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
765  BTParallelScanDesc btscan;
766 
767  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
768  parallel_scan->ps_offset);
769 
770  so->arrayKeyCount++;
771  SpinLockAcquire(&btscan->btps_mutex);
772  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
773  {
774  btscan->btps_scanPage = InvalidBlockNumber;
775  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
776  btscan->btps_arrayKeyCount++;
777  }
778  SpinLockRelease(&btscan->btps_mutex);
779 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int arrayKeyCount
Definition: nbtree.h:448
#define OffsetToPointer(base, offset)
Definition: c.h:602
#define SpinLockRelease(lock)
Definition: spin.h:64
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 720 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

721 {
722  BTScanOpaque so = (BTScanOpaque) scan->opaque;
723  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
724  BTParallelScanDesc btscan;
725  bool status_changed = false;
726 
727  /* Do nothing, for non-parallel scans */
728  if (parallel_scan == NULL)
729  return;
730 
731  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
732  parallel_scan->ps_offset);
733 
734  /*
735  * Mark the parallel scan as done for this combination of scan keys,
736  * unless some other process already did so. See also
737  * _bt_advance_array_keys.
738  */
739  SpinLockAcquire(&btscan->btps_mutex);
740  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
741  btscan->btps_pageStatus != BTPARALLEL_DONE)
742  {
743  btscan->btps_pageStatus = BTPARALLEL_DONE;
744  status_changed = true;
745  }
746  SpinLockRelease(&btscan->btps_mutex);
747 
748  /* wake up all the workers associated with this parallel scan */
749  if (status_changed)
750  ConditionVariableBroadcast(&btscan->btps_cv);
751 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:91
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int arrayKeyCount
Definition: nbtree.h:448
#define OffsetToPointer(base, offset)
Definition: c.h:602
#define SpinLockRelease(lock)
Definition: spin.h:64

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 697 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

698 {
699  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
700  BTParallelScanDesc btscan;
701 
702  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
703  parallel_scan->ps_offset);
704 
705  SpinLockAcquire(&btscan->btps_mutex);
706  btscan->btps_scanPage = scan_page;
708  SpinLockRelease(&btscan->btps_mutex);
710 }
slock_t btps_mutex
Definition: nbtree.c:87
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:91
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:88
#define OffsetToPointer(base, offset)
Definition: c.h:602
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:81
BTPS_State btps_pageStatus
Definition: nbtree.c:82

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 639 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

640 {
641  BTScanOpaque so = (BTScanOpaque) scan->opaque;
642  BTPS_State pageStatus;
643  bool exit_loop = false;
644  bool status = true;
645  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
646  BTParallelScanDesc btscan;
647 
648  *pageno = P_NONE;
649 
650  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
651  parallel_scan->ps_offset);
652 
653  while (1)
654  {
655  SpinLockAcquire(&btscan->btps_mutex);
656  pageStatus = btscan->btps_pageStatus;
657 
658  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
659  {
660  /* Parallel scan has already advanced to a new set of scankeys. */
661  status = false;
662  }
663  else if (pageStatus == BTPARALLEL_DONE)
664  {
665  /*
666  * We're done with this set of scankeys. This may be the end, or
667  * there could be more sets to try.
668  */
669  status = false;
670  }
671  else if (pageStatus != BTPARALLEL_ADVANCING)
672  {
673  /*
674  * We have successfully seized control of the scan for the purpose
675  * of advancing it to a new page!
676  */
677  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
678  *pageno = btscan->btps_scanPage;
679  exit_loop = true;
680  }
681  SpinLockRelease(&btscan->btps_mutex);
682  if (exit_loop || !status)
683  break;
685  }
687 
688  return status;
689 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
#define P_NONE
Definition: nbtree.h:150
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
BTPS_State
Definition: nbtree.c:67
int arrayKeyCount
Definition: nbtree.h:448
#define OffsetToPointer(base, offset)
Definition: c.h:602
#define SpinLockRelease(lock)
Definition: spin.h:64
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 786 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_VERSION, BufferGetPage, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, RecentGlobalXmin, TransactionIdIsValid, TransactionIdPrecedes(), vacuum_cleanup_index_scale_factor, and StdRdOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

787 {
788  Buffer metabuf;
789  Page metapg;
790  BTMetaPageData *metad;
791  bool result = false;
792 
793  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
794  metapg = BufferGetPage(metabuf);
795  metad = BTPageGetMeta(metapg);
796 
797  if (metad->btm_version < BTREE_VERSION)
798  {
799  /*
800  * Do cleanup if metapage needs upgrade, because we don't have
801  * cleanup-related meta-information yet.
802  */
803  result = true;
804  }
805  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
808  {
809  /*
810  * If oldest btpo.xact in the deleted pages is older than
811  * RecentGlobalXmin, then at least one deleted page can be recycled.
812  */
813  result = true;
814  }
815  else
816  {
817  StdRdOptions *relopts;
818  float8 cleanup_scale_factor;
819 
820  /*
821  * If table receives enough insertions and no cleanup was performed,
822  * then index would appear have stale statistics. If scale factor
823  * is set, we avoid that by performing cleanup if the number of
824  * inserted tuples exceeds vacuum_cleanup_index_scale_factor fraction
825  * of original tuples count.
826  */
827  relopts = (StdRdOptions *) info->index->rd_options;
828  cleanup_scale_factor = (relopts &&
829  relopts->vacuum_cleanup_index_scale_factor >= 0)
832 
833  if (cleanup_scale_factor <= 0 ||
835  info->num_heap_tuples > (1.0 + cleanup_scale_factor) *
837  result = true;
838  }
839 
840  _bt_relbuf(info->index, metabuf);
841  return result;
842 }
uint32 btm_version
Definition: nbtree.h:100
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define BTREE_VERSION
Definition: nbtree.h:117
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:149
#define BT_READ
Definition: nbtree.h:300
double float8
Definition: c.h:458
#define BTPageGetMeta(p)
Definition: nbtree.h:112
TransactionId RecentGlobalXmin
Definition: snapmgr.c:166
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define BTREE_METAPAGE
Definition: nbtree.h:115
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
float8 vacuum_cleanup_index_scale_factor
Definition: rel.h:261
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
double num_heap_tuples
Definition: genam.h:50
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:108
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:106
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:128
Pointer Page
Definition: bufpage.h:74

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 346 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

347 {
348  IndexScanDesc scan;
349  BTScanOpaque so;
350 
351  /* no order by operators allowed */
352  Assert(norderbys == 0);
353 
354  /* get the scan */
355  scan = RelationGetIndexScan(rel, nkeys, norderbys);
356 
357  /* allocate private workspace */
358  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
361  if (scan->numberOfKeys > 0)
362  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
363  else
364  so->keyData = NULL;
365 
366  so->arrayKeyData = NULL; /* assume no array keys for now */
367  so->numArrayKeys = 0;
368  so->arrayKeys = NULL;
369  so->arrayContext = NULL;
370 
371  so->killedItems = NULL; /* until needed */
372  so->numKilled = 0;
373 
374  /*
375  * We don't know yet whether the scan will be index-only, so we do not
376  * allocate the tuple workspace arrays until btrescan. However, we set up
377  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
378  */
379  so->currTuples = so->markTuples = NULL;
380 
381  scan->xs_itupdesc = RelationGetDescr(rel);
382 
383  scan->opaque = so;
384 
385  return scan;
386 }
#define RelationGetDescr(relation)
Definition: rel.h:433
MemoryContext arrayContext
Definition: nbtree.h:451
char * currTuples
Definition: nbtree.h:462
TupleDesc xs_itupdesc
Definition: relscan.h:116
BTScanPosData markPos
Definition: nbtree.h:476
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int numArrayKeys
Definition: nbtree.h:446
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:463
ScanKey arrayKeyData
Definition: nbtree.h:445
int * killedItems
Definition: nbtree.h:454
#define Assert(condition)
Definition: c.h:699
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:418
BTScanPosData currPos
Definition: nbtree.h:475
void * palloc(Size size)
Definition: mcxt.c:924
ScanKey keyData
Definition: nbtree.h:442
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:450
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:78

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 155 of file nbtree.c.

References _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

156 {
157  Page metapage;
158 
159  /* Construct metapage. */
160  metapage = (Page) palloc(BLCKSZ);
161  _bt_initmetapage(metapage, P_NONE, 0);
162 
163  /*
164  * Write the page and log it. It might seem that an immediate sync would
165  * be sufficient to guarantee that the file exists on disk, but recovery
166  * itself might remove it while replaying, for example, an
167  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
168  * this even when wal_level=minimal.
169  */
172  (char *) metapage, true);
174  BTREE_METAPAGE, metapage, true);
175 
176  /*
177  * An immediate sync is required even if we xlog'd the page, because the
178  * write did not go through shared_buffers and therefore a concurrent
179  * checkpoint may have moved the redo pointer past our xlog record.
180  */
182 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define P_NONE
Definition: nbtree.h:150
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:50
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define BTREE_METAPAGE
Definition: nbtree.h:115
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1195
void * palloc(Size size)
Definition: mcxt.c:924
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:734
Pointer Page
Definition: bufpage.h:74

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 852 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), _bt_update_meta_cleanup_info(), btvacuumscan(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

854 {
855  Relation rel = info->index;
856  BTCycleId cycleid;
857 
858  /* allocate stats if first time through, else re-use existing struct */
859  if (stats == NULL)
861 
862  /* Establish the vacuum cycle ID to use for this scan */
863  /* The ENSURE stuff ensures we clean up shared memory on failure */
865  {
866  TransactionId oldestBtpoXact;
867 
868  cycleid = _bt_start_vacuum(rel);
869 
870  btvacuumscan(info, stats, callback, callback_state, cycleid,
871  &oldestBtpoXact);
872 
873  /*
874  * Update cleanup-related information in metapage. This information
875  * is used only for cleanup but keeping them up to date can avoid
876  * unnecessary cleanup even after bulkdelete.
877  */
878  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
879  info->num_heap_tuples);
880  }
882  _bt_end_vacuum(rel);
883 
884  return stats;
885 }
uint32 TransactionId
Definition: c.h:474
#define PointerGetDatum(X)
Definition: postgres.h:541
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2004
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:155
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:27
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
Definition: nbtree.c:953
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1976
void * palloc0(Size size)
Definition: mcxt.c:955
double num_heap_tuples
Definition: genam.h:50
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1919

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1373 of file nbtree.c.

Referenced by bthandler().

1374 {
1375  return true;
1376 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 452 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

453 {
454  BTScanOpaque so = (BTScanOpaque) scan->opaque;
455 
456  /* we aren't holding any read locks, but gotta drop the pins */
457  if (BTScanPosIsValid(so->currPos))
458  {
459  /* Before leaving current page, deal with any killed items */
460  if (so->numKilled > 0)
461  _bt_killitems(scan);
463  }
464 
465  so->markItemIndex = -1;
467 
468  /* No need to invalidate positions, the RAM is about to be freed. */
469 
470  /* Release storage */
471  if (so->keyData != NULL)
472  pfree(so->keyData);
473  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
474  if (so->arrayContext != NULL)
476  if (so->killedItems != NULL)
477  pfree(so->killedItems);
478  if (so->currTuples != NULL)
479  pfree(so->currTuples);
480  /* so->markTuples should not be pfree'd, see btrescan */
481  pfree(so);
482 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
MemoryContext arrayContext
Definition: nbtree.h:451
char * currTuples
Definition: nbtree.h:462
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1744
BTScanPosData markPos
Definition: nbtree.h:476
void pfree(void *pointer)
Definition: mcxt.c:1031
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int markItemIndex
Definition: nbtree.h:472
int * killedItems
Definition: nbtree.h:454
BTScanPosData currPos
Definition: nbtree.h:475
ScanKey keyData
Definition: nbtree.h:442
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:406

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 574 of file nbtree.c.

Referenced by bthandler().

575 {
576  return sizeof(BTParallelScanDescData);
577 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 288 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, HeapTupleData::t_self, tbm_add_tuples(), and IndexScanDescData::xs_ctup.

Referenced by bthandler().

289 {
290  BTScanOpaque so = (BTScanOpaque) scan->opaque;
291  int64 ntids = 0;
292  ItemPointer heapTid;
293 
294  /*
295  * If we have any array keys, initialize them.
296  */
297  if (so->numArrayKeys)
298  {
299  /* punt if we have any unsatisfiable array keys */
300  if (so->numArrayKeys < 0)
301  return ntids;
302 
304  }
305 
306  /* This loop handles advancing to the next array elements, if any */
307  do
308  {
309  /* Fetch the first page & tuple */
310  if (_bt_first(scan, ForwardScanDirection))
311  {
312  /* Save tuple ID, and continue scanning */
313  heapTid = &scan->xs_ctup.t_self;
314  tbm_add_tuples(tbm, heapTid, 1, false);
315  ntids++;
316 
317  for (;;)
318  {
319  /*
320  * Advance to next tuple within page. This is the same as the
321  * easy case in _bt_next().
322  */
323  if (++so->currPos.itemIndex > so->currPos.lastItem)
324  {
325  /* let _bt_next do the heavy lifting */
326  if (!_bt_next(scan, ForwardScanDirection))
327  break;
328  }
329 
330  /* Save tuple ID, and continue scanning */
331  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
332  tbm_add_tuples(tbm, heapTid, 1, false);
333  ntids++;
334  }
335  }
336  /* Now see if we have more array keys to deal with */
338 
339  return ntids;
340 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:558
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:390
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:532
int itemIndex
Definition: nbtree.h:388
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:538
int lastItem
Definition: nbtree.h:387
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
ItemPointerData t_self
Definition: htup.h:65
int numArrayKeys
Definition: nbtree.h:446
HeapTupleData xs_ctup
Definition: relscan.h:121
ItemPointerData heapTid
Definition: nbtree.h:351
BTScanPosData currPos
Definition: nbtree.h:475
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1131

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 214 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxIndexTuplesPerPage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

215 {
216  BTScanOpaque so = (BTScanOpaque) scan->opaque;
217  bool res;
218 
219  /* btree indexes are never lossy */
220  scan->xs_recheck = false;
221 
222  /*
223  * If we have any array keys, initialize them during first call for a
224  * scan. We can't do this in btrescan because we don't know the scan
225  * direction at that time.
226  */
227  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
228  {
229  /* punt if we have any unsatisfiable array keys */
230  if (so->numArrayKeys < 0)
231  return false;
232 
233  _bt_start_array_keys(scan, dir);
234  }
235 
236  /* This loop handles advancing to the next array elements, if any */
237  do
238  {
239  /*
240  * If we've already initialized this scan, we can just advance it in
241  * the appropriate direction. If we haven't done so yet, we call
242  * _bt_first() to get the first item in the scan.
243  */
244  if (!BTScanPosIsValid(so->currPos))
245  res = _bt_first(scan, dir);
246  else
247  {
248  /*
249  * Check to see if we should kill the previously-fetched tuple.
250  */
251  if (scan->kill_prior_tuple)
252  {
253  /*
254  * Yes, remember it for later. (We'll deal with all such
255  * tuples at once right before leaving the index page.) The
256  * test for numKilled overrun is not just paranoia: if the
257  * caller reverses direction in the indexscan then the same
258  * item might get entered multiple times. It's not worth
259  * trying to optimize that, so we don't detect it, but instead
260  * just forget any excess entries.
261  */
262  if (so->killedItems == NULL)
263  so->killedItems = (int *)
264  palloc(MaxIndexTuplesPerPage * sizeof(int));
266  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
267  }
268 
269  /*
270  * Now continue the scan.
271  */
272  res = _bt_next(scan, dir);
273  }
274 
275  /* If we have a tuple, return it ... */
276  if (res)
277  break;
278  /* ... otherwise see if we have more array keys to deal with */
279  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
280 
281  return res;
282 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:558
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:532
int itemIndex
Definition: nbtree.h:388
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:538
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int numArrayKeys
Definition: nbtree.h:446
int * killedItems
Definition: nbtree.h:454
BTScanPosData currPos
Definition: nbtree.h:475
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:924
bool kill_prior_tuple
Definition: relscan.h:101
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1131

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 106 of file nbtree.c.

References IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amparallelrescan, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btbeginscan(), btbuild(), btbuildempty(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, and PG_RETURN_POINTER.

107 {
108  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
109 
110  amroutine->amstrategies = BTMaxStrategyNumber;
111  amroutine->amsupport = BTNProcs;
112  amroutine->amcanorder = true;
113  amroutine->amcanorderbyop = false;
114  amroutine->amcanbackward = true;
115  amroutine->amcanunique = true;
116  amroutine->amcanmulticol = true;
117  amroutine->amoptionalkey = true;
118  amroutine->amsearcharray = true;
119  amroutine->amsearchnulls = true;
120  amroutine->amstorage = false;
121  amroutine->amclusterable = true;
122  amroutine->ampredlocks = true;
123  amroutine->amcanparallel = true;
124  amroutine->amcaninclude = true;
125  amroutine->amkeytype = InvalidOid;
126 
127  amroutine->ambuild = btbuild;
128  amroutine->ambuildempty = btbuildempty;
129  amroutine->aminsert = btinsert;
130  amroutine->ambulkdelete = btbulkdelete;
131  amroutine->amvacuumcleanup = btvacuumcleanup;
132  amroutine->amcanreturn = btcanreturn;
133  amroutine->amcostestimate = btcostestimate;
134  amroutine->amoptions = btoptions;
135  amroutine->amproperty = btproperty;
136  amroutine->amvalidate = btvalidate;
137  amroutine->ambeginscan = btbeginscan;
138  amroutine->amrescan = btrescan;
139  amroutine->amgettuple = btgettuple;
140  amroutine->amgetbitmap = btgetbitmap;
141  amroutine->amendscan = btendscan;
142  amroutine->ammarkpos = btmarkpos;
143  amroutine->amrestrpos = btrestrpos;
146  amroutine->amparallelrescan = btparallelrescan;
147 
148  PG_RETURN_POINTER(amroutine);
149 }
ambeginscan_function ambeginscan
Definition: amapi.h:217
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:326
ambulkdelete_function ambulkdelete
Definition: amapi.h:210
bool amcanmulticol
Definition: amapi.h:180
uint16 amsupport
Definition: amapi.h:170
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:392
amgettuple_function amgettuple
Definition: amapi.h:219
bool amcanorderbyop
Definition: amapi.h:174
amproperty_function amproperty
Definition: amapi.h:215
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:191
amparallelrescan_function amparallelrescan
Definition: amapi.h:228
bool amstorage
Definition: amapi.h:188
bool ampredlocks
Definition: amapi.h:192
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:346
aminsert_function aminsert
Definition: amapi.h:209
Oid amkeytype
Definition: amapi.h:198
bool amoptionalkey
Definition: amapi.h:182
amvalidate_function amvalidate
Definition: amapi.h:216
Size btestimateparallelscan(void)
Definition: nbtree.c:574
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
void btbuildempty(Relation index)
Definition: nbtree.c:155
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:893
amgetbitmap_function amgetbitmap
Definition: amapi.h:220
ambuild_function ambuild
Definition: amapi.h:207
amoptions_function amoptions
Definition: amapi.h:214
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:288
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:306
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:452
bool amcaninclude
Definition: amapi.h:196
amcostestimate_function amcostestimate
Definition: amapi.h:213
bool amcanunique
Definition: amapi.h:178
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:6942
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:211
amendscan_function amendscan
Definition: amapi.h:221
bool amcanbackward
Definition: amapi.h:176
amrescan_function amrescan
Definition: amapi.h:218
bool amcanparallel
Definition: amapi.h:194
bool amsearchnulls
Definition: amapi.h:186
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:214
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:488
bool amclusterable
Definition: amapi.h:190
bool amsearcharray
Definition: amapi.h:184
#define InvalidOid
Definition: postgres_ext.h:36
#define makeNode(_type_)
Definition: nodes.h:565
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2066
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:518
ammarkpos_function ammarkpos
Definition: amapi.h:222
bool amcanorder
Definition: amapi.h:172
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1373
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:598
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:226
#define BTNProcs
Definition: nbtree.h:293
uint16 amstrategies
Definition: amapi.h:168
void btinitparallelscan(void *target)
Definition: nbtree.c:583
ambuildempty_function ambuildempty
Definition: amapi.h:208
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2054
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:212
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:227
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:852
amrestrpos_function amrestrpos
Definition: amapi.h:223

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 583 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

584 {
585  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
586 
587  SpinLockInit(&bt_target->btps_mutex);
588  bt_target->btps_scanPage = InvalidBlockNumber;
590  bt_target->btps_arrayKeyCount = 0;
591  ConditionVariableInit(&bt_target->btps_cv);
592 }
slock_t btps_mutex
Definition: nbtree.c:87
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:91
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:88
BlockNumber btps_scanPage
Definition: nbtree.c:81
BTPS_State btps_pageStatus
Definition: nbtree.c:82
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 191 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

195 {
196  bool result;
197  IndexTuple itup;
198 
199  /* generate an index tuple */
200  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
201  itup->t_tid = *ht_ctid;
202 
203  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
204 
205  pfree(itup);
206 
207  return result;
208 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:110
#define RelationGetDescr(relation)
Definition: rel.h:433
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:40
void pfree(void *pointer)
Definition: mcxt.c:1031
static Datum values[MAXATTR]
Definition: bootstrap.c:164

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 488 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

489 {
490  BTScanOpaque so = (BTScanOpaque) scan->opaque;
491 
492  /* There may be an old mark with a pin (but no lock). */
494 
495  /*
496  * Just record the current itemIndex. If we later step to next page
497  * before releasing the marked position, _bt_steppage makes a full copy of
498  * the currPos struct in markPos. If (as often happens) the mark is moved
499  * before we leave the page, we don't have to do that work.
500  */
501  if (BTScanPosIsValid(so->currPos))
502  so->markItemIndex = so->currPos.itemIndex;
503  else
504  {
506  so->markItemIndex = -1;
507  }
508 
509  /* Also record the current positions of any array keys */
510  if (so->numArrayKeys)
511  _bt_mark_array_keys(scan);
512 }
int itemIndex
Definition: nbtree.h:388
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
BTScanPosData markPos
Definition: nbtree.h:476
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int numArrayKeys
Definition: nbtree.h:446
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:617
int markItemIndex
Definition: nbtree.h:472
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:418
BTScanPosData currPos
Definition: nbtree.h:475
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:406

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 598 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

599 {
600  BTParallelScanDesc btscan;
601  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
602 
603  Assert(parallel_scan);
604 
605  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
606  parallel_scan->ps_offset);
607 
608  /*
609  * In theory, we don't need to acquire the spinlock here, because there
610  * shouldn't be any other workers running at this point, but we do so for
611  * consistency.
612  */
613  SpinLockAcquire(&btscan->btps_mutex);
616  btscan->btps_arrayKeyCount = 0;
617  SpinLockRelease(&btscan->btps_mutex);
618 }
slock_t btps_mutex
Definition: nbtree.c:87
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:91
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:602
#define SpinLockRelease(lock)
Definition: spin.h:64
BlockNumber btps_scanPage
Definition: nbtree.c:81
#define Assert(condition)
Definition: c.h:699
BTPS_State btps_pageStatus
Definition: nbtree.c:82
#define InvalidBlockNumber
Definition: block.h:33

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 392 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, memmove, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

394 {
395  BTScanOpaque so = (BTScanOpaque) scan->opaque;
396 
397  /* we aren't holding any read locks, but gotta drop the pins */
398  if (BTScanPosIsValid(so->currPos))
399  {
400  /* Before leaving current page, deal with any killed items */
401  if (so->numKilled > 0)
402  _bt_killitems(scan);
405  }
406 
407  so->markItemIndex = -1;
408  so->arrayKeyCount = 0;
411 
412  /*
413  * Allocate tuple workspace arrays, if needed for an index-only scan and
414  * not already done in a previous rescan call. To save on palloc
415  * overhead, both workspaces are allocated as one palloc block; only this
416  * function and btendscan know that.
417  *
418  * NOTE: this data structure also makes it safe to return data from a
419  * "name" column, even though btree name_ops uses an underlying storage
420  * datatype of cstring. The risk there is that "name" is supposed to be
421  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
422  * However, since we only return data out of tuples sitting in the
423  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
424  * data out of the markTuples array --- running off the end of memory for
425  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
426  * adding special-case treatment for name_ops elsewhere.
427  */
428  if (scan->xs_want_itup && so->currTuples == NULL)
429  {
430  so->currTuples = (char *) palloc(BLCKSZ * 2);
431  so->markTuples = so->currTuples + BLCKSZ;
432  }
433 
434  /*
435  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
436  * - vadim 05/05/97
437  */
438  if (scankey && scan->numberOfKeys > 0)
439  memmove(scan->keyData,
440  scankey,
441  scan->numberOfKeys * sizeof(ScanKeyData));
442  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
443 
444  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
446 }
char * currTuples
Definition: nbtree.h:462
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1744
BTScanPosData markPos
Definition: nbtree.h:476
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
#define memmove(d, s, c)
Definition: c.h:1135
int arrayKeyCount
Definition: nbtree.h:448
char * markTuples
Definition: nbtree.h:463
int markItemIndex
Definition: nbtree.h:472
bool xs_want_itup
Definition: relscan.h:97
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:418
int numberOfKeys
Definition: nbtree.h:441
ScanKey keyData
Definition: relscan.h:95
BTScanPosData currPos
Definition: nbtree.h:475
void * palloc(Size size)
Definition: mcxt.c:924
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:203
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:406

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 518 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

519 {
520  BTScanOpaque so = (BTScanOpaque) scan->opaque;
521 
522  /* Restore the marked positions of any array keys */
523  if (so->numArrayKeys)
525 
526  if (so->markItemIndex >= 0)
527  {
528  /*
529  * The scan has never moved to a new page since the last mark. Just
530  * restore the itemIndex.
531  *
532  * NB: In this case we can't count on anything in so->markPos to be
533  * accurate.
534  */
535  so->currPos.itemIndex = so->markItemIndex;
536  }
537  else
538  {
539  /*
540  * The scan moved to a new page after last mark or restore, and we are
541  * now restoring to the marked page. We aren't holding any read
542  * locks, but if we're still holding the pin for the current position,
543  * we must drop it.
544  */
545  if (BTScanPosIsValid(so->currPos))
546  {
547  /* Before leaving current page, deal with any killed items */
548  if (so->numKilled > 0)
549  _bt_killitems(scan);
551  }
552 
553  if (BTScanPosIsValid(so->markPos))
554  {
555  /* bump pin on mark buffer for assignment to current buffer */
556  if (BTScanPosIsPinned(so->markPos))
558  memcpy(&so->currPos, &so->markPos,
559  offsetof(BTScanPosData, items[1]) +
560  so->markPos.lastItem * sizeof(BTScanPosItem));
561  if (so->currTuples)
562  memcpy(so->currTuples, so->markTuples,
564  }
565  else
567  }
568 }
int itemIndex
Definition: nbtree.h:388
char * currTuples
Definition: nbtree.h:462
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:636
int nextTupleOffset
Definition: nbtree.h:377
int lastItem
Definition: nbtree.h:387
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1744
BTScanPosData markPos
Definition: nbtree.h:476
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int numArrayKeys
Definition: nbtree.h:446
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:395
char * markTuples
Definition: nbtree.h:463
int markItemIndex
Definition: nbtree.h:472
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:418
BTScanPosData currPos
Definition: nbtree.h:475
Buffer buf
Definition: nbtree.h:358
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3347
#define offsetof(type, field)
Definition: c.h:622
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:406

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 893 of file nbtree.c.

References _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

894 {
895  /* No-op in ANALYZE ONLY mode */
896  if (info->analyze_only)
897  return stats;
898 
899  /*
900  * If btbulkdelete was called, we need not do anything, just return the
901  * stats from the latest btbulkdelete call. If it wasn't called, we might
902  * still need to do a pass over the index, to recycle any newly-recyclable
903  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
904  * determines if either are needed.
905  *
906  * Since we aren't going to actually delete any leaf items, there's no
907  * need to go through all the vacuum-cycle-ID pushups.
908  */
909  if (stats == NULL)
910  {
911  TransactionId oldestBtpoXact;
912 
913  /* Check if we need a cleanup */
914  if (!_bt_vacuum_needs_cleanup(info))
915  return NULL;
916 
918  btvacuumscan(info, stats, NULL, NULL, 0, &oldestBtpoXact);
919 
920  /* Update cleanup-related information in the metapage */
921  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
922  info->num_heap_tuples);
923  }
924 
925  /*
926  * It's quite possible for us to be fooled by concurrent page splits into
927  * double-counting some index tuples, so disbelieve any total that exceeds
928  * the underlying heap's count ... if we know that accurately. Otherwise
929  * this might just make matters worse.
930  */
931  if (!info->estimated_count)
932  {
933  if (stats->num_index_tuples > info->num_heap_tuples)
934  stats->num_index_tuples = info->num_heap_tuples;
935  }
936 
937  return stats;
938 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:786
uint32 TransactionId
Definition: c.h:474
bool analyze_only
Definition: genam.h:47
Relation index
Definition: genam.h:46
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:155
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
Definition: nbtree.c:953
void * palloc0(Size size)
Definition: mcxt.c:955
double num_heap_tuples
Definition: genam.h:50
double num_index_tuples
Definition: genam.h:76
bool estimated_count
Definition: genam.h:48

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  blkno,
BlockNumber  orig_blkno 
)
static

Definition at line 1102 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirtyHint(), MaxOffsetNumber, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_IGNORE, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1103 {
1104  IndexVacuumInfo *info = vstate->info;
1105  IndexBulkDeleteResult *stats = vstate->stats;
1107  void *callback_state = vstate->callback_state;
1108  Relation rel = info->index;
1109  bool delete_now;
1110  BlockNumber recurse_to;
1111  Buffer buf;
1112  Page page;
1113  BTPageOpaque opaque = NULL;
1114 
1115 restart:
1116  delete_now = false;
1117  recurse_to = P_NONE;
1118 
1119  /* call vacuum_delay_point while not holding any buffer lock */
1121 
1122  /*
1123  * We can't use _bt_getbuf() here because it always applies
1124  * _bt_checkpage(), which will barf on an all-zero page. We want to
1125  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1126  * buffer access strategy.
1127  */
1128  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1129  info->strategy);
1130  LockBuffer(buf, BT_READ);
1131  page = BufferGetPage(buf);
1132  if (!PageIsNew(page))
1133  {
1134  _bt_checkpage(rel, buf);
1135  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1136  }
1137 
1138  /*
1139  * If we are recursing, the only case we want to do anything with is a
1140  * live leaf page having the current vacuum cycle ID. Any other state
1141  * implies we already saw the page (eg, deleted it as being empty).
1142  */
1143  if (blkno != orig_blkno)
1144  {
1145  if (_bt_page_recyclable(page) ||
1146  P_IGNORE(opaque) ||
1147  !P_ISLEAF(opaque) ||
1148  opaque->btpo_cycleid != vstate->cycleid)
1149  {
1150  _bt_relbuf(rel, buf);
1151  return;
1152  }
1153  }
1154 
1155  /* Page is valid, see what to do with it */
1156  if (_bt_page_recyclable(page))
1157  {
1158  /* Okay to recycle this page */
1159  RecordFreeIndexPage(rel, blkno);
1160  vstate->totFreePages++;
1161  stats->pages_deleted++;
1162  }
1163  else if (P_ISDELETED(opaque))
1164  {
1165  /* Already deleted, but can't recycle yet */
1166  stats->pages_deleted++;
1167 
1168  /* Update the oldest btpo.xact */
1169  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1170  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1171  vstate->oldestBtpoXact = opaque->btpo.xact;
1172  }
1173  else if (P_ISHALFDEAD(opaque))
1174  {
1175  /* Half-dead, try to delete */
1176  delete_now = true;
1177  }
1178  else if (P_ISLEAF(opaque))
1179  {
1180  OffsetNumber deletable[MaxOffsetNumber];
1181  int ndeletable;
1182  OffsetNumber offnum,
1183  minoff,
1184  maxoff;
1185 
1186  /*
1187  * Trade in the initial read lock for a super-exclusive write lock on
1188  * this page. We must get such a lock on every leaf page over the
1189  * course of the vacuum scan, whether or not it actually contains any
1190  * deletable tuples --- see nbtree/README.
1191  */
1193  LockBufferForCleanup(buf);
1194 
1195  /*
1196  * Remember highest leaf page number we've taken cleanup lock on; see
1197  * notes in btvacuumscan
1198  */
1199  if (blkno > vstate->lastBlockLocked)
1200  vstate->lastBlockLocked = blkno;
1201 
1202  /*
1203  * Check whether we need to recurse back to earlier pages. What we
1204  * are concerned about is a page split that happened since we started
1205  * the vacuum scan. If the split moved some tuples to a lower page
1206  * then we might have missed 'em. If so, set up for tail recursion.
1207  * (Must do this before possibly clearing btpo_cycleid below!)
1208  */
1209  if (vstate->cycleid != 0 &&
1210  opaque->btpo_cycleid == vstate->cycleid &&
1211  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1212  !P_RIGHTMOST(opaque) &&
1213  opaque->btpo_next < orig_blkno)
1214  recurse_to = opaque->btpo_next;
1215 
1216  /*
1217  * Scan over all items to see which ones need deleted according to the
1218  * callback function.
1219  */
1220  ndeletable = 0;
1221  minoff = P_FIRSTDATAKEY(opaque);
1222  maxoff = PageGetMaxOffsetNumber(page);
1223  if (callback)
1224  {
1225  for (offnum = minoff;
1226  offnum <= maxoff;
1227  offnum = OffsetNumberNext(offnum))
1228  {
1229  IndexTuple itup;
1230  ItemPointer htup;
1231 
1232  itup = (IndexTuple) PageGetItem(page,
1233  PageGetItemId(page, offnum));
1234  htup = &(itup->t_tid);
1235 
1236  /*
1237  * During Hot Standby we currently assume that
1238  * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1239  * only true as long as the callback function depends only
1240  * upon whether the index tuple refers to heap tuples removed
1241  * in the initial heap scan. When vacuum starts it derives a
1242  * value of OldestXmin. Backends taking later snapshots could
1243  * have a RecentGlobalXmin with a later xid than the vacuum's
1244  * OldestXmin, so it is possible that row versions deleted
1245  * after OldestXmin could be marked as killed by other
1246  * backends. The callback function *could* look at the index
1247  * tuple state in isolation and decide to delete the index
1248  * tuple, though currently it does not. If it ever did, we
1249  * would need to reconsider whether XLOG_BTREE_VACUUM records
1250  * should cause conflicts. If they did cause conflicts they
1251  * would be fairly harsh conflicts, since we haven't yet
1252  * worked out a way to pass a useful value for
1253  * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1254  * applies to *any* type of index that marks index tuples as
1255  * killed.
1256  */
1257  if (callback(htup, callback_state))
1258  deletable[ndeletable++] = offnum;
1259  }
1260  }
1261 
1262  /*
1263  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1264  * call per page, so as to minimize WAL traffic.
1265  */
1266  if (ndeletable > 0)
1267  {
1268  /*
1269  * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1270  * all information to the replay code to allow it to get a cleanup
1271  * lock on all pages between the previous lastBlockVacuumed and
1272  * this page. This ensures that WAL replay locks all leaf pages at
1273  * some point, which is important should non-MVCC scans be
1274  * requested. This is currently unused on standby, but we record
1275  * it anyway, so that the WAL contains the required information.
1276  *
1277  * Since we can visit leaf pages out-of-order when recursing,
1278  * replay might end up locking such pages an extra time, but it
1279  * doesn't seem worth the amount of bookkeeping it'd take to avoid
1280  * that.
1281  */
1282  _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
1283  vstate->lastBlockVacuumed);
1284 
1285  /*
1286  * Remember highest leaf page number we've issued a
1287  * XLOG_BTREE_VACUUM WAL record for.
1288  */
1289  if (blkno > vstate->lastBlockVacuumed)
1290  vstate->lastBlockVacuumed = blkno;
1291 
1292  stats->tuples_removed += ndeletable;
1293  /* must recompute maxoff */
1294  maxoff = PageGetMaxOffsetNumber(page);
1295  }
1296  else
1297  {
1298  /*
1299  * If the page has been split during this vacuum cycle, it seems
1300  * worth expending a write to clear btpo_cycleid even if we don't
1301  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1302  * care of this.) This ensures we won't process the page again.
1303  *
1304  * We treat this like a hint-bit update because there's no need to
1305  * WAL-log it.
1306  */
1307  if (vstate->cycleid != 0 &&
1308  opaque->btpo_cycleid == vstate->cycleid)
1309  {
1310  opaque->btpo_cycleid = 0;
1311  MarkBufferDirtyHint(buf, true);
1312  }
1313  }
1314 
1315  /*
1316  * If it's now empty, try to delete; else count the live tuples. We
1317  * don't delete when recursing, though, to avoid putting entries into
1318  * freePages out-of-order (doesn't seem worth any extra code to handle
1319  * the case).
1320  */
1321  if (minoff > maxoff)
1322  delete_now = (blkno == orig_blkno);
1323  else
1324  stats->num_index_tuples += maxoff - minoff + 1;
1325  }
1326 
1327  if (delete_now)
1328  {
1329  MemoryContext oldcontext;
1330  int ndel;
1331 
1332  /* Run pagedel in a temp context to avoid memory leakage */
1334  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1335 
1336  ndel = _bt_pagedel(rel, buf);
1337 
1338  /* count only this page, else may double-count parent */
1339  if (ndel)
1340  {
1341  stats->pages_deleted++;
1342  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1343  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1344  vstate->oldestBtpoXact = opaque->btpo.xact;
1345  }
1346 
1347  MemoryContextSwitchTo(oldcontext);
1348  /* pagedel released buffer, so we shouldn't */
1349  }
1350  else
1351  _bt_relbuf(rel, buf);
1352 
1353  /*
1354  * This is really tail recursion, but if the compiler is too stupid to
1355  * optimize it as such, we'd eat an uncomfortably large amount of stack
1356  * space per recursion level (due to the deletable[] array). A failure is
1357  * improbable since the number of levels isn't likely to be large ... but
1358  * just in case, let's hand-optimize into a loop.
1359  */
1360  if (recurse_to != P_NONE)
1361  {
1362  blkno = recurse_to;
1363  goto restart;
1364  }
1365 }
BlockNumber lastBlockLocked
Definition: nbtree.c:49
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
#define BTP_SPLIT_END
Definition: nbtree.h:76
BlockNumber btpo_next
Definition: nbtree.h:58
MemoryContext pagedelcontext
Definition: nbtree.c:52
double tuples_removed
Definition: genam.h:77
void * callback_state
Definition: nbtree.c:46
#define P_IGNORE(opaque)
Definition: nbtree.h:163
BlockNumber lastBlockVacuumed
Definition: nbtree.c:48
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
BlockNumber totFreePages
Definition: nbtree.c:50
#define MaxOffsetNumber
Definition: off.h:28
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
IndexBulkDeleteCallback callback
Definition: nbtree.c:45
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
ItemPointerData t_tid
Definition: itup.h:37
union BTPageOpaqueData::@46 btpo
BufferAccessStrategy strategy
Definition: genam.h:51
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:150
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:903
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
TransactionId xact
Definition: nbtree.h:62
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:300
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:662
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:162
BTCycleId btpo_cycleid
Definition: nbtree.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
BlockNumber pages_deleted
Definition: genam.h:78
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define P_ISDELETED(opaque)
Definition: nbtree.h:160
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:51
BTCycleId cycleid
Definition: nbtree.c:47
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1268
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
IndexVacuumInfo * info
Definition: nbtree.c:43
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:948
IndexBulkDeleteResult * stats
Definition: nbtree.c:44
#define PageIsNew(page)
Definition: bufpage.h:225
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:1672
uint16 btpo_flags
Definition: nbtree.h:64
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:83
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid,
TransactionId oldestBtpoXact 
)
static

Definition at line 953 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_relbuf(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), buf, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBufferForCleanup(), LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, RBM_NORMAL, ReadBufferExtended(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, BTVacState::stats, IndexVacuumInfo::strategy, BTVacState::totFreePages, UnlockRelationForExtension(), and XLogStandbyInfoActive.

Referenced by btbulkdelete(), and btvacuumcleanup().

956 {
957  Relation rel = info->index;
958  BTVacState vstate;
959  BlockNumber num_pages;
960  BlockNumber blkno;
961  bool needLock;
962 
963  /*
964  * Reset counts that will be incremented during the scan; needed in case
965  * of multiple scans during a single VACUUM command
966  */
967  stats->estimated_count = false;
968  stats->num_index_tuples = 0;
969  stats->pages_deleted = 0;
970 
971  /* Set up info to pass down to btvacuumpage */
972  vstate.info = info;
973  vstate.stats = stats;
974  vstate.callback = callback;
975  vstate.callback_state = callback_state;
976  vstate.cycleid = cycleid;
977  vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
979  vstate.totFreePages = 0;
981 
982  /* Create a temporary memory context to run _bt_pagedel in */
984  "_bt_pagedel",
986 
987  /*
988  * The outer loop iterates over all index pages except the metapage, in
989  * physical order (we hope the kernel will cooperate in providing
990  * read-ahead for speed). It is critical that we visit all leaf pages,
991  * including ones added after we start the scan, else we might fail to
992  * delete some deletable tuples. Hence, we must repeatedly check the
993  * relation length. We must acquire the relation-extension lock while
994  * doing so to avoid a race condition: if someone else is extending the
995  * relation, there is a window where bufmgr/smgr have created a new
996  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
997  * we manage to scan such a page here, we'll improperly assume it can be
998  * recycled. Taking the lock synchronizes things enough to prevent a
999  * problem: either num_pages won't include the new page, or _bt_getbuf
1000  * already has write lock on the buffer and it will be fully initialized
1001  * before we can examine it. (See also vacuumlazy.c, which has the same
1002  * issue.) Also, we need not worry if a page is added immediately after
1003  * we look; the page splitting code already has write-lock on the left
1004  * page before it adds a right page, so we must already have processed any
1005  * tuples due to be moved into such a page.
1006  *
1007  * We can skip locking for new or temp relations, however, since no one
1008  * else could be accessing them.
1009  */
1010  needLock = !RELATION_IS_LOCAL(rel);
1011 
1012  blkno = BTREE_METAPAGE + 1;
1013  for (;;)
1014  {
1015  /* Get the current relation length */
1016  if (needLock)
1018  num_pages = RelationGetNumberOfBlocks(rel);
1019  if (needLock)
1021 
1022  /* Quit if we've scanned the whole relation */
1023  if (blkno >= num_pages)
1024  break;
1025  /* Iterate over pages, then loop back to recheck length */
1026  for (; blkno < num_pages; blkno++)
1027  {
1028  btvacuumpage(&vstate, blkno, blkno);
1029  }
1030  }
1031 
1032  /*
1033  * Check to see if we need to issue one final WAL record for this index,
1034  * which may be needed for correctness on a hot standby node when non-MVCC
1035  * index scans could take place.
1036  *
1037  * If the WAL is replayed in hot standby, the replay process needs to get
1038  * cleanup locks on all index leaf pages, just as we've been doing here.
1039  * However, we won't issue any WAL records about pages that have no items
1040  * to be deleted. For pages between pages we've vacuumed, the replay code
1041  * will take locks under the direction of the lastBlockVacuumed fields in
1042  * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
1043  * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
1044  * against the last leaf page in the index, if that one wasn't vacuumed.
1045  */
1046  if (XLogStandbyInfoActive() &&
1047  vstate.lastBlockVacuumed < vstate.lastBlockLocked)
1048  {
1049  Buffer buf;
1050 
1051  /*
1052  * The page should be valid, but we can't use _bt_getbuf() because we
1053  * want to use a nondefault buffer access strategy. Since we aren't
1054  * going to delete any items, getting cleanup lock again is probably
1055  * overkill, but for consistency do that anyway.
1056  */
1057  buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
1058  RBM_NORMAL, info->strategy);
1059  LockBufferForCleanup(buf);
1060  _bt_checkpage(rel, buf);
1061  _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
1062  _bt_relbuf(rel, buf);
1063  }
1064 
1066 
1067  /*
1068  * If we found any recyclable pages (and recorded them in the FSM), then
1069  * forcibly update the upper-level FSM pages to ensure that searchers can
1070  * find them. It's possible that the pages were also found during
1071  * previous scans and so this is a waste of time, but it's cheap enough
1072  * relative to scanning the index that it shouldn't matter much, and
1073  * making sure that free pages are available sooner not later seems
1074  * worthwhile.
1075  *
1076  * Note that if no recyclable pages exist, we don't bother vacuuming the
1077  * FSM at all.
1078  */
1079  if (vstate.totFreePages > 0)
1081 
1082  /* update statistics */
1083  stats->num_pages = num_pages;
1084  stats->pages_free = vstate.totFreePages;
1085 
1086  if (oldestBtpoXact)
1087  *oldestBtpoXact = vstate.oldestBtpoXact;
1088 }
BlockNumber lastBlockLocked
Definition: nbtree.c:49
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
MemoryContext pagedelcontext
Definition: nbtree.c:52
void * callback_state
Definition: nbtree.c:46
BlockNumber lastBlockVacuumed
Definition: nbtree.c:48
BlockNumber totFreePages
Definition: nbtree.c:50
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:528
IndexBulkDeleteCallback callback
Definition: nbtree.c:45
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
BufferAccessStrategy strategy
Definition: genam.h:51
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:662
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:1102
BlockNumber num_pages
Definition: genam.h:73
BlockNumber pages_free
Definition: genam.h:79
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
static char * buf
Definition: pg_test_fsync.c:67
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:78
#define BTREE_METAPAGE
Definition: nbtree.h:115
TransactionId oldestBtpoXact
Definition: nbtree.c:51
#define AllocSetContextCreate(parent, name, allocparams)
Definition: memutils.h:170
BTCycleId cycleid
Definition: nbtree.c:47
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define XLogStandbyInfoActive()
Definition: xlog.h:160
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
IndexVacuumInfo * info
Definition: nbtree.c:43
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:948
IndexBulkDeleteResult * stats
Definition: nbtree.c:44
double num_index_tuples
Definition: genam.h:76
int Buffer
Definition: buf.h:23
bool estimated_count
Definition: genam.h:75