PostgreSQL Source Code  git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/relscan.h"
#include "access/xlog.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTVacState
 
struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State { BTPARALLEL_NOT_INITIALIZED, BTPARALLEL_ADVANCING, BTPARALLEL_IDLE, BTPARALLEL_DONE }
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
 
static void btvacuumpage (BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
static bool _bt_vacuum_needs_cleanup (IndexVacuumInfo *info)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 92 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 68 of file nbtree.c.

Function Documentation

◆ _bt_parallel_advance_array_keys()

void _bt_parallel_advance_array_keys ( IndexScanDesc  scan)

Definition at line 763 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, BTPARALLEL_NOT_INITIALIZED, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_advance_array_keys().

764 {
765  BTScanOpaque so = (BTScanOpaque) scan->opaque;
766  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
767  BTParallelScanDesc btscan;
768 
769  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
770  parallel_scan->ps_offset);
771 
772  so->arrayKeyCount++;
773  SpinLockAcquire(&btscan->btps_mutex);
774  if (btscan->btps_pageStatus == BTPARALLEL_DONE)
775  {
776  btscan->btps_scanPage = InvalidBlockNumber;
777  btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
778  btscan->btps_arrayKeyCount++;
779  }
780  SpinLockRelease(&btscan->btps_mutex);
781 }
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int arrayKeyCount
Definition: nbtree.h:639
#define OffsetToPointer(base, offset)
Definition: c.h:635
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
#define InvalidBlockNumber
Definition: block.h:33

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 722 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_DONE, ConditionVariableBroadcast(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_first(), and _bt_readnextpage().

723 {
724  BTScanOpaque so = (BTScanOpaque) scan->opaque;
725  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
726  BTParallelScanDesc btscan;
727  bool status_changed = false;
728 
729  /* Do nothing, for non-parallel scans */
730  if (parallel_scan == NULL)
731  return;
732 
733  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
734  parallel_scan->ps_offset);
735 
736  /*
737  * Mark the parallel scan as done for this combination of scan keys,
738  * unless some other process already did so. See also
739  * _bt_advance_array_keys.
740  */
741  SpinLockAcquire(&btscan->btps_mutex);
742  if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
743  btscan->btps_pageStatus != BTPARALLEL_DONE)
744  {
745  btscan->btps_pageStatus = BTPARALLEL_DONE;
746  status_changed = true;
747  }
748  SpinLockRelease(&btscan->btps_mutex);
749 
750  /* wake up all the workers associated with this parallel scan */
751  if (status_changed)
752  ConditionVariableBroadcast(&btscan->btps_cv);
753 }
void ConditionVariableBroadcast(ConditionVariable *cv)
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:92
#define SpinLockAcquire(lock)
Definition: spin.h:62
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int arrayKeyCount
Definition: nbtree.h:639
#define OffsetToPointer(base, offset)
Definition: c.h:635
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  scan_page 
)

Definition at line 699 of file nbtree.c.

References BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableSignal(), OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by _bt_readnextpage(), and _bt_readpage().

700 {
701  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
702  BTParallelScanDesc btscan;
703 
704  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
705  parallel_scan->ps_offset);
706 
707  SpinLockAcquire(&btscan->btps_mutex);
708  btscan->btps_scanPage = scan_page;
710  SpinLockRelease(&btscan->btps_mutex);
712 }
slock_t btps_mutex
Definition: nbtree.c:88
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:92
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableSignal(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:89
#define OffsetToPointer(base, offset)
Definition: c.h:635
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
BlockNumber btps_scanPage
Definition: nbtree.c:82
BTPS_State btps_pageStatus
Definition: nbtree.c:83

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber pageno 
)

Definition at line 641 of file nbtree.c.

References BTScanOpaqueData::arrayKeyCount, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, ConditionVariableCancelSleep(), ConditionVariableSleep(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, SpinLockAcquire, SpinLockRelease, status(), and WAIT_EVENT_BTREE_PAGE.

Referenced by _bt_first(), _bt_readnextpage(), and _bt_steppage().

642 {
643  BTScanOpaque so = (BTScanOpaque) scan->opaque;
644  BTPS_State pageStatus;
645  bool exit_loop = false;
646  bool status = true;
647  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
648  BTParallelScanDesc btscan;
649 
650  *pageno = P_NONE;
651 
652  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
653  parallel_scan->ps_offset);
654 
655  while (1)
656  {
657  SpinLockAcquire(&btscan->btps_mutex);
658  pageStatus = btscan->btps_pageStatus;
659 
660  if (so->arrayKeyCount < btscan->btps_arrayKeyCount)
661  {
662  /* Parallel scan has already advanced to a new set of scankeys. */
663  status = false;
664  }
665  else if (pageStatus == BTPARALLEL_DONE)
666  {
667  /*
668  * We're done with this set of scankeys. This may be the end, or
669  * there could be more sets to try.
670  */
671  status = false;
672  }
673  else if (pageStatus != BTPARALLEL_ADVANCING)
674  {
675  /*
676  * We have successfully seized control of the scan for the purpose
677  * of advancing it to a new page!
678  */
679  btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
680  *pageno = btscan->btps_scanPage;
681  exit_loop = true;
682  }
683  SpinLockRelease(&btscan->btps_mutex);
684  if (exit_loop || !status)
685  break;
687  }
689 
690  return status;
691 }
#define P_NONE
Definition: nbtree.h:181
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableCancelSleep(void)
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
BTPS_State
Definition: nbtree.c:68
int arrayKeyCount
Definition: nbtree.h:639
#define OffsetToPointer(base, offset)
Definition: c.h:635
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227

◆ _bt_vacuum_needs_cleanup()

static bool _bt_vacuum_needs_cleanup ( IndexVacuumInfo info)
static

Definition at line 788 of file nbtree.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, RelationData::rd_options, RecentGlobalXmin, TransactionIdIsValid, TransactionIdPrecedes(), vacuum_cleanup_index_scale_factor, and StdRdOptions::vacuum_cleanup_index_scale_factor.

Referenced by btvacuumcleanup().

789 {
790  Buffer metabuf;
791  Page metapg;
792  BTMetaPageData *metad;
793  bool result = false;
794 
795  metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
796  metapg = BufferGetPage(metabuf);
797  metad = BTPageGetMeta(metapg);
798 
799  if (metad->btm_version < BTREE_NOVAC_VERSION)
800  {
801  /*
802  * Do cleanup if metapage needs upgrade, because we don't have
803  * cleanup-related meta-information yet.
804  */
805  result = true;
806  }
807  else if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) &&
810  {
811  /*
812  * If oldest btpo.xact in the deleted pages is older than
813  * RecentGlobalXmin, then at least one deleted page can be recycled.
814  */
815  result = true;
816  }
817  else
818  {
819  StdRdOptions *relopts;
820  float8 cleanup_scale_factor;
821  float8 prev_num_heap_tuples;
822 
823  /*
824  * If table receives enough insertions and no cleanup was performed,
825  * then index would appear have stale statistics. If scale factor is
826  * set, we avoid that by performing cleanup if the number of inserted
827  * tuples exceeds vacuum_cleanup_index_scale_factor fraction of
828  * original tuples count.
829  */
830  relopts = (StdRdOptions *) info->index->rd_options;
831  cleanup_scale_factor = (relopts &&
832  relopts->vacuum_cleanup_index_scale_factor >= 0)
835  prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
836 
837  if (cleanup_scale_factor <= 0 ||
838  prev_num_heap_tuples <= 0 ||
839  (info->num_heap_tuples - prev_num_heap_tuples) /
840  prev_num_heap_tuples >= cleanup_scale_factor)
841  result = true;
842  }
843 
844  _bt_relbuf(info->index, metabuf);
845  return result;
846 }
uint32 btm_version
Definition: nbtree.h:100
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:757
Relation index
Definition: genam.h:46
double vacuum_cleanup_index_scale_factor
Definition: globals.c:150
#define BT_READ
Definition: nbtree.h:402
double float8
Definition: c.h:491
#define BTPageGetMeta(p)
Definition: nbtree.h:112
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:135
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define BTREE_METAPAGE
Definition: nbtree.h:131
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
float8 vacuum_cleanup_index_scale_factor
Definition: rel.h:268
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:912
double num_heap_tuples
Definition: genam.h:51
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:108
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:106
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int Buffer
Definition: buf.h:23
bytea * rd_options
Definition: rel.h:126
Pointer Page
Definition: bufpage.h:78

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 348 of file nbtree.c.

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, Assert, BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::numArrayKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

349 {
350  IndexScanDesc scan;
351  BTScanOpaque so;
352 
353  /* no order by operators allowed */
354  Assert(norderbys == 0);
355 
356  /* get the scan */
357  scan = RelationGetIndexScan(rel, nkeys, norderbys);
358 
359  /* allocate private workspace */
360  so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
363  if (scan->numberOfKeys > 0)
364  so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
365  else
366  so->keyData = NULL;
367 
368  so->arrayKeyData = NULL; /* assume no array keys for now */
369  so->numArrayKeys = 0;
370  so->arrayKeys = NULL;
371  so->arrayContext = NULL;
372 
373  so->killedItems = NULL; /* until needed */
374  so->numKilled = 0;
375 
376  /*
377  * We don't know yet whether the scan will be index-only, so we do not
378  * allocate the tuple workspace arrays until btrescan. However, we set up
379  * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
380  */
381  so->currTuples = so->markTuples = NULL;
382 
383  scan->xs_itupdesc = RelationGetDescr(rel);
384 
385  scan->opaque = so;
386 
387  return scan;
388 }
#define RelationGetDescr(relation)
Definition: rel.h:445
MemoryContext arrayContext
Definition: nbtree.h:642
char * currTuples
Definition: nbtree.h:653
struct TupleDescData * xs_itupdesc
Definition: relscan.h:128
BTScanPosData markPos
Definition: nbtree.h:667
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int numArrayKeys
Definition: nbtree.h:637
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:654
ScanKey arrayKeyData
Definition: nbtree.h:636
int * killedItems
Definition: nbtree.h:645
#define Assert(condition)
Definition: c.h:732
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:609
BTScanPosData currPos
Definition: nbtree.h:666
void * palloc(Size size)
Definition: mcxt.c:949
ScanKey keyData
Definition: nbtree.h:633
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:641
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 157 of file nbtree.c.

References _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, log_newpage(), RelFileNodeBackend::node, P_NONE, PageSetChecksumInplace(), palloc(), RelationData::rd_smgr, SMgrRelationData::smgr_rnode, smgrimmedsync(), and smgrwrite().

Referenced by bthandler().

158 {
159  Page metapage;
160 
161  /* Construct metapage. */
162  metapage = (Page) palloc(BLCKSZ);
163  _bt_initmetapage(metapage, P_NONE, 0);
164 
165  /*
166  * Write the page and log it. It might seem that an immediate sync would
167  * be sufficient to guarantee that the file exists on disk, but recovery
168  * itself might remove it while replaying, for example, an
169  * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
170  * this even when wal_level=minimal.
171  */
174  (char *) metapage, true);
176  BTREE_METAPAGE, metapage, true);
177 
178  /*
179  * An immediate sync is required even if we xlog'd the page, because the
180  * write did not go through shared_buffers and therefore a concurrent
181  * checkpoint may have moved the redo pointer past our xlog record.
182  */
184 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:56
#define P_NONE
Definition: nbtree.h:181
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
Definition: nbtpage.c:50
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:530
#define BTREE_METAPAGE
Definition: nbtree.h:131
RelFileNode node
Definition: relfilenode.h:74
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1198
void * palloc(Size size)
Definition: mcxt.c:949
XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:972
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
Pointer Page
Definition: bufpage.h:78

◆ btbulkdelete()

IndexBulkDeleteResult* btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 856 of file nbtree.c.

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), _bt_update_meta_cleanup_info(), btvacuumscan(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum.

Referenced by bthandler().

858 {
859  Relation rel = info->index;
860  BTCycleId cycleid;
861 
862  /* allocate stats if first time through, else re-use existing struct */
863  if (stats == NULL)
865 
866  /* Establish the vacuum cycle ID to use for this scan */
867  /* The ENSURE stuff ensures we clean up shared memory on failure */
869  {
870  TransactionId oldestBtpoXact;
871 
872  cycleid = _bt_start_vacuum(rel);
873 
874  btvacuumscan(info, stats, callback, callback_state, cycleid,
875  &oldestBtpoXact);
876 
877  /*
878  * Update cleanup-related information in metapage. This information is
879  * used only for cleanup but keeping them up to date can avoid
880  * unnecessary cleanup even after bulkdelete.
881  */
882  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
883  info->num_heap_tuples);
884  }
886  _bt_end_vacuum(rel);
887 
888  return stats;
889 }
uint32 TransactionId
Definition: c.h:507
#define PointerGetDatum(X)
Definition: postgres.h:556
Relation index
Definition: genam.h:46
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:1978
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:158
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
uint16 BTCycleId
Definition: nbtree.h:27
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
Definition: nbtree.c:957
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1950
void * palloc0(Size size)
Definition: mcxt.c:980
double num_heap_tuples
Definition: genam.h:51
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:1893

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1384 of file nbtree.c.

Referenced by bthandler().

1385 {
1386  return true;
1387 }

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 454 of file nbtree.c.

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

455 {
456  BTScanOpaque so = (BTScanOpaque) scan->opaque;
457 
458  /* we aren't holding any read locks, but gotta drop the pins */
459  if (BTScanPosIsValid(so->currPos))
460  {
461  /* Before leaving current page, deal with any killed items */
462  if (so->numKilled > 0)
463  _bt_killitems(scan);
465  }
466 
467  so->markItemIndex = -1;
469 
470  /* No need to invalidate positions, the RAM is about to be freed. */
471 
472  /* Release storage */
473  if (so->keyData != NULL)
474  pfree(so->keyData);
475  /* so->arrayKeyData and so->arrayKeys are in arrayContext */
476  if (so->arrayContext != NULL)
478  if (so->killedItems != NULL)
479  pfree(so->killedItems);
480  if (so->currTuples != NULL)
481  pfree(so->currTuples);
482  /* so->markTuples should not be pfree'd, see btrescan */
483  pfree(so);
484 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
MemoryContext arrayContext
Definition: nbtree.h:642
char * currTuples
Definition: nbtree.h:653
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:667
void pfree(void *pointer)
Definition: mcxt.c:1056
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int markItemIndex
Definition: nbtree.h:663
int * killedItems
Definition: nbtree.h:645
BTScanPosData currPos
Definition: nbtree.h:666
ScanKey keyData
Definition: nbtree.h:633
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:597

◆ btestimateparallelscan()

Size btestimateparallelscan ( void  )

Definition at line 576 of file nbtree.c.

Referenced by bthandler().

577 {
578  return sizeof(BTParallelScanDescData);
579 }
struct BTParallelScanDescData BTParallelScanDescData

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 290 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

291 {
292  BTScanOpaque so = (BTScanOpaque) scan->opaque;
293  int64 ntids = 0;
294  ItemPointer heapTid;
295 
296  /*
297  * If we have any array keys, initialize them.
298  */
299  if (so->numArrayKeys)
300  {
301  /* punt if we have any unsatisfiable array keys */
302  if (so->numArrayKeys < 0)
303  return ntids;
304 
306  }
307 
308  /* This loop handles advancing to the next array elements, if any */
309  do
310  {
311  /* Fetch the first page & tuple */
312  if (_bt_first(scan, ForwardScanDirection))
313  {
314  /* Save tuple ID, and continue scanning */
315  heapTid = &scan->xs_heaptid;
316  tbm_add_tuples(tbm, heapTid, 1, false);
317  ntids++;
318 
319  for (;;)
320  {
321  /*
322  * Advance to next tuple within page. This is the same as the
323  * easy case in _bt_next().
324  */
325  if (++so->currPos.itemIndex > so->currPos.lastItem)
326  {
327  /* let _bt_next do the heavy lifting */
328  if (!_bt_next(scan, ForwardScanDirection))
329  break;
330  }
331 
332  /* Save tuple ID, and continue scanning */
333  heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
334  tbm_add_tuples(tbm, heapTid, 1, false);
335  ntids++;
336  }
337  }
338  /* Now see if we have more array keys to deal with */
340 
341  return ntids;
342 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:546
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:581
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:520
int itemIndex
Definition: nbtree.h:579
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:376
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:741
int lastItem
Definition: nbtree.h:578
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int numArrayKeys
Definition: nbtree.h:637
ItemPointerData xs_heaptid
Definition: relscan.h:132
ItemPointerData heapTid
Definition: nbtree.h:542
BTScanPosData currPos
Definition: nbtree.h:666
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1343

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 216 of file nbtree.c.

References _bt_advance_array_keys(), _bt_first(), _bt_next(), _bt_start_array_keys(), BTScanPosIsValid, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxIndexTuplesPerPage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

217 {
218  BTScanOpaque so = (BTScanOpaque) scan->opaque;
219  bool res;
220 
221  /* btree indexes are never lossy */
222  scan->xs_recheck = false;
223 
224  /*
225  * If we have any array keys, initialize them during first call for a
226  * scan. We can't do this in btrescan because we don't know the scan
227  * direction at that time.
228  */
229  if (so->numArrayKeys && !BTScanPosIsValid(so->currPos))
230  {
231  /* punt if we have any unsatisfiable array keys */
232  if (so->numArrayKeys < 0)
233  return false;
234 
235  _bt_start_array_keys(scan, dir);
236  }
237 
238  /* This loop handles advancing to the next array elements, if any */
239  do
240  {
241  /*
242  * If we've already initialized this scan, we can just advance it in
243  * the appropriate direction. If we haven't done so yet, we call
244  * _bt_first() to get the first item in the scan.
245  */
246  if (!BTScanPosIsValid(so->currPos))
247  res = _bt_first(scan, dir);
248  else
249  {
250  /*
251  * Check to see if we should kill the previously-fetched tuple.
252  */
253  if (scan->kill_prior_tuple)
254  {
255  /*
256  * Yes, remember it for later. (We'll deal with all such
257  * tuples at once right before leaving the index page.) The
258  * test for numKilled overrun is not just paranoia: if the
259  * caller reverses direction in the indexscan then the same
260  * item might get entered multiple times. It's not worth
261  * trying to optimize that, so we don't detect it, but instead
262  * just forget any excess entries.
263  */
264  if (so->killedItems == NULL)
265  so->killedItems = (int *)
266  palloc(MaxIndexTuplesPerPage * sizeof(int));
268  so->killedItems[so->numKilled++] = so->currPos.itemIndex;
269  }
270 
271  /*
272  * Now continue the scan.
273  */
274  res = _bt_next(scan, dir);
275  }
276 
277  /* If we have a tuple, return it ... */
278  if (res)
279  break;
280  /* ... otherwise see if we have more array keys to deal with */
281  } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
282 
283  return res;
284 }
bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:546
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:520
int itemIndex
Definition: nbtree.h:579
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:741
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int numArrayKeys
Definition: nbtree.h:637
int * killedItems
Definition: nbtree.h:645
BTScanPosData currPos
Definition: nbtree.h:666
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:949
bool kill_prior_tuple
Definition: relscan.h:113
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1343

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 107 of file nbtree.c.

References IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amparallelrescan, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsupport, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), btparallelrescan(), btproperty(), btrescan(), btrestrpos(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, and PG_RETURN_POINTER.

108 {
109  IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
110 
111  amroutine->amstrategies = BTMaxStrategyNumber;
112  amroutine->amsupport = BTNProcs;
113  amroutine->amcanorder = true;
114  amroutine->amcanorderbyop = false;
115  amroutine->amcanbackward = true;
116  amroutine->amcanunique = true;
117  amroutine->amcanmulticol = true;
118  amroutine->amoptionalkey = true;
119  amroutine->amsearcharray = true;
120  amroutine->amsearchnulls = true;
121  amroutine->amstorage = false;
122  amroutine->amclusterable = true;
123  amroutine->ampredlocks = true;
124  amroutine->amcanparallel = true;
125  amroutine->amcaninclude = true;
126  amroutine->amkeytype = InvalidOid;
127 
128  amroutine->ambuild = btbuild;
129  amroutine->ambuildempty = btbuildempty;
130  amroutine->aminsert = btinsert;
131  amroutine->ambulkdelete = btbulkdelete;
132  amroutine->amvacuumcleanup = btvacuumcleanup;
133  amroutine->amcanreturn = btcanreturn;
134  amroutine->amcostestimate = btcostestimate;
135  amroutine->amoptions = btoptions;
136  amroutine->amproperty = btproperty;
137  amroutine->ambuildphasename = btbuildphasename;
138  amroutine->amvalidate = btvalidate;
139  amroutine->ambeginscan = btbeginscan;
140  amroutine->amrescan = btrescan;
141  amroutine->amgettuple = btgettuple;
142  amroutine->amgetbitmap = btgetbitmap;
143  amroutine->amendscan = btendscan;
144  amroutine->ammarkpos = btmarkpos;
145  amroutine->amrestrpos = btrestrpos;
148  amroutine->amparallelrescan = btparallelrescan;
149 
150  PG_RETURN_POINTER(amroutine);
151 }
ambeginscan_function ambeginscan
Definition: amapi.h:221
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
ambulkdelete_function ambulkdelete
Definition: amapi.h:213
bool amcanmulticol
Definition: amapi.h:183
uint16 amsupport
Definition: amapi.h:173
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:394
amgettuple_function amgettuple
Definition: amapi.h:223
bool amcanorderbyop
Definition: amapi.h:177
amproperty_function amproperty
Definition: amapi.h:218
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2063
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: nbtree.c:193
amparallelrescan_function amparallelrescan
Definition: amapi.h:232
bool amstorage
Definition: amapi.h:191
bool ampredlocks
Definition: amapi.h:195
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:348
aminsert_function aminsert
Definition: amapi.h:212
Oid amkeytype
Definition: amapi.h:201
bool amoptionalkey
Definition: amapi.h:185
amvalidate_function amvalidate
Definition: amapi.h:220
Size btestimateparallelscan(void)
Definition: nbtree.c:576
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:38
void btbuildempty(Relation index)
Definition: nbtree.c:157
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:897
amgetbitmap_function amgetbitmap
Definition: amapi.h:224
ambuild_function ambuild
Definition: amapi.h:210
amoptions_function amoptions
Definition: amapi.h:217
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:290
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:310
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:454
bool amcaninclude
Definition: amapi.h:199
amcostestimate_function amcostestimate
Definition: amapi.h:216
bool amcanunique
Definition: amapi.h:181
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:5763
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:214
amendscan_function amendscan
Definition: amapi.h:225
bool amcanbackward
Definition: amapi.h:179
amrescan_function amrescan
Definition: amapi.h:222
bool amcanparallel
Definition: amapi.h:197
bool amsearchnulls
Definition: amapi.h:189
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:216
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:490
bool amclusterable
Definition: amapi.h:193
bool amsearcharray
Definition: amapi.h:187
#define InvalidOid
Definition: postgres_ext.h:36
#define makeNode(_type_)
Definition: nodes.h:573
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2040
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:520
ammarkpos_function ammarkpos
Definition: amapi.h:226
bool amcanorder
Definition: amapi.h:175
ambuildphasename_function ambuildphasename
Definition: amapi.h:219
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1384
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:600
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:230
#define BTNProcs
Definition: nbtree.h:395
uint16 amstrategies
Definition: amapi.h:171
void btinitparallelscan(void *target)
Definition: nbtree.c:585
ambuildempty_function ambuildempty
Definition: amapi.h:211
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2028
#define BTMaxStrategyNumber
Definition: stratnum.h:35
amcanreturn_function amcanreturn
Definition: amapi.h:215
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:231
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:856
amrestrpos_function amrestrpos
Definition: amapi.h:227

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 585 of file nbtree.c.

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, ConditionVariableInit(), InvalidBlockNumber, and SpinLockInit.

Referenced by bthandler().

586 {
587  BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
588 
589  SpinLockInit(&bt_target->btps_mutex);
590  bt_target->btps_scanPage = InvalidBlockNumber;
592  bt_target->btps_arrayKeyCount = 0;
593  ConditionVariableInit(&bt_target->btps_cv);
594 }
slock_t btps_mutex
Definition: nbtree.c:88
#define SpinLockInit(lock)
Definition: spin.h:60
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:92
void ConditionVariableInit(ConditionVariable *cv)
ConditionVariable btps_cv
Definition: nbtree.c:89
BlockNumber btps_scanPage
Definition: nbtree.c:82
BTPS_State btps_pageStatus
Definition: nbtree.c:83
#define InvalidBlockNumber
Definition: block.h:33

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
IndexInfo indexInfo 
)

Definition at line 193 of file nbtree.c.

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, and IndexTupleData::t_tid.

Referenced by bthandler().

197 {
198  bool result;
199  IndexTuple itup;
200 
201  /* generate an index tuple */
202  itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
203  itup->t_tid = *ht_ctid;
204 
205  result = _bt_doinsert(rel, itup, checkUnique, heapRel);
206 
207  pfree(itup);
208 
209  return result;
210 }
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
Definition: nbtinsert.c:79
#define RelationGetDescr(relation)
Definition: rel.h:445
ItemPointerData t_tid
Definition: itup.h:37
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:47
void pfree(void *pointer)
Definition: mcxt.c:1056
static Datum values[MAXATTR]
Definition: bootstrap.c:167

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 490 of file nbtree.c.

References _bt_mark_array_keys(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

491 {
492  BTScanOpaque so = (BTScanOpaque) scan->opaque;
493 
494  /* There may be an old mark with a pin (but no lock). */
496 
497  /*
498  * Just record the current itemIndex. If we later step to next page
499  * before releasing the marked position, _bt_steppage makes a full copy of
500  * the currPos struct in markPos. If (as often happens) the mark is moved
501  * before we leave the page, we don't have to do that work.
502  */
503  if (BTScanPosIsValid(so->currPos))
504  so->markItemIndex = so->currPos.itemIndex;
505  else
506  {
508  so->markItemIndex = -1;
509  }
510 
511  /* Also record the current positions of any array keys */
512  if (so->numArrayKeys)
513  _bt_mark_array_keys(scan);
514 }
int itemIndex
Definition: nbtree.h:579
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
BTScanPosData markPos
Definition: nbtree.h:667
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int numArrayKeys
Definition: nbtree.h:637
void _bt_mark_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:605
int markItemIndex
Definition: nbtree.h:663
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:609
BTScanPosData currPos
Definition: nbtree.h:666
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:597

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 600 of file nbtree.c.

References Assert, BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_arrayKeyCount, BTParallelScanDescData::btps_mutex, BTParallelScanDescData::btps_pageStatus, BTParallelScanDescData::btps_scanPage, InvalidBlockNumber, OffsetToPointer, IndexScanDescData::parallel_scan, ParallelIndexScanDescData::ps_offset, SpinLockAcquire, and SpinLockRelease.

Referenced by bthandler().

601 {
602  BTParallelScanDesc btscan;
603  ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
604 
605  Assert(parallel_scan);
606 
607  btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
608  parallel_scan->ps_offset);
609 
610  /*
611  * In theory, we don't need to acquire the spinlock here, because there
612  * shouldn't be any other workers running at this point, but we do so for
613  * consistency.
614  */
615  SpinLockAcquire(&btscan->btps_mutex);
618  btscan->btps_arrayKeyCount = 0;
619  SpinLockRelease(&btscan->btps_mutex);
620 }
slock_t btps_mutex
Definition: nbtree.c:88
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:92
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define OffsetToPointer(base, offset)
Definition: c.h:635
#define SpinLockRelease(lock)
Definition: spin.h:64
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:151
BlockNumber btps_scanPage
Definition: nbtree.c:82
#define Assert(condition)
Definition: c.h:732
BTPS_State btps_pageStatus
Definition: nbtree.c:83
#define InvalidBlockNumber
Definition: block.h:33

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 394 of file nbtree.c.

References _bt_killitems(), _bt_preprocess_array_keys(), BTScanOpaqueData::arrayKeyCount, BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, memmove, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

396 {
397  BTScanOpaque so = (BTScanOpaque) scan->opaque;
398 
399  /* we aren't holding any read locks, but gotta drop the pins */
400  if (BTScanPosIsValid(so->currPos))
401  {
402  /* Before leaving current page, deal with any killed items */
403  if (so->numKilled > 0)
404  _bt_killitems(scan);
407  }
408 
409  so->markItemIndex = -1;
410  so->arrayKeyCount = 0;
413 
414  /*
415  * Allocate tuple workspace arrays, if needed for an index-only scan and
416  * not already done in a previous rescan call. To save on palloc
417  * overhead, both workspaces are allocated as one palloc block; only this
418  * function and btendscan know that.
419  *
420  * NOTE: this data structure also makes it safe to return data from a
421  * "name" column, even though btree name_ops uses an underlying storage
422  * datatype of cstring. The risk there is that "name" is supposed to be
423  * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
424  * However, since we only return data out of tuples sitting in the
425  * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
426  * data out of the markTuples array --- running off the end of memory for
427  * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
428  * adding special-case treatment for name_ops elsewhere.
429  */
430  if (scan->xs_want_itup && so->currTuples == NULL)
431  {
432  so->currTuples = (char *) palloc(BLCKSZ * 2);
433  so->markTuples = so->currTuples + BLCKSZ;
434  }
435 
436  /*
437  * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
438  * - vadim 05/05/97
439  */
440  if (scankey && scan->numberOfKeys > 0)
441  memmove(scan->keyData,
442  scankey,
443  scan->numberOfKeys * sizeof(ScanKeyData));
444  so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
445 
446  /* If any keys are SK_SEARCHARRAY type, set up array-key info */
448 }
char * currTuples
Definition: nbtree.h:653
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:667
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
#define memmove(d, s, c)
Definition: c.h:1238
int arrayKeyCount
Definition: nbtree.h:639
char * markTuples
Definition: nbtree.h:654
int markItemIndex
Definition: nbtree.h:663
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:609
int numberOfKeys
Definition: nbtree.h:632
struct ScanKeyData * keyData
Definition: relscan.h:107
BTScanPosData currPos
Definition: nbtree.h:666
void * palloc(Size size)
Definition: mcxt.c:949
void _bt_preprocess_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:191
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:597

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 520 of file nbtree.c.

References _bt_killitems(), _bt_restore_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, IncrBufferRefCount(), BTScanPosData::itemIndex, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, offsetof, and IndexScanDescData::opaque.

Referenced by bthandler().

521 {
522  BTScanOpaque so = (BTScanOpaque) scan->opaque;
523 
524  /* Restore the marked positions of any array keys */
525  if (so->numArrayKeys)
527 
528  if (so->markItemIndex >= 0)
529  {
530  /*
531  * The scan has never moved to a new page since the last mark. Just
532  * restore the itemIndex.
533  *
534  * NB: In this case we can't count on anything in so->markPos to be
535  * accurate.
536  */
537  so->currPos.itemIndex = so->markItemIndex;
538  }
539  else
540  {
541  /*
542  * The scan moved to a new page after last mark or restore, and we are
543  * now restoring to the marked page. We aren't holding any read
544  * locks, but if we're still holding the pin for the current position,
545  * we must drop it.
546  */
547  if (BTScanPosIsValid(so->currPos))
548  {
549  /* Before leaving current page, deal with any killed items */
550  if (so->numKilled > 0)
551  _bt_killitems(scan);
553  }
554 
555  if (BTScanPosIsValid(so->markPos))
556  {
557  /* bump pin on mark buffer for assignment to current buffer */
558  if (BTScanPosIsPinned(so->markPos))
560  memcpy(&so->currPos, &so->markPos,
561  offsetof(BTScanPosData, items[1]) +
562  so->markPos.lastItem * sizeof(BTScanPosItem));
563  if (so->currTuples)
564  memcpy(so->currTuples, so->markTuples,
566  }
567  else
569  }
570 }
int itemIndex
Definition: nbtree.h:579
char * currTuples
Definition: nbtree.h:653
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:603
void _bt_restore_array_keys(IndexScanDesc scan)
Definition: nbtutils.c:624
int nextTupleOffset
Definition: nbtree.h:568
int lastItem
Definition: nbtree.h:578
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:1718
BTScanPosData markPos
Definition: nbtree.h:667
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:670
int numArrayKeys
Definition: nbtree.h:637
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:586
char * markTuples
Definition: nbtree.h:654
int markItemIndex
Definition: nbtree.h:663
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:609
BTScanPosData currPos
Definition: nbtree.h:666
Buffer buf
Definition: nbtree.h:549
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3403
#define offsetof(type, field)
Definition: c.h:655
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:597

◆ btvacuumcleanup()

IndexBulkDeleteResult* btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 897 of file nbtree.c.

References _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, and palloc0().

Referenced by bthandler().

898 {
899  /* No-op in ANALYZE ONLY mode */
900  if (info->analyze_only)
901  return stats;
902 
903  /*
904  * If btbulkdelete was called, we need not do anything, just return the
905  * stats from the latest btbulkdelete call. If it wasn't called, we might
906  * still need to do a pass over the index, to recycle any newly-recyclable
907  * pages or to obtain index statistics. _bt_vacuum_needs_cleanup
908  * determines if either are needed.
909  *
910  * Since we aren't going to actually delete any leaf items, there's no
911  * need to go through all the vacuum-cycle-ID pushups.
912  */
913  if (stats == NULL)
914  {
915  TransactionId oldestBtpoXact;
916 
917  /* Check if we need a cleanup */
918  if (!_bt_vacuum_needs_cleanup(info))
919  return NULL;
920 
922  btvacuumscan(info, stats, NULL, NULL, 0, &oldestBtpoXact);
923 
924  /* Update cleanup-related information in the metapage */
925  _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,
926  info->num_heap_tuples);
927  }
928 
929  /*
930  * It's quite possible for us to be fooled by concurrent page splits into
931  * double-counting some index tuples, so disbelieve any total that exceeds
932  * the underlying heap's count ... if we know that accurately. Otherwise
933  * this might just make matters worse.
934  */
935  if (!info->estimated_count)
936  {
937  if (stats->num_index_tuples > info->num_heap_tuples)
938  stats->num_index_tuples = info->num_heap_tuples;
939  }
940 
941  return stats;
942 }
static bool _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
Definition: nbtree.c:788
uint32 TransactionId
Definition: c.h:507
bool analyze_only
Definition: genam.h:47
Relation index
Definition: genam.h:46
void _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
Definition: nbtpage.c:158
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid, TransactionId *oldestBtpoXact)
Definition: nbtree.c:957
void * palloc0(Size size)
Definition: mcxt.c:980
double num_heap_tuples
Definition: genam.h:51
double num_index_tuples
Definition: genam.h:77
bool estimated_count
Definition: genam.h:49

◆ btvacuumpage()

static void btvacuumpage ( BTVacState vstate,
BlockNumber  blkno,
BlockNumber  orig_blkno 
)
static

Definition at line 1113 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_page_recyclable(), _bt_pagedel(), _bt_relbuf(), BT_READ, BTP_SPLIT_END, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetPage, BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, IndexVacuumInfo::index, BTVacState::info, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirtyHint(), MaxOffsetNumber, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, BTVacState::oldestBtpoXact, P_FIRSTDATAKEY, P_IGNORE, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIsNew, IndexBulkDeleteResult::pages_deleted, RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, BTVacState::totFreePages, TransactionIdIsValid, TransactionIdPrecedes(), IndexBulkDeleteResult::tuples_removed, vacuum_delay_point(), and BTPageOpaqueData::xact.

Referenced by btvacuumscan().

1114 {
1115  IndexVacuumInfo *info = vstate->info;
1116  IndexBulkDeleteResult *stats = vstate->stats;
1118  void *callback_state = vstate->callback_state;
1119  Relation rel = info->index;
1120  bool delete_now;
1121  BlockNumber recurse_to;
1122  Buffer buf;
1123  Page page;
1124  BTPageOpaque opaque = NULL;
1125 
1126 restart:
1127  delete_now = false;
1128  recurse_to = P_NONE;
1129 
1130  /* call vacuum_delay_point while not holding any buffer lock */
1132 
1133  /*
1134  * We can't use _bt_getbuf() here because it always applies
1135  * _bt_checkpage(), which will barf on an all-zero page. We want to
1136  * recycle all-zero pages, not fail. Also, we want to use a nondefault
1137  * buffer access strategy.
1138  */
1139  buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1140  info->strategy);
1141  LockBuffer(buf, BT_READ);
1142  page = BufferGetPage(buf);
1143  if (!PageIsNew(page))
1144  {
1145  _bt_checkpage(rel, buf);
1146  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1147  }
1148 
1149  /*
1150  * If we are recursing, the only case we want to do anything with is a
1151  * live leaf page having the current vacuum cycle ID. Any other state
1152  * implies we already saw the page (eg, deleted it as being empty).
1153  */
1154  if (blkno != orig_blkno)
1155  {
1156  if (_bt_page_recyclable(page) ||
1157  P_IGNORE(opaque) ||
1158  !P_ISLEAF(opaque) ||
1159  opaque->btpo_cycleid != vstate->cycleid)
1160  {
1161  _bt_relbuf(rel, buf);
1162  return;
1163  }
1164  }
1165 
1166  /* Page is valid, see what to do with it */
1167  if (_bt_page_recyclable(page))
1168  {
1169  /* Okay to recycle this page */
1170  RecordFreeIndexPage(rel, blkno);
1171  vstate->totFreePages++;
1172  stats->pages_deleted++;
1173  }
1174  else if (P_ISDELETED(opaque))
1175  {
1176  /* Already deleted, but can't recycle yet */
1177  stats->pages_deleted++;
1178 
1179  /* Update the oldest btpo.xact */
1180  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1181  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1182  vstate->oldestBtpoXact = opaque->btpo.xact;
1183  }
1184  else if (P_ISHALFDEAD(opaque))
1185  {
1186  /* Half-dead, try to delete */
1187  delete_now = true;
1188  }
1189  else if (P_ISLEAF(opaque))
1190  {
1191  OffsetNumber deletable[MaxOffsetNumber];
1192  int ndeletable;
1193  OffsetNumber offnum,
1194  minoff,
1195  maxoff;
1196 
1197  /*
1198  * Trade in the initial read lock for a super-exclusive write lock on
1199  * this page. We must get such a lock on every leaf page over the
1200  * course of the vacuum scan, whether or not it actually contains any
1201  * deletable tuples --- see nbtree/README.
1202  */
1204  LockBufferForCleanup(buf);
1205 
1206  /*
1207  * Remember highest leaf page number we've taken cleanup lock on; see
1208  * notes in btvacuumscan
1209  */
1210  if (blkno > vstate->lastBlockLocked)
1211  vstate->lastBlockLocked = blkno;
1212 
1213  /*
1214  * Check whether we need to recurse back to earlier pages. What we
1215  * are concerned about is a page split that happened since we started
1216  * the vacuum scan. If the split moved some tuples to a lower page
1217  * then we might have missed 'em. If so, set up for tail recursion.
1218  * (Must do this before possibly clearing btpo_cycleid below!)
1219  */
1220  if (vstate->cycleid != 0 &&
1221  opaque->btpo_cycleid == vstate->cycleid &&
1222  !(opaque->btpo_flags & BTP_SPLIT_END) &&
1223  !P_RIGHTMOST(opaque) &&
1224  opaque->btpo_next < orig_blkno)
1225  recurse_to = opaque->btpo_next;
1226 
1227  /*
1228  * Scan over all items to see which ones need deleted according to the
1229  * callback function.
1230  */
1231  ndeletable = 0;
1232  minoff = P_FIRSTDATAKEY(opaque);
1233  maxoff = PageGetMaxOffsetNumber(page);
1234  if (callback)
1235  {
1236  for (offnum = minoff;
1237  offnum <= maxoff;
1238  offnum = OffsetNumberNext(offnum))
1239  {
1240  IndexTuple itup;
1241  ItemPointer htup;
1242 
1243  itup = (IndexTuple) PageGetItem(page,
1244  PageGetItemId(page, offnum));
1245  htup = &(itup->t_tid);
1246 
1247  /*
1248  * During Hot Standby we currently assume that
1249  * XLOG_BTREE_VACUUM records do not produce conflicts. That is
1250  * only true as long as the callback function depends only
1251  * upon whether the index tuple refers to heap tuples removed
1252  * in the initial heap scan. When vacuum starts it derives a
1253  * value of OldestXmin. Backends taking later snapshots could
1254  * have a RecentGlobalXmin with a later xid than the vacuum's
1255  * OldestXmin, so it is possible that row versions deleted
1256  * after OldestXmin could be marked as killed by other
1257  * backends. The callback function *could* look at the index
1258  * tuple state in isolation and decide to delete the index
1259  * tuple, though currently it does not. If it ever did, we
1260  * would need to reconsider whether XLOG_BTREE_VACUUM records
1261  * should cause conflicts. If they did cause conflicts they
1262  * would be fairly harsh conflicts, since we haven't yet
1263  * worked out a way to pass a useful value for
1264  * latestRemovedXid on the XLOG_BTREE_VACUUM records. This
1265  * applies to *any* type of index that marks index tuples as
1266  * killed.
1267  */
1268  if (callback(htup, callback_state))
1269  deletable[ndeletable++] = offnum;
1270  }
1271  }
1272 
1273  /*
1274  * Apply any needed deletes. We issue just one _bt_delitems_vacuum()
1275  * call per page, so as to minimize WAL traffic.
1276  */
1277  if (ndeletable > 0)
1278  {
1279  /*
1280  * Notice that the issued XLOG_BTREE_VACUUM WAL record includes
1281  * all information to the replay code to allow it to get a cleanup
1282  * lock on all pages between the previous lastBlockVacuumed and
1283  * this page. This ensures that WAL replay locks all leaf pages at
1284  * some point, which is important should non-MVCC scans be
1285  * requested. This is currently unused on standby, but we record
1286  * it anyway, so that the WAL contains the required information.
1287  *
1288  * Since we can visit leaf pages out-of-order when recursing,
1289  * replay might end up locking such pages an extra time, but it
1290  * doesn't seem worth the amount of bookkeeping it'd take to avoid
1291  * that.
1292  */
1293  _bt_delitems_vacuum(rel, buf, deletable, ndeletable,
1294  vstate->lastBlockVacuumed);
1295 
1296  /*
1297  * Remember highest leaf page number we've issued a
1298  * XLOG_BTREE_VACUUM WAL record for.
1299  */
1300  if (blkno > vstate->lastBlockVacuumed)
1301  vstate->lastBlockVacuumed = blkno;
1302 
1303  stats->tuples_removed += ndeletable;
1304  /* must recompute maxoff */
1305  maxoff = PageGetMaxOffsetNumber(page);
1306  }
1307  else
1308  {
1309  /*
1310  * If the page has been split during this vacuum cycle, it seems
1311  * worth expending a write to clear btpo_cycleid even if we don't
1312  * have any deletions to do. (If we do, _bt_delitems_vacuum takes
1313  * care of this.) This ensures we won't process the page again.
1314  *
1315  * We treat this like a hint-bit update because there's no need to
1316  * WAL-log it.
1317  */
1318  if (vstate->cycleid != 0 &&
1319  opaque->btpo_cycleid == vstate->cycleid)
1320  {
1321  opaque->btpo_cycleid = 0;
1322  MarkBufferDirtyHint(buf, true);
1323  }
1324  }
1325 
1326  /*
1327  * If it's now empty, try to delete; else count the live tuples. We
1328  * don't delete when recursing, though, to avoid putting entries into
1329  * freePages out-of-order (doesn't seem worth any extra code to handle
1330  * the case).
1331  */
1332  if (minoff > maxoff)
1333  delete_now = (blkno == orig_blkno);
1334  else
1335  stats->num_index_tuples += maxoff - minoff + 1;
1336  }
1337 
1338  if (delete_now)
1339  {
1340  MemoryContext oldcontext;
1341  int ndel;
1342 
1343  /* Run pagedel in a temp context to avoid memory leakage */
1345  oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1346 
1347  ndel = _bt_pagedel(rel, buf);
1348 
1349  /* count only this page, else may double-count parent */
1350  if (ndel)
1351  {
1352  stats->pages_deleted++;
1353  if (!TransactionIdIsValid(vstate->oldestBtpoXact) ||
1354  TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact))
1355  vstate->oldestBtpoXact = opaque->btpo.xact;
1356  }
1357 
1358  MemoryContextSwitchTo(oldcontext);
1359  /* pagedel released buffer, so we shouldn't */
1360  }
1361  else
1362  _bt_relbuf(rel, buf);
1363 
1364  /*
1365  * This is really tail recursion, but if the compiler is too stupid to
1366  * optimize it as such, we'd eat an uncomfortably large amount of stack
1367  * space per recursion level (due to the deletable[] array). A failure is
1368  * improbable since the number of levels isn't likely to be large ... but
1369  * just in case, let's hand-optimize into a loop.
1370  */
1371  if (recurse_to != P_NONE)
1372  {
1373  blkno = recurse_to;
1374  goto restart;
1375  }
1376 }
BlockNumber lastBlockLocked
Definition: nbtree.c:50
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3659
#define BTP_SPLIT_END
Definition: nbtree.h:76
BlockNumber btpo_next
Definition: nbtree.h:58
MemoryContext pagedelcontext
Definition: nbtree.c:53
double tuples_removed
Definition: genam.h:78
void * callback_state
Definition: nbtree.c:47
#define P_IGNORE(opaque)
Definition: nbtree.h:194
BlockNumber lastBlockVacuumed
Definition: nbtree.c:49
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3435
BlockNumber totFreePages
Definition: nbtree.c:51
#define MaxOffsetNumber
Definition: off.h:28
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:219
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
ItemPointerData t_tid
Definition: itup.h:37
union BTPageOpaqueData::@46 btpo
BufferAccessStrategy strategy
Definition: genam.h:52
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define P_NONE
Definition: nbtree.h:181
Relation index
Definition: genam.h:46
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:939
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:62
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:402
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:690
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:193
BTCycleId btpo_cycleid
Definition: nbtree.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static char * buf
Definition: pg_test_fsync.c:68
IndexTupleData * IndexTuple
Definition: itup.h:53
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define P_ISDELETED(opaque)
Definition: nbtree.h:191
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId oldestBtpoXact
Definition: nbtree.c:52
BTCycleId cycleid
Definition: nbtree.c:48
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3602
int _bt_pagedel(Relation rel, Buffer buf)
Definition: nbtpage.c:1304
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:912
IndexVacuumInfo * info
Definition: nbtree.c:44
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:984
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
#define PageIsNew(page)
Definition: bufpage.h:229
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:1946
uint16 btpo_flags
Definition: nbtree.h:64
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:188
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
#define P_ISLEAF(opaque)
Definition: nbtree.h:189

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid,
TransactionId oldestBtpoXact 
)
static

Definition at line 957 of file nbtree.c.

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_relbuf(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, BTREE_METAPAGE, btvacuumpage(), buf, BTVacState::callback, callback(), BTVacState::callback_state, CurrentMemoryContext, BTVacState::cycleid, IndexBulkDeleteResult::estimated_count, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidTransactionId, BTVacState::lastBlockLocked, BTVacState::lastBlockVacuumed, LockBufferForCleanup(), LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::oldestBtpoXact, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, RBM_NORMAL, ReadBufferExtended(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, IndexVacuumInfo::strategy, BTVacState::totFreePages, UnlockRelationForExtension(), and XLogStandbyInfoActive.

Referenced by btbulkdelete(), and btvacuumcleanup().

960 {
961  Relation rel = info->index;
962  BTVacState vstate;
963  BlockNumber num_pages;
964  BlockNumber blkno;
965  bool needLock;
966 
967  /*
968  * Reset counts that will be incremented during the scan; needed in case
969  * of multiple scans during a single VACUUM command
970  */
971  stats->estimated_count = false;
972  stats->num_index_tuples = 0;
973  stats->pages_deleted = 0;
974 
975  /* Set up info to pass down to btvacuumpage */
976  vstate.info = info;
977  vstate.stats = stats;
978  vstate.callback = callback;
979  vstate.callback_state = callback_state;
980  vstate.cycleid = cycleid;
981  vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
983  vstate.totFreePages = 0;
985 
986  /* Create a temporary memory context to run _bt_pagedel in */
988  "_bt_pagedel",
990 
991  /*
992  * The outer loop iterates over all index pages except the metapage, in
993  * physical order (we hope the kernel will cooperate in providing
994  * read-ahead for speed). It is critical that we visit all leaf pages,
995  * including ones added after we start the scan, else we might fail to
996  * delete some deletable tuples. Hence, we must repeatedly check the
997  * relation length. We must acquire the relation-extension lock while
998  * doing so to avoid a race condition: if someone else is extending the
999  * relation, there is a window where bufmgr/smgr have created a new
1000  * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1001  * we manage to scan such a page here, we'll improperly assume it can be
1002  * recycled. Taking the lock synchronizes things enough to prevent a
1003  * problem: either num_pages won't include the new page, or _bt_getbuf
1004  * already has write lock on the buffer and it will be fully initialized
1005  * before we can examine it. (See also vacuumlazy.c, which has the same
1006  * issue.) Also, we need not worry if a page is added immediately after
1007  * we look; the page splitting code already has write-lock on the left
1008  * page before it adds a right page, so we must already have processed any
1009  * tuples due to be moved into such a page.
1010  *
1011  * We can skip locking for new or temp relations, however, since no one
1012  * else could be accessing them.
1013  */
1014  needLock = !RELATION_IS_LOCAL(rel);
1015 
1016  blkno = BTREE_METAPAGE + 1;
1017  for (;;)
1018  {
1019  /* Get the current relation length */
1020  if (needLock)
1022  num_pages = RelationGetNumberOfBlocks(rel);
1023  if (needLock)
1025 
1026  if (info->report_progress)
1028  num_pages);
1029 
1030  /* Quit if we've scanned the whole relation */
1031  if (blkno >= num_pages)
1032  break;
1033  /* Iterate over pages, then loop back to recheck length */
1034  for (; blkno < num_pages; blkno++)
1035  {
1036  btvacuumpage(&vstate, blkno, blkno);
1037  if (info->report_progress)
1039  blkno);
1040  }
1041  }
1042 
1043  /*
1044  * Check to see if we need to issue one final WAL record for this index,
1045  * which may be needed for correctness on a hot standby node when non-MVCC
1046  * index scans could take place.
1047  *
1048  * If the WAL is replayed in hot standby, the replay process needs to get
1049  * cleanup locks on all index leaf pages, just as we've been doing here.
1050  * However, we won't issue any WAL records about pages that have no items
1051  * to be deleted. For pages between pages we've vacuumed, the replay code
1052  * will take locks under the direction of the lastBlockVacuumed fields in
1053  * the XLOG_BTREE_VACUUM WAL records. To cover pages after the last one
1054  * we vacuum, we need to issue a dummy XLOG_BTREE_VACUUM WAL record
1055  * against the last leaf page in the index, if that one wasn't vacuumed.
1056  */
1057  if (XLogStandbyInfoActive() &&
1058  vstate.lastBlockVacuumed < vstate.lastBlockLocked)
1059  {
1060  Buffer buf;
1061 
1062  /*
1063  * The page should be valid, but we can't use _bt_getbuf() because we
1064  * want to use a nondefault buffer access strategy. Since we aren't
1065  * going to delete any items, getting cleanup lock again is probably
1066  * overkill, but for consistency do that anyway.
1067  */
1068  buf = ReadBufferExtended(rel, MAIN_FORKNUM, vstate.lastBlockLocked,
1069  RBM_NORMAL, info->strategy);
1070  LockBufferForCleanup(buf);
1071  _bt_checkpage(rel, buf);
1072  _bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
1073  _bt_relbuf(rel, buf);
1074  }
1075 
1077 
1078  /*
1079  * If we found any recyclable pages (and recorded them in the FSM), then
1080  * forcibly update the upper-level FSM pages to ensure that searchers can
1081  * find them. It's possible that the pages were also found during
1082  * previous scans and so this is a waste of time, but it's cheap enough
1083  * relative to scanning the index that it shouldn't matter much, and
1084  * making sure that free pages are available sooner not later seems
1085  * worthwhile.
1086  *
1087  * Note that if no recyclable pages exist, we don't bother vacuuming the
1088  * FSM at all.
1089  */
1090  if (vstate.totFreePages > 0)
1092 
1093  /* update statistics */
1094  stats->num_pages = num_pages;
1095  stats->pages_free = vstate.totFreePages;
1096 
1097  if (oldestBtpoXact)
1098  *oldestBtpoXact = vstate.oldestBtpoXact;
1099 }
BlockNumber lastBlockLocked
Definition: nbtree.c:50
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3659
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
MemoryContext pagedelcontext
Definition: nbtree.c:53
void * callback_state
Definition: nbtree.c:47
BlockNumber lastBlockVacuumed
Definition: nbtree.c:49
BlockNumber totFreePages
Definition: nbtree.c:51
#define ExclusiveLock
Definition: lockdefs.h:44
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:539
IndexBulkDeleteCallback callback
Definition: nbtree.c:46
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
bool report_progress
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:52
Relation index
Definition: genam.h:46
uint32 BlockNumber
Definition: block.h:31
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:690
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
Definition: nbtree.c:1113
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
static char * buf
Definition: pg_test_fsync.c:68
#define InvalidTransactionId
Definition: transam.h:31
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BlockNumber pages_deleted
Definition: genam.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:131
TransactionId oldestBtpoXact
Definition: nbtree.c:52
BTCycleId cycleid
Definition: nbtree.c:48
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:103
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define XLogStandbyInfoActive()
Definition: xlog.h:195
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:912
IndexVacuumInfo * info
Definition: nbtree.c:44
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
Definition: nbtpage.c:984
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:102
IndexBulkDeleteResult * stats
Definition: nbtree.c:45
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
bool estimated_count
Definition: genam.h:76