PostgreSQL Source Code git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/stratnum.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static BlockNumber btvacuumpage (BTVacState *vstate, Buffer buf)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber next_scan_page, BlockNumber curr_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber curr_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
int btgettreeheight (Relation rel)
 
CompareType bttranslatestrategy (StrategyNumber strategy, Oid opfamily)
 
StrategyNumber bttranslatecmptype (CompareType cmptype, Oid opfamily)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 84 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 53 of file nbtree.c.

54{
BTPS_State
Definition: nbtree.c:54
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:57
@ BTPARALLEL_NEED_PRIMSCAN
Definition: nbtree.c:56
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:55
@ BTPARALLEL_IDLE
Definition: nbtree.c:58
@ BTPARALLEL_DONE
Definition: nbtree.c:59

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 782 of file nbtree.c.

783{
784 BTScanOpaque so = (BTScanOpaque) scan->opaque;
785 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
786 BTParallelScanDesc btscan;
787 bool status_changed = false;
788
790
791 /* Do nothing, for non-parallel scans */
792 if (parallel_scan == NULL)
793 return;
794
795 /*
796 * Should not mark parallel scan done when there's still a pending
797 * primitive index scan
798 */
799 if (so->needPrimScan)
800 return;
801
802 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
803 parallel_scan->ps_offset_am);
804
805 /*
806 * Mark the parallel scan as done, unless some other process did so
807 * already
808 */
809 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
810 Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
811 if (btscan->btps_pageStatus != BTPARALLEL_DONE)
812 {
813 btscan->btps_pageStatus = BTPARALLEL_DONE;
814 status_changed = true;
815 }
816 LWLockRelease(&btscan->btps_lock);
817
818 /* wake up all the workers associated with this parallel scan */
819 if (status_changed)
820 ConditionVariableBroadcast(&btscan->btps_cv);
821}
#define OffsetToPointer(base, offset)
Definition: c.h:743
void ConditionVariableBroadcast(ConditionVariable *cv)
Assert(PointerIsAligned(start, uint64))
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1179
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1899
@ LW_EXCLUSIVE
Definition: lwlock.h:114
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:84
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1015
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1078
bool needPrimScan
Definition: nbtree.h:1045
BTScanPosData currPos
Definition: nbtree.h:1074
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:191

References Assert(), BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosIsValid, ConditionVariableBroadcast(), BTScanOpaqueData::currPos, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanOpaqueData::needPrimScan, OffsetToPointer, IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_seize(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  curr_page 
)

Definition at line 832 of file nbtree.c.

833{
834 BTScanOpaque so = (BTScanOpaque) scan->opaque;
835 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
836 BTParallelScanDesc btscan;
837
838 Assert(so->numArrayKeys);
839
840 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
841 parallel_scan->ps_offset_am);
842
843 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
844 if (btscan->btps_lastCurrPage == curr_page &&
845 btscan->btps_pageStatus == BTPARALLEL_IDLE)
846 {
847 btscan->btps_nextScanPage = InvalidBlockNumber;
848 btscan->btps_lastCurrPage = InvalidBlockNumber;
849 btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
850
851 /* Serialize scan's current array keys */
852 for (int i = 0; i < so->numArrayKeys; i++)
853 {
854 BTArrayKeyInfo *array = &so->arrayKeys[i];
855
856 btscan->btps_arrElems[i] = array->cur_elem;
857 }
858 }
859 LWLockRelease(&btscan->btps_lock);
860}
#define InvalidBlockNumber
Definition: block.h:33
int i
Definition: isn.c:74
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1048

References BTScanOpaqueData::arrayKeys, Assert(), BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTArrayKeyInfo::cur_elem, i, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by _bt_advance_array_keys().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  next_scan_page,
BlockNumber  curr_page 
)

Definition at line 755 of file nbtree.c.

757{
758 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
759 BTParallelScanDesc btscan;
760
761 Assert(BlockNumberIsValid(next_scan_page));
762
763 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
764 parallel_scan->ps_offset_am);
765
767 btscan->btps_nextScanPage = next_scan_page;
768 btscan->btps_lastCurrPage = curr_page;
770 LWLockRelease(&btscan->btps_lock);
772}
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void ConditionVariableSignal(ConditionVariable *cv)
BTPS_State btps_pageStatus
Definition: nbtree.c:71
BlockNumber btps_lastCurrPage
Definition: nbtree.c:69
ConditionVariable btps_cv
Definition: nbtree.c:75
BlockNumber btps_nextScanPage
Definition: nbtree.c:68

References Assert(), BlockNumberIsValid(), BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_lock, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, ConditionVariableSignal(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber next_scan_page,
BlockNumber last_curr_page,
bool  first 
)

Definition at line 613 of file nbtree.c.

615{
616 BTScanOpaque so = (BTScanOpaque) scan->opaque;
617 bool exit_loop = false,
618 status = true,
619 endscan = false;
620 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
621 BTParallelScanDesc btscan;
622
623 *next_scan_page = InvalidBlockNumber;
624 *last_curr_page = InvalidBlockNumber;
625
626 /*
627 * Reset so->currPos, and initialize moreLeft/moreRight such that the next
628 * call to _bt_readnextpage treats this backend similarly to a serial
629 * backend that steps from *last_curr_page to *next_scan_page (unless this
630 * backend's so->currPos is initialized by _bt_readfirstpage before then).
631 */
633 so->currPos.moreLeft = so->currPos.moreRight = true;
634
635 if (first)
636 {
637 /*
638 * Initialize array related state when called from _bt_first, assuming
639 * that this will be the first primitive index scan for the scan
640 */
641 so->needPrimScan = false;
642 so->scanBehind = false;
643 so->oppositeDirCheck = false;
644 }
645 else
646 {
647 /*
648 * Don't attempt to seize the scan when it requires another primitive
649 * index scan, since caller's backend cannot start it right now
650 */
651 if (so->needPrimScan)
652 return false;
653 }
654
655 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
656 parallel_scan->ps_offset_am);
657
658 while (1)
659 {
660 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
661
662 if (btscan->btps_pageStatus == BTPARALLEL_DONE)
663 {
664 /* We're done with this parallel index scan */
665 status = false;
666 }
667 else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
668 btscan->btps_nextScanPage == P_NONE)
669 {
670 /* End this parallel index scan */
671 status = false;
672 endscan = true;
673 }
674 else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
675 {
676 Assert(so->numArrayKeys);
677
678 if (first)
679 {
680 /* Can start scheduled primitive scan right away, so do so */
681 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
682 for (int i = 0; i < so->numArrayKeys; i++)
683 {
684 BTArrayKeyInfo *array = &so->arrayKeys[i];
685 ScanKey skey = &so->keyData[array->scan_key];
686
687 array->cur_elem = btscan->btps_arrElems[i];
688 skey->sk_argument = array->elem_values[array->cur_elem];
689 }
690 exit_loop = true;
691 }
692 else
693 {
694 /*
695 * Don't attempt to seize the scan when it requires another
696 * primitive index scan, since caller's backend cannot start
697 * it right now
698 */
699 status = false;
700 }
701
702 /*
703 * Either way, update backend local state to indicate that a
704 * pending primitive scan is required
705 */
706 so->needPrimScan = true;
707 so->scanBehind = false;
708 so->oppositeDirCheck = false;
709 }
710 else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
711 {
712 /*
713 * We have successfully seized control of the scan for the purpose
714 * of advancing it to a new page!
715 */
716 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
717 Assert(btscan->btps_nextScanPage != P_NONE);
718 *next_scan_page = btscan->btps_nextScanPage;
719 *last_curr_page = btscan->btps_lastCurrPage;
720 exit_loop = true;
721 }
722 LWLockRelease(&btscan->btps_lock);
723 if (exit_loop || !status)
724 break;
725 ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
726 }
728
729 /* When the scan has reached the rightmost (or leftmost) page, end it */
730 if (endscan)
731 _bt_parallel_done(scan);
732
733 return status;
734}
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:782
#define P_NONE
Definition: nbtree.h:212
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1021
Datum * elem_values
Definition: nbtree.h:1033
bool oppositeDirCheck
Definition: nbtree.h:1047
ScanKey keyData
Definition: nbtree.h:1041
bool moreRight
Definition: nbtree.h:980
bool moreLeft
Definition: nbtree.h:979
Datum sk_argument
Definition: skey.h:72

References _bt_parallel_done(), BTScanOpaqueData::arrayKeys, Assert(), BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosInvalidate, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTArrayKeyInfo::cur_elem, BTScanOpaqueData::currPos, BTArrayKeyInfo::elem_values, i, InvalidBlockNumber, BTScanOpaqueData::keyData, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, P_NONE, IndexScanDescData::parallel_scan, BTArrayKeyInfo::scan_key, BTScanOpaqueData::scanBehind, and ScanKeyData::sk_argument.

Referenced by _bt_first(), and _bt_readnextpage().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 319 of file nbtree.c.

320{
321 IndexScanDesc scan;
322 BTScanOpaque so;
323
324 /* no order by operators allowed */
325 Assert(norderbys == 0);
326
327 /* get the scan */
328 scan = RelationGetIndexScan(rel, nkeys, norderbys);
329
330 /* allocate private workspace */
331 so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
334 if (scan->numberOfKeys > 0)
335 so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
336 else
337 so->keyData = NULL;
338
339 so->needPrimScan = false;
340 so->scanBehind = false;
341 so->oppositeDirCheck = false;
342 so->arrayKeys = NULL;
343 so->orderProcs = NULL;
344 so->arrayContext = NULL;
345
346 so->killedItems = NULL; /* until needed */
347 so->numKilled = 0;
348
349 /*
350 * We don't know yet whether the scan will be index-only, so we do not
351 * allocate the tuple workspace arrays until btrescan. However, we set up
352 * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
353 */
354 so->currTuples = so->markTuples = NULL;
355
356 scan->xs_itupdesc = RelationGetDescr(rel);
357
358 scan->opaque = so;
359
360 return scan;
361}
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
void * palloc(Size size)
Definition: mcxt.c:1317
#define RelationGetDescr(relation)
Definition: rel.h:539
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:1062
FmgrInfo * orderProcs
Definition: nbtree.h:1049
int * killedItems
Definition: nbtree.h:1053
char * currTuples
Definition: nbtree.h:1061
BTScanPosData markPos
Definition: nbtree.h:1075
MemoryContext arrayContext
Definition: nbtree.h:1050
struct TupleDescData * xs_itupdesc
Definition: relscan.h:168

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeys, Assert(), BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, BTScanOpaqueData::orderProcs, palloc(), RelationGetDescr, RelationGetIndexScan(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 166 of file nbtree.c.

167{
168 bool allequalimage = _bt_allequalimage(index, false);
169 BulkWriteState *bulkstate;
170 BulkWriteBuffer metabuf;
171
173
174 /* Construct metapage. */
175 metabuf = smgr_bulk_get_buf(bulkstate);
176 _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
177 smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
178
179 smgr_bulk_finish(bulkstate);
180}
PageData * Page
Definition: bufpage.h:82
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition: bulk_write.c:87
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:67
#define BTREE_METAPAGE
Definition: nbtree.h:148
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:3296
@ INIT_FORKNUM
Definition: relpath.h:61
Definition: type.h:96

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult * btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 870 of file nbtree.c.

872{
873 Relation rel = info->index;
874 BTCycleId cycleid;
875
876 /* allocate stats if first time through, else re-use existing struct */
877 if (stats == NULL)
879
880 /* Establish the vacuum cycle ID to use for this scan */
881 /* The ENSURE stuff ensures we clean up shared memory on failure */
883 {
884 cycleid = _bt_start_vacuum(rel);
885
886 btvacuumscan(info, stats, callback, callback_state, cycleid);
887 }
889 _bt_end_vacuum(rel);
890
891 return stats;
892}
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
void * palloc0(Size size)
Definition: mcxt.c:1347
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:988
uint16 BTCycleId
Definition: nbtree.h:29
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:2641
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:2669
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:2584
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
Relation index
Definition: genam.h:69
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1545 of file nbtree.c.

1546{
1547 return true;
1548}

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 424 of file nbtree.c.

425{
426 BTScanOpaque so = (BTScanOpaque) scan->opaque;
427
428 /* we aren't holding any read locks, but gotta drop the pins */
430 {
431 /* Before leaving current page, deal with any killed items */
432 if (so->numKilled > 0)
433 _bt_killitems(scan);
435 }
436
437 so->markItemIndex = -1;
439
440 /* No need to invalidate positions, the RAM is about to be freed. */
441
442 /* Release storage */
443 if (so->keyData != NULL)
444 pfree(so->keyData);
445 /* so->arrayKeys and so->orderProcs are in arrayContext */
446 if (so->arrayContext != NULL)
448 if (so->killedItems != NULL)
449 pfree(so->killedItems);
450 if (so->currTuples != NULL)
451 pfree(so->currTuples);
452 /* so->markTuples should not be pfree'd, see btrescan */
453 pfree(so);
454}
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:78
void pfree(void *pointer)
Definition: mcxt.c:1524
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1009
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:2333

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( int  nkeys,
int  norderbys 
)

Definition at line 544 of file nbtree.c.

545{
546 /* Pessimistically assume all input scankeys will be output with arrays */
547 return offsetof(BTParallelScanDescData, btps_arrElems) + sizeof(int) * nkeys;
548}

References BTParallelScanDescData::btps_arrElems.

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 273 of file nbtree.c.

274{
275 BTScanOpaque so = (BTScanOpaque) scan->opaque;
276 int64 ntids = 0;
277 ItemPointer heapTid;
278
279 /* Each loop iteration performs another primitive index scan */
280 do
281 {
282 /* Fetch the first page & tuple */
284 {
285 /* Save tuple ID, and continue scanning */
286 heapTid = &scan->xs_heaptid;
287 tbm_add_tuples(tbm, heapTid, 1, false);
288 ntids++;
289
290 for (;;)
291 {
292 /*
293 * Advance to next tuple within page. This is the same as the
294 * easy case in _bt_next().
295 */
296 if (++so->currPos.itemIndex > so->currPos.lastItem)
297 {
298 /* let _bt_next do the heavy lifting */
299 if (!_bt_next(scan, ForwardScanDirection))
300 break;
301 }
302
303 /* Save tuple ID, and continue scanning */
304 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
305 tbm_add_tuples(tbm, heapTid, 1, false);
306 ntids++;
307 }
308 }
309 /* Now see if we need another primitive index scan */
311
312 return ntids;
313}
int64_t int64
Definition: c.h:499
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:882
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1450
bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:743
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:990
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:993
int itemIndex
Definition: nbtree.h:991
ItemPointerData heapTid
Definition: nbtree.h:951
ItemPointerData xs_heaptid
Definition: relscan.h:172
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:366

References _bt_first(), _bt_next(), BTScanOpaqueData::currPos, ForwardScanDirection, BTScanPosItem::heapTid, if(), BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettreeheight()

int btgettreeheight ( Relation  rel)

Definition at line 1554 of file nbtree.c.

1555{
1556 return _bt_getrootheight(rel);
1557}
int _bt_getrootheight(Relation rel)
Definition: nbtpage.c:675

References _bt_getrootheight().

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 213 of file nbtree.c.

214{
215 BTScanOpaque so = (BTScanOpaque) scan->opaque;
216 bool res;
217
218 /* btree indexes are never lossy */
219 scan->xs_recheck = false;
220
221 /* Each loop iteration performs another primitive index scan */
222 do
223 {
224 /*
225 * If we've already initialized this scan, we can just advance it in
226 * the appropriate direction. If we haven't done so yet, we call
227 * _bt_first() to get the first item in the scan.
228 */
230 res = _bt_first(scan, dir);
231 else
232 {
233 /*
234 * Check to see if we should kill the previously-fetched tuple.
235 */
236 if (scan->kill_prior_tuple)
237 {
238 /*
239 * Yes, remember it for later. (We'll deal with all such
240 * tuples at once right before leaving the index page.) The
241 * test for numKilled overrun is not just paranoia: if the
242 * caller reverses direction in the indexscan then the same
243 * item might get entered multiple times. It's not worth
244 * trying to optimize that, so we don't detect it, but instead
245 * just forget any excess entries.
246 */
247 if (so->killedItems == NULL)
248 so->killedItems = (int *)
249 palloc(MaxTIDsPerBTreePage * sizeof(int));
251 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
252 }
253
254 /*
255 * Now continue the scan.
256 */
257 res = _bt_next(scan, dir);
258 }
259
260 /* If we have a tuple, return it ... */
261 if (res)
262 break;
263 /* ... otherwise see if we need another primitive index scan */
264 } while (so->numArrayKeys && _bt_start_prim_scan(scan, dir));
265
266 return res;
267}
#define MaxTIDsPerBTreePage
Definition: nbtree.h:185
bool kill_prior_tuple
Definition: relscan.h:147

References _bt_first(), _bt_next(), BTScanPosIsValid, BTScanOpaqueData::currPos, if(), BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc(), and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 102 of file nbtree.c.

103{
105
107 amroutine->amsupport = BTNProcs;
108 amroutine->amoptsprocnum = BTOPTIONS_PROC;
109 amroutine->amcanorder = true;
110 amroutine->amcanorderbyop = false;
111 amroutine->amcanhash = false;
112 amroutine->amconsistentequality = true;
113 amroutine->amconsistentordering = true;
114 amroutine->amcanbackward = true;
115 amroutine->amcanunique = true;
116 amroutine->amcanmulticol = true;
117 amroutine->amoptionalkey = true;
118 amroutine->amsearcharray = true;
119 amroutine->amsearchnulls = true;
120 amroutine->amstorage = false;
121 amroutine->amclusterable = true;
122 amroutine->ampredlocks = true;
123 amroutine->amcanparallel = true;
124 amroutine->amcanbuildparallel = true;
125 amroutine->amcaninclude = true;
126 amroutine->amusemaintenanceworkmem = false;
127 amroutine->amsummarizing = false;
128 amroutine->amparallelvacuumoptions =
130 amroutine->amkeytype = InvalidOid;
131
132 amroutine->ambuild = btbuild;
133 amroutine->ambuildempty = btbuildempty;
134 amroutine->aminsert = btinsert;
135 amroutine->aminsertcleanup = NULL;
136 amroutine->ambulkdelete = btbulkdelete;
137 amroutine->amvacuumcleanup = btvacuumcleanup;
138 amroutine->amcanreturn = btcanreturn;
139 amroutine->amcostestimate = btcostestimate;
140 amroutine->amgettreeheight = btgettreeheight;
141 amroutine->amoptions = btoptions;
142 amroutine->amproperty = btproperty;
144 amroutine->amvalidate = btvalidate;
145 amroutine->amadjustmembers = btadjustmembers;
146 amroutine->ambeginscan = btbeginscan;
147 amroutine->amrescan = btrescan;
148 amroutine->amgettuple = btgettuple;
149 amroutine->amgetbitmap = btgetbitmap;
150 amroutine->amendscan = btendscan;
151 amroutine->ammarkpos = btmarkpos;
152 amroutine->amrestrpos = btrestrpos;
158
159 PG_RETURN_POINTER(amroutine);
160}
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1545
StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily)
Definition: nbtree.c:1580
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:319
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:900
CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily)
Definition: nbtree.c:1560
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:213
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:570
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:189
void btbuildempty(Relation index)
Definition: nbtree.c:166
int btgettreeheight(Relation rel)
Definition: nbtree.c:1554
void btinitparallelscan(void *target)
Definition: nbtree.c:554
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:870
Size btestimateparallelscan(int nkeys, int norderbys)
Definition: nbtree.c:544
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:273
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:460
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:424
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:367
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:486
#define BTNProcs
Definition: nbtree.h:717
#define BTOPTIONS_PROC
Definition: nbtree.h:716
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:295
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:2765
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:2719
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:2742
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:40
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:284
#define makeNode(_type_)
Definition: nodes.h:157
#define InvalidOid
Definition: postgres_ext.h:37
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7006
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:303
ambuildempty_function ambuildempty
Definition: amapi.h:293
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:297
bool amclusterable
Definition: amapi.h:267
amoptions_function amoptions
Definition: amapi.h:301
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:315
amrestrpos_function amrestrpos
Definition: amapi.h:312
aminsert_function aminsert
Definition: amapi.h:294
amendscan_function amendscan
Definition: amapi.h:310
amtranslate_strategy_function amtranslatestrategy
Definition: amapi.h:320
uint16 amoptsprocnum
Definition: amapi.h:241
amparallelrescan_function amparallelrescan
Definition: amapi.h:317
Oid amkeytype
Definition: amapi.h:283
bool amconsistentordering
Definition: amapi.h:251
bool ampredlocks
Definition: amapi.h:269
uint16 amsupport
Definition: amapi.h:239
amtranslate_cmptype_function amtranslatecmptype
Definition: amapi.h:321
amcostestimate_function amcostestimate
Definition: amapi.h:299
bool amcanorderbyop
Definition: amapi.h:245
amadjustmembers_function amadjustmembers
Definition: amapi.h:305
ambuild_function ambuild
Definition: amapi.h:292
bool amstorage
Definition: amapi.h:265
uint16 amstrategies
Definition: amapi.h:237
bool amoptionalkey
Definition: amapi.h:259
amgettuple_function amgettuple
Definition: amapi.h:308
amcanreturn_function amcanreturn
Definition: amapi.h:298
bool amcanunique
Definition: amapi.h:255
amgetbitmap_function amgetbitmap
Definition: amapi.h:309
amproperty_function amproperty
Definition: amapi.h:302
ambulkdelete_function ambulkdelete
Definition: amapi.h:296
bool amsearcharray
Definition: amapi.h:261
bool amsummarizing
Definition: amapi.h:279
amvalidate_function amvalidate
Definition: amapi.h:304
ammarkpos_function ammarkpos
Definition: amapi.h:311
bool amcanmulticol
Definition: amapi.h:257
bool amusemaintenanceworkmem
Definition: amapi.h:277
ambeginscan_function ambeginscan
Definition: amapi.h:306
bool amcanparallel
Definition: amapi.h:271
amrescan_function amrescan
Definition: amapi.h:307
bool amcanorder
Definition: amapi.h:243
bool amcanbuildparallel
Definition: amapi.h:273
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:316
uint8 amparallelvacuumoptions
Definition: amapi.h:281
aminsertcleanup_function aminsertcleanup
Definition: amapi.h:295
bool amcanbackward
Definition: amapi.h:253
amgettreeheight_function amgettreeheight
Definition: amapi.h:300
bool amcaninclude
Definition: amapi.h:275
bool amsearchnulls
Definition: amapi.h:263
bool amconsistentequality
Definition: amapi.h:249
bool amcanhash
Definition: amapi.h:247
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:55

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanbuildparallel, IndexAmRoutine::amcanhash, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amconsistentequality, IndexAmRoutine::amconsistentordering, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettreeheight, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::aminsertcleanup, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amtranslatecmptype, IndexAmRoutine::amtranslatestrategy, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettreeheight(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), bttranslatecmptype(), bttranslatestrategy(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool *  isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 189 of file nbtree.c.

194{
195 bool result;
196 IndexTuple itup;
197
198 /* generate an index tuple */
199 itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
200 itup->t_tid = *ht_ctid;
201
202 result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
203
204 pfree(itup);
205
206 return result;
207}
static Datum values[MAXATTR]
Definition: bootstrap.c:151
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:102
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 460 of file nbtree.c.

461{
462 BTScanOpaque so = (BTScanOpaque) scan->opaque;
463
464 /* There may be an old mark with a pin (but no lock). */
466
467 /*
468 * Just record the current itemIndex. If we later step to next page
469 * before releasing the marked position, _bt_steppage makes a full copy of
470 * the currPos struct in markPos. If (as often happens) the mark is moved
471 * before we leave the page, we don't have to do that work.
472 */
473 if (BTScanPosIsValid(so->currPos))
475 else
476 {
478 so->markItemIndex = -1;
479 }
480}

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 570 of file nbtree.c.

571{
572 BTParallelScanDesc btscan;
573 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
574
575 Assert(parallel_scan);
576
577 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
578 parallel_scan->ps_offset_am);
579
580 /*
581 * In theory, we don't need to acquire the LWLock here, because there
582 * shouldn't be any other workers running at this point, but we do so for
583 * consistency.
584 */
589 LWLockRelease(&btscan->btps_lock);
590}

References Assert(), BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_lock, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1496 of file nbtree.c.

1498{
1499 int live = 0;
1500 int nitem = BTreeTupleGetNPosting(posting);
1502 BTVacuumPosting vacposting = NULL;
1503
1504 for (int i = 0; i < nitem; i++)
1505 {
1506 if (!vstate->callback(items + i, vstate->callback_state))
1507 {
1508 /* Live table TID */
1509 live++;
1510 }
1511 else if (vacposting == NULL)
1512 {
1513 /*
1514 * First dead table TID encountered.
1515 *
1516 * It's now clear that we need to delete one or more dead table
1517 * TIDs, so start maintaining metadata describing how to update
1518 * existing posting list tuple.
1519 */
1520 vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1521 nitem * sizeof(uint16));
1522
1523 vacposting->itup = posting;
1524 vacposting->updatedoffset = updatedoffset;
1525 vacposting->ndeletedtids = 0;
1526 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1527 }
1528 else
1529 {
1530 /* Second or subsequent dead table TID */
1531 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1532 }
1533 }
1534
1535 *nremaining = live;
1536 return vacposting;
1537}
uint16_t uint16
Definition: c.h:501
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:518
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:537
IndexBulkDeleteCallback callback
Definition: nbtree.h:334
void * callback_state
Definition: nbtree.h:335
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:916
uint16 ndeletedtids
Definition: nbtree.h:915
IndexTuple itup
Definition: nbtree.h:911
OffsetNumber updatedoffset
Definition: nbtree.h:912
static ItemArray items
Definition: test_tidstore.c:48

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, items, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 367 of file nbtree.c.

369{
370 BTScanOpaque so = (BTScanOpaque) scan->opaque;
371
372 /* we aren't holding any read locks, but gotta drop the pins */
374 {
375 /* Before leaving current page, deal with any killed items */
376 if (so->numKilled > 0)
377 _bt_killitems(scan);
380 }
381
382 so->markItemIndex = -1;
383 so->needPrimScan = false;
384 so->scanBehind = false;
385 so->oppositeDirCheck = false;
388
389 /*
390 * Allocate tuple workspace arrays, if needed for an index-only scan and
391 * not already done in a previous rescan call. To save on palloc
392 * overhead, both workspaces are allocated as one palloc block; only this
393 * function and btendscan know that.
394 *
395 * NOTE: this data structure also makes it safe to return data from a
396 * "name" column, even though btree name_ops uses an underlying storage
397 * datatype of cstring. The risk there is that "name" is supposed to be
398 * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
399 * However, since we only return data out of tuples sitting in the
400 * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
401 * data out of the markTuples array --- running off the end of memory for
402 * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
403 * adding special-case treatment for name_ops elsewhere.
404 */
405 if (scan->xs_want_itup && so->currTuples == NULL)
406 {
407 so->currTuples = (char *) palloc(BLCKSZ * 2);
408 so->markTuples = so->currTuples + BLCKSZ;
409 }
410
411 /*
412 * Reset the scan keys
413 */
414 if (scankey && scan->numberOfKeys > 0)
415 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
416 so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
417 so->numArrayKeys = 0; /* ditto */
418}
struct ScanKeyData * keyData
Definition: relscan.h:141

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, palloc(), BTScanOpaqueData::scanBehind, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 486 of file nbtree.c.

487{
488 BTScanOpaque so = (BTScanOpaque) scan->opaque;
489
490 if (so->markItemIndex >= 0)
491 {
492 /*
493 * The scan has never moved to a new page since the last mark. Just
494 * restore the itemIndex.
495 *
496 * NB: In this case we can't count on anything in so->markPos to be
497 * accurate.
498 */
500 }
501 else
502 {
503 /*
504 * The scan moved to a new page after last mark or restore, and we are
505 * now restoring to the marked page. We aren't holding any read
506 * locks, but if we're still holding the pin for the current position,
507 * we must drop it.
508 */
509 if (BTScanPosIsValid(so->currPos))
510 {
511 /* Before leaving current page, deal with any killed items */
512 if (so->numKilled > 0)
513 _bt_killitems(scan);
515 }
516
517 if (BTScanPosIsValid(so->markPos))
518 {
519 /* bump pin on mark buffer for assignment to current buffer */
520 if (BTScanPosIsPinned(so->markPos))
522 memcpy(&so->currPos, &so->markPos,
523 offsetof(BTScanPosData, items[1]) +
524 so->markPos.lastItem * sizeof(BTScanPosItem));
525 if (so->currTuples)
526 memcpy(so->currTuples, so->markTuples,
528 /* Reset the scan's array keys (see _bt_steppage for why) */
529 if (so->numArrayKeys)
530 {
532 so->needPrimScan = false;
533 }
534 }
535 else
537 }
538}
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4949
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:998
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtutils.c:413
Buffer buf
Definition: nbtree.h:958
int nextTupleOffset
Definition: nbtree.h:973
ScanDirection dir
Definition: nbtree.h:967

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::dir, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, items, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ bttranslatecmptype()

StrategyNumber bttranslatecmptype ( CompareType  cmptype,
Oid  opfamily 
)

Definition at line 1580 of file nbtree.c.

1581{
1582 switch (cmptype)
1583 {
1584 case COMPARE_LT:
1585 return BTLessStrategyNumber;
1586 case COMPARE_LE:
1588 case COMPARE_EQ:
1589 return BTEqualStrategyNumber;
1590 case COMPARE_GE:
1592 case COMPARE_GT:
1594 default:
1595 return InvalidStrategy;
1596 }
1597}
@ COMPARE_LE
Definition: cmptype.h:35
@ COMPARE_GT
Definition: cmptype.h:38
@ COMPARE_EQ
Definition: cmptype.h:36
@ COMPARE_GE
Definition: cmptype.h:37
@ COMPARE_LT
Definition: cmptype.h:34
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define InvalidStrategy
Definition: stratnum.h:24
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_LE, COMPARE_LT, and InvalidStrategy.

Referenced by bthandler().

◆ bttranslatestrategy()

CompareType bttranslatestrategy ( StrategyNumber  strategy,
Oid  opfamily 
)

Definition at line 1560 of file nbtree.c.

1561{
1562 switch (strategy)
1563 {
1565 return COMPARE_LT;
1567 return COMPARE_LE;
1569 return COMPARE_EQ;
1571 return COMPARE_GE;
1573 return COMPARE_GT;
1574 default:
1575 return COMPARE_INVALID;
1576 }
1577}
@ COMPARE_INVALID
Definition: cmptype.h:33

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_INVALID, COMPARE_LE, and COMPARE_LT.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult * btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 900 of file nbtree.c.

901{
902 BlockNumber num_delpages;
903
904 /* No-op in ANALYZE ONLY mode */
905 if (info->analyze_only)
906 return stats;
907
908 /*
909 * If btbulkdelete was called, we need not do anything (we just maintain
910 * the information used within _bt_vacuum_needs_cleanup() by calling
911 * _bt_set_cleanup_info() below).
912 *
913 * If btbulkdelete was _not_ called, then we have a choice to make: we
914 * must decide whether or not a btvacuumscan() call is needed now (i.e.
915 * whether the ongoing VACUUM operation can entirely avoid a physical scan
916 * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
917 * now.
918 */
919 if (stats == NULL)
920 {
921 /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
923 return NULL;
924
925 /*
926 * Since we aren't going to actually delete any leaf items, there's no
927 * need to go through all the vacuum-cycle-ID pushups here.
928 *
929 * Posting list tuples are a source of inaccuracy for cleanup-only
930 * scans. btvacuumscan() will assume that the number of index tuples
931 * from each page can be used as num_index_tuples, even though
932 * num_index_tuples is supposed to represent the number of TIDs in the
933 * index. This naive approach can underestimate the number of tuples
934 * in the index significantly.
935 *
936 * We handle the problem by making num_index_tuples an estimate in
937 * cleanup-only case.
938 */
940 btvacuumscan(info, stats, NULL, NULL, 0);
941 stats->estimated_count = true;
942 }
943
944 /*
945 * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
946 *
947 * num_delpages is the number of deleted pages now in the index that were
948 * not safe to place in the FSM to be recycled just yet. num_delpages is
949 * greater than 0 only when _bt_pagedel() actually deleted pages during
950 * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
951 * have failed to place any newly deleted pages in the FSM just moments
952 * ago. (Actually, there are edge cases where recycling of the current
953 * VACUUM's newly deleted pages does not even become safe by the time the
954 * next VACUUM comes around. See nbtree/README.)
955 */
956 Assert(stats->pages_deleted >= stats->pages_free);
957 num_delpages = stats->pages_deleted - stats->pages_free;
958 _bt_set_cleanup_info(info->index, num_delpages);
959
960 /*
961 * It's quite possible for us to be fooled by concurrent page splits into
962 * double-counting some index tuples, so disbelieve any total that exceeds
963 * the underlying heap's count ... if we know that accurately. Otherwise
964 * this might just make matters worse.
965 */
966 if (!info->estimated_count)
967 {
968 if (stats->num_index_tuples > info->num_heap_tuples)
969 stats->num_index_tuples = info->num_heap_tuples;
970 }
971
972 return stats;
973}
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:232
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:179
BlockNumber pages_deleted
Definition: genam.h:105
BlockNumber pages_free
Definition: genam.h:106
double num_index_tuples
Definition: genam.h:102
double num_heap_tuples
Definition: genam.h:75
bool analyze_only
Definition: genam.h:71
bool estimated_count
Definition: genam.h:73

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert(), btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0().

Referenced by bthandler().

◆ btvacuumpage()

static BlockNumber btvacuumpage ( BTVacState vstate,
Buffer  buf 
)
static

Definition at line 1158 of file nbtree.c.

1159{
1160 IndexVacuumInfo *info = vstate->info;
1161 IndexBulkDeleteResult *stats = vstate->stats;
1163 void *callback_state = vstate->callback_state;
1164 Relation rel = info->index;
1165 Relation heaprel = info->heaprel;
1166 bool attempt_pagedel;
1167 BlockNumber blkno,
1168 backtrack_to;
1170 Page page;
1171 BTPageOpaque opaque;
1172
1173 blkno = scanblkno;
1174
1175backtrack:
1176
1177 attempt_pagedel = false;
1178 backtrack_to = P_NONE;
1179
1180 _bt_lockbuf(rel, buf, BT_READ);
1181 page = BufferGetPage(buf);
1182 opaque = NULL;
1183 if (!PageIsNew(page))
1184 {
1185 _bt_checkpage(rel, buf);
1186 opaque = BTPageGetOpaque(page);
1187 }
1188
1189 Assert(blkno <= scanblkno);
1190 if (blkno != scanblkno)
1191 {
1192 /*
1193 * We're backtracking.
1194 *
1195 * We followed a right link to a sibling leaf page (a page that
1196 * happens to be from a block located before scanblkno). The only
1197 * case we want to do anything with is a live leaf page having the
1198 * current vacuum cycle ID.
1199 *
1200 * The page had better be in a state that's consistent with what we
1201 * expect. Check for conditions that imply corruption in passing. It
1202 * can't be half-dead because only an interrupted VACUUM process can
1203 * leave pages in that state, so we'd definitely have dealt with it
1204 * back when the page was the scanblkno page (half-dead pages are
1205 * always marked fully deleted by _bt_pagedel(), barring corruption).
1206 */
1207 if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1208 {
1209 Assert(false);
1210 ereport(LOG,
1211 (errcode(ERRCODE_INDEX_CORRUPTED),
1212 errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1213 blkno, scanblkno, RelationGetRelationName(rel))));
1214 _bt_relbuf(rel, buf);
1215 return scanblkno;
1216 }
1217
1218 /*
1219 * We may have already processed the page in an earlier call, when the
1220 * page was scanblkno. This happens when the leaf page split occurred
1221 * after the scan began, but before the right sibling page became the
1222 * scanblkno.
1223 *
1224 * Page may also have been deleted by current btvacuumpage() call,
1225 * since _bt_pagedel() sometimes deletes the right sibling page of
1226 * scanblkno in passing (it does so after we decided where to
1227 * backtrack to). We don't need to process this page as a deleted
1228 * page a second time now (in fact, it would be wrong to count it as a
1229 * deleted page in the bulk delete statistics a second time).
1230 */
1231 if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1232 {
1233 /* Done with current scanblkno (and all lower split pages) */
1234 _bt_relbuf(rel, buf);
1235 return scanblkno;
1236 }
1237 }
1238
1239 if (!opaque || BTPageIsRecyclable(page, heaprel))
1240 {
1241 /* Okay to recycle this page (which could be leaf or internal) */
1242 RecordFreeIndexPage(rel, blkno);
1243 stats->pages_deleted++;
1244 stats->pages_free++;
1245 }
1246 else if (P_ISDELETED(opaque))
1247 {
1248 /*
1249 * Already deleted page (which could be leaf or internal). Can't
1250 * recycle yet.
1251 */
1252 stats->pages_deleted++;
1253 }
1254 else if (P_ISHALFDEAD(opaque))
1255 {
1256 /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1257 attempt_pagedel = true;
1258
1259 /*
1260 * _bt_pagedel() will increment both pages_newly_deleted and
1261 * pages_deleted stats in all cases (barring corruption)
1262 */
1263 }
1264 else if (P_ISLEAF(opaque))
1265 {
1267 int ndeletable;
1269 int nupdatable;
1270 OffsetNumber offnum,
1271 minoff,
1272 maxoff;
1273 int nhtidsdead,
1274 nhtidslive;
1275
1276 /*
1277 * Trade in the initial read lock for a full cleanup lock on this
1278 * page. We must get such a lock on every leaf page over the course
1279 * of the vacuum scan, whether or not it actually contains any
1280 * deletable tuples --- see nbtree/README.
1281 */
1283
1284 /*
1285 * Check whether we need to backtrack to earlier pages. What we are
1286 * concerned about is a page split that happened since we started the
1287 * vacuum scan. If the split moved tuples on the right half of the
1288 * split (i.e. the tuples that sort high) to a block that we already
1289 * passed over, then we might have missed the tuples. We need to
1290 * backtrack now. (Must do this before possibly clearing btpo_cycleid
1291 * or deleting scanblkno page below!)
1292 */
1293 if (vstate->cycleid != 0 &&
1294 opaque->btpo_cycleid == vstate->cycleid &&
1295 !(opaque->btpo_flags & BTP_SPLIT_END) &&
1296 !P_RIGHTMOST(opaque) &&
1297 opaque->btpo_next < scanblkno)
1298 backtrack_to = opaque->btpo_next;
1299
1300 ndeletable = 0;
1301 nupdatable = 0;
1302 minoff = P_FIRSTDATAKEY(opaque);
1303 maxoff = PageGetMaxOffsetNumber(page);
1304 nhtidsdead = 0;
1305 nhtidslive = 0;
1306 if (callback)
1307 {
1308 /* btbulkdelete callback tells us what to delete (or update) */
1309 for (offnum = minoff;
1310 offnum <= maxoff;
1311 offnum = OffsetNumberNext(offnum))
1312 {
1313 IndexTuple itup;
1314
1315 itup = (IndexTuple) PageGetItem(page,
1316 PageGetItemId(page, offnum));
1317
1318 Assert(!BTreeTupleIsPivot(itup));
1319 if (!BTreeTupleIsPosting(itup))
1320 {
1321 /* Regular tuple, standard table TID representation */
1322 if (callback(&itup->t_tid, callback_state))
1323 {
1324 deletable[ndeletable++] = offnum;
1325 nhtidsdead++;
1326 }
1327 else
1328 nhtidslive++;
1329 }
1330 else
1331 {
1332 BTVacuumPosting vacposting;
1333 int nremaining;
1334
1335 /* Posting list tuple */
1336 vacposting = btreevacuumposting(vstate, itup, offnum,
1337 &nremaining);
1338 if (vacposting == NULL)
1339 {
1340 /*
1341 * All table TIDs from the posting tuple remain, so no
1342 * delete or update required
1343 */
1344 Assert(nremaining == BTreeTupleGetNPosting(itup));
1345 }
1346 else if (nremaining > 0)
1347 {
1348
1349 /*
1350 * Store metadata about posting list tuple in
1351 * updatable array for entire page. Existing tuple
1352 * will be updated during the later call to
1353 * _bt_delitems_vacuum().
1354 */
1355 Assert(nremaining < BTreeTupleGetNPosting(itup));
1356 updatable[nupdatable++] = vacposting;
1357 nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1358 }
1359 else
1360 {
1361 /*
1362 * All table TIDs from the posting list must be
1363 * deleted. We'll delete the index tuple completely
1364 * (no update required).
1365 */
1366 Assert(nremaining == 0);
1367 deletable[ndeletable++] = offnum;
1368 nhtidsdead += BTreeTupleGetNPosting(itup);
1369 pfree(vacposting);
1370 }
1371
1372 nhtidslive += nremaining;
1373 }
1374 }
1375 }
1376
1377 /*
1378 * Apply any needed deletes or updates. We issue just one
1379 * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1380 */
1381 if (ndeletable > 0 || nupdatable > 0)
1382 {
1383 Assert(nhtidsdead >= ndeletable + nupdatable);
1384 _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1385 nupdatable);
1386
1387 stats->tuples_removed += nhtidsdead;
1388 /* must recompute maxoff */
1389 maxoff = PageGetMaxOffsetNumber(page);
1390
1391 /* can't leak memory here */
1392 for (int i = 0; i < nupdatable; i++)
1393 pfree(updatable[i]);
1394 }
1395 else
1396 {
1397 /*
1398 * If the leaf page has been split during this vacuum cycle, it
1399 * seems worth expending a write to clear btpo_cycleid even if we
1400 * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1401 * takes care of this.) This ensures we won't process the page
1402 * again.
1403 *
1404 * We treat this like a hint-bit update because there's no need to
1405 * WAL-log it.
1406 */
1407 Assert(nhtidsdead == 0);
1408 if (vstate->cycleid != 0 &&
1409 opaque->btpo_cycleid == vstate->cycleid)
1410 {
1411 opaque->btpo_cycleid = 0;
1412 MarkBufferDirtyHint(buf, true);
1413 }
1414 }
1415
1416 /*
1417 * If the leaf page is now empty, try to delete it; else count the
1418 * live tuples (live table TIDs in posting lists are counted as
1419 * separate live tuples). We don't delete when backtracking, though,
1420 * since that would require teaching _bt_pagedel() about backtracking
1421 * (doesn't seem worth adding more complexity to deal with that).
1422 *
1423 * We don't count the number of live TIDs during cleanup-only calls to
1424 * btvacuumscan (i.e. when callback is not set). We count the number
1425 * of index tuples directly instead. This avoids the expense of
1426 * directly examining all of the tuples on each page. VACUUM will
1427 * treat num_index_tuples as an estimate in cleanup-only case, so it
1428 * doesn't matter that this underestimates num_index_tuples
1429 * significantly in some cases.
1430 */
1431 if (minoff > maxoff)
1432 attempt_pagedel = (blkno == scanblkno);
1433 else if (callback)
1434 stats->num_index_tuples += nhtidslive;
1435 else
1436 stats->num_index_tuples += maxoff - minoff + 1;
1437
1438 Assert(!attempt_pagedel || nhtidslive == 0);
1439 }
1440
1441 if (attempt_pagedel)
1442 {
1443 MemoryContext oldcontext;
1444
1445 /* Run pagedel in a temp context to avoid memory leakage */
1447 oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1448
1449 /*
1450 * _bt_pagedel maintains the bulk delete stats on our behalf;
1451 * pages_newly_deleted and pages_deleted are likely to be incremented
1452 * during call
1453 */
1454 Assert(blkno == scanblkno);
1455 _bt_pagedel(rel, buf, vstate);
1456
1457 MemoryContextSwitchTo(oldcontext);
1458 /* pagedel released buffer, so we shouldn't */
1459 }
1460 else
1461 _bt_relbuf(rel, buf);
1462
1463 if (backtrack_to != P_NONE)
1464 {
1465 blkno = backtrack_to;
1466
1467 /* check for vacuum delay while not holding any buffer lock */
1468 vacuum_delay_point(false);
1469
1470 /*
1471 * We can't use _bt_getbuf() here because it always applies
1472 * _bt_checkpage(), which will barf on an all-zero page. We want to
1473 * recycle all-zero pages, not fail. Also, we want to use a
1474 * nondefault buffer access strategy.
1475 */
1477 info->strategy);
1478 goto backtrack;
1479 }
1480
1481 return scanblkno;
1482}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3795
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4981
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:798
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:401
@ RBM_NORMAL
Definition: bufmgr.h:45
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:234
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode(int sqlerrcode)
Definition: elog.c:853
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:110
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:181
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1023
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1802
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1154
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:797
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1109
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1039
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1496
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:224
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:480
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_ISDELETED(opaque)
Definition: nbtree.h:222
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BT_READ
Definition: nbtree.h:724
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition: nbtree.h:291
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:492
#define BTP_SPLIT_END
Definition: nbtree.h:81
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static char * buf
Definition: pg_test_fsync.c:72
#define RelationGetRelationName(relation)
Definition: rel.h:547
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber btpo_next
Definition: nbtree.h:65
uint16 btpo_flags
Definition: nbtree.h:67
BTCycleId btpo_cycleid
Definition: nbtree.h:68
IndexBulkDeleteResult * stats
Definition: nbtree.h:333
BTCycleId cycleid
Definition: nbtree.h:336
MemoryContext pagedelcontext
Definition: nbtree.h:337
IndexVacuumInfo * info
Definition: nbtree.h:332
double tuples_removed
Definition: genam.h:103
BufferAccessStrategy strategy
Definition: genam.h:76
Relation heaprel
Definition: genam.h:70
void vacuum_delay_point(bool is_analyze)
Definition: vacuum.c:2402

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert(), BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetBlockNumber(), BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 988 of file nbtree.c.

991{
992 Relation rel = info->index;
993 BTVacState vstate;
994 BlockNumber num_pages;
995 bool needLock;
997 ReadStream *stream = NULL;
998
999 /*
1000 * Reset fields that track information about the entire index now. This
1001 * avoids double-counting in the case where a single VACUUM command
1002 * requires multiple scans of the index.
1003 *
1004 * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
1005 * since they track information about the VACUUM command, and so must last
1006 * across each call to btvacuumscan().
1007 *
1008 * (Note that pages_free is treated as state about the whole index, not
1009 * the current VACUUM. This is appropriate because RecordFreeIndexPage()
1010 * calls are idempotent, and get repeated for the same deleted pages in
1011 * some scenarios. The point for us is to track the number of recyclable
1012 * pages in the index at the end of the VACUUM command.)
1013 */
1014 stats->num_pages = 0;
1015 stats->num_index_tuples = 0;
1016 stats->pages_deleted = 0;
1017 stats->pages_free = 0;
1018
1019 /* Set up info to pass down to btvacuumpage */
1020 vstate.info = info;
1021 vstate.stats = stats;
1022 vstate.callback = callback;
1023 vstate.callback_state = callback_state;
1024 vstate.cycleid = cycleid;
1025
1026 /* Create a temporary memory context to run _bt_pagedel in */
1028 "_bt_pagedel",
1030
1031 /* Initialize vstate fields used by _bt_pendingfsm_finalize */
1032 vstate.bufsize = 0;
1033 vstate.maxbufsize = 0;
1034 vstate.pendingpages = NULL;
1035 vstate.npendingpages = 0;
1036 /* Consider applying _bt_pendingfsm_finalize optimization */
1037 _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
1038
1039 /*
1040 * The outer loop iterates over all index pages except the metapage, in
1041 * physical order (we hope the kernel will cooperate in providing
1042 * read-ahead for speed). It is critical that we visit all leaf pages,
1043 * including ones added after we start the scan, else we might fail to
1044 * delete some deletable tuples. Hence, we must repeatedly check the
1045 * relation length. We must acquire the relation-extension lock while
1046 * doing so to avoid a race condition: if someone else is extending the
1047 * relation, there is a window where bufmgr/smgr have created a new
1048 * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1049 * we manage to scan such a page here, we'll improperly assume it can be
1050 * recycled. Taking the lock synchronizes things enough to prevent a
1051 * problem: either num_pages won't include the new page, or _bt_getbuf
1052 * already has write lock on the buffer and it will be fully initialized
1053 * before we can examine it. Also, we need not worry if a page is added
1054 * immediately after we look; the page splitting code already has
1055 * write-lock on the left page before it adds a right page, so we must
1056 * already have processed any tuples due to be moved into such a page.
1057 *
1058 * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
1059 * think the use of the extension lock is still required.
1060 *
1061 * We can skip locking for new or temp relations, however, since no one
1062 * else could be accessing them.
1063 */
1064 needLock = !RELATION_IS_LOCAL(rel);
1065
1068 info->strategy,
1069 rel,
1072 &p,
1073 0);
1074 for (;;)
1075 {
1076 /* Get the current relation length */
1077 if (needLock)
1079 num_pages = RelationGetNumberOfBlocks(rel);
1080 if (needLock)
1082
1083 if (info->report_progress)
1085 num_pages);
1086
1087 /* Quit if we've scanned the whole relation */
1088 if (p.current_blocknum >= num_pages)
1089 break;
1090
1091 p.last_exclusive = num_pages;
1092
1093 /* Iterate over pages, then loop back to recheck relation length */
1094 while (true)
1095 {
1096 BlockNumber current_block;
1097 Buffer buf;
1098
1099 /* call vacuum_delay_point while not holding any buffer lock */
1100 vacuum_delay_point(false);
1101
1102 buf = read_stream_next_buffer(stream, NULL);
1103
1104 if (!BufferIsValid(buf))
1105 break;
1106
1107 current_block = btvacuumpage(&vstate, buf);
1108
1109 if (info->report_progress)
1111 current_block);
1112 }
1113
1115
1116 /*
1117 * We have to reset the read stream to use it again. After returning
1118 * InvalidBuffer, the read stream API won't invoke our callback again
1119 * until the stream has been reset.
1120 */
1121 read_stream_reset(stream);
1122 }
1123
1124 read_stream_end(stream);
1125
1126 /* Set statistics num_pages field to final size of index */
1127 stats->num_pages = num_pages;
1128
1130
1131 /*
1132 * If there were any calls to _bt_pagedel() during scan of the index then
1133 * see if any of the resulting pages can be placed in the FSM now. When
1134 * it's not safe we'll have to leave it up to a future VACUUM operation.
1135 *
1136 * Finally, if we placed any pages in the FSM (either just now or during
1137 * the scan), forcibly update the upper-level FSM pages to ensure that
1138 * searchers can find them.
1139 */
1140 _bt_pendingfsm_finalize(rel, &vstate);
1141 if (stats->pages_free > 0)
1143}
void pgstat_progress_update_param(int index, int64 val)
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:274
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:352
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:474
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:2996
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2954
static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf)
Definition: nbtree.c:1158
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:125
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:124
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:978
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:742
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:688
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1023
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:158
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:656
BTPendingFSM * pendingpages
Definition: nbtree.h:344
int npendingpages
Definition: nbtree.h:345
int bufsize
Definition: nbtree.h:342
int maxbufsize
Definition: nbtree.h:343
BlockNumber num_pages
Definition: genam.h:100
bool report_progress
Definition: genam.h:72

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), block_range_read_stream_cb(), BTREE_METAPAGE, btvacuumpage(), buf, BufferIsValid(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, BlockRangeReadStreamPrivate::current_blocknum, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, LockRelationForExtension(), MAIN_FORKNUM, BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, read_stream_begin_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), read_stream_reset(), RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, IndexVacuumInfo::strategy, UnlockRelationForExtension(), and vacuum_delay_point().

Referenced by btbulkdelete(), and btvacuumcleanup().