PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/stratnum.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
#include "utils/wait_event.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static bool _bt_start_prim_scan (IndexScanDesc scan)
 
static void _bt_parallel_serialize_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void _bt_parallel_restore_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static BlockNumber btvacuumpage (BTVacState *vstate, Buffer buf)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (Relation rel, int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber next_scan_page, BlockNumber curr_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber curr_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
int btgettreeheight (Relation rel)
 
CompareType bttranslatestrategy (StrategyNumber strategy, Oid opfamily)
 
StrategyNumber bttranslatecmptype (CompareType cmptype, Oid opfamily)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 94 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 55 of file nbtree.c.

56{
BTPS_State
Definition nbtree.c:56
@ BTPARALLEL_ADVANCING
Definition nbtree.c:59
@ BTPARALLEL_NEED_PRIMSCAN
Definition nbtree.c:58
@ BTPARALLEL_NOT_INITIALIZED
Definition nbtree.c:57
@ BTPARALLEL_IDLE
Definition nbtree.c:60
@ BTPARALLEL_DONE
Definition nbtree.c:61

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 1034 of file nbtree.c.

1035{
1037 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1039 bool status_changed = false;
1040
1041 Assert(!BTScanPosIsValid(so->currPos));
1042
1043 /* Do nothing, for non-parallel scans */
1044 if (parallel_scan == NULL)
1045 return;
1046
1047 /*
1048 * Should not mark parallel scan done when there's still a pending
1049 * primitive index scan
1050 */
1051 if (so->needPrimScan)
1052 return;
1053
1054 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1055 parallel_scan->ps_offset_am);
1056
1057 /*
1058 * Mark the parallel scan as done, unless some other process did so
1059 * already
1060 */
1061 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1062 Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
1063 if (btscan->btps_pageStatus != BTPARALLEL_DONE)
1064 {
1065 btscan->btps_pageStatus = BTPARALLEL_DONE;
1066 status_changed = true;
1067 }
1068 LWLockRelease(&btscan->btps_lock);
1069
1070 /* wake up all the workers associated with this parallel scan */
1071 if (status_changed)
1073}
#define OffsetToPointer(base, offset)
Definition c.h:857
#define Assert(condition)
Definition c.h:945
void ConditionVariableBroadcast(ConditionVariable *cv)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_EXCLUSIVE
Definition lwlock.h:112
struct BTParallelScanDescData * BTParallelScanDesc
Definition nbtree.c:94
#define BTScanPosIsValid(scanpos)
Definition nbtree.h:1021
BTScanOpaqueData * BTScanOpaque
Definition nbtree.h:1097
static int fb(int x)
struct ParallelIndexScanDescData * parallel_scan
Definition relscan.h:192

References Assert, BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosIsValid, ConditionVariableBroadcast(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_seize(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  curr_page 
)

Definition at line 1084 of file nbtree.c.

1085{
1086 Relation rel = scan->indexRelation;
1088 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1090
1091 Assert(so->numArrayKeys);
1092
1093 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1094 parallel_scan->ps_offset_am);
1095
1096 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1097 if (btscan->btps_lastCurrPage == curr_page &&
1098 btscan->btps_pageStatus == BTPARALLEL_IDLE)
1099 {
1100 btscan->btps_nextScanPage = InvalidBlockNumber;
1101 btscan->btps_lastCurrPage = InvalidBlockNumber;
1102 btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
1103
1104 /* Serialize scan's current array keys */
1106 }
1107 LWLockRelease(&btscan->btps_lock);
1108}
#define InvalidBlockNumber
Definition block.h:33
static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition nbtree.c:716
Relation indexRelation
Definition relscan.h:138

References _bt_parallel_serialize_arrays(), Assert, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, fb(), IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_advance_array_keys(), and _bt_readpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  next_scan_page,
BlockNumber  curr_page 
)

Definition at line 1007 of file nbtree.c.

1009{
1010 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1012
1014
1015 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1016 parallel_scan->ps_offset_am);
1017
1018 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1019 btscan->btps_nextScanPage = next_scan_page;
1020 btscan->btps_lastCurrPage = curr_page;
1021 btscan->btps_pageStatus = BTPARALLEL_IDLE;
1022 LWLockRelease(&btscan->btps_lock);
1024}
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
void ConditionVariableSignal(ConditionVariable *cv)

References Assert, BlockNumberIsValid(), BTPARALLEL_IDLE, ConditionVariableSignal(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_restore_arrays()

static void _bt_parallel_restore_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 759 of file nbtree.c.

761{
762 char *datumshared;
763
764 /* Space for serialized datums begins immediately after btps_arrElems[] */
765 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
766 for (int i = 0; i < so->numArrayKeys; i++)
767 {
768 BTArrayKeyInfo *array = &so->arrayKeys[i];
769 ScanKey skey = &so->keyData[array->scan_key];
770 bool isnull;
771
772 if (array->num_elems != -1)
773 {
774 /* Restore SAOP array using its saved cur_elem */
775 Assert(!(skey->sk_flags & SK_BT_SKIP));
776 array->cur_elem = btscan->btps_arrElems[i];
777 skey->sk_argument = array->elem_values[array->cur_elem];
778 continue;
779 }
780
781 /* Restore skip array by restoring its key directly */
782 if (!array->attbyval && skey->sk_argument)
783 pfree(DatumGetPointer(skey->sk_argument));
784 skey->sk_argument = (Datum) 0;
785 memcpy(&skey->sk_flags, datumshared, sizeof(int));
786 datumshared += sizeof(int);
787
788 Assert(skey->sk_flags & SK_BT_SKIP);
789
790 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
791 {
792 /* No sk_argument datum to restore */
793 continue;
794 }
795
796 skey->sk_argument = datumRestore(&datumshared, &isnull);
797 if (isnull)
798 {
799 Assert(skey->sk_argument == 0);
800 Assert(skey->sk_flags & SK_SEARCHNULL);
801 Assert(skey->sk_flags & SK_ISNULL);
802 }
803 }
804}
Datum datumRestore(char **start_address, bool *isnull)
Definition datum.c:523
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
#define SK_BT_SKIP
Definition nbtree.h:1106
#define SK_BT_MAXVAL
Definition nbtree.h:1110
#define SK_BT_MINVAL
Definition nbtree.h:1109
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
#define SK_SEARCHNULL
Definition skey.h:121
#define SK_ISNULL
Definition skey.h:115
Datum * elem_values
Definition nbtree.h:1041

References Assert, BTArrayKeyInfo::attbyval, BTArrayKeyInfo::cur_elem, DatumGetPointer(), datumRestore(), BTArrayKeyInfo::elem_values, fb(), i, BTArrayKeyInfo::num_elems, pfree(), BTArrayKeyInfo::scan_key, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, SK_ISNULL, and SK_SEARCHNULL.

Referenced by _bt_parallel_seize().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber next_scan_page,
BlockNumber last_curr_page,
bool  first 
)

Definition at line 869 of file nbtree.c.

871{
872 Relation rel = scan->indexRelation;
874 bool exit_loop = false,
875 status = true,
876 endscan = false;
877 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
879
882
883 /*
884 * Reset so->currPos, and initialize moreLeft/moreRight such that the next
885 * call to _bt_readnextpage treats this backend similarly to a serial
886 * backend that steps from *last_curr_page to *next_scan_page (unless this
887 * backend's so->currPos is initialized by _bt_readfirstpage before then).
888 */
889 BTScanPosInvalidate(so->currPos);
890 so->currPos.moreLeft = so->currPos.moreRight = true;
891
892 if (first)
893 {
894 /*
895 * Initialize array related state when called from _bt_first, assuming
896 * that this will be the first primitive index scan for the scan
897 */
898 so->needPrimScan = false;
899 so->scanBehind = false;
900 so->oppositeDirCheck = false;
901 }
902 else
903 {
904 /*
905 * Don't attempt to seize the scan when it requires another primitive
906 * index scan, since caller's backend cannot start it right now
907 */
908 if (so->needPrimScan)
909 return false;
910 }
911
912 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
913 parallel_scan->ps_offset_am);
914
915 while (1)
916 {
917 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
918
919 if (btscan->btps_pageStatus == BTPARALLEL_DONE)
920 {
921 /* We're done with this parallel index scan */
922 status = false;
923 }
924 else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
925 btscan->btps_nextScanPage == P_NONE)
926 {
927 /* End this parallel index scan */
928 status = false;
929 endscan = true;
930 }
931 else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
932 {
933 Assert(so->numArrayKeys);
934
935 if (first)
936 {
937 /* Can start scheduled primitive scan right away, so do so */
938 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
939
940 /* Restore scan's array keys from serialized values */
942 exit_loop = true;
943 }
944 else
945 {
946 /*
947 * Don't attempt to seize the scan when it requires another
948 * primitive index scan, since caller's backend cannot start
949 * it right now
950 */
951 status = false;
952 }
953
954 /*
955 * Either way, update backend local state to indicate that a
956 * pending primitive scan is required
957 */
958 so->needPrimScan = true;
959 so->scanBehind = false;
960 so->oppositeDirCheck = false;
961 }
962 else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
963 {
964 /*
965 * We have successfully seized control of the scan for the purpose
966 * of advancing it to a new page!
967 */
968 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
969 Assert(btscan->btps_nextScanPage != P_NONE);
970 *next_scan_page = btscan->btps_nextScanPage;
971 *last_curr_page = btscan->btps_lastCurrPage;
972 exit_loop = true;
973 }
974 LWLockRelease(&btscan->btps_lock);
975 if (exit_loop || !status)
976 break;
978 }
980
981 /* When the scan has reached the rightmost (or leftmost) page, end it */
982 if (endscan)
983 _bt_parallel_done(scan);
984
985 return status;
986}
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void _bt_parallel_done(IndexScanDesc scan)
Definition nbtree.c:1034
static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition nbtree.c:759
#define P_NONE
Definition nbtree.h:213
#define BTScanPosInvalidate(scanpos)
Definition nbtree.h:1027

References _bt_parallel_done(), _bt_parallel_restore_arrays(), Assert, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosInvalidate, ConditionVariableCancelSleep(), ConditionVariableSleep(), fb(), IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_serialize_arrays()

static void _bt_parallel_serialize_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 716 of file nbtree.c.

718{
719 char *datumshared;
720
721 /* Space for serialized datums begins immediately after btps_arrElems[] */
722 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
723 for (int i = 0; i < so->numArrayKeys; i++)
724 {
725 BTArrayKeyInfo *array = &so->arrayKeys[i];
726 ScanKey skey = &so->keyData[array->scan_key];
727
728 if (array->num_elems != -1)
729 {
730 /* Save SAOP array's cur_elem (no need to copy key/datum) */
731 Assert(!(skey->sk_flags & SK_BT_SKIP));
732 btscan->btps_arrElems[i] = array->cur_elem;
733 continue;
734 }
735
736 /* Save all mutable state associated with skip array's key */
737 Assert(skey->sk_flags & SK_BT_SKIP);
738 memcpy(datumshared, &skey->sk_flags, sizeof(int));
739 datumshared += sizeof(int);
740
741 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
742 {
743 /* No sk_argument datum to serialize */
744 Assert(skey->sk_argument == 0);
745 continue;
746 }
747
748 datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0,
749 array->attbyval, array->attlen, &datumshared);
750 }
751}
void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address)
Definition datum.c:461

References Assert, BTArrayKeyInfo::attbyval, BTArrayKeyInfo::attlen, BTArrayKeyInfo::cur_elem, datumSerialize(), fb(), i, BTArrayKeyInfo::num_elems, BTArrayKeyInfo::scan_key, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, and SK_ISNULL.

Referenced by _bt_parallel_primscan_schedule().

◆ _bt_start_prim_scan()

static bool _bt_start_prim_scan ( IndexScanDesc  scan)
static

Definition at line 652 of file nbtree.c.

653{
655
656 Assert(so->numArrayKeys);
657
658 so->scanBehind = so->oppositeDirCheck = false; /* reset */
659
660 /*
661 * Array keys are advanced within _bt_checkkeys when the scan reaches the
662 * leaf level (more precisely, they're advanced when the scan reaches the
663 * end of each distinct set of array elements). This process avoids
664 * repeat access to leaf pages (across multiple primitive index scans) by
665 * advancing the scan's array keys when it allows the primitive index scan
666 * to find nearby matching tuples (or when it eliminates ranges of array
667 * key space that can't possibly be satisfied by any index tuple).
668 *
669 * _bt_checkkeys sets a simple flag variable to schedule another primitive
670 * index scan. The flag tells us what to do.
671 *
672 * We cannot rely on _bt_first always reaching _bt_checkkeys. There are
673 * various cases where that won't happen. For example, if the index is
674 * completely empty, then _bt_first won't call _bt_readpage/_bt_checkkeys.
675 * We also don't expect a call to _bt_checkkeys during searches for a
676 * non-existent value that happens to be lower/higher than any existing
677 * value in the index.
678 *
679 * We don't require special handling for these cases -- we don't need to
680 * be explicitly instructed to _not_ perform another primitive index scan.
681 * It's up to code under the control of _bt_first to always set the flag
682 * when another primitive index scan will be required.
683 *
684 * This works correctly, even with the tricky cases listed above, which
685 * all involve access to leaf pages "near the boundaries of the key space"
686 * (whether it's from a leftmost/rightmost page, or an imaginary empty
687 * leaf root page). If _bt_checkkeys cannot be reached by a primitive
688 * index scan for one set of array keys, then it also won't be reached for
689 * any later set ("later" in terms of the direction that we scan the index
690 * and advance the arrays). The array keys won't have advanced in these
691 * cases, but that's the correct behavior (even _bt_advance_array_keys
692 * won't always advance the arrays at the point they become "exhausted").
693 */
694 if (so->needPrimScan)
695 {
696 /*
697 * Flag was set -- must call _bt_first again, which will reset the
698 * scan's needPrimScan flag
699 */
700 return true;
701 }
702
703 /* The top-level index scan ran out of tuples in this scan direction */
704 if (scan->parallel_scan != NULL)
705 _bt_parallel_done(scan);
706
707 return false;
708}

References _bt_parallel_done(), Assert, fb(), IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by btgetbitmap(), and btgettuple().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 338 of file nbtree.c.

339{
340 IndexScanDesc scan;
342
343 /* no order by operators allowed */
344 Assert(norderbys == 0);
345
346 /* get the scan */
347 scan = RelationGetIndexScan(rel, nkeys, norderbys);
348
349 /* allocate private workspace */
351 BTScanPosInvalidate(so->currPos);
352 BTScanPosInvalidate(so->markPos);
353 if (scan->numberOfKeys > 0)
354 so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
355 else
356 so->keyData = NULL;
357
358 so->skipScan = false;
359 so->needPrimScan = false;
360 so->scanBehind = false;
361 so->oppositeDirCheck = false;
362 so->arrayKeys = NULL;
363 so->orderProcs = NULL;
364 so->arrayContext = NULL;
365
366 so->killedItems = NULL; /* until needed */
367 so->numKilled = 0;
368
369 /*
370 * We don't know yet whether the scan will be index-only, so we do not
371 * allocate the tuple workspace arrays until btrescan. However, we set up
372 * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
373 */
374 so->currTuples = so->markTuples = NULL;
375
376 scan->xs_itupdesc = RelationGetDescr(rel);
377
378 scan->opaque = so;
379
380 return scan;
381}
#define palloc_object(type)
Definition fe_memutils.h:74
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition genam.c:80
void * palloc(Size size)
Definition mcxt.c:1387
#define RelationGetDescr(relation)
Definition rel.h:540
ScanKeyData * ScanKey
Definition skey.h:75
struct TupleDescData * xs_itupdesc
Definition relscan.h:169

References Assert, BTScanPosInvalidate, fb(), IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, palloc(), palloc_object, RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 182 of file nbtree.c.

183{
184 bool allequalimage = _bt_allequalimage(index, false);
185 BulkWriteState *bulkstate;
187
189
190 /* Construct metapage. */
191 metabuf = smgr_bulk_get_buf(bulkstate);
192 _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
193 smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
194
195 smgr_bulk_finish(bulkstate);
196}
PageData * Page
Definition bufpage.h:81
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition bulk_write.c:87
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition bulk_write.c:130
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition nbtpage.c:68
#define BTREE_METAPAGE
Definition nbtree.h:149
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition nbtutils.c:1176
@ INIT_FORKNUM
Definition relpath.h:61
Definition type.h:96

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, fb(), INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult * btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void callback_state 
)

Definition at line 1118 of file nbtree.c.

1120{
1121 Relation rel = info->index;
1122 BTCycleId cycleid;
1123
1124 /* allocate stats if first time through, else re-use existing struct */
1125 if (stats == NULL)
1127
1128 /* Establish the vacuum cycle ID to use for this scan */
1129 /* The ENSURE stuff ensures we clean up shared memory on failure */
1131 {
1132 cycleid = _bt_start_vacuum(rel);
1133
1134 btvacuumscan(info, stats, callback, callback_state, cycleid);
1135 }
1137 _bt_end_vacuum(rel);
1138
1139 return stats;
1140}
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition nbtree.c:1236
uint16 BTCycleId
Definition nbtree.h:30
void _bt_end_vacuum(Relation rel)
Definition nbtutils.c:521
void _bt_end_vacuum_callback(int code, Datum arg)
Definition nbtutils.c:549
BTCycleId _bt_start_vacuum(Relation rel)
Definition nbtutils.c:464
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
Relation index
Definition genam.h:54
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), fb(), IndexVacuumInfo::index, palloc0_object, PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1798 of file nbtree.c.

1799{
1800 return true;
1801}

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 454 of file nbtree.c.

455{
457
458 /* we aren't holding any read locks, but gotta drop the pins */
459 if (BTScanPosIsValid(so->currPos))
460 {
461 /* Before leaving current page, deal with any killed items */
462 if (so->numKilled > 0)
463 _bt_killitems(scan);
464 BTScanPosUnpinIfPinned(so->currPos);
465 }
466
467 so->markItemIndex = -1;
468 BTScanPosUnpinIfPinned(so->markPos);
469
470 /* No need to invalidate positions, the RAM is about to be freed. */
471
472 /* Release storage */
473 if (so->keyData != NULL)
474 pfree(so->keyData);
475 /* so->arrayKeys and so->orderProcs are in arrayContext */
476 if (so->arrayContext != NULL)
477 MemoryContextDelete(so->arrayContext);
478 if (so->killedItems != NULL)
479 pfree(so->killedItems);
480 if (so->currTuples != NULL)
481 pfree(so->currTuples);
482 /* so->markTuples should not be pfree'd, see btrescan */
483 pfree(so);
484}
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define BTScanPosUnpinIfPinned(scanpos)
Definition nbtree.h:1015
void _bt_killitems(IndexScanDesc scan)
Definition nbtutils.c:189

References _bt_killitems(), BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), MemoryContextDelete(), IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 574 of file nbtree.c.

575{
579
580 /*
581 * Pessimistically assume that every input scan key will be output with
582 * its own SAOP array
583 */
585 sizeof(int) * nkeys;
586
587 /* Single column indexes cannot possibly use a skip array */
588 if (nkeyatts == 1)
589 return estnbtreeshared;
590
591 /*
592 * Pessimistically assume that all attributes prior to the least
593 * significant attribute require a skip array (and an associated key)
594 */
595 genericattrspace = datumEstimateSpace((Datum) 0, false, true,
596 sizeof(Datum));
597 for (int attnum = 1; attnum < nkeyatts; attnum++)
598 {
599 CompactAttribute *attr;
600
601 /*
602 * We make the conservative assumption that every index column will
603 * also require a skip array.
604 *
605 * Every skip array must have space to store its scan key's sk_flags.
606 */
608
609 /* Consider space required to store a datum of opclass input type */
610 attr = TupleDescCompactAttr(rel->rd_att, attnum - 1);
611 if (attr->attbyval)
612 {
613 /* This index attribute stores pass-by-value datums */
615 true, attr->attlen);
616
618 continue;
619 }
620
621 /*
622 * This index attribute stores pass-by-reference datums.
623 *
624 * Assume that serializing this array will use just as much space as a
625 * pass-by-value datum, in addition to space for the largest possible
626 * whole index tuple (this is not just a per-datum portion of the
627 * largest possible tuple because that'd be almost as large anyway).
628 *
629 * This is quite conservative, but it's not clear how we could do much
630 * better. The executor requires an up-front storage request size
631 * that reliably covers the scan's high watermark memory usage. We
632 * can't be sure of the real high watermark until the scan is over.
633 */
636 }
637
638 return estnbtreeshared;
639}
int16_t int16
Definition c.h:613
size_t Size
Definition c.h:691
Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
Definition datum.c:414
#define BTMaxItemSize
Definition nbtree.h:165
int16 attnum
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition rel.h:533
Size add_size(Size s1, Size s2)
Definition shmem.c:485
TupleDesc rd_att
Definition rel.h:112
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:193

References add_size(), CompactAttribute::attbyval, CompactAttribute::attlen, attnum, BTMaxItemSize, BTParallelScanDescData::btps_arrElems, datumEstimateSpace(), fb(), IndexRelationGetNumberOfKeyAttributes, RelationData::rd_att, and TupleDescCompactAttr().

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 290 of file nbtree.c.

291{
293 int64 ntids = 0;
294 ItemPointer heapTid;
295
296 Assert(scan->heapRelation == NULL);
297
298 /* Each loop iteration performs another primitive index scan */
299 do
300 {
301 /* Fetch the first page & tuple */
303 {
304 /* Save tuple ID, and continue scanning */
305 heapTid = &scan->xs_heaptid;
306 tbm_add_tuples(tbm, heapTid, 1, false);
307 ntids++;
308
309 for (;;)
310 {
311 /*
312 * Advance to next tuple within page. This is the same as the
313 * easy case in _bt_next().
314 */
315 if (++so->currPos.itemIndex > so->currPos.lastItem)
316 {
317 /* let _bt_next do the heavy lifting */
318 if (!_bt_next(scan, ForwardScanDirection))
319 break;
320 }
321
322 /* Save tuple ID, and continue scanning */
323 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
324 tbm_add_tuples(tbm, heapTid, 1, false);
325 ntids++;
326 }
327 }
328 /* Now see if we need another primitive index scan */
329 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
330
331 return ntids;
332}
int64_t int64
Definition c.h:615
static bool _bt_start_prim_scan(IndexScanDesc scan)
Definition nbtree.c:652
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition nbtsearch.c:883
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition nbtsearch.c:1586
@ ForwardScanDirection
Definition sdir.h:28
ItemPointerData xs_heaptid
Definition relscan.h:173
Relation heapRelation
Definition relscan.h:137
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointerData *tids, int ntids, bool recheck)
Definition tidbitmap.c:367

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert, fb(), ForwardScanDirection, IndexScanDescData::heapRelation, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettreeheight()

int btgettreeheight ( Relation  rel)

Definition at line 1807 of file nbtree.c.

1808{
1809 return _bt_getrootheight(rel);
1810}
int _bt_getrootheight(Relation rel)
Definition nbtpage.c:680

References _bt_getrootheight().

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 229 of file nbtree.c.

230{
232 bool res;
233
234 Assert(scan->heapRelation != NULL);
235
236 /* btree indexes are never lossy */
237 scan->xs_recheck = false;
238
239 /* Each loop iteration performs another primitive index scan */
240 do
241 {
242 /*
243 * If we've already initialized this scan, we can just advance it in
244 * the appropriate direction. If we haven't done so yet, we call
245 * _bt_first() to get the first item in the scan.
246 */
247 if (!BTScanPosIsValid(so->currPos))
248 res = _bt_first(scan, dir);
249 else
250 {
251 /*
252 * Check to see if we should kill the previously-fetched tuple.
253 */
254 if (scan->kill_prior_tuple)
255 {
256 /*
257 * Yes, remember it for later. (We'll deal with all such
258 * tuples at once right before leaving the index page.) The
259 * test for numKilled overrun is not just paranoia: if the
260 * caller reverses direction in the indexscan then the same
261 * item might get entered multiple times. It's not worth
262 * trying to optimize that, so we don't detect it, but instead
263 * just forget any excess entries.
264 */
265 if (so->killedItems == NULL)
266 so->killedItems = palloc_array(int, MaxTIDsPerBTreePage);
267 if (so->numKilled < MaxTIDsPerBTreePage)
268 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
269 }
270
271 /*
272 * Now continue the scan.
273 */
274 res = _bt_next(scan, dir);
275 }
276
277 /* If we have a tuple, return it ... */
278 if (res)
279 break;
280 /* ... otherwise see if we need another primitive index scan */
281 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
282
283 return res;
284}
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define MaxTIDsPerBTreePage
Definition nbtree.h:186
bool kill_prior_tuple
Definition relscan.h:148

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert, BTScanPosIsValid, fb(), IndexScanDescData::heapRelation, IndexScanDescData::kill_prior_tuple, MaxTIDsPerBTreePage, IndexScanDescData::opaque, palloc_array, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 117 of file nbtree.c.

118{
119 static const IndexAmRoutine amroutine = {
121 .amstrategies = BTMaxStrategyNumber,
122 .amsupport = BTNProcs,
123 .amoptsprocnum = BTOPTIONS_PROC,
124 .amcanorder = true,
125 .amcanorderbyop = false,
126 .amcanhash = false,
127 .amconsistentequality = true,
128 .amconsistentordering = true,
129 .amcanbackward = true,
130 .amcanunique = true,
131 .amcanmulticol = true,
132 .amoptionalkey = true,
133 .amsearcharray = true,
134 .amsearchnulls = true,
135 .amstorage = false,
136 .amclusterable = true,
137 .ampredlocks = true,
138 .amcanparallel = true,
139 .amcanbuildparallel = true,
140 .amcaninclude = true,
141 .amusemaintenanceworkmem = false,
142 .amsummarizing = false,
143 .amparallelvacuumoptions =
145 .amkeytype = InvalidOid,
146
147 .ambuild = btbuild,
148 .ambuildempty = btbuildempty,
149 .aminsert = btinsert,
150 .aminsertcleanup = NULL,
151 .ambulkdelete = btbulkdelete,
152 .amvacuumcleanup = btvacuumcleanup,
153 .amcanreturn = btcanreturn,
154 .amcostestimate = btcostestimate,
155 .amgettreeheight = btgettreeheight,
156 .amoptions = btoptions,
157 .amproperty = btproperty,
158 .ambuildphasename = btbuildphasename,
159 .amvalidate = btvalidate,
160 .amadjustmembers = btadjustmembers,
161 .ambeginscan = btbeginscan,
162 .amrescan = btrescan,
163 .amgettuple = btgettuple,
164 .amgetbitmap = btgetbitmap,
165 .amendscan = btendscan,
166 .ammarkpos = btmarkpos,
167 .amrestrpos = btrestrpos,
168 .amestimateparallelscan = btestimateparallelscan,
169 .aminitparallelscan = btinitparallelscan,
170 .amparallelrescan = btparallelrescan,
171 .amtranslatestrategy = bttranslatestrategy,
172 .amtranslatecmptype = bttranslatecmptype,
173 };
174
176}
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
bool btcanreturn(Relation index, int attno)
Definition nbtree.c:1798
StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily)
Definition nbtree.c:1833
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition nbtree.c:338
Size btestimateparallelscan(Relation rel, int nkeys, int norderbys)
Definition nbtree.c:574
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition nbtree.c:1148
CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily)
Definition nbtree.c:1813
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition nbtree.c:229
void btparallelrescan(IndexScanDesc scan)
Definition nbtree.c:826
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition nbtree.c:205
void btbuildempty(Relation index)
Definition nbtree.c:182
int btgettreeheight(Relation rel)
Definition nbtree.c:1807
void btinitparallelscan(void *target)
Definition nbtree.c:810
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition nbtree.c:1118
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition nbtree.c:290
void btmarkpos(IndexScanDesc scan)
Definition nbtree.c:490
void btendscan(IndexScanDesc scan)
Definition nbtree.c:454
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition nbtree.c:387
void btrestrpos(IndexScanDesc scan)
Definition nbtree.c:516
#define BTNProcs
Definition nbtree.h:723
#define BTOPTIONS_PROC
Definition nbtree.h:721
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition nbtsort.c:299
char * btbuildphasename(int64 phasenum)
Definition nbtutils.c:645
bytea * btoptions(Datum reloptions, bool validate)
Definition nbtutils.c:599
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition nbtutils.c:622
bool btvalidate(Oid opclassoid)
Definition nbtvalidate.c:40
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
#define InvalidOid
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition selfuncs.c:7690
#define BTMaxStrategyNumber
Definition stratnum.h:35
NodeTag type
Definition amapi.h:234
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition vacuum.h:55

References btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettreeheight(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), bttranslatecmptype(), bttranslatestrategy(), btvacuumcleanup(), btvalidate(), fb(), InvalidOid, PG_RETURN_POINTER, IndexAmRoutine::type, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void target)

Definition at line 810 of file nbtree.c.

811{
813
814 LWLockInitialize(&bt_target->btps_lock,
816 bt_target->btps_nextScanPage = InvalidBlockNumber;
817 bt_target->btps_lastCurrPage = InvalidBlockNumber;
818 bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
820}
void ConditionVariableInit(ConditionVariable *cv)
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:699

References BTPARALLEL_NOT_INITIALIZED, ConditionVariableInit(), fb(), InvalidBlockNumber, and LWLockInitialize().

Referenced by bthandler().

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 205 of file nbtree.c.

210{
211 bool result;
212 IndexTuple itup;
213
214 /* generate an index tuple */
215 itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
216 itup->t_tid = *ht_ctid;
217
218 result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
219
220 pfree(itup);
221
222 return result;
223}
static Datum values[MAXATTR]
Definition bootstrap.c:188
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition nbtinsert.c:105
ItemPointerData t_tid
Definition itup.h:37

References _bt_doinsert(), fb(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 490 of file nbtree.c.

491{
493
494 /* There may be an old mark with a pin (but no lock). */
495 BTScanPosUnpinIfPinned(so->markPos);
496
497 /*
498 * Just record the current itemIndex. If we later step to next page
499 * before releasing the marked position, _bt_steppage makes a full copy of
500 * the currPos struct in markPos. If (as often happens) the mark is moved
501 * before we leave the page, we don't have to do that work.
502 */
503 if (BTScanPosIsValid(so->currPos))
504 so->markItemIndex = so->currPos.itemIndex;
505 else
506 {
507 BTScanPosInvalidate(so->markPos);
508 so->markItemIndex = -1;
509 }
510}

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 826 of file nbtree.c.

827{
829 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
830
831 Assert(parallel_scan);
832
833 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
834 parallel_scan->ps_offset_am);
835
836 /*
837 * In theory, we don't need to acquire the LWLock here, because there
838 * shouldn't be any other workers running at this point, but we do so for
839 * consistency.
840 */
841 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
842 btscan->btps_nextScanPage = InvalidBlockNumber;
843 btscan->btps_lastCurrPage = InvalidBlockNumber;
844 btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
845 LWLockRelease(&btscan->btps_lock);
846}

References Assert, BTPARALLEL_NOT_INITIALIZED, fb(), InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int nremaining 
)
static

Definition at line 1749 of file nbtree.c.

1751{
1752 int live = 0;
1753 int nitem = BTreeTupleGetNPosting(posting);
1756
1757 for (int i = 0; i < nitem; i++)
1758 {
1759 if (!vstate->callback(items + i, vstate->callback_state))
1760 {
1761 /* Live table TID */
1762 live++;
1763 }
1764 else if (vacposting == NULL)
1765 {
1766 /*
1767 * First dead table TID encountered.
1768 *
1769 * It's now clear that we need to delete one or more dead table
1770 * TIDs, so start maintaining metadata describing how to update
1771 * existing posting list tuple.
1772 */
1774 nitem * sizeof(uint16));
1775
1776 vacposting->itup = posting;
1777 vacposting->updatedoffset = updatedoffset;
1778 vacposting->ndeletedtids = 0;
1779 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1780 }
1781 else
1782 {
1783 /* Second or subsequent dead table TID */
1784 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1785 }
1786 }
1787
1788 *nremaining = live;
1789 return vacposting;
1790}
uint16_t uint16
Definition c.h:617
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition nbtree.h:519
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition nbtree.h:538
static ItemArray items

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), fb(), i, items, and palloc().

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 387 of file nbtree.c.

389{
391
392 /* we aren't holding any read locks, but gotta drop the pins */
393 if (BTScanPosIsValid(so->currPos))
394 {
395 /* Before leaving current page, deal with any killed items */
396 if (so->numKilled > 0)
397 _bt_killitems(scan);
398 BTScanPosUnpinIfPinned(so->currPos);
399 BTScanPosInvalidate(so->currPos);
400 }
401
402 /*
403 * We prefer to eagerly drop leaf page pins before btgettuple returns.
404 * This avoids making VACUUM wait to acquire a cleanup lock on the page.
405 *
406 * We cannot safely drop leaf page pins during index-only scans due to a
407 * race condition involving VACUUM setting pages all-visible in the VM.
408 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
409 *
410 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
411 * Pins cannot be held for more than an instant during bitmap scans either
412 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
413 *
414 * See nbtree/README section on making concurrent TID recycling safe.
415 *
416 * Note: so->dropPin should never change across rescans.
417 */
418 so->dropPin = (!scan->xs_want_itup &&
420 scan->heapRelation != NULL);
421
422 so->markItemIndex = -1;
423 so->needPrimScan = false;
424 so->scanBehind = false;
425 so->oppositeDirCheck = false;
426 BTScanPosUnpinIfPinned(so->markPos);
427 BTScanPosInvalidate(so->markPos);
428
429 /*
430 * Allocate tuple workspace arrays, if needed for an index-only scan and
431 * not already done in a previous rescan call. To save on palloc
432 * overhead, both workspaces are allocated as one palloc block; only this
433 * function and btendscan know that.
434 */
435 if (scan->xs_want_itup && so->currTuples == NULL)
436 {
437 so->currTuples = (char *) palloc(BLCKSZ * 2);
438 so->markTuples = so->currTuples + BLCKSZ;
439 }
440
441 /*
442 * Reset the scan keys
443 */
444 if (scankey && scan->numberOfKeys > 0)
445 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
446 so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
447 so->numArrayKeys = 0; /* ditto */
448}
#define IsMVCCLikeSnapshot(snapshot)
Definition snapmgr.h:74
struct ScanKeyData * keyData
Definition relscan.h:142
struct SnapshotData * xs_snapshot
Definition relscan.h:139

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), IndexScanDescData::heapRelation, IsMVCCLikeSnapshot, IndexScanDescData::keyData, IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, palloc(), IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 516 of file nbtree.c.

517{
519
520 if (so->markItemIndex >= 0)
521 {
522 /*
523 * The scan has never moved to a new page since the last mark. Just
524 * restore the itemIndex.
525 *
526 * NB: In this case we can't count on anything in so->markPos to be
527 * accurate.
528 */
529 so->currPos.itemIndex = so->markItemIndex;
530 }
531 else
532 {
533 /*
534 * The scan moved to a new page after last mark or restore, and we are
535 * now restoring to the marked page. We aren't holding any read
536 * locks, but if we're still holding the pin for the current position,
537 * we must drop it.
538 */
539 if (BTScanPosIsValid(so->currPos))
540 {
541 /* Before leaving current page, deal with any killed items */
542 if (so->numKilled > 0)
543 _bt_killitems(scan);
544 BTScanPosUnpinIfPinned(so->currPos);
545 }
546
547 if (BTScanPosIsValid(so->markPos))
548 {
549 /* bump pin on mark buffer for assignment to current buffer */
550 if (BTScanPosIsPinned(so->markPos))
551 IncrBufferRefCount(so->markPos.buf);
552 memcpy(&so->currPos, &so->markPos,
554 so->markPos.lastItem * sizeof(BTScanPosItem));
555 if (so->currTuples)
556 memcpy(so->currTuples, so->markTuples,
557 so->markPos.nextTupleOffset);
558 /* Reset the scan's array keys (see _bt_steppage for why) */
559 if (so->numArrayKeys)
560 {
561 _bt_start_array_keys(scan, so->currPos.dir);
562 so->needPrimScan = false;
563 }
564 }
565 else
566 BTScanPosInvalidate(so->currPos);
567 }
568}
void IncrBufferRefCount(Buffer buffer)
Definition bufmgr.c:5537
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
#define BTScanPosIsPinned(scanpos)
Definition nbtree.h:1004

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), IncrBufferRefCount(), items, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ bttranslatecmptype()

StrategyNumber bttranslatecmptype ( CompareType  cmptype,
Oid  opfamily 
)

Definition at line 1833 of file nbtree.c.

1834{
1835 switch (cmptype)
1836 {
1837 case COMPARE_LT:
1838 return BTLessStrategyNumber;
1839 case COMPARE_LE:
1841 case COMPARE_EQ:
1842 return BTEqualStrategyNumber;
1843 case COMPARE_GE:
1845 case COMPARE_GT:
1847 default:
1848 return InvalidStrategy;
1849 }
1850}
@ COMPARE_LE
Definition cmptype.h:35
@ COMPARE_GT
Definition cmptype.h:38
@ COMPARE_EQ
Definition cmptype.h:36
@ COMPARE_GE
Definition cmptype.h:37
@ COMPARE_LT
Definition cmptype.h:34
#define BTGreaterStrategyNumber
Definition stratnum.h:33
#define InvalidStrategy
Definition stratnum.h:24
#define BTLessStrategyNumber
Definition stratnum.h:29
#define BTEqualStrategyNumber
Definition stratnum.h:31
#define BTLessEqualStrategyNumber
Definition stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition stratnum.h:32

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_LE, COMPARE_LT, and InvalidStrategy.

Referenced by bthandler().

◆ bttranslatestrategy()

CompareType bttranslatestrategy ( StrategyNumber  strategy,
Oid  opfamily 
)

Definition at line 1813 of file nbtree.c.

1814{
1815 switch (strategy)
1816 {
1818 return COMPARE_LT;
1820 return COMPARE_LE;
1822 return COMPARE_EQ;
1824 return COMPARE_GE;
1826 return COMPARE_GT;
1827 default:
1828 return COMPARE_INVALID;
1829 }
1830}
@ COMPARE_INVALID
Definition cmptype.h:33

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_INVALID, COMPARE_LE, and COMPARE_LT.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult * btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 1148 of file nbtree.c.

1149{
1151
1152 /* No-op in ANALYZE ONLY mode */
1153 if (info->analyze_only)
1154 return stats;
1155
1156 /*
1157 * If btbulkdelete was called, we need not do anything (we just maintain
1158 * the information used within _bt_vacuum_needs_cleanup() by calling
1159 * _bt_set_cleanup_info() below).
1160 *
1161 * If btbulkdelete was _not_ called, then we have a choice to make: we
1162 * must decide whether or not a btvacuumscan() call is needed now (i.e.
1163 * whether the ongoing VACUUM operation can entirely avoid a physical scan
1164 * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
1165 * now.
1166 */
1167 if (stats == NULL)
1168 {
1169 /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
1170 if (!_bt_vacuum_needs_cleanup(info->index))
1171 return NULL;
1172
1173 /*
1174 * Since we aren't going to actually delete any leaf items, there's no
1175 * need to go through all the vacuum-cycle-ID pushups here.
1176 *
1177 * Posting list tuples are a source of inaccuracy for cleanup-only
1178 * scans. btvacuumscan() will assume that the number of index tuples
1179 * from each page can be used as num_index_tuples, even though
1180 * num_index_tuples is supposed to represent the number of TIDs in the
1181 * index. This naive approach can underestimate the number of tuples
1182 * in the index significantly.
1183 *
1184 * We handle the problem by making num_index_tuples an estimate in
1185 * cleanup-only case.
1186 */
1188 btvacuumscan(info, stats, NULL, NULL, 0);
1189 stats->estimated_count = true;
1190 }
1191
1192 /*
1193 * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
1194 *
1195 * num_delpages is the number of deleted pages now in the index that were
1196 * not safe to place in the FSM to be recycled just yet. num_delpages is
1197 * greater than 0 only when _bt_pagedel() actually deleted pages during
1198 * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
1199 * have failed to place any newly deleted pages in the FSM just moments
1200 * ago. (Actually, there are edge cases where recycling of the current
1201 * VACUUM's newly deleted pages does not even become safe by the time the
1202 * next VACUUM comes around. See nbtree/README.)
1203 */
1204 Assert(stats->pages_deleted >= stats->pages_free);
1205 num_delpages = stats->pages_deleted - stats->pages_free;
1207
1208 /*
1209 * It's quite possible for us to be fooled by concurrent page splits into
1210 * double-counting some index tuples, so disbelieve any total that exceeds
1211 * the underlying heap's count ... if we know that accurately. Otherwise
1212 * this might just make matters worse.
1213 */
1214 if (!info->estimated_count)
1215 {
1216 if (stats->num_index_tuples > info->num_heap_tuples)
1217 stats->num_index_tuples = info->num_heap_tuples;
1218 }
1219
1220 return stats;
1221}
uint32 BlockNumber
Definition block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition nbtpage.c:233
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition nbtpage.c:180
BlockNumber pages_deleted
Definition genam.h:90
BlockNumber pages_free
Definition genam.h:91
double num_index_tuples
Definition genam.h:87
double num_heap_tuples
Definition genam.h:60
bool analyze_only
Definition genam.h:56
bool estimated_count
Definition genam.h:58

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, fb(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0_object.

Referenced by bthandler().

◆ btvacuumpage()

static BlockNumber btvacuumpage ( BTVacState vstate,
Buffer  buf 
)
static

Definition at line 1411 of file nbtree.c.

1412{
1413 IndexVacuumInfo *info = vstate->info;
1414 IndexBulkDeleteResult *stats = vstate->stats;
1416 void *callback_state = vstate->callback_state;
1417 Relation rel = info->index;
1418 Relation heaprel = info->heaprel;
1419 bool attempt_pagedel;
1420 BlockNumber blkno,
1423 Page page;
1424 BTPageOpaque opaque;
1425
1426 blkno = scanblkno;
1427
1428backtrack:
1429
1430 attempt_pagedel = false;
1432
1433 _bt_lockbuf(rel, buf, BT_READ);
1434 page = BufferGetPage(buf);
1435 opaque = NULL;
1436 if (!PageIsNew(page))
1437 {
1438 _bt_checkpage(rel, buf);
1439 opaque = BTPageGetOpaque(page);
1440 }
1441
1442 Assert(blkno <= scanblkno);
1443 if (blkno != scanblkno)
1444 {
1445 /*
1446 * We're backtracking.
1447 *
1448 * We followed a right link to a sibling leaf page (a page that
1449 * happens to be from a block located before scanblkno). The only
1450 * case we want to do anything with is a live leaf page having the
1451 * current vacuum cycle ID.
1452 *
1453 * The page had better be in a state that's consistent with what we
1454 * expect. Check for conditions that imply corruption in passing. It
1455 * can't be half-dead because only an interrupted VACUUM process can
1456 * leave pages in that state, so we'd definitely have dealt with it
1457 * back when the page was the scanblkno page (half-dead pages are
1458 * always marked fully deleted by _bt_pagedel(), barring corruption).
1459 */
1460 if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1461 {
1462 Assert(false);
1463 ereport(LOG,
1465 errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1466 blkno, scanblkno, RelationGetRelationName(rel))));
1467 _bt_relbuf(rel, buf);
1468 return scanblkno;
1469 }
1470
1471 /*
1472 * We may have already processed the page in an earlier call, when the
1473 * page was scanblkno. This happens when the leaf page split occurred
1474 * after the scan began, but before the right sibling page became the
1475 * scanblkno.
1476 *
1477 * Page may also have been deleted by current btvacuumpage() call,
1478 * since _bt_pagedel() sometimes deletes the right sibling page of
1479 * scanblkno in passing (it does so after we decided where to
1480 * backtrack to). We don't need to process this page as a deleted
1481 * page a second time now (in fact, it would be wrong to count it as a
1482 * deleted page in the bulk delete statistics a second time).
1483 */
1484 if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1485 {
1486 /* Done with current scanblkno (and all lower split pages) */
1487 _bt_relbuf(rel, buf);
1488 return scanblkno;
1489 }
1490 }
1491
1492 if (!opaque || BTPageIsRecyclable(page, heaprel))
1493 {
1494 /* Okay to recycle this page (which could be leaf or internal) */
1495 RecordFreeIndexPage(rel, blkno);
1496 stats->pages_deleted++;
1497 stats->pages_free++;
1498 }
1499 else if (P_ISDELETED(opaque))
1500 {
1501 /*
1502 * Already deleted page (which could be leaf or internal). Can't
1503 * recycle yet.
1504 */
1505 stats->pages_deleted++;
1506 }
1507 else if (P_ISHALFDEAD(opaque))
1508 {
1509 /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1510 attempt_pagedel = true;
1511
1512 /*
1513 * _bt_pagedel() will increment both pages_newly_deleted and
1514 * pages_deleted stats in all cases (barring corruption)
1515 */
1516 }
1517 else if (P_ISLEAF(opaque))
1518 {
1520 int ndeletable;
1522 int nupdatable;
1523 OffsetNumber offnum,
1524 minoff,
1525 maxoff;
1526 int nhtidsdead,
1527 nhtidslive;
1528
1529 /*
1530 * Trade in the initial read lock for a full cleanup lock on this
1531 * page. We must get such a lock on every leaf page over the course
1532 * of the vacuum scan, whether or not it actually contains any
1533 * deletable tuples --- see nbtree/README.
1534 */
1536
1537 /*
1538 * Check whether we need to backtrack to earlier pages. What we are
1539 * concerned about is a page split that happened since we started the
1540 * vacuum scan. If the split moved tuples on the right half of the
1541 * split (i.e. the tuples that sort high) to a block that we already
1542 * passed over, then we might have missed the tuples. We need to
1543 * backtrack now. (Must do this before possibly clearing btpo_cycleid
1544 * or deleting scanblkno page below!)
1545 */
1546 if (vstate->cycleid != 0 &&
1547 opaque->btpo_cycleid == vstate->cycleid &&
1548 !(opaque->btpo_flags & BTP_SPLIT_END) &&
1549 !P_RIGHTMOST(opaque) &&
1550 opaque->btpo_next < scanblkno)
1551 backtrack_to = opaque->btpo_next;
1552
1553 ndeletable = 0;
1554 nupdatable = 0;
1555 minoff = P_FIRSTDATAKEY(opaque);
1556 maxoff = PageGetMaxOffsetNumber(page);
1557 nhtidsdead = 0;
1558 nhtidslive = 0;
1559 if (callback)
1560 {
1561 /* btbulkdelete callback tells us what to delete (or update) */
1562 for (offnum = minoff;
1563 offnum <= maxoff;
1564 offnum = OffsetNumberNext(offnum))
1565 {
1566 IndexTuple itup;
1567
1568 itup = (IndexTuple) PageGetItem(page,
1569 PageGetItemId(page, offnum));
1570
1571 Assert(!BTreeTupleIsPivot(itup));
1572 if (!BTreeTupleIsPosting(itup))
1573 {
1574 /* Regular tuple, standard table TID representation */
1575 if (callback(&itup->t_tid, callback_state))
1576 {
1577 deletable[ndeletable++] = offnum;
1578 nhtidsdead++;
1579 }
1580 else
1581 nhtidslive++;
1582 }
1583 else
1584 {
1586 int nremaining;
1587
1588 /* Posting list tuple */
1589 vacposting = btreevacuumposting(vstate, itup, offnum,
1590 &nremaining);
1591 if (vacposting == NULL)
1592 {
1593 /*
1594 * All table TIDs from the posting tuple remain, so no
1595 * delete or update required
1596 */
1598 }
1599 else if (nremaining > 0)
1600 {
1601
1602 /*
1603 * Store metadata about posting list tuple in
1604 * updatable array for entire page. Existing tuple
1605 * will be updated during the later call to
1606 * _bt_delitems_vacuum().
1607 */
1609 updatable[nupdatable++] = vacposting;
1611 }
1612 else
1613 {
1614 /*
1615 * All table TIDs from the posting list must be
1616 * deleted. We'll delete the index tuple completely
1617 * (no update required).
1618 */
1619 Assert(nremaining == 0);
1620 deletable[ndeletable++] = offnum;
1623 }
1624
1626 }
1627 }
1628 }
1629
1630 /*
1631 * Apply any needed deletes or updates. We issue just one
1632 * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1633 */
1634 if (ndeletable > 0 || nupdatable > 0)
1635 {
1637 _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1638 nupdatable);
1639
1640 stats->tuples_removed += nhtidsdead;
1641 /* must recompute maxoff */
1642 maxoff = PageGetMaxOffsetNumber(page);
1643
1644 /* can't leak memory here */
1645 for (int i = 0; i < nupdatable; i++)
1646 pfree(updatable[i]);
1647 }
1648 else
1649 {
1650 /*
1651 * If the leaf page has been split during this vacuum cycle, it
1652 * seems worth expending a write to clear btpo_cycleid even if we
1653 * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1654 * takes care of this.) This ensures we won't process the page
1655 * again.
1656 *
1657 * We treat this like a hint-bit update because there's no need to
1658 * WAL-log it.
1659 */
1660 Assert(nhtidsdead == 0);
1661 if (vstate->cycleid != 0 &&
1662 opaque->btpo_cycleid == vstate->cycleid)
1663 {
1664 opaque->btpo_cycleid = 0;
1665 MarkBufferDirtyHint(buf, true);
1666 }
1667 }
1668
1669 /*
1670 * If the leaf page is now empty, try to delete it; else count the
1671 * live tuples (live table TIDs in posting lists are counted as
1672 * separate live tuples). We don't delete when backtracking, though,
1673 * since that would require teaching _bt_pagedel() about backtracking
1674 * (doesn't seem worth adding more complexity to deal with that).
1675 *
1676 * We don't count the number of live TIDs during cleanup-only calls to
1677 * btvacuumscan (i.e. when callback is not set). We count the number
1678 * of index tuples directly instead. This avoids the expense of
1679 * directly examining all of the tuples on each page. VACUUM will
1680 * treat num_index_tuples as an estimate in cleanup-only case, so it
1681 * doesn't matter that this underestimates num_index_tuples
1682 * significantly in some cases.
1683 */
1684 if (minoff > maxoff)
1685 attempt_pagedel = (blkno == scanblkno);
1686 else if (callback)
1687 stats->num_index_tuples += nhtidslive;
1688 else
1689 stats->num_index_tuples += maxoff - minoff + 1;
1690
1692 }
1693
1694 if (attempt_pagedel)
1695 {
1696 MemoryContext oldcontext;
1697
1698 /* Run pagedel in a temp context to avoid memory leakage */
1699 MemoryContextReset(vstate->pagedelcontext);
1700 oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1701
1702 /*
1703 * _bt_pagedel maintains the bulk delete stats on our behalf;
1704 * pages_newly_deleted and pages_deleted are likely to be incremented
1705 * during call
1706 */
1707 Assert(blkno == scanblkno);
1708 _bt_pagedel(rel, buf, vstate);
1709
1710 MemoryContextSwitchTo(oldcontext);
1711 /* pagedel released buffer, so we shouldn't */
1712 }
1713 else
1714 _bt_relbuf(rel, buf);
1715
1716 if (backtrack_to != P_NONE)
1717 {
1718 blkno = backtrack_to;
1719
1720 /* check for vacuum delay while not holding any buffer lock */
1721 vacuum_delay_point(false);
1722
1723 /*
1724 * We can't use _bt_getbuf() here because it always applies
1725 * _bt_checkpage(), which will barf on an all-zero page. We want to
1726 * recycle all-zero pages, not fail. Also, we want to use a
1727 * nondefault buffer access strategy.
1728 */
1730 info->strategy);
1731 goto backtrack;
1732 }
1733
1734 return scanblkno;
1735}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5688
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:921
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ RBM_NORMAL
Definition bufmgr.h:46
static bool PageIsNew(const PageData *page)
Definition bufpage.h:259
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define ereport(elevel,...)
Definition elog.h:150
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition genam.h:95
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition indexfsm.c:52
IndexTupleData * IndexTuple
Definition itup.h:53
#define MaxIndexTuplesPerPage
Definition itup.h:181
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void _bt_relbuf(Relation rel, Buffer buf)
Definition nbtpage.c:1028
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition nbtpage.c:1809
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition nbtpage.c:1159
void _bt_checkpage(Relation rel, Buffer buf)
Definition nbtpage.c:802
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition nbtpage.c:1114
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition nbtpage.c:1044
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition nbtree.c:1749
#define P_ISHALFDEAD(opaque)
Definition nbtree.h:225
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition nbtree.h:481
#define P_ISLEAF(opaque)
Definition nbtree.h:221
#define BTPageGetOpaque(page)
Definition nbtree.h:74
#define P_ISDELETED(opaque)
Definition nbtree.h:223
#define P_FIRSTDATAKEY(opaque)
Definition nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition nbtree.h:220
#define BT_READ
Definition nbtree.h:730
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition nbtree.h:292
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition nbtree.h:493
#define BTP_SPLIT_END
Definition nbtree.h:82
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define RelationGetRelationName(relation)
Definition rel.h:548
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber btpo_next
Definition nbtree.h:66
uint16 btpo_flags
Definition nbtree.h:68
BTCycleId btpo_cycleid
Definition nbtree.h:69
double tuples_removed
Definition genam.h:88
BufferAccessStrategy strategy
Definition genam.h:61
Relation heaprel
Definition genam.h:55
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2431

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetBlockNumber(), BufferGetPage(), callback(), ereport, errcode(), errmsg_internal(), fb(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void callback_state,
BTCycleId  cycleid 
)
static

Definition at line 1236 of file nbtree.c.

1239{
1240 Relation rel = info->index;
1242 BlockNumber num_pages;
1243 bool needLock;
1245 ReadStream *stream = NULL;
1246
1247 /*
1248 * Reset fields that track information about the entire index now. This
1249 * avoids double-counting in the case where a single VACUUM command
1250 * requires multiple scans of the index.
1251 *
1252 * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
1253 * since they track information about the VACUUM command, and so must last
1254 * across each call to btvacuumscan().
1255 *
1256 * (Note that pages_free is treated as state about the whole index, not
1257 * the current VACUUM. This is appropriate because RecordFreeIndexPage()
1258 * calls are idempotent, and get repeated for the same deleted pages in
1259 * some scenarios. The point for us is to track the number of recyclable
1260 * pages in the index at the end of the VACUUM command.)
1261 */
1262 stats->num_pages = 0;
1263 stats->num_index_tuples = 0;
1264 stats->pages_deleted = 0;
1265 stats->pages_free = 0;
1266
1267 /* Set up info to pass down to btvacuumpage */
1268 vstate.info = info;
1269 vstate.stats = stats;
1270 vstate.callback = callback;
1271 vstate.callback_state = callback_state;
1272 vstate.cycleid = cycleid;
1273
1274 /* Create a temporary memory context to run _bt_pagedel in */
1276 "_bt_pagedel",
1278
1279 /* Initialize vstate fields used by _bt_pendingfsm_finalize */
1280 vstate.bufsize = 0;
1281 vstate.maxbufsize = 0;
1282 vstate.pendingpages = NULL;
1283 vstate.npendingpages = 0;
1284 /* Consider applying _bt_pendingfsm_finalize optimization */
1286
1287 /*
1288 * The outer loop iterates over all index pages except the metapage, in
1289 * physical order (we hope the kernel will cooperate in providing
1290 * read-ahead for speed). It is critical that we visit all leaf pages,
1291 * including ones added after we start the scan, else we might fail to
1292 * delete some deletable tuples. Hence, we must repeatedly check the
1293 * relation length. We must acquire the relation-extension lock while
1294 * doing so to avoid a race condition: if someone else is extending the
1295 * relation, there is a window where bufmgr/smgr have created a new
1296 * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1297 * we manage to scan such a page here, we'll improperly assume it can be
1298 * recycled. Taking the lock synchronizes things enough to prevent a
1299 * problem: either num_pages won't include the new page, or _bt_getbuf
1300 * already has write lock on the buffer and it will be fully initialized
1301 * before we can examine it. Also, we need not worry if a page is added
1302 * immediately after we look; the page splitting code already has
1303 * write-lock on the left page before it adds a right page, so we must
1304 * already have processed any tuples due to be moved into such a page.
1305 *
1306 * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
1307 * think the use of the extension lock is still required.
1308 *
1309 * We can skip locking for new or temp relations, however, since no one
1310 * else could be accessing them.
1311 */
1313
1315
1316 /*
1317 * It is safe to use batchmode as block_range_read_stream_cb takes no
1318 * locks.
1319 */
1323 info->strategy,
1324 rel,
1327 &p,
1328 0);
1329 for (;;)
1330 {
1331 /* Get the current relation length */
1332 if (needLock)
1334 num_pages = RelationGetNumberOfBlocks(rel);
1335 if (needLock)
1337
1338 if (info->report_progress)
1340 num_pages);
1341
1342 /* Quit if we've scanned the whole relation */
1343 if (p.current_blocknum >= num_pages)
1344 break;
1345
1346 p.last_exclusive = num_pages;
1347
1348 /* Iterate over pages, then loop back to recheck relation length */
1349 while (true)
1350 {
1351 BlockNumber current_block;
1352 Buffer buf;
1353
1354 /* call vacuum_delay_point while not holding any buffer lock */
1355 vacuum_delay_point(false);
1356
1357 buf = read_stream_next_buffer(stream, NULL);
1358
1359 if (!BufferIsValid(buf))
1360 break;
1361
1362 current_block = btvacuumpage(&vstate, buf);
1363
1364 if (info->report_progress)
1366 current_block);
1367 }
1368
1369 /*
1370 * We have to reset the read stream to use it again. After returning
1371 * InvalidBuffer, the read stream API won't invoke our callback again
1372 * until the stream has been reset.
1373 */
1374 read_stream_reset(stream);
1375 }
1376
1377 read_stream_end(stream);
1378
1379 /* Set statistics num_pages field to final size of index */
1380 stats->num_pages = num_pages;
1381
1382 MemoryContextDelete(vstate.pagedelcontext);
1383
1384 /*
1385 * If there were any calls to _bt_pagedel() during scan of the index then
1386 * see if any of the resulting pages can be placed in the FSM now. When
1387 * it's not safe we'll have to leave it up to a future VACUUM operation.
1388 *
1389 * Finally, if we placed any pages in the FSM (either just now or during
1390 * the scan), forcibly update the upper-level FSM pages to ensure that
1391 * searchers can find them.
1392 */
1394 if (stats->pages_free > 0)
1396}
void pgstat_progress_update_param(int index, int64 val)
int Buffer
Definition buf.h:23
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
void IndexFreeSpaceMapVacuum(Relation rel)
Definition indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:474
#define ExclusiveLock
Definition lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition nbtpage.c:3010
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition nbtpage.c:2968
static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf)
Definition nbtree.c:1411
#define PROGRESS_SCAN_BLOCKS_DONE
Definition progress.h:148
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition progress.h:147
void read_stream_reset(ReadStream *stream)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition rel.h:657
BlockNumber num_pages
Definition genam.h:85
bool report_progress
Definition genam.h:57

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, block_range_read_stream_cb(), BTREE_METAPAGE, btvacuumpage(), buf, BufferIsValid(), callback(), BlockRangeReadStreamPrivate::current_blocknum, CurrentMemoryContext, ExclusiveLock, fb(), IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BlockRangeReadStreamPrivate::last_exclusive, LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, read_stream_begin_relation(), read_stream_end(), READ_STREAM_FULL, READ_STREAM_MAINTENANCE, read_stream_next_buffer(), read_stream_reset(), READ_STREAM_USE_BATCHING, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, IndexVacuumInfo::strategy, UnlockRelationForExtension(), and vacuum_delay_point().

Referenced by btbulkdelete(), and btvacuumcleanup().