PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/stratnum.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "storage/read_stream.h"
#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
#include "utils/wait_event.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static bool _bt_start_prim_scan (IndexScanDesc scan)
 
static void _bt_parallel_serialize_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void _bt_parallel_restore_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static BlockNumber btvacuumpage (BTVacState *vstate, Buffer buf)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (Relation rel, int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber next_scan_page, BlockNumber curr_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber curr_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
int btgettreeheight (Relation rel)
 
CompareType bttranslatestrategy (StrategyNumber strategy, Oid opfamily)
 
StrategyNumber bttranslatecmptype (CompareType cmptype, Oid opfamily)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 95 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 56 of file nbtree.c.

57{
BTPS_State
Definition nbtree.c:57
@ BTPARALLEL_ADVANCING
Definition nbtree.c:60
@ BTPARALLEL_NEED_PRIMSCAN
Definition nbtree.c:59
@ BTPARALLEL_NOT_INITIALIZED
Definition nbtree.c:58
@ BTPARALLEL_IDLE
Definition nbtree.c:61
@ BTPARALLEL_DONE
Definition nbtree.c:62

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 1035 of file nbtree.c.

1036{
1038 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1040 bool status_changed = false;
1041
1042 Assert(!BTScanPosIsValid(so->currPos));
1043
1044 /* Do nothing, for non-parallel scans */
1045 if (parallel_scan == NULL)
1046 return;
1047
1048 /*
1049 * Should not mark parallel scan done when there's still a pending
1050 * primitive index scan
1051 */
1052 if (so->needPrimScan)
1053 return;
1054
1055 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1056 parallel_scan->ps_offset_am);
1057
1058 /*
1059 * Mark the parallel scan as done, unless some other process did so
1060 * already
1061 */
1062 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1063 Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
1064 if (btscan->btps_pageStatus != BTPARALLEL_DONE)
1065 {
1066 btscan->btps_pageStatus = BTPARALLEL_DONE;
1067 status_changed = true;
1068 }
1069 LWLockRelease(&btscan->btps_lock);
1070
1071 /* wake up all the workers associated with this parallel scan */
1072 if (status_changed)
1074}
#define OffsetToPointer(base, offset)
Definition c.h:855
#define Assert(condition)
Definition c.h:943
void ConditionVariableBroadcast(ConditionVariable *cv)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_EXCLUSIVE
Definition lwlock.h:104
struct BTParallelScanDescData * BTParallelScanDesc
Definition nbtree.c:95
#define BTScanPosIsValid(scanpos)
Definition nbtree.h:1021
BTScanOpaqueData * BTScanOpaque
Definition nbtree.h:1097
static int fb(int x)
struct ParallelIndexScanDescData * parallel_scan
Definition relscan.h:204

References Assert, BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosIsValid, ConditionVariableBroadcast(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_seize(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  curr_page 
)

Definition at line 1085 of file nbtree.c.

1086{
1087 Relation rel = scan->indexRelation;
1089 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1091
1092 Assert(so->numArrayKeys);
1093
1094 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1095 parallel_scan->ps_offset_am);
1096
1097 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1098 if (btscan->btps_lastCurrPage == curr_page &&
1099 btscan->btps_pageStatus == BTPARALLEL_IDLE)
1100 {
1101 btscan->btps_nextScanPage = InvalidBlockNumber;
1102 btscan->btps_lastCurrPage = InvalidBlockNumber;
1103 btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
1104
1105 /* Serialize scan's current array keys */
1107 }
1108 LWLockRelease(&btscan->btps_lock);
1109}
#define InvalidBlockNumber
Definition block.h:33
static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition nbtree.c:717
Relation indexRelation
Definition relscan.h:150

References _bt_parallel_serialize_arrays(), Assert, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, fb(), IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_advance_array_keys(), and _bt_readpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  next_scan_page,
BlockNumber  curr_page 
)

Definition at line 1008 of file nbtree.c.

1010{
1011 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1013
1015
1016 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1017 parallel_scan->ps_offset_am);
1018
1019 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1020 btscan->btps_nextScanPage = next_scan_page;
1021 btscan->btps_lastCurrPage = curr_page;
1022 btscan->btps_pageStatus = BTPARALLEL_IDLE;
1023 LWLockRelease(&btscan->btps_lock);
1025}
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
void ConditionVariableSignal(ConditionVariable *cv)

References Assert, BlockNumberIsValid(), BTPARALLEL_IDLE, ConditionVariableSignal(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_restore_arrays()

static void _bt_parallel_restore_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 760 of file nbtree.c.

762{
763 char *datumshared;
764
765 /* Space for serialized datums begins immediately after btps_arrElems[] */
766 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
767 for (int i = 0; i < so->numArrayKeys; i++)
768 {
769 BTArrayKeyInfo *array = &so->arrayKeys[i];
770 ScanKey skey = &so->keyData[array->scan_key];
771 bool isnull;
772
773 if (array->num_elems != -1)
774 {
775 /* Restore SAOP array using its saved cur_elem */
776 Assert(!(skey->sk_flags & SK_BT_SKIP));
777 array->cur_elem = btscan->btps_arrElems[i];
778 skey->sk_argument = array->elem_values[array->cur_elem];
779 continue;
780 }
781
782 /* Restore skip array by restoring its key directly */
783 if (!array->attbyval && skey->sk_argument)
784 pfree(DatumGetPointer(skey->sk_argument));
785 skey->sk_argument = (Datum) 0;
786 memcpy(&skey->sk_flags, datumshared, sizeof(int));
787 datumshared += sizeof(int);
788
789 Assert(skey->sk_flags & SK_BT_SKIP);
790
791 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
792 {
793 /* No sk_argument datum to restore */
794 continue;
795 }
796
797 skey->sk_argument = datumRestore(&datumshared, &isnull);
798 if (isnull)
799 {
800 Assert(skey->sk_argument == 0);
801 Assert(skey->sk_flags & SK_SEARCHNULL);
802 Assert(skey->sk_flags & SK_ISNULL);
803 }
804 }
805}
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
Datum datumRestore(char **start_address, bool *isnull)
Definition datum.c:559
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
#define SK_BT_SKIP
Definition nbtree.h:1106
#define SK_BT_MAXVAL
Definition nbtree.h:1110
#define SK_BT_MINVAL
Definition nbtree.h:1109
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
#define SK_SEARCHNULL
Definition skey.h:121
#define SK_ISNULL
Definition skey.h:115
Datum * elem_values
Definition nbtree.h:1041

References Assert, BTArrayKeyInfo::attbyval, BTArrayKeyInfo::cur_elem, DatumGetPointer(), datumRestore(), BTArrayKeyInfo::elem_values, fb(), i, memcpy(), BTArrayKeyInfo::num_elems, pfree(), BTArrayKeyInfo::scan_key, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, SK_ISNULL, and SK_SEARCHNULL.

Referenced by _bt_parallel_seize().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber next_scan_page,
BlockNumber last_curr_page,
bool  first 
)

Definition at line 870 of file nbtree.c.

872{
873 Relation rel = scan->indexRelation;
875 bool exit_loop = false,
876 status = true,
877 endscan = false;
878 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
880
883
884 /*
885 * Reset so->currPos, and initialize moreLeft/moreRight such that the next
886 * call to _bt_readnextpage treats this backend similarly to a serial
887 * backend that steps from *last_curr_page to *next_scan_page (unless this
888 * backend's so->currPos is initialized by _bt_readfirstpage before then).
889 */
890 BTScanPosInvalidate(so->currPos);
891 so->currPos.moreLeft = so->currPos.moreRight = true;
892
893 if (first)
894 {
895 /*
896 * Initialize array related state when called from _bt_first, assuming
897 * that this will be the first primitive index scan for the scan
898 */
899 so->needPrimScan = false;
900 so->scanBehind = false;
901 so->oppositeDirCheck = false;
902 }
903 else
904 {
905 /*
906 * Don't attempt to seize the scan when it requires another primitive
907 * index scan, since caller's backend cannot start it right now
908 */
909 if (so->needPrimScan)
910 return false;
911 }
912
913 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
914 parallel_scan->ps_offset_am);
915
916 while (1)
917 {
918 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
919
920 if (btscan->btps_pageStatus == BTPARALLEL_DONE)
921 {
922 /* We're done with this parallel index scan */
923 status = false;
924 }
925 else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
926 btscan->btps_nextScanPage == P_NONE)
927 {
928 /* End this parallel index scan */
929 status = false;
930 endscan = true;
931 }
932 else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
933 {
934 Assert(so->numArrayKeys);
935
936 if (first)
937 {
938 /* Can start scheduled primitive scan right away, so do so */
939 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
940
941 /* Restore scan's array keys from serialized values */
943 exit_loop = true;
944 }
945 else
946 {
947 /*
948 * Don't attempt to seize the scan when it requires another
949 * primitive index scan, since caller's backend cannot start
950 * it right now
951 */
952 status = false;
953 }
954
955 /*
956 * Either way, update backend local state to indicate that a
957 * pending primitive scan is required
958 */
959 so->needPrimScan = true;
960 so->scanBehind = false;
961 so->oppositeDirCheck = false;
962 }
963 else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
964 {
965 /*
966 * We have successfully seized control of the scan for the purpose
967 * of advancing it to a new page!
968 */
969 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
970 Assert(btscan->btps_nextScanPage != P_NONE);
971 *next_scan_page = btscan->btps_nextScanPage;
972 *last_curr_page = btscan->btps_lastCurrPage;
973 exit_loop = true;
974 }
975 LWLockRelease(&btscan->btps_lock);
976 if (exit_loop || !status)
977 break;
979 }
981
982 /* When the scan has reached the rightmost (or leftmost) page, end it */
983 if (endscan)
984 _bt_parallel_done(scan);
985
986 return status;
987}
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void _bt_parallel_done(IndexScanDesc scan)
Definition nbtree.c:1035
static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition nbtree.c:760
#define P_NONE
Definition nbtree.h:213
#define BTScanPosInvalidate(scanpos)
Definition nbtree.h:1027

References _bt_parallel_done(), _bt_parallel_restore_arrays(), Assert, BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosInvalidate, ConditionVariableCancelSleep(), ConditionVariableSleep(), fb(), IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_serialize_arrays()

static void _bt_parallel_serialize_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 717 of file nbtree.c.

719{
720 char *datumshared;
721
722 /* Space for serialized datums begins immediately after btps_arrElems[] */
723 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
724 for (int i = 0; i < so->numArrayKeys; i++)
725 {
726 BTArrayKeyInfo *array = &so->arrayKeys[i];
727 ScanKey skey = &so->keyData[array->scan_key];
728
729 if (array->num_elems != -1)
730 {
731 /* Save SAOP array's cur_elem (no need to copy key/datum) */
732 Assert(!(skey->sk_flags & SK_BT_SKIP));
733 btscan->btps_arrElems[i] = array->cur_elem;
734 continue;
735 }
736
737 /* Save all mutable state associated with skip array's key */
738 Assert(skey->sk_flags & SK_BT_SKIP);
739 memcpy(datumshared, &skey->sk_flags, sizeof(int));
740 datumshared += sizeof(int);
741
742 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
743 {
744 /* No sk_argument datum to serialize */
745 Assert(skey->sk_argument == 0);
746 continue;
747 }
748
749 datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0,
750 array->attbyval, array->attlen, &datumshared);
751 }
752}
void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address)
Definition datum.c:497

References Assert, BTArrayKeyInfo::attbyval, BTArrayKeyInfo::attlen, BTArrayKeyInfo::cur_elem, datumSerialize(), fb(), i, memcpy(), BTArrayKeyInfo::num_elems, BTArrayKeyInfo::scan_key, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, and SK_ISNULL.

Referenced by _bt_parallel_primscan_schedule().

◆ _bt_start_prim_scan()

static bool _bt_start_prim_scan ( IndexScanDesc  scan)
static

Definition at line 653 of file nbtree.c.

654{
656
657 Assert(so->numArrayKeys);
658
659 so->scanBehind = so->oppositeDirCheck = false; /* reset */
660
661 /*
662 * Array keys are advanced within _bt_checkkeys when the scan reaches the
663 * leaf level (more precisely, they're advanced when the scan reaches the
664 * end of each distinct set of array elements). This process avoids
665 * repeat access to leaf pages (across multiple primitive index scans) by
666 * advancing the scan's array keys when it allows the primitive index scan
667 * to find nearby matching tuples (or when it eliminates ranges of array
668 * key space that can't possibly be satisfied by any index tuple).
669 *
670 * _bt_checkkeys sets a simple flag variable to schedule another primitive
671 * index scan. The flag tells us what to do.
672 *
673 * We cannot rely on _bt_first always reaching _bt_checkkeys. There are
674 * various cases where that won't happen. For example, if the index is
675 * completely empty, then _bt_first won't call _bt_readpage/_bt_checkkeys.
676 * We also don't expect a call to _bt_checkkeys during searches for a
677 * non-existent value that happens to be lower/higher than any existing
678 * value in the index.
679 *
680 * We don't require special handling for these cases -- we don't need to
681 * be explicitly instructed to _not_ perform another primitive index scan.
682 * It's up to code under the control of _bt_first to always set the flag
683 * when another primitive index scan will be required.
684 *
685 * This works correctly, even with the tricky cases listed above, which
686 * all involve access to leaf pages "near the boundaries of the key space"
687 * (whether it's from a leftmost/rightmost page, or an imaginary empty
688 * leaf root page). If _bt_checkkeys cannot be reached by a primitive
689 * index scan for one set of array keys, then it also won't be reached for
690 * any later set ("later" in terms of the direction that we scan the index
691 * and advance the arrays). The array keys won't have advanced in these
692 * cases, but that's the correct behavior (even _bt_advance_array_keys
693 * won't always advance the arrays at the point they become "exhausted").
694 */
695 if (so->needPrimScan)
696 {
697 /*
698 * Flag was set -- must call _bt_first again, which will reset the
699 * scan's needPrimScan flag
700 */
701 return true;
702 }
703
704 /* The top-level index scan ran out of tuples in this scan direction */
705 if (scan->parallel_scan != NULL)
706 _bt_parallel_done(scan);
707
708 return false;
709}

References _bt_parallel_done(), Assert, fb(), IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by btgetbitmap(), and btgettuple().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 339 of file nbtree.c.

340{
341 IndexScanDesc scan;
343
344 /* no order by operators allowed */
345 Assert(norderbys == 0);
346
347 /* get the scan */
348 scan = RelationGetIndexScan(rel, nkeys, norderbys);
349
350 /* allocate private workspace */
352 BTScanPosInvalidate(so->currPos);
353 BTScanPosInvalidate(so->markPos);
354 if (scan->numberOfKeys > 0)
355 so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
356 else
357 so->keyData = NULL;
358
359 so->skipScan = false;
360 so->needPrimScan = false;
361 so->scanBehind = false;
362 so->oppositeDirCheck = false;
363 so->arrayKeys = NULL;
364 so->orderProcs = NULL;
365 so->arrayContext = NULL;
366
367 so->killedItems = NULL; /* until needed */
368 so->numKilled = 0;
369
370 /*
371 * We don't know yet whether the scan will be index-only, so we do not
372 * allocate the tuple workspace arrays until btrescan. However, we set up
373 * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
374 */
375 so->currTuples = so->markTuples = NULL;
376
377 scan->xs_itupdesc = RelationGetDescr(rel);
378
379 scan->opaque = so;
380
381 return scan;
382}
#define palloc_object(type)
Definition fe_memutils.h:74
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition genam.c:80
void * palloc(Size size)
Definition mcxt.c:1387
#define RelationGetDescr(relation)
Definition rel.h:542
ScanKeyData * ScanKey
Definition skey.h:75
struct TupleDescData * xs_itupdesc
Definition relscan.h:181

References Assert, BTScanPosInvalidate, fb(), IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, palloc(), palloc_object, RelationGetDescr, RelationGetIndexScan(), and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 183 of file nbtree.c.

184{
185 bool allequalimage = _bt_allequalimage(index, false);
186 BulkWriteState *bulkstate;
188
190
191 /* Construct metapage. */
192 metabuf = smgr_bulk_get_buf(bulkstate);
193 _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
194 smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
195
196 smgr_bulk_finish(bulkstate);
197}
PageData * Page
Definition bufpage.h:81
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition bulk_write.c:87
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition bulk_write.c:130
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition nbtpage.c:68
#define BTREE_METAPAGE
Definition nbtree.h:149
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition nbtutils.c:1175
@ INIT_FORKNUM
Definition relpath.h:61
Definition type.h:96

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, fb(), INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult * btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void callback_state 
)

Definition at line 1119 of file nbtree.c.

1121{
1122 Relation rel = info->index;
1123 BTCycleId cycleid;
1124
1125 /* allocate stats if first time through, else re-use existing struct */
1126 if (stats == NULL)
1128
1129 /* Establish the vacuum cycle ID to use for this scan */
1130 /* The ENSURE stuff ensures we clean up shared memory on failure */
1132 {
1133 cycleid = _bt_start_vacuum(rel);
1134
1135 btvacuumscan(info, stats, callback, callback_state, cycleid);
1136 }
1138 _bt_end_vacuum(rel);
1139
1140 return stats;
1141}
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition nbtree.c:1237
uint16 BTCycleId
Definition nbtree.h:30
void _bt_end_vacuum(Relation rel)
Definition nbtutils.c:530
void _bt_end_vacuum_callback(int code, Datum arg)
Definition nbtutils.c:558
BTCycleId _bt_start_vacuum(Relation rel)
Definition nbtutils.c:473
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
Relation index
Definition genam.h:54
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), fb(), IndexVacuumInfo::index, palloc0_object, PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1799 of file nbtree.c.

1800{
1801 return true;
1802}

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 455 of file nbtree.c.

456{
458
459 /* we aren't holding any read locks, but gotta drop the pins */
460 if (BTScanPosIsValid(so->currPos))
461 {
462 /* Before leaving current page, deal with any killed items */
463 if (so->numKilled > 0)
464 _bt_killitems(scan);
465 BTScanPosUnpinIfPinned(so->currPos);
466 }
467
468 so->markItemIndex = -1;
469 BTScanPosUnpinIfPinned(so->markPos);
470
471 /* No need to invalidate positions, the RAM is about to be freed. */
472
473 /* Release storage */
474 if (so->keyData != NULL)
475 pfree(so->keyData);
476 /* so->arrayKeys and so->orderProcs are in arrayContext */
477 if (so->arrayContext != NULL)
478 MemoryContextDelete(so->arrayContext);
479 if (so->killedItems != NULL)
480 pfree(so->killedItems);
481 if (so->currTuples != NULL)
482 pfree(so->currTuples);
483 /* so->markTuples should not be pfree'd, see btrescan */
484 pfree(so);
485}
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define BTScanPosUnpinIfPinned(scanpos)
Definition nbtree.h:1015
void _bt_killitems(IndexScanDesc scan)
Definition nbtutils.c:191

References _bt_killitems(), BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), MemoryContextDelete(), IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 575 of file nbtree.c.

576{
580
581 /*
582 * Pessimistically assume that every input scan key will be output with
583 * its own SAOP array
584 */
586 sizeof(int) * nkeys;
587
588 /* Single column indexes cannot possibly use a skip array */
589 if (nkeyatts == 1)
590 return estnbtreeshared;
591
592 /*
593 * Pessimistically assume that all attributes prior to the least
594 * significant attribute require a skip array (and an associated key)
595 */
596 genericattrspace = datumEstimateSpace((Datum) 0, false, true,
597 sizeof(Datum));
598 for (int attnum = 1; attnum < nkeyatts; attnum++)
599 {
600 CompactAttribute *attr;
601
602 /*
603 * We make the conservative assumption that every index column will
604 * also require a skip array.
605 *
606 * Every skip array must have space to store its scan key's sk_flags.
607 */
609
610 /* Consider space required to store a datum of opclass input type */
611 attr = TupleDescCompactAttr(rel->rd_att, attnum - 1);
612 if (attr->attbyval)
613 {
614 /* This index attribute stores pass-by-value datums */
616 true, attr->attlen);
617
619 continue;
620 }
621
622 /*
623 * This index attribute stores pass-by-reference datums.
624 *
625 * Assume that serializing this array will use just as much space as a
626 * pass-by-value datum, in addition to space for the largest possible
627 * whole index tuple (this is not just a per-datum portion of the
628 * largest possible tuple because that'd be almost as large anyway).
629 *
630 * This is quite conservative, but it's not clear how we could do much
631 * better. The executor requires an up-front storage request size
632 * that reliably covers the scan's high watermark memory usage. We
633 * can't be sure of the real high watermark until the scan is over.
634 */
637 }
638
639 return estnbtreeshared;
640}
int16_t int16
Definition c.h:619
size_t Size
Definition c.h:689
Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
Definition datum.c:450
#define BTMaxItemSize
Definition nbtree.h:165
int16 attnum
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition rel.h:535
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
TupleDesc rd_att
Definition rel.h:112
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:195

References add_size(), CompactAttribute::attbyval, CompactAttribute::attlen, attnum, BTMaxItemSize, BTParallelScanDescData::btps_arrElems, datumEstimateSpace(), fb(), IndexRelationGetNumberOfKeyAttributes, RelationData::rd_att, and TupleDescCompactAttr().

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 291 of file nbtree.c.

292{
294 int64 ntids = 0;
295 ItemPointer heapTid;
296
297 Assert(scan->heapRelation == NULL);
298
299 /* Each loop iteration performs another primitive index scan */
300 do
301 {
302 /* Fetch the first page & tuple */
304 {
305 /* Save tuple ID, and continue scanning */
306 heapTid = &scan->xs_heaptid;
307 tbm_add_tuples(tbm, heapTid, 1, false);
308 ntids++;
309
310 for (;;)
311 {
312 /*
313 * Advance to next tuple within page. This is the same as the
314 * easy case in _bt_next().
315 */
316 if (++so->currPos.itemIndex > so->currPos.lastItem)
317 {
318 /* let _bt_next do the heavy lifting */
319 if (!_bt_next(scan, ForwardScanDirection))
320 break;
321 }
322
323 /* Save tuple ID, and continue scanning */
324 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
325 tbm_add_tuples(tbm, heapTid, 1, false);
326 ntids++;
327 }
328 }
329 /* Now see if we need another primitive index scan */
330 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
331
332 return ntids;
333}
int64_t int64
Definition c.h:621
static bool _bt_start_prim_scan(IndexScanDesc scan)
Definition nbtree.c:653
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition nbtsearch.c:883
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition nbtsearch.c:1586
@ ForwardScanDirection
Definition sdir.h:28
ItemPointerData xs_heaptid
Definition relscan.h:185
Relation heapRelation
Definition relscan.h:149
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointerData *tids, int ntids, bool recheck)
Definition tidbitmap.c:367

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert, fb(), ForwardScanDirection, IndexScanDescData::heapRelation, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettreeheight()

int btgettreeheight ( Relation  rel)

Definition at line 1808 of file nbtree.c.

1809{
1810 return _bt_getrootheight(rel);
1811}
int _bt_getrootheight(Relation rel)
Definition nbtpage.c:680

References _bt_getrootheight().

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 230 of file nbtree.c.

231{
233 bool res;
234
235 Assert(scan->heapRelation != NULL);
236
237 /* btree indexes are never lossy */
238 scan->xs_recheck = false;
239
240 /* Each loop iteration performs another primitive index scan */
241 do
242 {
243 /*
244 * If we've already initialized this scan, we can just advance it in
245 * the appropriate direction. If we haven't done so yet, we call
246 * _bt_first() to get the first item in the scan.
247 */
248 if (!BTScanPosIsValid(so->currPos))
249 res = _bt_first(scan, dir);
250 else
251 {
252 /*
253 * Check to see if we should kill the previously-fetched tuple.
254 */
255 if (scan->kill_prior_tuple)
256 {
257 /*
258 * Yes, remember it for later. (We'll deal with all such
259 * tuples at once right before leaving the index page.) The
260 * test for numKilled overrun is not just paranoia: if the
261 * caller reverses direction in the indexscan then the same
262 * item might get entered multiple times. It's not worth
263 * trying to optimize that, so we don't detect it, but instead
264 * just forget any excess entries.
265 */
266 if (so->killedItems == NULL)
267 so->killedItems = palloc_array(int, MaxTIDsPerBTreePage);
268 if (so->numKilled < MaxTIDsPerBTreePage)
269 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
270 }
271
272 /*
273 * Now continue the scan.
274 */
275 res = _bt_next(scan, dir);
276 }
277
278 /* If we have a tuple, return it ... */
279 if (res)
280 break;
281 /* ... otherwise see if we need another primitive index scan */
282 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
283
284 return res;
285}
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define MaxTIDsPerBTreePage
Definition nbtree.h:186
bool kill_prior_tuple
Definition relscan.h:160

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert, BTScanPosIsValid, fb(), IndexScanDescData::heapRelation, IndexScanDescData::kill_prior_tuple, MaxTIDsPerBTreePage, IndexScanDescData::opaque, palloc_array, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 118 of file nbtree.c.

119{
120 static const IndexAmRoutine amroutine = {
122 .amstrategies = BTMaxStrategyNumber,
123 .amsupport = BTNProcs,
124 .amoptsprocnum = BTOPTIONS_PROC,
125 .amcanorder = true,
126 .amcanorderbyop = false,
127 .amcanhash = false,
128 .amconsistentequality = true,
129 .amconsistentordering = true,
130 .amcanbackward = true,
131 .amcanunique = true,
132 .amcanmulticol = true,
133 .amoptionalkey = true,
134 .amsearcharray = true,
135 .amsearchnulls = true,
136 .amstorage = false,
137 .amclusterable = true,
138 .ampredlocks = true,
139 .amcanparallel = true,
140 .amcanbuildparallel = true,
141 .amcaninclude = true,
142 .amusemaintenanceworkmem = false,
143 .amsummarizing = false,
144 .amparallelvacuumoptions =
146 .amkeytype = InvalidOid,
147
148 .ambuild = btbuild,
149 .ambuildempty = btbuildempty,
150 .aminsert = btinsert,
151 .aminsertcleanup = NULL,
152 .ambulkdelete = btbulkdelete,
153 .amvacuumcleanup = btvacuumcleanup,
154 .amcanreturn = btcanreturn,
155 .amcostestimate = btcostestimate,
156 .amgettreeheight = btgettreeheight,
157 .amoptions = btoptions,
158 .amproperty = btproperty,
159 .ambuildphasename = btbuildphasename,
160 .amvalidate = btvalidate,
161 .amadjustmembers = btadjustmembers,
162 .ambeginscan = btbeginscan,
163 .amrescan = btrescan,
164 .amgettuple = btgettuple,
165 .amgetbitmap = btgetbitmap,
166 .amendscan = btendscan,
167 .ammarkpos = btmarkpos,
168 .amrestrpos = btrestrpos,
169 .amestimateparallelscan = btestimateparallelscan,
170 .aminitparallelscan = btinitparallelscan,
171 .amparallelrescan = btparallelrescan,
172 .amtranslatestrategy = bttranslatestrategy,
173 .amtranslatecmptype = bttranslatecmptype,
174 };
175
177}
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
bool btcanreturn(Relation index, int attno)
Definition nbtree.c:1799
StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily)
Definition nbtree.c:1834
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition nbtree.c:339
Size btestimateparallelscan(Relation rel, int nkeys, int norderbys)
Definition nbtree.c:575
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition nbtree.c:1149
CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily)
Definition nbtree.c:1814
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition nbtree.c:230
void btparallelrescan(IndexScanDesc scan)
Definition nbtree.c:827
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition nbtree.c:206
void btbuildempty(Relation index)
Definition nbtree.c:183
int btgettreeheight(Relation rel)
Definition nbtree.c:1808
void btinitparallelscan(void *target)
Definition nbtree.c:811
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition nbtree.c:1119
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition nbtree.c:291
void btmarkpos(IndexScanDesc scan)
Definition nbtree.c:491
void btendscan(IndexScanDesc scan)
Definition nbtree.c:455
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition nbtree.c:388
void btrestrpos(IndexScanDesc scan)
Definition nbtree.c:517
#define BTNProcs
Definition nbtree.h:723
#define BTOPTIONS_PROC
Definition nbtree.h:721
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition nbtsort.c:299
char * btbuildphasename(int64 phasenum)
Definition nbtutils.c:644
bytea * btoptions(Datum reloptions, bool validate)
Definition nbtutils.c:598
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition nbtutils.c:621
bool btvalidate(Oid opclassoid)
Definition nbtvalidate.c:40
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
#define InvalidOid
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition selfuncs.c:7691
#define BTMaxStrategyNumber
Definition stratnum.h:35
NodeTag type
Definition amapi.h:234
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition vacuum.h:47
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition vacuum.h:54

References btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettreeheight(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), bttranslatecmptype(), bttranslatestrategy(), btvacuumcleanup(), btvalidate(), fb(), InvalidOid, PG_RETURN_POINTER, IndexAmRoutine::type, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void target)

Definition at line 811 of file nbtree.c.

812{
814
815 LWLockInitialize(&bt_target->btps_lock,
817 bt_target->btps_nextScanPage = InvalidBlockNumber;
818 bt_target->btps_lastCurrPage = InvalidBlockNumber;
819 bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
821}
void ConditionVariableInit(ConditionVariable *cv)
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670

References BTPARALLEL_NOT_INITIALIZED, ConditionVariableInit(), fb(), InvalidBlockNumber, and LWLockInitialize().

Referenced by bthandler().

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 206 of file nbtree.c.

211{
212 bool result;
213 IndexTuple itup;
214
215 /* generate an index tuple */
216 itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
217 itup->t_tid = *ht_ctid;
218
219 result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
220
221 pfree(itup);
222
223 return result;
224}
static Datum values[MAXATTR]
Definition bootstrap.c:190
uint32 result
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition nbtinsert.c:105
ItemPointerData t_tid
Definition itup.h:37

References _bt_doinsert(), fb(), index_form_tuple(), pfree(), RelationGetDescr, result, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 491 of file nbtree.c.

492{
494
495 /* There may be an old mark with a pin (but no lock). */
496 BTScanPosUnpinIfPinned(so->markPos);
497
498 /*
499 * Just record the current itemIndex. If we later step to next page
500 * before releasing the marked position, _bt_steppage makes a full copy of
501 * the currPos struct in markPos. If (as often happens) the mark is moved
502 * before we leave the page, we don't have to do that work.
503 */
504 if (BTScanPosIsValid(so->currPos))
505 so->markItemIndex = so->currPos.itemIndex;
506 else
507 {
508 BTScanPosInvalidate(so->markPos);
509 so->markItemIndex = -1;
510 }
511}

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 827 of file nbtree.c.

828{
830 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
831
832 Assert(parallel_scan);
833
834 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
835 parallel_scan->ps_offset_am);
836
837 /*
838 * In theory, we don't need to acquire the LWLock here, because there
839 * shouldn't be any other workers running at this point, but we do so for
840 * consistency.
841 */
842 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
843 btscan->btps_nextScanPage = InvalidBlockNumber;
844 btscan->btps_lastCurrPage = InvalidBlockNumber;
845 btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
846 LWLockRelease(&btscan->btps_lock);
847}

References Assert, BTPARALLEL_NOT_INITIALIZED, fb(), InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int nremaining 
)
static

Definition at line 1750 of file nbtree.c.

1752{
1753 int live = 0;
1754 int nitem = BTreeTupleGetNPosting(posting);
1757
1758 for (int i = 0; i < nitem; i++)
1759 {
1760 if (!vstate->callback(items + i, vstate->callback_state))
1761 {
1762 /* Live table TID */
1763 live++;
1764 }
1765 else if (vacposting == NULL)
1766 {
1767 /*
1768 * First dead table TID encountered.
1769 *
1770 * It's now clear that we need to delete one or more dead table
1771 * TIDs, so start maintaining metadata describing how to update
1772 * existing posting list tuple.
1773 */
1775 nitem * sizeof(uint16));
1776
1777 vacposting->itup = posting;
1778 vacposting->updatedoffset = updatedoffset;
1779 vacposting->ndeletedtids = 0;
1780 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1781 }
1782 else
1783 {
1784 /* Second or subsequent dead table TID */
1785 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1786 }
1787 }
1788
1789 *nremaining = live;
1790 return vacposting;
1791}
uint16_t uint16
Definition c.h:623
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition nbtree.h:519
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition nbtree.h:538
static ItemArray items

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), fb(), i, items, and palloc().

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 388 of file nbtree.c.

390{
392
393 /* we aren't holding any read locks, but gotta drop the pins */
394 if (BTScanPosIsValid(so->currPos))
395 {
396 /* Before leaving current page, deal with any killed items */
397 if (so->numKilled > 0)
398 _bt_killitems(scan);
399 BTScanPosUnpinIfPinned(so->currPos);
400 BTScanPosInvalidate(so->currPos);
401 }
402
403 /*
404 * We prefer to eagerly drop leaf page pins before btgettuple returns.
405 * This avoids making VACUUM wait to acquire a cleanup lock on the page.
406 *
407 * We cannot safely drop leaf page pins during index-only scans due to a
408 * race condition involving VACUUM setting pages all-visible in the VM.
409 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
410 *
411 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
412 * Pins cannot be held for more than an instant during bitmap scans either
413 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
414 *
415 * See nbtree/README section on making concurrent TID recycling safe.
416 *
417 * Note: so->dropPin should never change across rescans.
418 */
419 so->dropPin = (!scan->xs_want_itup &&
421 scan->heapRelation != NULL);
422
423 so->markItemIndex = -1;
424 so->needPrimScan = false;
425 so->scanBehind = false;
426 so->oppositeDirCheck = false;
427 BTScanPosUnpinIfPinned(so->markPos);
428 BTScanPosInvalidate(so->markPos);
429
430 /*
431 * Allocate tuple workspace arrays, if needed for an index-only scan and
432 * not already done in a previous rescan call. To save on palloc
433 * overhead, both workspaces are allocated as one palloc block; only this
434 * function and btendscan know that.
435 */
436 if (scan->xs_want_itup && so->currTuples == NULL)
437 {
438 so->currTuples = (char *) palloc(BLCKSZ * 2);
439 so->markTuples = so->currTuples + BLCKSZ;
440 }
441
442 /*
443 * Reset the scan keys
444 */
445 if (scankey && scan->numberOfKeys > 0)
446 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
447 so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
448 so->numArrayKeys = 0; /* ditto */
449}
#define IsMVCCLikeSnapshot(snapshot)
Definition snapmgr.h:74
struct ScanKeyData * keyData
Definition relscan.h:154
struct SnapshotData * xs_snapshot
Definition relscan.h:151

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), IndexScanDescData::heapRelation, IsMVCCLikeSnapshot, IndexScanDescData::keyData, memcpy(), IndexScanDescData::numberOfKeys, IndexScanDescData::opaque, palloc(), IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 517 of file nbtree.c.

518{
520
521 if (so->markItemIndex >= 0)
522 {
523 /*
524 * The scan has never moved to a new page since the last mark. Just
525 * restore the itemIndex.
526 *
527 * NB: In this case we can't count on anything in so->markPos to be
528 * accurate.
529 */
530 so->currPos.itemIndex = so->markItemIndex;
531 }
532 else
533 {
534 /*
535 * The scan moved to a new page after last mark or restore, and we are
536 * now restoring to the marked page. We aren't holding any read
537 * locks, but if we're still holding the pin for the current position,
538 * we must drop it.
539 */
540 if (BTScanPosIsValid(so->currPos))
541 {
542 /* Before leaving current page, deal with any killed items */
543 if (so->numKilled > 0)
544 _bt_killitems(scan);
545 BTScanPosUnpinIfPinned(so->currPos);
546 }
547
548 if (BTScanPosIsValid(so->markPos))
549 {
550 /* bump pin on mark buffer for assignment to current buffer */
551 if (BTScanPosIsPinned(so->markPos))
552 IncrBufferRefCount(so->markPos.buf);
553 memcpy(&so->currPos, &so->markPos,
555 so->markPos.lastItem * sizeof(BTScanPosItem));
556 if (so->currTuples)
557 memcpy(so->currTuples, so->markTuples,
558 so->markPos.nextTupleOffset);
559 /* Reset the scan's array keys (see _bt_steppage for why) */
560 if (so->numArrayKeys)
561 {
562 _bt_start_array_keys(scan, so->currPos.dir);
563 so->needPrimScan = false;
564 }
565 }
566 else
567 BTScanPosInvalidate(so->currPos);
568 }
569}
void IncrBufferRefCount(Buffer buffer)
Definition bufmgr.c:5670
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
#define BTScanPosIsPinned(scanpos)
Definition nbtree.h:1004

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, fb(), IncrBufferRefCount(), items, memcpy(), and IndexScanDescData::opaque.

Referenced by bthandler().

◆ bttranslatecmptype()

StrategyNumber bttranslatecmptype ( CompareType  cmptype,
Oid  opfamily 
)

Definition at line 1834 of file nbtree.c.

1835{
1836 switch (cmptype)
1837 {
1838 case COMPARE_LT:
1839 return BTLessStrategyNumber;
1840 case COMPARE_LE:
1842 case COMPARE_EQ:
1843 return BTEqualStrategyNumber;
1844 case COMPARE_GE:
1846 case COMPARE_GT:
1848 default:
1849 return InvalidStrategy;
1850 }
1851}
@ COMPARE_LE
Definition cmptype.h:35
@ COMPARE_GT
Definition cmptype.h:38
@ COMPARE_EQ
Definition cmptype.h:36
@ COMPARE_GE
Definition cmptype.h:37
@ COMPARE_LT
Definition cmptype.h:34
#define BTGreaterStrategyNumber
Definition stratnum.h:33
#define InvalidStrategy
Definition stratnum.h:24
#define BTLessStrategyNumber
Definition stratnum.h:29
#define BTEqualStrategyNumber
Definition stratnum.h:31
#define BTLessEqualStrategyNumber
Definition stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition stratnum.h:32

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_LE, COMPARE_LT, and InvalidStrategy.

Referenced by bthandler().

◆ bttranslatestrategy()

CompareType bttranslatestrategy ( StrategyNumber  strategy,
Oid  opfamily 
)

Definition at line 1814 of file nbtree.c.

1815{
1816 switch (strategy)
1817 {
1819 return COMPARE_LT;
1821 return COMPARE_LE;
1823 return COMPARE_EQ;
1825 return COMPARE_GE;
1827 return COMPARE_GT;
1828 default:
1829 return COMPARE_INVALID;
1830 }
1831}
@ COMPARE_INVALID
Definition cmptype.h:33

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_INVALID, COMPARE_LE, and COMPARE_LT.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult * btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 1149 of file nbtree.c.

1150{
1152
1153 /* No-op in ANALYZE ONLY mode */
1154 if (info->analyze_only)
1155 return stats;
1156
1157 /*
1158 * If btbulkdelete was called, we need not do anything (we just maintain
1159 * the information used within _bt_vacuum_needs_cleanup() by calling
1160 * _bt_set_cleanup_info() below).
1161 *
1162 * If btbulkdelete was _not_ called, then we have a choice to make: we
1163 * must decide whether or not a btvacuumscan() call is needed now (i.e.
1164 * whether the ongoing VACUUM operation can entirely avoid a physical scan
1165 * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
1166 * now.
1167 */
1168 if (stats == NULL)
1169 {
1170 /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
1171 if (!_bt_vacuum_needs_cleanup(info->index))
1172 return NULL;
1173
1174 /*
1175 * Since we aren't going to actually delete any leaf items, there's no
1176 * need to go through all the vacuum-cycle-ID pushups here.
1177 *
1178 * Posting list tuples are a source of inaccuracy for cleanup-only
1179 * scans. btvacuumscan() will assume that the number of index tuples
1180 * from each page can be used as num_index_tuples, even though
1181 * num_index_tuples is supposed to represent the number of TIDs in the
1182 * index. This naive approach can underestimate the number of tuples
1183 * in the index significantly.
1184 *
1185 * We handle the problem by making num_index_tuples an estimate in
1186 * cleanup-only case.
1187 */
1189 btvacuumscan(info, stats, NULL, NULL, 0);
1190 stats->estimated_count = true;
1191 }
1192
1193 /*
1194 * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
1195 *
1196 * num_delpages is the number of deleted pages now in the index that were
1197 * not safe to place in the FSM to be recycled just yet. num_delpages is
1198 * greater than 0 only when _bt_pagedel() actually deleted pages during
1199 * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
1200 * have failed to place any newly deleted pages in the FSM just moments
1201 * ago. (Actually, there are edge cases where recycling of the current
1202 * VACUUM's newly deleted pages does not even become safe by the time the
1203 * next VACUUM comes around. See nbtree/README.)
1204 */
1205 Assert(stats->pages_deleted >= stats->pages_free);
1206 num_delpages = stats->pages_deleted - stats->pages_free;
1208
1209 /*
1210 * It's quite possible for us to be fooled by concurrent page splits into
1211 * double-counting some index tuples, so disbelieve any total that exceeds
1212 * the underlying heap's count ... if we know that accurately. Otherwise
1213 * this might just make matters worse.
1214 */
1215 if (!info->estimated_count)
1216 {
1217 if (stats->num_index_tuples > info->num_heap_tuples)
1218 stats->num_index_tuples = info->num_heap_tuples;
1219 }
1220
1221 return stats;
1222}
uint32 BlockNumber
Definition block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition nbtpage.c:233
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition nbtpage.c:180
BlockNumber pages_deleted
Definition genam.h:90
BlockNumber pages_free
Definition genam.h:91
double num_index_tuples
Definition genam.h:87
double num_heap_tuples
Definition genam.h:60
bool analyze_only
Definition genam.h:56
bool estimated_count
Definition genam.h:58

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert, btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, fb(), IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0_object.

Referenced by bthandler().

◆ btvacuumpage()

static BlockNumber btvacuumpage ( BTVacState vstate,
Buffer  buf 
)
static

Definition at line 1412 of file nbtree.c.

1413{
1414 IndexVacuumInfo *info = vstate->info;
1415 IndexBulkDeleteResult *stats = vstate->stats;
1417 void *callback_state = vstate->callback_state;
1418 Relation rel = info->index;
1419 Relation heaprel = info->heaprel;
1420 bool attempt_pagedel;
1421 BlockNumber blkno,
1424 Page page;
1425 BTPageOpaque opaque;
1426
1427 blkno = scanblkno;
1428
1429backtrack:
1430
1431 attempt_pagedel = false;
1433
1434 _bt_lockbuf(rel, buf, BT_READ);
1435 page = BufferGetPage(buf);
1436 opaque = NULL;
1437 if (!PageIsNew(page))
1438 {
1439 _bt_checkpage(rel, buf);
1440 opaque = BTPageGetOpaque(page);
1441 }
1442
1443 Assert(blkno <= scanblkno);
1444 if (blkno != scanblkno)
1445 {
1446 /*
1447 * We're backtracking.
1448 *
1449 * We followed a right link to a sibling leaf page (a page that
1450 * happens to be from a block located before scanblkno). The only
1451 * case we want to do anything with is a live leaf page having the
1452 * current vacuum cycle ID.
1453 *
1454 * The page had better be in a state that's consistent with what we
1455 * expect. Check for conditions that imply corruption in passing. It
1456 * can't be half-dead because only an interrupted VACUUM process can
1457 * leave pages in that state, so we'd definitely have dealt with it
1458 * back when the page was the scanblkno page (half-dead pages are
1459 * always marked fully deleted by _bt_pagedel(), barring corruption).
1460 */
1461 if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1462 {
1463 Assert(false);
1464 ereport(LOG,
1466 errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1467 blkno, scanblkno, RelationGetRelationName(rel))));
1468 _bt_relbuf(rel, buf);
1469 return scanblkno;
1470 }
1471
1472 /*
1473 * We may have already processed the page in an earlier call, when the
1474 * page was scanblkno. This happens when the leaf page split occurred
1475 * after the scan began, but before the right sibling page became the
1476 * scanblkno.
1477 *
1478 * Page may also have been deleted by current btvacuumpage() call,
1479 * since _bt_pagedel() sometimes deletes the right sibling page of
1480 * scanblkno in passing (it does so after we decided where to
1481 * backtrack to). We don't need to process this page as a deleted
1482 * page a second time now (in fact, it would be wrong to count it as a
1483 * deleted page in the bulk delete statistics a second time).
1484 */
1485 if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1486 {
1487 /* Done with current scanblkno (and all lower split pages) */
1488 _bt_relbuf(rel, buf);
1489 return scanblkno;
1490 }
1491 }
1492
1493 if (!opaque || BTPageIsRecyclable(page, heaprel))
1494 {
1495 /* Okay to recycle this page (which could be leaf or internal) */
1496 RecordFreeIndexPage(rel, blkno);
1497 stats->pages_deleted++;
1498 stats->pages_free++;
1499 }
1500 else if (P_ISDELETED(opaque))
1501 {
1502 /*
1503 * Already deleted page (which could be leaf or internal). Can't
1504 * recycle yet.
1505 */
1506 stats->pages_deleted++;
1507 }
1508 else if (P_ISHALFDEAD(opaque))
1509 {
1510 /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1511 attempt_pagedel = true;
1512
1513 /*
1514 * _bt_pagedel() will increment both pages_newly_deleted and
1515 * pages_deleted stats in all cases (barring corruption)
1516 */
1517 }
1518 else if (P_ISLEAF(opaque))
1519 {
1521 int ndeletable;
1523 int nupdatable;
1524 OffsetNumber offnum,
1525 minoff,
1526 maxoff;
1527 int nhtidsdead,
1528 nhtidslive;
1529
1530 /*
1531 * Trade in the initial read lock for a full cleanup lock on this
1532 * page. We must get such a lock on every leaf page over the course
1533 * of the vacuum scan, whether or not it actually contains any
1534 * deletable tuples --- see nbtree/README.
1535 */
1537
1538 /*
1539 * Check whether we need to backtrack to earlier pages. What we are
1540 * concerned about is a page split that happened since we started the
1541 * vacuum scan. If the split moved tuples on the right half of the
1542 * split (i.e. the tuples that sort high) to a block that we already
1543 * passed over, then we might have missed the tuples. We need to
1544 * backtrack now. (Must do this before possibly clearing btpo_cycleid
1545 * or deleting scanblkno page below!)
1546 */
1547 if (vstate->cycleid != 0 &&
1548 opaque->btpo_cycleid == vstate->cycleid &&
1549 !(opaque->btpo_flags & BTP_SPLIT_END) &&
1550 !P_RIGHTMOST(opaque) &&
1551 opaque->btpo_next < scanblkno)
1552 backtrack_to = opaque->btpo_next;
1553
1554 ndeletable = 0;
1555 nupdatable = 0;
1556 minoff = P_FIRSTDATAKEY(opaque);
1557 maxoff = PageGetMaxOffsetNumber(page);
1558 nhtidsdead = 0;
1559 nhtidslive = 0;
1560 if (callback)
1561 {
1562 /* btbulkdelete callback tells us what to delete (or update) */
1563 for (offnum = minoff;
1564 offnum <= maxoff;
1565 offnum = OffsetNumberNext(offnum))
1566 {
1567 IndexTuple itup;
1568
1569 itup = (IndexTuple) PageGetItem(page,
1570 PageGetItemId(page, offnum));
1571
1572 Assert(!BTreeTupleIsPivot(itup));
1573 if (!BTreeTupleIsPosting(itup))
1574 {
1575 /* Regular tuple, standard table TID representation */
1576 if (callback(&itup->t_tid, callback_state))
1577 {
1578 deletable[ndeletable++] = offnum;
1579 nhtidsdead++;
1580 }
1581 else
1582 nhtidslive++;
1583 }
1584 else
1585 {
1587 int nremaining;
1588
1589 /* Posting list tuple */
1590 vacposting = btreevacuumposting(vstate, itup, offnum,
1591 &nremaining);
1592 if (vacposting == NULL)
1593 {
1594 /*
1595 * All table TIDs from the posting tuple remain, so no
1596 * delete or update required
1597 */
1599 }
1600 else if (nremaining > 0)
1601 {
1602
1603 /*
1604 * Store metadata about posting list tuple in
1605 * updatable array for entire page. Existing tuple
1606 * will be updated during the later call to
1607 * _bt_delitems_vacuum().
1608 */
1610 updatable[nupdatable++] = vacposting;
1612 }
1613 else
1614 {
1615 /*
1616 * All table TIDs from the posting list must be
1617 * deleted. We'll delete the index tuple completely
1618 * (no update required).
1619 */
1620 Assert(nremaining == 0);
1621 deletable[ndeletable++] = offnum;
1624 }
1625
1627 }
1628 }
1629 }
1630
1631 /*
1632 * Apply any needed deletes or updates. We issue just one
1633 * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1634 */
1635 if (ndeletable > 0 || nupdatable > 0)
1636 {
1638 _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1639 nupdatable);
1640
1641 stats->tuples_removed += nhtidsdead;
1642 /* must recompute maxoff */
1643 maxoff = PageGetMaxOffsetNumber(page);
1644
1645 /* can't leak memory here */
1646 for (int i = 0; i < nupdatable; i++)
1647 pfree(updatable[i]);
1648 }
1649 else
1650 {
1651 /*
1652 * If the leaf page has been split during this vacuum cycle, it
1653 * seems worth expending a write to clear btpo_cycleid even if we
1654 * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1655 * takes care of this.) This ensures we won't process the page
1656 * again.
1657 *
1658 * We treat this like a hint-bit update because there's no need to
1659 * WAL-log it.
1660 */
1661 Assert(nhtidsdead == 0);
1662 if (vstate->cycleid != 0 &&
1663 opaque->btpo_cycleid == vstate->cycleid)
1664 {
1665 opaque->btpo_cycleid = 0;
1666 MarkBufferDirtyHint(buf, true);
1667 }
1668 }
1669
1670 /*
1671 * If the leaf page is now empty, try to delete it; else count the
1672 * live tuples (live table TIDs in posting lists are counted as
1673 * separate live tuples). We don't delete when backtracking, though,
1674 * since that would require teaching _bt_pagedel() about backtracking
1675 * (doesn't seem worth adding more complexity to deal with that).
1676 *
1677 * We don't count the number of live TIDs during cleanup-only calls to
1678 * btvacuumscan (i.e. when callback is not set). We count the number
1679 * of index tuples directly instead. This avoids the expense of
1680 * directly examining all of the tuples on each page. VACUUM will
1681 * treat num_index_tuples as an estimate in cleanup-only case, so it
1682 * doesn't matter that this underestimates num_index_tuples
1683 * significantly in some cases.
1684 */
1685 if (minoff > maxoff)
1686 attempt_pagedel = (blkno == scanblkno);
1687 else if (callback)
1688 stats->num_index_tuples += nhtidslive;
1689 else
1690 stats->num_index_tuples += maxoff - minoff + 1;
1691
1693 }
1694
1695 if (attempt_pagedel)
1696 {
1697 MemoryContext oldcontext;
1698
1699 /* Run pagedel in a temp context to avoid memory leakage */
1700 MemoryContextReset(vstate->pagedelcontext);
1701 oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1702
1703 /*
1704 * _bt_pagedel maintains the bulk delete stats on our behalf;
1705 * pages_newly_deleted and pages_deleted are likely to be incremented
1706 * during call
1707 */
1708 Assert(blkno == scanblkno);
1709 _bt_pagedel(rel, buf, vstate);
1710
1711 MemoryContextSwitchTo(oldcontext);
1712 /* pagedel released buffer, so we shouldn't */
1713 }
1714 else
1715 _bt_relbuf(rel, buf);
1716
1717 if (backtrack_to != P_NONE)
1718 {
1719 blkno = backtrack_to;
1720
1721 /* check for vacuum delay while not holding any buffer lock */
1722 vacuum_delay_point(false);
1723
1724 /*
1725 * We can't use _bt_getbuf() here because it always applies
1726 * _bt_checkpage(), which will barf on an all-zero page. We want to
1727 * recycle all-zero pages, not fail. Also, we want to use a
1728 * nondefault buffer access strategy.
1729 */
1731 info->strategy);
1732 goto backtrack;
1733 }
1734
1735 return scanblkno;
1736}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4446
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5821
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:926
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:468
@ RBM_NORMAL
Definition bufmgr.h:46
static bool PageIsNew(const PageData *page)
Definition bufpage.h:258
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:268
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:378
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:396
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define ereport(elevel,...)
Definition elog.h:152
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition genam.h:95
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition indexfsm.c:52
IndexTupleData * IndexTuple
Definition itup.h:53
#define MaxIndexTuplesPerPage
Definition itup.h:181
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void _bt_relbuf(Relation rel, Buffer buf)
Definition nbtpage.c:1044
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition nbtpage.c:1832
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition nbtpage.c:1182
void _bt_checkpage(Relation rel, Buffer buf)
Definition nbtpage.c:802
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition nbtpage.c:1137
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition nbtpage.c:1067
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition nbtree.c:1750
#define P_ISHALFDEAD(opaque)
Definition nbtree.h:225
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition nbtree.h:481
#define P_ISLEAF(opaque)
Definition nbtree.h:221
#define BTPageGetOpaque(page)
Definition nbtree.h:74
#define P_ISDELETED(opaque)
Definition nbtree.h:223
#define P_FIRSTDATAKEY(opaque)
Definition nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition nbtree.h:220
#define BT_READ
Definition nbtree.h:730
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition nbtree.h:292
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition nbtree.h:493
#define BTP_SPLIT_END
Definition nbtree.h:82
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define RelationGetRelationName(relation)
Definition rel.h:550
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber btpo_next
Definition nbtree.h:66
uint16 btpo_flags
Definition nbtree.h:68
BTCycleId btpo_cycleid
Definition nbtree.h:69
double tuples_removed
Definition genam.h:88
BufferAccessStrategy strategy
Definition genam.h:61
Relation heaprel
Definition genam.h:55
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2433

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert, BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetBlockNumber(), BufferGetPage(), callback(), ereport, errcode(), errmsg_internal(), fb(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void callback_state,
BTCycleId  cycleid 
)
static

Definition at line 1237 of file nbtree.c.

1240{
1241 Relation rel = info->index;
1243 BlockNumber num_pages;
1244 bool needLock;
1246 ReadStream *stream = NULL;
1247
1248 /*
1249 * Reset fields that track information about the entire index now. This
1250 * avoids double-counting in the case where a single VACUUM command
1251 * requires multiple scans of the index.
1252 *
1253 * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
1254 * since they track information about the VACUUM command, and so must last
1255 * across each call to btvacuumscan().
1256 *
1257 * (Note that pages_free is treated as state about the whole index, not
1258 * the current VACUUM. This is appropriate because RecordFreeIndexPage()
1259 * calls are idempotent, and get repeated for the same deleted pages in
1260 * some scenarios. The point for us is to track the number of recyclable
1261 * pages in the index at the end of the VACUUM command.)
1262 */
1263 stats->num_pages = 0;
1264 stats->num_index_tuples = 0;
1265 stats->pages_deleted = 0;
1266 stats->pages_free = 0;
1267
1268 /* Set up info to pass down to btvacuumpage */
1269 vstate.info = info;
1270 vstate.stats = stats;
1271 vstate.callback = callback;
1272 vstate.callback_state = callback_state;
1273 vstate.cycleid = cycleid;
1274
1275 /* Create a temporary memory context to run _bt_pagedel in */
1277 "_bt_pagedel",
1279
1280 /* Initialize vstate fields used by _bt_pendingfsm_finalize */
1281 vstate.bufsize = 0;
1282 vstate.maxbufsize = 0;
1283 vstate.pendingpages = NULL;
1284 vstate.npendingpages = 0;
1285 /* Consider applying _bt_pendingfsm_finalize optimization */
1287
1288 /*
1289 * The outer loop iterates over all index pages except the metapage, in
1290 * physical order (we hope the kernel will cooperate in providing
1291 * read-ahead for speed). It is critical that we visit all leaf pages,
1292 * including ones added after we start the scan, else we might fail to
1293 * delete some deletable tuples. Hence, we must repeatedly check the
1294 * relation length. We must acquire the relation-extension lock while
1295 * doing so to avoid a race condition: if someone else is extending the
1296 * relation, there is a window where bufmgr/smgr have created a new
1297 * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1298 * we manage to scan such a page here, we'll improperly assume it can be
1299 * recycled. Taking the lock synchronizes things enough to prevent a
1300 * problem: either num_pages won't include the new page, or _bt_getbuf
1301 * already has write lock on the buffer and it will be fully initialized
1302 * before we can examine it. Also, we need not worry if a page is added
1303 * immediately after we look; the page splitting code already has
1304 * write-lock on the left page before it adds a right page, so we must
1305 * already have processed any tuples due to be moved into such a page.
1306 *
1307 * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
1308 * think the use of the extension lock is still required.
1309 *
1310 * We can skip locking for new or temp relations, however, since no one
1311 * else could be accessing them.
1312 */
1314
1316
1317 /*
1318 * It is safe to use batchmode as block_range_read_stream_cb takes no
1319 * locks.
1320 */
1324 info->strategy,
1325 rel,
1328 &p,
1329 0);
1330 for (;;)
1331 {
1332 /* Get the current relation length */
1333 if (needLock)
1335 num_pages = RelationGetNumberOfBlocks(rel);
1336 if (needLock)
1338
1339 if (info->report_progress)
1341 num_pages);
1342
1343 /* Quit if we've scanned the whole relation */
1344 if (p.current_blocknum >= num_pages)
1345 break;
1346
1347 p.last_exclusive = num_pages;
1348
1349 /* Iterate over pages, then loop back to recheck relation length */
1350 while (true)
1351 {
1352 BlockNumber current_block;
1353 Buffer buf;
1354
1355 /* call vacuum_delay_point while not holding any buffer lock */
1356 vacuum_delay_point(false);
1357
1358 buf = read_stream_next_buffer(stream, NULL);
1359
1360 if (!BufferIsValid(buf))
1361 break;
1362
1363 current_block = btvacuumpage(&vstate, buf);
1364
1365 if (info->report_progress)
1367 current_block);
1368 }
1369
1370 /*
1371 * We have to reset the read stream to use it again. After returning
1372 * InvalidBuffer, the read stream API won't invoke our callback again
1373 * until the stream has been reset.
1374 */
1375 read_stream_reset(stream);
1376 }
1377
1378 read_stream_end(stream);
1379
1380 /* Set statistics num_pages field to final size of index */
1381 stats->num_pages = num_pages;
1382
1383 MemoryContextDelete(vstate.pagedelcontext);
1384
1385 /*
1386 * If there were any calls to _bt_pagedel() during scan of the index then
1387 * see if any of the resulting pages can be placed in the FSM now. When
1388 * it's not safe we'll have to leave it up to a future VACUUM operation.
1389 *
1390 * Finally, if we placed any pages in the FSM (either just now or during
1391 * the scan), forcibly update the upper-level FSM pages to ensure that
1392 * searchers can find them.
1393 */
1395 if (stats->pages_free > 0)
1397}
void pgstat_progress_update_param(int index, int64 val)
int Buffer
Definition buf.h:23
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:309
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:419
void IndexFreeSpaceMapVacuum(Relation rel)
Definition indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:474
#define ExclusiveLock
Definition lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition nbtpage.c:3033
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition nbtpage.c:2991
static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf)
Definition nbtree.c:1412
#define PROGRESS_SCAN_BLOCKS_DONE
Definition progress.h:151
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition progress.h:150
void read_stream_reset(ReadStream *stream)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition rel.h:659
BlockNumber num_pages
Definition genam.h:85
bool report_progress
Definition genam.h:57

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, block_range_read_stream_cb(), BTREE_METAPAGE, btvacuumpage(), buf, BufferIsValid(), callback(), BlockRangeReadStreamPrivate::current_blocknum, CurrentMemoryContext, ExclusiveLock, fb(), IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BlockRangeReadStreamPrivate::last_exclusive, LockRelationForExtension(), MAIN_FORKNUM, MemoryContextDelete(), IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, read_stream_begin_relation(), read_stream_end(), READ_STREAM_FULL, READ_STREAM_MAINTENANCE, read_stream_next_buffer(), read_stream_reset(), READ_STREAM_USE_BATCHING, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, IndexVacuumInfo::strategy, UnlockRelationForExtension(), and vacuum_delay_point().

Referenced by btbulkdelete(), and btvacuumcleanup().