PostgreSQL Source Code git master
nbtree.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/stratnum.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "storage/bulk_write.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
Include dependency graph for nbtree.c:

Go to the source code of this file.

Data Structures

struct  BTParallelScanDescData
 

Typedefs

typedef struct BTParallelScanDescData BTParallelScanDescData
 
typedef struct BTParallelScanDescDataBTParallelScanDesc
 

Enumerations

enum  BTPS_State {
  BTPARALLEL_NOT_INITIALIZED , BTPARALLEL_NEED_PRIMSCAN , BTPARALLEL_ADVANCING , BTPARALLEL_IDLE ,
  BTPARALLEL_DONE
}
 

Functions

static bool _bt_start_prim_scan (IndexScanDesc scan)
 
static void _bt_parallel_serialize_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void _bt_parallel_restore_arrays (Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
 
static void btvacuumscan (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
 
static BlockNumber btvacuumpage (BTVacState *vstate, Buffer buf)
 
static BTVacuumPosting btreevacuumposting (BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
 
Datum bthandler (PG_FUNCTION_ARGS)
 
void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
Size btestimateparallelscan (Relation rel, int nkeys, int norderbys)
 
void btinitparallelscan (void *target)
 
void btparallelrescan (IndexScanDesc scan)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *next_scan_page, BlockNumber *last_curr_page, bool first)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber next_scan_page, BlockNumber curr_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_primscan_schedule (IndexScanDesc scan, BlockNumber curr_page)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
int btgettreeheight (Relation rel)
 
CompareType bttranslatestrategy (StrategyNumber strategy, Oid opfamily)
 
StrategyNumber bttranslatecmptype (CompareType cmptype, Oid opfamily)
 

Typedef Documentation

◆ BTParallelScanDesc

Definition at line 93 of file nbtree.c.

◆ BTParallelScanDescData

Enumeration Type Documentation

◆ BTPS_State

enum BTPS_State
Enumerator
BTPARALLEL_NOT_INITIALIZED 
BTPARALLEL_NEED_PRIMSCAN 
BTPARALLEL_ADVANCING 
BTPARALLEL_IDLE 
BTPARALLEL_DONE 

Definition at line 54 of file nbtree.c.

55{
BTPS_State
Definition: nbtree.c:55
@ BTPARALLEL_ADVANCING
Definition: nbtree.c:58
@ BTPARALLEL_NEED_PRIMSCAN
Definition: nbtree.c:57
@ BTPARALLEL_NOT_INITIALIZED
Definition: nbtree.c:56
@ BTPARALLEL_IDLE
Definition: nbtree.c:59
@ BTPARALLEL_DONE
Definition: nbtree.c:60

Function Documentation

◆ _bt_parallel_done()

void _bt_parallel_done ( IndexScanDesc  scan)

Definition at line 1050 of file nbtree.c.

1051{
1052 BTScanOpaque so = (BTScanOpaque) scan->opaque;
1053 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1054 BTParallelScanDesc btscan;
1055 bool status_changed = false;
1056
1058
1059 /* Do nothing, for non-parallel scans */
1060 if (parallel_scan == NULL)
1061 return;
1062
1063 /*
1064 * Should not mark parallel scan done when there's still a pending
1065 * primitive index scan
1066 */
1067 if (so->needPrimScan)
1068 return;
1069
1070 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1071 parallel_scan->ps_offset_am);
1072
1073 /*
1074 * Mark the parallel scan as done, unless some other process did so
1075 * already
1076 */
1077 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1078 Assert(btscan->btps_pageStatus != BTPARALLEL_NEED_PRIMSCAN);
1079 if (btscan->btps_pageStatus != BTPARALLEL_DONE)
1080 {
1081 btscan->btps_pageStatus = BTPARALLEL_DONE;
1082 status_changed = true;
1083 }
1084 LWLockRelease(&btscan->btps_lock);
1085
1086 /* wake up all the workers associated with this parallel scan */
1087 if (status_changed)
1088 ConditionVariableBroadcast(&btscan->btps_cv);
1089}
#define OffsetToPointer(base, offset)
Definition: c.h:785
void ConditionVariableBroadcast(ConditionVariable *cv)
Assert(PointerIsAligned(start, uint64))
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_EXCLUSIVE
Definition: lwlock.h:112
struct BTParallelScanDescData * BTParallelScanDesc
Definition: nbtree.c:93
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:1021
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:1097
bool needPrimScan
Definition: nbtree.h:1063
BTScanPosData currPos
Definition: nbtree.h:1093
struct ParallelIndexScanDescData * parallel_scan
Definition: relscan.h:193

References Assert(), BTPARALLEL_DONE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosIsValid, ConditionVariableBroadcast(), BTScanOpaqueData::currPos, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanOpaqueData::needPrimScan, OffsetToPointer, IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by _bt_endpoint(), _bt_first(), _bt_parallel_seize(), _bt_readnextpage(), and _bt_start_prim_scan().

◆ _bt_parallel_primscan_schedule()

void _bt_parallel_primscan_schedule ( IndexScanDesc  scan,
BlockNumber  curr_page 
)

Definition at line 1100 of file nbtree.c.

1101{
1102 Relation rel = scan->indexRelation;
1103 BTScanOpaque so = (BTScanOpaque) scan->opaque;
1104 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1105 BTParallelScanDesc btscan;
1106
1107 Assert(so->numArrayKeys);
1108
1109 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1110 parallel_scan->ps_offset_am);
1111
1112 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
1113 if (btscan->btps_lastCurrPage == curr_page &&
1114 btscan->btps_pageStatus == BTPARALLEL_IDLE)
1115 {
1116 btscan->btps_nextScanPage = InvalidBlockNumber;
1117 btscan->btps_lastCurrPage = InvalidBlockNumber;
1118 btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
1119
1120 /* Serialize scan's current array keys */
1121 _bt_parallel_serialize_arrays(rel, btscan, so);
1122 }
1123 LWLockRelease(&btscan->btps_lock);
1124}
#define InvalidBlockNumber
Definition: block.h:33
static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition: nbtree.c:732
Relation indexRelation
Definition: relscan.h:139

References _bt_parallel_serialize_arrays(), Assert(), BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, and IndexScanDescData::parallel_scan.

Referenced by _bt_advance_array_keys(), and _bt_readpage().

◆ _bt_parallel_release()

void _bt_parallel_release ( IndexScanDesc  scan,
BlockNumber  next_scan_page,
BlockNumber  curr_page 
)

Definition at line 1023 of file nbtree.c.

1025{
1026 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
1027 BTParallelScanDesc btscan;
1028
1029 Assert(BlockNumberIsValid(next_scan_page));
1030
1031 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
1032 parallel_scan->ps_offset_am);
1033
1035 btscan->btps_nextScanPage = next_scan_page;
1036 btscan->btps_lastCurrPage = curr_page;
1038 LWLockRelease(&btscan->btps_lock);
1040}
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void ConditionVariableSignal(ConditionVariable *cv)
BTPS_State btps_pageStatus
Definition: nbtree.c:72
BlockNumber btps_lastCurrPage
Definition: nbtree.c:70
ConditionVariable btps_cv
Definition: nbtree.c:76
BlockNumber btps_nextScanPage
Definition: nbtree.c:69

References Assert(), BlockNumberIsValid(), BTPARALLEL_IDLE, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_lock, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, ConditionVariableSignal(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by _bt_readnextpage(), and _bt_readpage().

◆ _bt_parallel_restore_arrays()

static void _bt_parallel_restore_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 775 of file nbtree.c.

777{
778 char *datumshared;
779
780 /* Space for serialized datums begins immediately after btps_arrElems[] */
781 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
782 for (int i = 0; i < so->numArrayKeys; i++)
783 {
784 BTArrayKeyInfo *array = &so->arrayKeys[i];
785 ScanKey skey = &so->keyData[array->scan_key];
786 bool isnull;
787
788 if (array->num_elems != -1)
789 {
790 /* Restore SAOP array using its saved cur_elem */
791 Assert(!(skey->sk_flags & SK_BT_SKIP));
792 array->cur_elem = btscan->btps_arrElems[i];
793 skey->sk_argument = array->elem_values[array->cur_elem];
794 continue;
795 }
796
797 /* Restore skip array by restoring its key directly */
798 if (!array->attbyval && skey->sk_argument)
800 skey->sk_argument = (Datum) 0;
801 memcpy(&skey->sk_flags, datumshared, sizeof(int));
802 datumshared += sizeof(int);
803
804 Assert(skey->sk_flags & SK_BT_SKIP);
805
806 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
807 {
808 /* No sk_argument datum to restore */
809 continue;
810 }
811
812 skey->sk_argument = datumRestore(&datumshared, &isnull);
813 if (isnull)
814 {
815 Assert(skey->sk_argument == 0);
817 Assert(skey->sk_flags & SK_ISNULL);
818 }
819 }
820}
Datum datumRestore(char **start_address, bool *isnull)
Definition: datum.c:521
int i
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:1594
#define SK_BT_SKIP
Definition: nbtree.h:1106
#define SK_BT_MAXVAL
Definition: nbtree.h:1110
#define SK_BT_MINVAL
Definition: nbtree.h:1109
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
#define SK_SEARCHNULL
Definition: skey.h:121
#define SK_ISNULL
Definition: skey.h:115
bool attbyval
Definition: nbtree.h:1046
Datum * elem_values
Definition: nbtree.h:1041
int btps_arrElems[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.c:83
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:1066
ScanKey keyData
Definition: nbtree.h:1058
int sk_flags
Definition: skey.h:66
Datum sk_argument
Definition: skey.h:72

References BTScanOpaqueData::arrayKeys, Assert(), BTArrayKeyInfo::attbyval, BTParallelScanDescData::btps_arrElems, BTArrayKeyInfo::cur_elem, DatumGetPointer(), datumRestore(), BTArrayKeyInfo::elem_values, i, BTScanOpaqueData::keyData, BTArrayKeyInfo::num_elems, BTScanOpaqueData::numArrayKeys, pfree(), BTArrayKeyInfo::scan_key, ScanKeyData::sk_argument, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, ScanKeyData::sk_flags, SK_ISNULL, and SK_SEARCHNULL.

Referenced by _bt_parallel_seize().

◆ _bt_parallel_seize()

bool _bt_parallel_seize ( IndexScanDesc  scan,
BlockNumber next_scan_page,
BlockNumber last_curr_page,
bool  first 
)

Definition at line 885 of file nbtree.c.

887{
888 Relation rel = scan->indexRelation;
889 BTScanOpaque so = (BTScanOpaque) scan->opaque;
890 bool exit_loop = false,
891 status = true,
892 endscan = false;
893 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
894 BTParallelScanDesc btscan;
895
896 *next_scan_page = InvalidBlockNumber;
897 *last_curr_page = InvalidBlockNumber;
898
899 /*
900 * Reset so->currPos, and initialize moreLeft/moreRight such that the next
901 * call to _bt_readnextpage treats this backend similarly to a serial
902 * backend that steps from *last_curr_page to *next_scan_page (unless this
903 * backend's so->currPos is initialized by _bt_readfirstpage before then).
904 */
906 so->currPos.moreLeft = so->currPos.moreRight = true;
907
908 if (first)
909 {
910 /*
911 * Initialize array related state when called from _bt_first, assuming
912 * that this will be the first primitive index scan for the scan
913 */
914 so->needPrimScan = false;
915 so->scanBehind = false;
916 so->oppositeDirCheck = false;
917 }
918 else
919 {
920 /*
921 * Don't attempt to seize the scan when it requires another primitive
922 * index scan, since caller's backend cannot start it right now
923 */
924 if (so->needPrimScan)
925 return false;
926 }
927
928 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
929 parallel_scan->ps_offset_am);
930
931 while (1)
932 {
933 LWLockAcquire(&btscan->btps_lock, LW_EXCLUSIVE);
934
935 if (btscan->btps_pageStatus == BTPARALLEL_DONE)
936 {
937 /* We're done with this parallel index scan */
938 status = false;
939 }
940 else if (btscan->btps_pageStatus == BTPARALLEL_IDLE &&
941 btscan->btps_nextScanPage == P_NONE)
942 {
943 /* End this parallel index scan */
944 status = false;
945 endscan = true;
946 }
947 else if (btscan->btps_pageStatus == BTPARALLEL_NEED_PRIMSCAN)
948 {
949 Assert(so->numArrayKeys);
950
951 if (first)
952 {
953 /* Can start scheduled primitive scan right away, so do so */
954 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
955
956 /* Restore scan's array keys from serialized values */
957 _bt_parallel_restore_arrays(rel, btscan, so);
958 exit_loop = true;
959 }
960 else
961 {
962 /*
963 * Don't attempt to seize the scan when it requires another
964 * primitive index scan, since caller's backend cannot start
965 * it right now
966 */
967 status = false;
968 }
969
970 /*
971 * Either way, update backend local state to indicate that a
972 * pending primitive scan is required
973 */
974 so->needPrimScan = true;
975 so->scanBehind = false;
976 so->oppositeDirCheck = false;
977 }
978 else if (btscan->btps_pageStatus != BTPARALLEL_ADVANCING)
979 {
980 /*
981 * We have successfully seized control of the scan for the purpose
982 * of advancing it to a new page!
983 */
984 btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
985 Assert(btscan->btps_nextScanPage != P_NONE);
986 *next_scan_page = btscan->btps_nextScanPage;
987 *last_curr_page = btscan->btps_lastCurrPage;
988 exit_loop = true;
989 }
990 LWLockRelease(&btscan->btps_lock);
991 if (exit_loop || !status)
992 break;
993 ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
994 }
996
997 /* When the scan has reached the rightmost (or leftmost) page, end it */
998 if (endscan)
999 _bt_parallel_done(scan);
1000
1001 return status;
1002}
bool ConditionVariableCancelSleep(void)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:1050
static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan, BTScanOpaque so)
Definition: nbtree.c:775
#define P_NONE
Definition: nbtree.h:213
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:1027
bool oppositeDirCheck
Definition: nbtree.h:1065
bool moreRight
Definition: nbtree.h:986
bool moreLeft
Definition: nbtree.h:985

References _bt_parallel_done(), _bt_parallel_restore_arrays(), Assert(), BTPARALLEL_ADVANCING, BTPARALLEL_DONE, BTPARALLEL_IDLE, BTPARALLEL_NEED_PRIMSCAN, BTScanPosInvalidate, ConditionVariableCancelSleep(), ConditionVariableSleep(), BTScanOpaqueData::currPos, IndexScanDescData::indexRelation, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTScanPosData::moreLeft, BTScanPosData::moreRight, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, OffsetToPointer, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, P_NONE, IndexScanDescData::parallel_scan, and BTScanOpaqueData::scanBehind.

Referenced by _bt_first(), and _bt_readnextpage().

◆ _bt_parallel_serialize_arrays()

static void _bt_parallel_serialize_arrays ( Relation  rel,
BTParallelScanDesc  btscan,
BTScanOpaque  so 
)
static

Definition at line 732 of file nbtree.c.

734{
735 char *datumshared;
736
737 /* Space for serialized datums begins immediately after btps_arrElems[] */
738 datumshared = ((char *) &btscan->btps_arrElems[so->numArrayKeys]);
739 for (int i = 0; i < so->numArrayKeys; i++)
740 {
741 BTArrayKeyInfo *array = &so->arrayKeys[i];
742 ScanKey skey = &so->keyData[array->scan_key];
743
744 if (array->num_elems != -1)
745 {
746 /* Save SAOP array's cur_elem (no need to copy key/datum) */
747 Assert(!(skey->sk_flags & SK_BT_SKIP));
748 btscan->btps_arrElems[i] = array->cur_elem;
749 continue;
750 }
751
752 /* Save all mutable state associated with skip array's key */
753 Assert(skey->sk_flags & SK_BT_SKIP);
754 memcpy(datumshared, &skey->sk_flags, sizeof(int));
755 datumshared += sizeof(int);
756
757 if (skey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))
758 {
759 /* No sk_argument datum to serialize */
760 Assert(skey->sk_argument == 0);
761 continue;
762 }
763
764 datumSerialize(skey->sk_argument, (skey->sk_flags & SK_ISNULL) != 0,
765 array->attbyval, array->attlen, &datumshared);
766 }
767}
void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address)
Definition: datum.c:459
int16 attlen
Definition: nbtree.h:1045

References BTScanOpaqueData::arrayKeys, Assert(), BTArrayKeyInfo::attbyval, BTArrayKeyInfo::attlen, BTParallelScanDescData::btps_arrElems, BTArrayKeyInfo::cur_elem, datumSerialize(), i, BTScanOpaqueData::keyData, BTArrayKeyInfo::num_elems, BTScanOpaqueData::numArrayKeys, BTArrayKeyInfo::scan_key, ScanKeyData::sk_argument, SK_BT_MAXVAL, SK_BT_MINVAL, SK_BT_SKIP, ScanKeyData::sk_flags, and SK_ISNULL.

Referenced by _bt_parallel_primscan_schedule().

◆ _bt_start_prim_scan()

static bool _bt_start_prim_scan ( IndexScanDesc  scan)
static

Definition at line 668 of file nbtree.c.

669{
670 BTScanOpaque so = (BTScanOpaque) scan->opaque;
671
672 Assert(so->numArrayKeys);
673
674 so->scanBehind = so->oppositeDirCheck = false; /* reset */
675
676 /*
677 * Array keys are advanced within _bt_checkkeys when the scan reaches the
678 * leaf level (more precisely, they're advanced when the scan reaches the
679 * end of each distinct set of array elements). This process avoids
680 * repeat access to leaf pages (across multiple primitive index scans) by
681 * advancing the scan's array keys when it allows the primitive index scan
682 * to find nearby matching tuples (or when it eliminates ranges of array
683 * key space that can't possibly be satisfied by any index tuple).
684 *
685 * _bt_checkkeys sets a simple flag variable to schedule another primitive
686 * index scan. The flag tells us what to do.
687 *
688 * We cannot rely on _bt_first always reaching _bt_checkkeys. There are
689 * various cases where that won't happen. For example, if the index is
690 * completely empty, then _bt_first won't call _bt_readpage/_bt_checkkeys.
691 * We also don't expect a call to _bt_checkkeys during searches for a
692 * non-existent value that happens to be lower/higher than any existing
693 * value in the index.
694 *
695 * We don't require special handling for these cases -- we don't need to
696 * be explicitly instructed to _not_ perform another primitive index scan.
697 * It's up to code under the control of _bt_first to always set the flag
698 * when another primitive index scan will be required.
699 *
700 * This works correctly, even with the tricky cases listed above, which
701 * all involve access to leaf pages "near the boundaries of the key space"
702 * (whether it's from a leftmost/rightmost page, or an imaginary empty
703 * leaf root page). If _bt_checkkeys cannot be reached by a primitive
704 * index scan for one set of array keys, then it also won't be reached for
705 * any later set ("later" in terms of the direction that we scan the index
706 * and advance the arrays). The array keys won't have advanced in these
707 * cases, but that's the correct behavior (even _bt_advance_array_keys
708 * won't always advance the arrays at the point they become "exhausted").
709 */
710 if (so->needPrimScan)
711 {
712 /*
713 * Flag was set -- must call _bt_first again, which will reset the
714 * scan's needPrimScan flag
715 */
716 return true;
717 }
718
719 /* The top-level index scan ran out of tuples in this scan direction */
720 if (scan->parallel_scan != NULL)
721 _bt_parallel_done(scan);
722
723 return false;
724}

References _bt_parallel_done(), Assert(), BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, IndexScanDescData::parallel_scan, and BTScanOpaqueData::scanBehind.

Referenced by btgetbitmap(), and btgettuple().

◆ btbeginscan()

IndexScanDesc btbeginscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 336 of file nbtree.c.

337{
338 IndexScanDesc scan;
339 BTScanOpaque so;
340
341 /* no order by operators allowed */
342 Assert(norderbys == 0);
343
344 /* get the scan */
345 scan = RelationGetIndexScan(rel, nkeys, norderbys);
346
347 /* allocate private workspace */
351 if (scan->numberOfKeys > 0)
352 so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
353 else
354 so->keyData = NULL;
355
356 so->skipScan = false;
357 so->needPrimScan = false;
358 so->scanBehind = false;
359 so->oppositeDirCheck = false;
360 so->arrayKeys = NULL;
361 so->orderProcs = NULL;
362 so->arrayContext = NULL;
363
364 so->killedItems = NULL; /* until needed */
365 so->numKilled = 0;
366
367 /*
368 * We don't know yet whether the scan will be index-only, so we do not
369 * allocate the tuple workspace arrays until btrescan. However, we set up
370 * scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
371 */
372 so->currTuples = so->markTuples = NULL;
373
374 scan->xs_itupdesc = RelationGetDescr(rel);
375
376 scan->opaque = so;
377
378 return scan;
379}
#define palloc_object(type)
Definition: fe_memutils.h:74
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
void * palloc(Size size)
Definition: mcxt.c:1365
#define RelationGetDescr(relation)
Definition: rel.h:541
ScanKeyData * ScanKey
Definition: skey.h:75
char * markTuples
Definition: nbtree.h:1081
FmgrInfo * orderProcs
Definition: nbtree.h:1067
int * killedItems
Definition: nbtree.h:1071
char * currTuples
Definition: nbtree.h:1080
BTScanPosData markPos
Definition: nbtree.h:1094
MemoryContext arrayContext
Definition: nbtree.h:1068
struct TupleDescData * xs_itupdesc
Definition: relscan.h:170

References BTScanOpaqueData::arrayContext, BTScanOpaqueData::arrayKeys, Assert(), BTScanPosInvalidate, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, BTScanOpaqueData::orderProcs, palloc(), palloc_object, RelationGetDescr, RelationGetIndexScan(), BTScanOpaqueData::scanBehind, BTScanOpaqueData::skipScan, and IndexScanDescData::xs_itupdesc.

Referenced by bthandler().

◆ btbuildempty()

void btbuildempty ( Relation  index)

Definition at line 180 of file nbtree.c.

181{
182 bool allequalimage = _bt_allequalimage(index, false);
183 BulkWriteState *bulkstate;
184 BulkWriteBuffer metabuf;
185
187
188 /* Construct metapage. */
189 metabuf = smgr_bulk_get_buf(bulkstate);
190 _bt_initmetapage((Page) metabuf, P_NONE, 0, allequalimage);
191 smgr_bulk_write(bulkstate, BTREE_METAPAGE, metabuf, true);
192
193 smgr_bulk_finish(bulkstate);
194}
PageData * Page
Definition: bufpage.h:81
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition: bulk_write.c:87
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130
void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
Definition: nbtpage.c:68
#define BTREE_METAPAGE
Definition: nbtree.h:149
bool _bt_allequalimage(Relation rel, bool debugmessage)
Definition: nbtutils.c:1181
@ INIT_FORKNUM
Definition: relpath.h:61
Definition: type.h:96

References _bt_allequalimage(), _bt_initmetapage(), BTREE_METAPAGE, INIT_FORKNUM, P_NONE, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_rel(), and smgr_bulk_write().

Referenced by bthandler().

◆ btbulkdelete()

IndexBulkDeleteResult * btbulkdelete ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state 
)

Definition at line 1134 of file nbtree.c.

1136{
1137 Relation rel = info->index;
1138 BTCycleId cycleid;
1139
1140 /* allocate stats if first time through, else re-use existing struct */
1141 if (stats == NULL)
1143
1144 /* Establish the vacuum cycle ID to use for this scan */
1145 /* The ENSURE stuff ensures we clean up shared memory on failure */
1147 {
1148 cycleid = _bt_start_vacuum(rel);
1149
1150 btvacuumscan(info, stats, callback, callback_state, cycleid);
1151 }
1153 _bt_end_vacuum(rel);
1154
1155 return stats;
1156}
#define palloc0_object(type)
Definition: fe_memutils.h:75
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid)
Definition: nbtree.c:1252
uint16 BTCycleId
Definition: nbtree.h:30
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:526
void _bt_end_vacuum_callback(int code, Datum arg)
Definition: nbtutils.c:554
BTCycleId _bt_start_vacuum(Relation rel)
Definition: nbtutils.c:469
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
Relation index
Definition: genam.h:73
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References _bt_end_vacuum(), _bt_end_vacuum_callback(), _bt_start_vacuum(), btvacuumscan(), callback(), IndexVacuumInfo::index, palloc0_object, PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, and PointerGetDatum().

Referenced by bthandler().

◆ btcanreturn()

bool btcanreturn ( Relation  index,
int  attno 
)

Definition at line 1814 of file nbtree.c.

1815{
1816 return true;
1817}

Referenced by bthandler().

◆ btendscan()

void btendscan ( IndexScanDesc  scan)

Definition at line 470 of file nbtree.c.

471{
472 BTScanOpaque so = (BTScanOpaque) scan->opaque;
473
474 /* we aren't holding any read locks, but gotta drop the pins */
476 {
477 /* Before leaving current page, deal with any killed items */
478 if (so->numKilled > 0)
479 _bt_killitems(scan);
481 }
482
483 so->markItemIndex = -1;
485
486 /* No need to invalidate positions, the RAM is about to be freed. */
487
488 /* Release storage */
489 if (so->keyData != NULL)
490 pfree(so->keyData);
491 /* so->arrayKeys and so->orderProcs are in arrayContext */
492 if (so->arrayContext != NULL)
494 if (so->killedItems != NULL)
495 pfree(so->killedItems);
496 if (so->currTuples != NULL)
497 pfree(so->currTuples);
498 /* so->markTuples should not be pfree'd, see btrescan */
499 pfree(so);
500}
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
#define BTScanPosUnpinIfPinned(scanpos)
Definition: nbtree.h:1015
void _bt_killitems(IndexScanDesc scan)
Definition: nbtutils.c:205

References _bt_killitems(), BTScanOpaqueData::arrayContext, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, if(), BTScanOpaqueData::keyData, BTScanOpaqueData::killedItems, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, MemoryContextDelete(), BTScanOpaqueData::numKilled, IndexScanDescData::opaque, and pfree().

Referenced by bthandler().

◆ btestimateparallelscan()

Size btestimateparallelscan ( Relation  rel,
int  nkeys,
int  norderbys 
)

Definition at line 590 of file nbtree.c.

591{
593 Size estnbtreeshared,
594 genericattrspace;
595
596 /*
597 * Pessimistically assume that every input scan key will be output with
598 * its own SAOP array
599 */
600 estnbtreeshared = offsetof(BTParallelScanDescData, btps_arrElems) +
601 sizeof(int) * nkeys;
602
603 /* Single column indexes cannot possibly use a skip array */
604 if (nkeyatts == 1)
605 return estnbtreeshared;
606
607 /*
608 * Pessimistically assume that all attributes prior to the least
609 * significant attribute require a skip array (and an associated key)
610 */
611 genericattrspace = datumEstimateSpace((Datum) 0, false, true,
612 sizeof(Datum));
613 for (int attnum = 1; attnum < nkeyatts; attnum++)
614 {
615 CompactAttribute *attr;
616
617 /*
618 * We make the conservative assumption that every index column will
619 * also require a skip array.
620 *
621 * Every skip array must have space to store its scan key's sk_flags.
622 */
623 estnbtreeshared = add_size(estnbtreeshared, sizeof(int));
624
625 /* Consider space required to store a datum of opclass input type */
626 attr = TupleDescCompactAttr(rel->rd_att, attnum - 1);
627 if (attr->attbyval)
628 {
629 /* This index attribute stores pass-by-value datums */
630 Size estfixed = datumEstimateSpace((Datum) 0, false,
631 true, attr->attlen);
632
633 estnbtreeshared = add_size(estnbtreeshared, estfixed);
634 continue;
635 }
636
637 /*
638 * This index attribute stores pass-by-reference datums.
639 *
640 * Assume that serializing this array will use just as much space as a
641 * pass-by-value datum, in addition to space for the largest possible
642 * whole index tuple (this is not just a per-datum portion of the
643 * largest possible tuple because that'd be almost as large anyway).
644 *
645 * This is quite conservative, but it's not clear how we could do much
646 * better. The executor requires an up-front storage request size
647 * that reliably covers the scan's high watermark memory usage. We
648 * can't be sure of the real high watermark until the scan is over.
649 */
650 estnbtreeshared = add_size(estnbtreeshared, genericattrspace);
651 estnbtreeshared = add_size(estnbtreeshared, BTMaxItemSize);
652 }
653
654 return estnbtreeshared;
655}
int16_t int16
Definition: c.h:547
size_t Size
Definition: c.h:624
Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
Definition: datum.c:412
#define BTMaxItemSize
Definition: nbtree.h:165
int16 attnum
Definition: pg_attribute.h:74
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:534
Size add_size(Size s1, Size s2)
Definition: shmem.c:495
int16 attlen
Definition: tupdesc.h:71
TupleDesc rd_att
Definition: rel.h:112
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:175

References add_size(), CompactAttribute::attbyval, CompactAttribute::attlen, attnum, BTMaxItemSize, BTParallelScanDescData::btps_arrElems, datumEstimateSpace(), IndexRelationGetNumberOfKeyAttributes, RelationData::rd_att, and TupleDescCompactAttr().

Referenced by bthandler().

◆ btgetbitmap()

int64 btgetbitmap ( IndexScanDesc  scan,
TIDBitmap tbm 
)

Definition at line 288 of file nbtree.c.

289{
290 BTScanOpaque so = (BTScanOpaque) scan->opaque;
291 int64 ntids = 0;
292 ItemPointer heapTid;
293
294 Assert(scan->heapRelation == NULL);
295
296 /* Each loop iteration performs another primitive index scan */
297 do
298 {
299 /* Fetch the first page & tuple */
301 {
302 /* Save tuple ID, and continue scanning */
303 heapTid = &scan->xs_heaptid;
304 tbm_add_tuples(tbm, heapTid, 1, false);
305 ntids++;
306
307 for (;;)
308 {
309 /*
310 * Advance to next tuple within page. This is the same as the
311 * easy case in _bt_next().
312 */
313 if (++so->currPos.itemIndex > so->currPos.lastItem)
314 {
315 /* let _bt_next do the heavy lifting */
316 if (!_bt_next(scan, ForwardScanDirection))
317 break;
318 }
319
320 /* Save tuple ID, and continue scanning */
321 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
322 tbm_add_tuples(tbm, heapTid, 1, false);
323 ntids++;
324 }
325 }
326 /* Now see if we need another primitive index scan */
327 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
328
329 return ntids;
330}
int64_t int64
Definition: c.h:549
static bool _bt_start_prim_scan(IndexScanDesc scan)
Definition: nbtree.c:668
bool _bt_first(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:877
bool _bt_next(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1585
@ ForwardScanDirection
Definition: sdir.h:28
int lastItem
Definition: nbtree.h:996
BTScanPosItem items[MaxTIDsPerBTreePage]
Definition: nbtree.h:999
int itemIndex
Definition: nbtree.h:997
ItemPointerData heapTid
Definition: nbtree.h:957
ItemPointerData xs_heaptid
Definition: relscan.h:174
Relation heapRelation
Definition: relscan.h:138
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointerData *tids, int ntids, bool recheck)
Definition: tidbitmap.c:367

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert(), BTScanOpaqueData::currPos, ForwardScanDirection, IndexScanDescData::heapRelation, BTScanPosItem::heapTid, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, tbm_add_tuples(), and IndexScanDescData::xs_heaptid.

Referenced by bthandler().

◆ btgettreeheight()

int btgettreeheight ( Relation  rel)

Definition at line 1823 of file nbtree.c.

1824{
1825 return _bt_getrootheight(rel);
1826}
int _bt_getrootheight(Relation rel)
Definition: nbtpage.c:676

References _bt_getrootheight().

Referenced by bthandler().

◆ btgettuple()

bool btgettuple ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 227 of file nbtree.c.

228{
229 BTScanOpaque so = (BTScanOpaque) scan->opaque;
230 bool res;
231
232 Assert(scan->heapRelation != NULL);
233
234 /* btree indexes are never lossy */
235 scan->xs_recheck = false;
236
237 /* Each loop iteration performs another primitive index scan */
238 do
239 {
240 /*
241 * If we've already initialized this scan, we can just advance it in
242 * the appropriate direction. If we haven't done so yet, we call
243 * _bt_first() to get the first item in the scan.
244 */
245 if (!BTScanPosIsValid(so->currPos))
246 res = _bt_first(scan, dir);
247 else
248 {
249 /*
250 * Check to see if we should kill the previously-fetched tuple.
251 */
252 if (scan->kill_prior_tuple)
253 {
254 /*
255 * Yes, remember it for later. (We'll deal with all such
256 * tuples at once right before leaving the index page.) The
257 * test for numKilled overrun is not just paranoia: if the
258 * caller reverses direction in the indexscan then the same
259 * item might get entered multiple times. It's not worth
260 * trying to optimize that, so we don't detect it, but instead
261 * just forget any excess entries.
262 */
263 if (so->killedItems == NULL)
266 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
267 }
268
269 /*
270 * Now continue the scan.
271 */
272 res = _bt_next(scan, dir);
273 }
274
275 /* If we have a tuple, return it ... */
276 if (res)
277 break;
278 /* ... otherwise see if we need another primitive index scan */
279 } while (so->numArrayKeys && _bt_start_prim_scan(scan));
280
281 return res;
282}
#define palloc_array(type, count)
Definition: fe_memutils.h:76
#define MaxTIDsPerBTreePage
Definition: nbtree.h:186
bool kill_prior_tuple
Definition: relscan.h:149

References _bt_first(), _bt_next(), _bt_start_prim_scan(), Assert(), BTScanPosIsValid, BTScanOpaqueData::currPos, IndexScanDescData::heapRelation, BTScanPosData::itemIndex, IndexScanDescData::kill_prior_tuple, BTScanOpaqueData::killedItems, MaxTIDsPerBTreePage, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, palloc_array, and IndexScanDescData::xs_recheck.

Referenced by bthandler().

◆ bthandler()

Datum bthandler ( PG_FUNCTION_ARGS  )

Definition at line 116 of file nbtree.c.

117{
119
121 amroutine->amsupport = BTNProcs;
122 amroutine->amoptsprocnum = BTOPTIONS_PROC;
123 amroutine->amcanorder = true;
124 amroutine->amcanorderbyop = false;
125 amroutine->amcanhash = false;
126 amroutine->amconsistentequality = true;
127 amroutine->amconsistentordering = true;
128 amroutine->amcanbackward = true;
129 amroutine->amcanunique = true;
130 amroutine->amcanmulticol = true;
131 amroutine->amoptionalkey = true;
132 amroutine->amsearcharray = true;
133 amroutine->amsearchnulls = true;
134 amroutine->amstorage = false;
135 amroutine->amclusterable = true;
136 amroutine->ampredlocks = true;
137 amroutine->amcanparallel = true;
138 amroutine->amcanbuildparallel = true;
139 amroutine->amcaninclude = true;
140 amroutine->amusemaintenanceworkmem = false;
141 amroutine->amsummarizing = false;
142 amroutine->amparallelvacuumoptions =
144 amroutine->amkeytype = InvalidOid;
145
146 amroutine->ambuild = btbuild;
147 amroutine->ambuildempty = btbuildempty;
148 amroutine->aminsert = btinsert;
149 amroutine->aminsertcleanup = NULL;
150 amroutine->ambulkdelete = btbulkdelete;
151 amroutine->amvacuumcleanup = btvacuumcleanup;
152 amroutine->amcanreturn = btcanreturn;
153 amroutine->amcostestimate = btcostestimate;
154 amroutine->amgettreeheight = btgettreeheight;
155 amroutine->amoptions = btoptions;
156 amroutine->amproperty = btproperty;
158 amroutine->amvalidate = btvalidate;
159 amroutine->amadjustmembers = btadjustmembers;
160 amroutine->ambeginscan = btbeginscan;
161 amroutine->amrescan = btrescan;
162 amroutine->amgettuple = btgettuple;
163 amroutine->amgetbitmap = btgetbitmap;
164 amroutine->amendscan = btendscan;
165 amroutine->ammarkpos = btmarkpos;
166 amroutine->amrestrpos = btrestrpos;
172
173 PG_RETURN_POINTER(amroutine);
174}
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
bool btcanreturn(Relation index, int attno)
Definition: nbtree.c:1814
StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily)
Definition: nbtree.c:1849
IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:336
Size btestimateparallelscan(Relation rel, int nkeys, int norderbys)
Definition: nbtree.c:590
IndexBulkDeleteResult * btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: nbtree.c:1164
CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily)
Definition: nbtree.c:1829
bool btgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: nbtree.c:227
void btparallelrescan(IndexScanDesc scan)
Definition: nbtree.c:842
bool btinsert(Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: nbtree.c:203
void btbuildempty(Relation index)
Definition: nbtree.c:180
int btgettreeheight(Relation rel)
Definition: nbtree.c:1823
void btinitparallelscan(void *target)
Definition: nbtree.c:826
IndexBulkDeleteResult * btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: nbtree.c:1134
int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: nbtree.c:288
void btmarkpos(IndexScanDesc scan)
Definition: nbtree.c:506
void btendscan(IndexScanDesc scan)
Definition: nbtree.c:470
void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
Definition: nbtree.c:385
void btrestrpos(IndexScanDesc scan)
Definition: nbtree.c:532
#define BTNProcs
Definition: nbtree.h:723
#define BTOPTIONS_PROC
Definition: nbtree.h:721
IndexBuildResult * btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: nbtsort.c:296
char * btbuildphasename(int64 phasenum)
Definition: nbtutils.c:650
bytea * btoptions(Datum reloptions, bool validate)
Definition: nbtutils.c:604
bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: nbtutils.c:627
bool btvalidate(Oid opclassoid)
Definition: nbtvalidate.c:40
void btadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: nbtvalidate.c:288
#define makeNode(_type_)
Definition: nodes.h:161
#define InvalidOid
Definition: postgres_ext.h:37
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7605
#define BTMaxStrategyNumber
Definition: stratnum.h:35
ambuildphasename_function ambuildphasename
Definition: amapi.h:306
ambuildempty_function ambuildempty
Definition: amapi.h:296
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:300
bool amclusterable
Definition: amapi.h:270
amoptions_function amoptions
Definition: amapi.h:304
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:318
amrestrpos_function amrestrpos
Definition: amapi.h:315
aminsert_function aminsert
Definition: amapi.h:297
amendscan_function amendscan
Definition: amapi.h:313
amtranslate_strategy_function amtranslatestrategy
Definition: amapi.h:323
uint16 amoptsprocnum
Definition: amapi.h:244
amparallelrescan_function amparallelrescan
Definition: amapi.h:320
Oid amkeytype
Definition: amapi.h:286
bool amconsistentordering
Definition: amapi.h:254
bool ampredlocks
Definition: amapi.h:272
uint16 amsupport
Definition: amapi.h:242
amtranslate_cmptype_function amtranslatecmptype
Definition: amapi.h:324
amcostestimate_function amcostestimate
Definition: amapi.h:302
bool amcanorderbyop
Definition: amapi.h:248
amadjustmembers_function amadjustmembers
Definition: amapi.h:308
ambuild_function ambuild
Definition: amapi.h:295
bool amstorage
Definition: amapi.h:268
uint16 amstrategies
Definition: amapi.h:240
bool amoptionalkey
Definition: amapi.h:262
amgettuple_function amgettuple
Definition: amapi.h:311
amcanreturn_function amcanreturn
Definition: amapi.h:301
bool amcanunique
Definition: amapi.h:258
amgetbitmap_function amgetbitmap
Definition: amapi.h:312
amproperty_function amproperty
Definition: amapi.h:305
ambulkdelete_function ambulkdelete
Definition: amapi.h:299
bool amsearcharray
Definition: amapi.h:264
bool amsummarizing
Definition: amapi.h:282
amvalidate_function amvalidate
Definition: amapi.h:307
ammarkpos_function ammarkpos
Definition: amapi.h:314
bool amcanmulticol
Definition: amapi.h:260
bool amusemaintenanceworkmem
Definition: amapi.h:280
ambeginscan_function ambeginscan
Definition: amapi.h:309
bool amcanparallel
Definition: amapi.h:274
amrescan_function amrescan
Definition: amapi.h:310
bool amcanorder
Definition: amapi.h:246
bool amcanbuildparallel
Definition: amapi.h:276
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:319
uint8 amparallelvacuumoptions
Definition: amapi.h:284
aminsertcleanup_function aminsertcleanup
Definition: amapi.h:298
bool amcanbackward
Definition: amapi.h:256
amgettreeheight_function amgettreeheight
Definition: amapi.h:303
bool amcaninclude
Definition: amapi.h:278
bool amsearchnulls
Definition: amapi.h:266
bool amconsistentequality
Definition: amapi.h:252
bool amcanhash
Definition: amapi.h:250
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:55

References IndexAmRoutine::amadjustmembers, IndexAmRoutine::ambeginscan, IndexAmRoutine::ambuild, IndexAmRoutine::ambuildempty, IndexAmRoutine::ambuildphasename, IndexAmRoutine::ambulkdelete, IndexAmRoutine::amcanbackward, IndexAmRoutine::amcanbuildparallel, IndexAmRoutine::amcanhash, IndexAmRoutine::amcaninclude, IndexAmRoutine::amcanmulticol, IndexAmRoutine::amcanorder, IndexAmRoutine::amcanorderbyop, IndexAmRoutine::amcanparallel, IndexAmRoutine::amcanreturn, IndexAmRoutine::amcanunique, IndexAmRoutine::amclusterable, IndexAmRoutine::amconsistentequality, IndexAmRoutine::amconsistentordering, IndexAmRoutine::amcostestimate, IndexAmRoutine::amendscan, IndexAmRoutine::amestimateparallelscan, IndexAmRoutine::amgetbitmap, IndexAmRoutine::amgettreeheight, IndexAmRoutine::amgettuple, IndexAmRoutine::aminitparallelscan, IndexAmRoutine::aminsert, IndexAmRoutine::aminsertcleanup, IndexAmRoutine::amkeytype, IndexAmRoutine::ammarkpos, IndexAmRoutine::amoptionalkey, IndexAmRoutine::amoptions, IndexAmRoutine::amoptsprocnum, IndexAmRoutine::amparallelrescan, IndexAmRoutine::amparallelvacuumoptions, IndexAmRoutine::ampredlocks, IndexAmRoutine::amproperty, IndexAmRoutine::amrescan, IndexAmRoutine::amrestrpos, IndexAmRoutine::amsearcharray, IndexAmRoutine::amsearchnulls, IndexAmRoutine::amstorage, IndexAmRoutine::amstrategies, IndexAmRoutine::amsummarizing, IndexAmRoutine::amsupport, IndexAmRoutine::amtranslatecmptype, IndexAmRoutine::amtranslatestrategy, IndexAmRoutine::amusemaintenanceworkmem, IndexAmRoutine::amvacuumcleanup, IndexAmRoutine::amvalidate, btadjustmembers(), btbeginscan(), btbuild(), btbuildempty(), btbuildphasename(), btbulkdelete(), btcanreturn(), btcostestimate(), btendscan(), btestimateparallelscan(), btgetbitmap(), btgettreeheight(), btgettuple(), btinitparallelscan(), btinsert(), btmarkpos(), BTMaxStrategyNumber, BTNProcs, btoptions(), BTOPTIONS_PROC, btparallelrescan(), btproperty(), btrescan(), btrestrpos(), bttranslatecmptype(), bttranslatestrategy(), btvacuumcleanup(), btvalidate(), InvalidOid, makeNode, PG_RETURN_POINTER, VACUUM_OPTION_PARALLEL_BULKDEL, and VACUUM_OPTION_PARALLEL_COND_CLEANUP.

◆ btinitparallelscan()

void btinitparallelscan ( void *  target)

Definition at line 826 of file nbtree.c.

827{
828 BTParallelScanDesc bt_target = (BTParallelScanDesc) target;
829
830 LWLockInitialize(&bt_target->btps_lock,
831 LWTRANCHE_PARALLEL_BTREE_SCAN);
835 ConditionVariableInit(&bt_target->btps_cv);
836}
void ConditionVariableInit(ConditionVariable *cv)
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698

References BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_cv, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_lock, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, ConditionVariableInit(), InvalidBlockNumber, and LWLockInitialize().

Referenced by bthandler().

◆ btinsert()

bool btinsert ( Relation  rel,
Datum values,
bool *  isnull,
ItemPointer  ht_ctid,
Relation  heapRel,
IndexUniqueCheck  checkUnique,
bool  indexUnchanged,
IndexInfo indexInfo 
)

Definition at line 203 of file nbtree.c.

208{
209 bool result;
210 IndexTuple itup;
211
212 /* generate an index tuple */
213 itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
214 itup->t_tid = *ht_ctid;
215
216 result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
217
218 pfree(itup);
219
220 return result;
221}
static Datum values[MAXATTR]
Definition: bootstrap.c:153
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
bool _bt_doinsert(Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, bool indexUnchanged, Relation heapRel)
Definition: nbtinsert.c:104
ItemPointerData t_tid
Definition: itup.h:37

References _bt_doinsert(), index_form_tuple(), pfree(), RelationGetDescr, IndexTupleData::t_tid, and values.

Referenced by bthandler().

◆ btmarkpos()

void btmarkpos ( IndexScanDesc  scan)

Definition at line 506 of file nbtree.c.

507{
508 BTScanOpaque so = (BTScanOpaque) scan->opaque;
509
510 /* There may be an old mark with a pin (but no lock). */
512
513 /*
514 * Just record the current itemIndex. If we later step to next page
515 * before releasing the marked position, _bt_steppage makes a full copy of
516 * the currPos struct in markPos. If (as often happens) the mark is moved
517 * before we leave the page, we don't have to do that work.
518 */
519 if (BTScanPosIsValid(so->currPos))
521 else
522 {
524 so->markItemIndex = -1;
525 }
526}

References BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanPosData::itemIndex, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ btparallelrescan()

void btparallelrescan ( IndexScanDesc  scan)

Definition at line 842 of file nbtree.c.

843{
844 BTParallelScanDesc btscan;
845 ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
846
847 Assert(parallel_scan);
848
849 btscan = (BTParallelScanDesc) OffsetToPointer(parallel_scan,
850 parallel_scan->ps_offset_am);
851
852 /*
853 * In theory, we don't need to acquire the LWLock here, because there
854 * shouldn't be any other workers running at this point, but we do so for
855 * consistency.
856 */
861 LWLockRelease(&btscan->btps_lock);
862}

References Assert(), BTPARALLEL_NOT_INITIALIZED, BTParallelScanDescData::btps_lastCurrPage, BTParallelScanDescData::btps_lock, BTParallelScanDescData::btps_nextScanPage, BTParallelScanDescData::btps_pageStatus, InvalidBlockNumber, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), OffsetToPointer, IndexScanDescData::parallel_scan, and ParallelIndexScanDescData::ps_offset_am.

Referenced by bthandler().

◆ btreevacuumposting()

static BTVacuumPosting btreevacuumposting ( BTVacState vstate,
IndexTuple  posting,
OffsetNumber  updatedoffset,
int *  nremaining 
)
static

Definition at line 1765 of file nbtree.c.

1767{
1768 int live = 0;
1769 int nitem = BTreeTupleGetNPosting(posting);
1771 BTVacuumPosting vacposting = NULL;
1772
1773 for (int i = 0; i < nitem; i++)
1774 {
1775 if (!vstate->callback(items + i, vstate->callback_state))
1776 {
1777 /* Live table TID */
1778 live++;
1779 }
1780 else if (vacposting == NULL)
1781 {
1782 /*
1783 * First dead table TID encountered.
1784 *
1785 * It's now clear that we need to delete one or more dead table
1786 * TIDs, so start maintaining metadata describing how to update
1787 * existing posting list tuple.
1788 */
1789 vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
1790 nitem * sizeof(uint16));
1791
1792 vacposting->itup = posting;
1793 vacposting->updatedoffset = updatedoffset;
1794 vacposting->ndeletedtids = 0;
1795 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1796 }
1797 else
1798 {
1799 /* Second or subsequent dead table TID */
1800 vacposting->deletetids[vacposting->ndeletedtids++] = i;
1801 }
1802 }
1803
1804 *nremaining = live;
1805 return vacposting;
1806}
uint16_t uint16
Definition: c.h:551
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:519
static ItemPointer BTreeTupleGetPosting(IndexTuple posting)
Definition: nbtree.h:538
IndexBulkDeleteCallback callback
Definition: nbtree.h:335
void * callback_state
Definition: nbtree.h:336
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:922
uint16 ndeletedtids
Definition: nbtree.h:921
IndexTuple itup
Definition: nbtree.h:917
OffsetNumber updatedoffset
Definition: nbtree.h:918
static ItemArray items
Definition: test_tidstore.c:48

References BTreeTupleGetNPosting(), BTreeTupleGetPosting(), BTVacState::callback, BTVacState::callback_state, BTVacuumPostingData::deletetids, i, items, BTVacuumPostingData::itup, BTVacuumPostingData::ndeletedtids, palloc(), and BTVacuumPostingData::updatedoffset.

Referenced by btvacuumpage().

◆ btrescan()

void btrescan ( IndexScanDesc  scan,
ScanKey  scankey,
int  nscankeys,
ScanKey  orderbys,
int  norderbys 
)

Definition at line 385 of file nbtree.c.

387{
388 BTScanOpaque so = (BTScanOpaque) scan->opaque;
389
390 /* we aren't holding any read locks, but gotta drop the pins */
392 {
393 /* Before leaving current page, deal with any killed items */
394 if (so->numKilled > 0)
395 _bt_killitems(scan);
398 }
399
400 /*
401 * We prefer to eagerly drop leaf page pins before btgettuple returns.
402 * This avoids making VACUUM wait to acquire a cleanup lock on the page.
403 *
404 * We cannot safely drop leaf page pins during index-only scans due to a
405 * race condition involving VACUUM setting pages all-visible in the VM.
406 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
407 *
408 * When we drop pins eagerly, the mechanism that marks so->killedItems[]
409 * index tuples LP_DEAD has to deal with concurrent TID recycling races.
410 * The scheme used to detect unsafe TID recycling won't work when scanning
411 * unlogged relations (since it involves saving an affected page's LSN).
412 * Opt out of eager pin dropping during unlogged relation scans for now
413 * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
414 *
415 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
416 * Pins cannot be held for more than an instant during bitmap scans either
417 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
418 *
419 * See nbtree/README section on making concurrent TID recycling safe.
420 *
421 * Note: so->dropPin should never change across rescans.
422 */
423 so->dropPin = (!scan->xs_want_itup &&
426 scan->heapRelation != NULL);
427
428 so->markItemIndex = -1;
429 so->needPrimScan = false;
430 so->scanBehind = false;
431 so->oppositeDirCheck = false;
434
435 /*
436 * Allocate tuple workspace arrays, if needed for an index-only scan and
437 * not already done in a previous rescan call. To save on palloc
438 * overhead, both workspaces are allocated as one palloc block; only this
439 * function and btendscan know that.
440 *
441 * NOTE: this data structure also makes it safe to return data from a
442 * "name" column, even though btree name_ops uses an underlying storage
443 * datatype of cstring. The risk there is that "name" is supposed to be
444 * padded to NAMEDATALEN, but the actual index tuple is probably shorter.
445 * However, since we only return data out of tuples sitting in the
446 * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some
447 * data out of the markTuples array --- running off the end of memory for
448 * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats
449 * adding special-case treatment for name_ops elsewhere.
450 */
451 if (scan->xs_want_itup && so->currTuples == NULL)
452 {
453 so->currTuples = (char *) palloc(BLCKSZ * 2);
454 so->markTuples = so->currTuples + BLCKSZ;
455 }
456
457 /*
458 * Reset the scan keys
459 */
460 if (scankey && scan->numberOfKeys > 0)
461 memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
462 so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
463 so->numArrayKeys = 0; /* ditto */
464}
#define RelationNeedsWAL(relation)
Definition: rel.h:638
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
struct ScanKeyData * keyData
Definition: relscan.h:143
struct SnapshotData * xs_snapshot
Definition: relscan.h:140

References _bt_killitems(), BTScanPosInvalidate, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanOpaqueData::dropPin, IndexScanDescData::heapRelation, if(), IndexScanDescData::indexRelation, IsMVCCSnapshot, IndexScanDescData::keyData, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numberOfKeys, IndexScanDescData::numberOfKeys, BTScanOpaqueData::numKilled, IndexScanDescData::opaque, BTScanOpaqueData::oppositeDirCheck, palloc(), RelationNeedsWAL, BTScanOpaqueData::scanBehind, IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by bthandler().

◆ btrestrpos()

void btrestrpos ( IndexScanDesc  scan)

Definition at line 532 of file nbtree.c.

533{
534 BTScanOpaque so = (BTScanOpaque) scan->opaque;
535
536 if (so->markItemIndex >= 0)
537 {
538 /*
539 * The scan has never moved to a new page since the last mark. Just
540 * restore the itemIndex.
541 *
542 * NB: In this case we can't count on anything in so->markPos to be
543 * accurate.
544 */
546 }
547 else
548 {
549 /*
550 * The scan moved to a new page after last mark or restore, and we are
551 * now restoring to the marked page. We aren't holding any read
552 * locks, but if we're still holding the pin for the current position,
553 * we must drop it.
554 */
555 if (BTScanPosIsValid(so->currPos))
556 {
557 /* Before leaving current page, deal with any killed items */
558 if (so->numKilled > 0)
559 _bt_killitems(scan);
561 }
562
563 if (BTScanPosIsValid(so->markPos))
564 {
565 /* bump pin on mark buffer for assignment to current buffer */
566 if (BTScanPosIsPinned(so->markPos))
568 memcpy(&so->currPos, &so->markPos,
569 offsetof(BTScanPosData, items[1]) +
570 so->markPos.lastItem * sizeof(BTScanPosItem));
571 if (so->currTuples)
572 memcpy(so->currTuples, so->markTuples,
574 /* Reset the scan's array keys (see _bt_steppage for why) */
575 if (so->numArrayKeys)
576 {
578 so->needPrimScan = false;
579 }
580 }
581 else
583 }
584}
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:5398
void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
Definition: nbtreadpage.c:537
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:1004
Buffer buf
Definition: nbtree.h:964
int nextTupleOffset
Definition: nbtree.h:979
ScanDirection dir
Definition: nbtree.h:973

References _bt_killitems(), _bt_start_array_keys(), BTScanPosInvalidate, BTScanPosIsPinned, BTScanPosIsValid, BTScanPosUnpinIfPinned, BTScanPosData::buf, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::dir, if(), IncrBufferRefCount(), BTScanPosData::itemIndex, items, BTScanPosData::lastItem, BTScanOpaqueData::markItemIndex, BTScanOpaqueData::markPos, BTScanOpaqueData::markTuples, BTScanOpaqueData::needPrimScan, BTScanPosData::nextTupleOffset, BTScanOpaqueData::numArrayKeys, BTScanOpaqueData::numKilled, and IndexScanDescData::opaque.

Referenced by bthandler().

◆ bttranslatecmptype()

StrategyNumber bttranslatecmptype ( CompareType  cmptype,
Oid  opfamily 
)

Definition at line 1849 of file nbtree.c.

1850{
1851 switch (cmptype)
1852 {
1853 case COMPARE_LT:
1854 return BTLessStrategyNumber;
1855 case COMPARE_LE:
1857 case COMPARE_EQ:
1858 return BTEqualStrategyNumber;
1859 case COMPARE_GE:
1861 case COMPARE_GT:
1863 default:
1864 return InvalidStrategy;
1865 }
1866}
@ COMPARE_LE
Definition: cmptype.h:35
@ COMPARE_GT
Definition: cmptype.h:38
@ COMPARE_EQ
Definition: cmptype.h:36
@ COMPARE_GE
Definition: cmptype.h:37
@ COMPARE_LT
Definition: cmptype.h:34
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define InvalidStrategy
Definition: stratnum.h:24
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_LE, COMPARE_LT, and InvalidStrategy.

Referenced by bthandler().

◆ bttranslatestrategy()

CompareType bttranslatestrategy ( StrategyNumber  strategy,
Oid  opfamily 
)

Definition at line 1829 of file nbtree.c.

1830{
1831 switch (strategy)
1832 {
1834 return COMPARE_LT;
1836 return COMPARE_LE;
1838 return COMPARE_EQ;
1840 return COMPARE_GE;
1842 return COMPARE_GT;
1843 default:
1844 return COMPARE_INVALID;
1845 }
1846}
@ COMPARE_INVALID
Definition: cmptype.h:33

References BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_INVALID, COMPARE_LE, and COMPARE_LT.

Referenced by bthandler().

◆ btvacuumcleanup()

IndexBulkDeleteResult * btvacuumcleanup ( IndexVacuumInfo info,
IndexBulkDeleteResult stats 
)

Definition at line 1164 of file nbtree.c.

1165{
1166 BlockNumber num_delpages;
1167
1168 /* No-op in ANALYZE ONLY mode */
1169 if (info->analyze_only)
1170 return stats;
1171
1172 /*
1173 * If btbulkdelete was called, we need not do anything (we just maintain
1174 * the information used within _bt_vacuum_needs_cleanup() by calling
1175 * _bt_set_cleanup_info() below).
1176 *
1177 * If btbulkdelete was _not_ called, then we have a choice to make: we
1178 * must decide whether or not a btvacuumscan() call is needed now (i.e.
1179 * whether the ongoing VACUUM operation can entirely avoid a physical scan
1180 * of the index). A call to _bt_vacuum_needs_cleanup() decides it for us
1181 * now.
1182 */
1183 if (stats == NULL)
1184 {
1185 /* Check if VACUUM operation can entirely avoid btvacuumscan() call */
1186 if (!_bt_vacuum_needs_cleanup(info->index))
1187 return NULL;
1188
1189 /*
1190 * Since we aren't going to actually delete any leaf items, there's no
1191 * need to go through all the vacuum-cycle-ID pushups here.
1192 *
1193 * Posting list tuples are a source of inaccuracy for cleanup-only
1194 * scans. btvacuumscan() will assume that the number of index tuples
1195 * from each page can be used as num_index_tuples, even though
1196 * num_index_tuples is supposed to represent the number of TIDs in the
1197 * index. This naive approach can underestimate the number of tuples
1198 * in the index significantly.
1199 *
1200 * We handle the problem by making num_index_tuples an estimate in
1201 * cleanup-only case.
1202 */
1204 btvacuumscan(info, stats, NULL, NULL, 0);
1205 stats->estimated_count = true;
1206 }
1207
1208 /*
1209 * Maintain num_delpages value in metapage for _bt_vacuum_needs_cleanup().
1210 *
1211 * num_delpages is the number of deleted pages now in the index that were
1212 * not safe to place in the FSM to be recycled just yet. num_delpages is
1213 * greater than 0 only when _bt_pagedel() actually deleted pages during
1214 * our call to btvacuumscan(). Even then, _bt_pendingfsm_finalize() must
1215 * have failed to place any newly deleted pages in the FSM just moments
1216 * ago. (Actually, there are edge cases where recycling of the current
1217 * VACUUM's newly deleted pages does not even become safe by the time the
1218 * next VACUUM comes around. See nbtree/README.)
1219 */
1220 Assert(stats->pages_deleted >= stats->pages_free);
1221 num_delpages = stats->pages_deleted - stats->pages_free;
1222 _bt_set_cleanup_info(info->index, num_delpages);
1223
1224 /*
1225 * It's quite possible for us to be fooled by concurrent page splits into
1226 * double-counting some index tuples, so disbelieve any total that exceeds
1227 * the underlying heap's count ... if we know that accurately. Otherwise
1228 * this might just make matters worse.
1229 */
1230 if (!info->estimated_count)
1231 {
1232 if (stats->num_index_tuples > info->num_heap_tuples)
1233 stats->num_index_tuples = info->num_heap_tuples;
1234 }
1235
1236 return stats;
1237}
uint32 BlockNumber
Definition: block.h:31
void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages)
Definition: nbtpage.c:233
bool _bt_vacuum_needs_cleanup(Relation rel)
Definition: nbtpage.c:180
BlockNumber pages_deleted
Definition: genam.h:109
BlockNumber pages_free
Definition: genam.h:110
double num_index_tuples
Definition: genam.h:106
double num_heap_tuples
Definition: genam.h:79
bool analyze_only
Definition: genam.h:75
bool estimated_count
Definition: genam.h:77

References _bt_set_cleanup_info(), _bt_vacuum_needs_cleanup(), IndexVacuumInfo::analyze_only, Assert(), btvacuumscan(), IndexVacuumInfo::estimated_count, IndexBulkDeleteResult::estimated_count, IndexVacuumInfo::index, IndexVacuumInfo::num_heap_tuples, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, and palloc0_object.

Referenced by bthandler().

◆ btvacuumpage()

static BlockNumber btvacuumpage ( BTVacState vstate,
Buffer  buf 
)
static

Definition at line 1427 of file nbtree.c.

1428{
1429 IndexVacuumInfo *info = vstate->info;
1430 IndexBulkDeleteResult *stats = vstate->stats;
1432 void *callback_state = vstate->callback_state;
1433 Relation rel = info->index;
1434 Relation heaprel = info->heaprel;
1435 bool attempt_pagedel;
1436 BlockNumber blkno,
1437 backtrack_to;
1439 Page page;
1440 BTPageOpaque opaque;
1441
1442 blkno = scanblkno;
1443
1444backtrack:
1445
1446 attempt_pagedel = false;
1447 backtrack_to = P_NONE;
1448
1449 _bt_lockbuf(rel, buf, BT_READ);
1450 page = BufferGetPage(buf);
1451 opaque = NULL;
1452 if (!PageIsNew(page))
1453 {
1454 _bt_checkpage(rel, buf);
1455 opaque = BTPageGetOpaque(page);
1456 }
1457
1458 Assert(blkno <= scanblkno);
1459 if (blkno != scanblkno)
1460 {
1461 /*
1462 * We're backtracking.
1463 *
1464 * We followed a right link to a sibling leaf page (a page that
1465 * happens to be from a block located before scanblkno). The only
1466 * case we want to do anything with is a live leaf page having the
1467 * current vacuum cycle ID.
1468 *
1469 * The page had better be in a state that's consistent with what we
1470 * expect. Check for conditions that imply corruption in passing. It
1471 * can't be half-dead because only an interrupted VACUUM process can
1472 * leave pages in that state, so we'd definitely have dealt with it
1473 * back when the page was the scanblkno page (half-dead pages are
1474 * always marked fully deleted by _bt_pagedel(), barring corruption).
1475 */
1476 if (!opaque || !P_ISLEAF(opaque) || P_ISHALFDEAD(opaque))
1477 {
1478 Assert(false);
1479 ereport(LOG,
1480 (errcode(ERRCODE_INDEX_CORRUPTED),
1481 errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
1482 blkno, scanblkno, RelationGetRelationName(rel))));
1483 _bt_relbuf(rel, buf);
1484 return scanblkno;
1485 }
1486
1487 /*
1488 * We may have already processed the page in an earlier call, when the
1489 * page was scanblkno. This happens when the leaf page split occurred
1490 * after the scan began, but before the right sibling page became the
1491 * scanblkno.
1492 *
1493 * Page may also have been deleted by current btvacuumpage() call,
1494 * since _bt_pagedel() sometimes deletes the right sibling page of
1495 * scanblkno in passing (it does so after we decided where to
1496 * backtrack to). We don't need to process this page as a deleted
1497 * page a second time now (in fact, it would be wrong to count it as a
1498 * deleted page in the bulk delete statistics a second time).
1499 */
1500 if (opaque->btpo_cycleid != vstate->cycleid || P_ISDELETED(opaque))
1501 {
1502 /* Done with current scanblkno (and all lower split pages) */
1503 _bt_relbuf(rel, buf);
1504 return scanblkno;
1505 }
1506 }
1507
1508 if (!opaque || BTPageIsRecyclable(page, heaprel))
1509 {
1510 /* Okay to recycle this page (which could be leaf or internal) */
1511 RecordFreeIndexPage(rel, blkno);
1512 stats->pages_deleted++;
1513 stats->pages_free++;
1514 }
1515 else if (P_ISDELETED(opaque))
1516 {
1517 /*
1518 * Already deleted page (which could be leaf or internal). Can't
1519 * recycle yet.
1520 */
1521 stats->pages_deleted++;
1522 }
1523 else if (P_ISHALFDEAD(opaque))
1524 {
1525 /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
1526 attempt_pagedel = true;
1527
1528 /*
1529 * _bt_pagedel() will increment both pages_newly_deleted and
1530 * pages_deleted stats in all cases (barring corruption)
1531 */
1532 }
1533 else if (P_ISLEAF(opaque))
1534 {
1536 int ndeletable;
1538 int nupdatable;
1539 OffsetNumber offnum,
1540 minoff,
1541 maxoff;
1542 int nhtidsdead,
1543 nhtidslive;
1544
1545 /*
1546 * Trade in the initial read lock for a full cleanup lock on this
1547 * page. We must get such a lock on every leaf page over the course
1548 * of the vacuum scan, whether or not it actually contains any
1549 * deletable tuples --- see nbtree/README.
1550 */
1552
1553 /*
1554 * Check whether we need to backtrack to earlier pages. What we are
1555 * concerned about is a page split that happened since we started the
1556 * vacuum scan. If the split moved tuples on the right half of the
1557 * split (i.e. the tuples that sort high) to a block that we already
1558 * passed over, then we might have missed the tuples. We need to
1559 * backtrack now. (Must do this before possibly clearing btpo_cycleid
1560 * or deleting scanblkno page below!)
1561 */
1562 if (vstate->cycleid != 0 &&
1563 opaque->btpo_cycleid == vstate->cycleid &&
1564 !(opaque->btpo_flags & BTP_SPLIT_END) &&
1565 !P_RIGHTMOST(opaque) &&
1566 opaque->btpo_next < scanblkno)
1567 backtrack_to = opaque->btpo_next;
1568
1569 ndeletable = 0;
1570 nupdatable = 0;
1571 minoff = P_FIRSTDATAKEY(opaque);
1572 maxoff = PageGetMaxOffsetNumber(page);
1573 nhtidsdead = 0;
1574 nhtidslive = 0;
1575 if (callback)
1576 {
1577 /* btbulkdelete callback tells us what to delete (or update) */
1578 for (offnum = minoff;
1579 offnum <= maxoff;
1580 offnum = OffsetNumberNext(offnum))
1581 {
1582 IndexTuple itup;
1583
1584 itup = (IndexTuple) PageGetItem(page,
1585 PageGetItemId(page, offnum));
1586
1587 Assert(!BTreeTupleIsPivot(itup));
1588 if (!BTreeTupleIsPosting(itup))
1589 {
1590 /* Regular tuple, standard table TID representation */
1591 if (callback(&itup->t_tid, callback_state))
1592 {
1593 deletable[ndeletable++] = offnum;
1594 nhtidsdead++;
1595 }
1596 else
1597 nhtidslive++;
1598 }
1599 else
1600 {
1601 BTVacuumPosting vacposting;
1602 int nremaining;
1603
1604 /* Posting list tuple */
1605 vacposting = btreevacuumposting(vstate, itup, offnum,
1606 &nremaining);
1607 if (vacposting == NULL)
1608 {
1609 /*
1610 * All table TIDs from the posting tuple remain, so no
1611 * delete or update required
1612 */
1613 Assert(nremaining == BTreeTupleGetNPosting(itup));
1614 }
1615 else if (nremaining > 0)
1616 {
1617
1618 /*
1619 * Store metadata about posting list tuple in
1620 * updatable array for entire page. Existing tuple
1621 * will be updated during the later call to
1622 * _bt_delitems_vacuum().
1623 */
1624 Assert(nremaining < BTreeTupleGetNPosting(itup));
1625 updatable[nupdatable++] = vacposting;
1626 nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
1627 }
1628 else
1629 {
1630 /*
1631 * All table TIDs from the posting list must be
1632 * deleted. We'll delete the index tuple completely
1633 * (no update required).
1634 */
1635 Assert(nremaining == 0);
1636 deletable[ndeletable++] = offnum;
1637 nhtidsdead += BTreeTupleGetNPosting(itup);
1638 pfree(vacposting);
1639 }
1640
1641 nhtidslive += nremaining;
1642 }
1643 }
1644 }
1645
1646 /*
1647 * Apply any needed deletes or updates. We issue just one
1648 * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
1649 */
1650 if (ndeletable > 0 || nupdatable > 0)
1651 {
1652 Assert(nhtidsdead >= ndeletable + nupdatable);
1653 _bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
1654 nupdatable);
1655
1656 stats->tuples_removed += nhtidsdead;
1657 /* must recompute maxoff */
1658 maxoff = PageGetMaxOffsetNumber(page);
1659
1660 /* can't leak memory here */
1661 for (int i = 0; i < nupdatable; i++)
1662 pfree(updatable[i]);
1663 }
1664 else
1665 {
1666 /*
1667 * If the leaf page has been split during this vacuum cycle, it
1668 * seems worth expending a write to clear btpo_cycleid even if we
1669 * don't have any deletions to do. (If we do, _bt_delitems_vacuum
1670 * takes care of this.) This ensures we won't process the page
1671 * again.
1672 *
1673 * We treat this like a hint-bit update because there's no need to
1674 * WAL-log it.
1675 */
1676 Assert(nhtidsdead == 0);
1677 if (vstate->cycleid != 0 &&
1678 opaque->btpo_cycleid == vstate->cycleid)
1679 {
1680 opaque->btpo_cycleid = 0;
1681 MarkBufferDirtyHint(buf, true);
1682 }
1683 }
1684
1685 /*
1686 * If the leaf page is now empty, try to delete it; else count the
1687 * live tuples (live table TIDs in posting lists are counted as
1688 * separate live tuples). We don't delete when backtracking, though,
1689 * since that would require teaching _bt_pagedel() about backtracking
1690 * (doesn't seem worth adding more complexity to deal with that).
1691 *
1692 * We don't count the number of live TIDs during cleanup-only calls to
1693 * btvacuumscan (i.e. when callback is not set). We count the number
1694 * of index tuples directly instead. This avoids the expense of
1695 * directly examining all of the tuples on each page. VACUUM will
1696 * treat num_index_tuples as an estimate in cleanup-only case, so it
1697 * doesn't matter that this underestimates num_index_tuples
1698 * significantly in some cases.
1699 */
1700 if (minoff > maxoff)
1701 attempt_pagedel = (blkno == scanblkno);
1702 else if (callback)
1703 stats->num_index_tuples += nhtidslive;
1704 else
1705 stats->num_index_tuples += maxoff - minoff + 1;
1706
1707 Assert(!attempt_pagedel || nhtidslive == 0);
1708 }
1709
1710 if (attempt_pagedel)
1711 {
1712 MemoryContext oldcontext;
1713
1714 /* Run pagedel in a temp context to avoid memory leakage */
1716 oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
1717
1718 /*
1719 * _bt_pagedel maintains the bulk delete stats on our behalf;
1720 * pages_newly_deleted and pages_deleted are likely to be incremented
1721 * during call
1722 */
1723 Assert(blkno == scanblkno);
1724 _bt_pagedel(rel, buf, vstate);
1725
1726 MemoryContextSwitchTo(oldcontext);
1727 /* pagedel released buffer, so we shouldn't */
1728 }
1729 else
1730 _bt_relbuf(rel, buf);
1731
1732 if (backtrack_to != P_NONE)
1733 {
1734 blkno = backtrack_to;
1735
1736 /* check for vacuum delay while not holding any buffer lock */
1737 vacuum_delay_point(false);
1738
1739 /*
1740 * We can't use _bt_getbuf() here because it always applies
1741 * _bt_checkpage(), which will barf on an all-zero page. We want to
1742 * recycle all-zero pages, not fail. Also, we want to use a
1743 * nondefault buffer access strategy.
1744 */
1746 info->strategy);
1747 goto backtrack;
1748 }
1749
1750 return scanblkno;
1751}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5430
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
@ RBM_NORMAL
Definition: bufmgr.h:46
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:233
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
int errcode(int sqlerrcode)
Definition: elog.c:863
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:150
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:114
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
IndexTupleData * IndexTuple
Definition: itup.h:53
#define MaxIndexTuplesPerPage
Definition: itup.h:181
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1024
void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
Definition: nbtpage.c:1801
void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
Definition: nbtpage.c:1155
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:798
void _bt_upgradelockbufcleanup(Relation rel, Buffer buf)
Definition: nbtpage.c:1110
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:1040
static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining)
Definition: nbtree.c:1765
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:225
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:481
#define P_ISLEAF(opaque)
Definition: nbtree.h:221
#define BTPageGetOpaque(page)
Definition: nbtree.h:74
#define P_ISDELETED(opaque)
Definition: nbtree.h:223
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:370
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:220
#define BT_READ
Definition: nbtree.h:730
static bool BTPageIsRecyclable(Page page, Relation heaprel)
Definition: nbtree.h:292
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:493
#define BTP_SPLIT_END
Definition: nbtree.h:82
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define RelationGetRelationName(relation)
Definition: rel.h:549
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber btpo_next
Definition: nbtree.h:66
uint16 btpo_flags
Definition: nbtree.h:68
BTCycleId btpo_cycleid
Definition: nbtree.h:69
IndexBulkDeleteResult * stats
Definition: nbtree.h:334
BTCycleId cycleid
Definition: nbtree.h:337
MemoryContext pagedelcontext
Definition: nbtree.h:338
IndexVacuumInfo * info
Definition: nbtree.h:333
double tuples_removed
Definition: genam.h:107
BufferAccessStrategy strategy
Definition: genam.h:80
Relation heaprel
Definition: genam.h:74
void vacuum_delay_point(bool is_analyze)
Definition: vacuum.c:2426

References _bt_checkpage(), _bt_delitems_vacuum(), _bt_lockbuf(), _bt_pagedel(), _bt_relbuf(), _bt_upgradelockbufcleanup(), Assert(), BT_READ, BTP_SPLIT_END, BTPageGetOpaque, BTPageIsRecyclable(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTreeTupleGetNPosting(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), btreevacuumposting(), buf, BufferGetBlockNumber(), BufferGetPage(), BTVacState::callback, callback(), BTVacState::callback_state, BTVacState::cycleid, ereport, errcode(), errmsg_internal(), IndexVacuumInfo::heaprel, i, IndexVacuumInfo::index, BTVacState::info, LOG, MAIN_FORKNUM, MarkBufferDirtyHint(), MaxIndexTuplesPerPage, MemoryContextReset(), MemoryContextSwitchTo(), IndexBulkDeleteResult::num_index_tuples, OffsetNumberNext, P_FIRSTDATAKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, P_RIGHTMOST, BTVacState::pagedelcontext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsNew(), IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, pfree(), RBM_NORMAL, ReadBufferExtended(), RecordFreeIndexPage(), RelationGetRelationName, BTVacState::stats, IndexVacuumInfo::strategy, IndexTupleData::t_tid, IndexBulkDeleteResult::tuples_removed, and vacuum_delay_point().

Referenced by btvacuumscan().

◆ btvacuumscan()

static void btvacuumscan ( IndexVacuumInfo info,
IndexBulkDeleteResult stats,
IndexBulkDeleteCallback  callback,
void *  callback_state,
BTCycleId  cycleid 
)
static

Definition at line 1252 of file nbtree.c.

1255{
1256 Relation rel = info->index;
1257 BTVacState vstate;
1258 BlockNumber num_pages;
1259 bool needLock;
1261 ReadStream *stream = NULL;
1262
1263 /*
1264 * Reset fields that track information about the entire index now. This
1265 * avoids double-counting in the case where a single VACUUM command
1266 * requires multiple scans of the index.
1267 *
1268 * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
1269 * since they track information about the VACUUM command, and so must last
1270 * across each call to btvacuumscan().
1271 *
1272 * (Note that pages_free is treated as state about the whole index, not
1273 * the current VACUUM. This is appropriate because RecordFreeIndexPage()
1274 * calls are idempotent, and get repeated for the same deleted pages in
1275 * some scenarios. The point for us is to track the number of recyclable
1276 * pages in the index at the end of the VACUUM command.)
1277 */
1278 stats->num_pages = 0;
1279 stats->num_index_tuples = 0;
1280 stats->pages_deleted = 0;
1281 stats->pages_free = 0;
1282
1283 /* Set up info to pass down to btvacuumpage */
1284 vstate.info = info;
1285 vstate.stats = stats;
1286 vstate.callback = callback;
1287 vstate.callback_state = callback_state;
1288 vstate.cycleid = cycleid;
1289
1290 /* Create a temporary memory context to run _bt_pagedel in */
1292 "_bt_pagedel",
1294
1295 /* Initialize vstate fields used by _bt_pendingfsm_finalize */
1296 vstate.bufsize = 0;
1297 vstate.maxbufsize = 0;
1298 vstate.pendingpages = NULL;
1299 vstate.npendingpages = 0;
1300 /* Consider applying _bt_pendingfsm_finalize optimization */
1301 _bt_pendingfsm_init(rel, &vstate, (callback == NULL));
1302
1303 /*
1304 * The outer loop iterates over all index pages except the metapage, in
1305 * physical order (we hope the kernel will cooperate in providing
1306 * read-ahead for speed). It is critical that we visit all leaf pages,
1307 * including ones added after we start the scan, else we might fail to
1308 * delete some deletable tuples. Hence, we must repeatedly check the
1309 * relation length. We must acquire the relation-extension lock while
1310 * doing so to avoid a race condition: if someone else is extending the
1311 * relation, there is a window where bufmgr/smgr have created a new
1312 * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
1313 * we manage to scan such a page here, we'll improperly assume it can be
1314 * recycled. Taking the lock synchronizes things enough to prevent a
1315 * problem: either num_pages won't include the new page, or _bt_getbuf
1316 * already has write lock on the buffer and it will be fully initialized
1317 * before we can examine it. Also, we need not worry if a page is added
1318 * immediately after we look; the page splitting code already has
1319 * write-lock on the left page before it adds a right page, so we must
1320 * already have processed any tuples due to be moved into such a page.
1321 *
1322 * XXX: Now that new pages are locked with RBM_ZERO_AND_LOCK, I don't
1323 * think the use of the extension lock is still required.
1324 *
1325 * We can skip locking for new or temp relations, however, since no one
1326 * else could be accessing them.
1327 */
1328 needLock = !RELATION_IS_LOCAL(rel);
1329
1331
1332 /*
1333 * It is safe to use batchmode as block_range_read_stream_cb takes no
1334 * locks.
1335 */
1339 info->strategy,
1340 rel,
1343 &p,
1344 0);
1345 for (;;)
1346 {
1347 /* Get the current relation length */
1348 if (needLock)
1350 num_pages = RelationGetNumberOfBlocks(rel);
1351 if (needLock)
1353
1354 if (info->report_progress)
1356 num_pages);
1357
1358 /* Quit if we've scanned the whole relation */
1359 if (p.current_blocknum >= num_pages)
1360 break;
1361
1362 p.last_exclusive = num_pages;
1363
1364 /* Iterate over pages, then loop back to recheck relation length */
1365 while (true)
1366 {
1367 BlockNumber current_block;
1368 Buffer buf;
1369
1370 /* call vacuum_delay_point while not holding any buffer lock */
1371 vacuum_delay_point(false);
1372
1373 buf = read_stream_next_buffer(stream, NULL);
1374
1375 if (!BufferIsValid(buf))
1376 break;
1377
1378 current_block = btvacuumpage(&vstate, buf);
1379
1380 if (info->report_progress)
1382 current_block);
1383 }
1384
1385 /*
1386 * We have to reset the read stream to use it again. After returning
1387 * InvalidBuffer, the read stream API won't invoke our callback again
1388 * until the stream has been reset.
1389 */
1390 read_stream_reset(stream);
1391 }
1392
1393 read_stream_end(stream);
1394
1395 /* Set statistics num_pages field to final size of index */
1396 stats->num_pages = num_pages;
1397
1399
1400 /*
1401 * If there were any calls to _bt_pagedel() during scan of the index then
1402 * see if any of the resulting pages can be placed in the FSM now. When
1403 * it's not safe we'll have to leave it up to a future VACUUM operation.
1404 *
1405 * Finally, if we placed any pages in the FSM (either just now or during
1406 * the scan), forcibly update the upper-level FSM pages to ensure that
1407 * searchers can find them.
1408 */
1409 _bt_pendingfsm_finalize(rel, &vstate);
1410 if (stats->pages_free > 0)
1412}
void pgstat_progress_update_param(int index, int64 val)
int Buffer
Definition: buf.h:23
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:294
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:474
#define ExclusiveLock
Definition: lockdefs.h:42
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate)
Definition: nbtpage.c:3000
void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly)
Definition: nbtpage.c:2958
static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf)
Definition: nbtree.c:1427
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:142
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:141
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:1044
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:791
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:737
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1089
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:162
#define READ_STREAM_MAINTENANCE
Definition: read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:658
BTPendingFSM * pendingpages
Definition: nbtree.h:345
int npendingpages
Definition: nbtree.h:346
int bufsize
Definition: nbtree.h:343
int maxbufsize
Definition: nbtree.h:344
BlockNumber num_pages
Definition: genam.h:104
bool report_progress
Definition: genam.h:76

References _bt_pendingfsm_finalize(), _bt_pendingfsm_init(), ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, block_range_read_stream_cb(), BTREE_METAPAGE, btvacuumpage(), buf, BufferIsValid(), BTVacState::bufsize, BTVacState::callback, callback(), BTVacState::callback_state, BlockRangeReadStreamPrivate::current_blocknum, CurrentMemoryContext, BTVacState::cycleid, ExclusiveLock, IndexVacuumInfo::index, IndexFreeSpaceMapVacuum(), BTVacState::info, BlockRangeReadStreamPrivate::last_exclusive, LockRelationForExtension(), MAIN_FORKNUM, BTVacState::maxbufsize, MemoryContextDelete(), BTVacState::npendingpages, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, BTVacState::pagedelcontext, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, BTVacState::pendingpages, pgstat_progress_update_param(), PROGRESS_SCAN_BLOCKS_DONE, PROGRESS_SCAN_BLOCKS_TOTAL, read_stream_begin_relation(), read_stream_end(), READ_STREAM_FULL, READ_STREAM_MAINTENANCE, read_stream_next_buffer(), read_stream_reset(), READ_STREAM_USE_BATCHING, RELATION_IS_LOCAL, RelationGetNumberOfBlocks, IndexVacuumInfo::report_progress, BTVacState::stats, IndexVacuumInfo::strategy, UnlockRelationForExtension(), and vacuum_delay_point().

Referenced by btbulkdelete(), and btvacuumcleanup().