PostgreSQL Source Code  git master
nbtree.h File Reference
#include "access/amapi.h"
#include "access/itup.h"
#include "access/sdir.h"
#include "access/xlogreader.h"
#include "catalog/pg_index.h"
#include "lib/stringinfo.h"
#include "storage/bufmgr.h"
#include "storage/shm_toc.h"
Include dependency graph for nbtree.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  BTPageOpaqueData
 
struct  BTMetaPageData
 
struct  BTStackData
 
struct  BTScanPosItem
 
struct  BTScanPosData
 
struct  BTArrayKeyInfo
 
struct  BTScanOpaqueData
 

Macros

#define BTP_LEAF   (1 << 0) /* leaf page, i.e. not internal page */
 
#define BTP_ROOT   (1 << 1) /* root page (has no parent) */
 
#define BTP_DELETED   (1 << 2) /* page has been deleted from tree */
 
#define BTP_META   (1 << 3) /* meta-page */
 
#define BTP_HALF_DEAD   (1 << 4) /* empty, but still in tree */
 
#define BTP_SPLIT_END   (1 << 5) /* rightmost page of split group */
 
#define BTP_HAS_GARBAGE   (1 << 6) /* page has LP_DEAD tuples */
 
#define BTP_INCOMPLETE_SPLIT   (1 << 7) /* right sibling's downlink is missing */
 
#define MAX_BT_CYCLE_ID   0xFF7F
 
#define BTPageGetMeta(p)   ((BTMetaPageData *) PageGetContents(p))
 
#define BTREE_METAPAGE   0 /* first page is meta */
 
#define BTREE_MAGIC   0x053162 /* magic number of btree pages */
 
#define BTREE_VERSION   3 /* current version number */
 
#define BTREE_MIN_VERSION   2 /* minimal supported version number */
 
#define BTMaxItemSize(page)
 
#define BTREE_MIN_FILLFACTOR   10
 
#define BTREE_DEFAULT_FILLFACTOR   90
 
#define BTREE_NONLEAF_FILLFACTOR   70
 
#define P_NONE   0
 
#define P_LEFTMOST(opaque)   ((opaque)->btpo_prev == P_NONE)
 
#define P_RIGHTMOST(opaque)   ((opaque)->btpo_next == P_NONE)
 
#define P_ISLEAF(opaque)   (((opaque)->btpo_flags & BTP_LEAF) != 0)
 
#define P_ISROOT(opaque)   (((opaque)->btpo_flags & BTP_ROOT) != 0)
 
#define P_ISDELETED(opaque)   (((opaque)->btpo_flags & BTP_DELETED) != 0)
 
#define P_ISMETA(opaque)   (((opaque)->btpo_flags & BTP_META) != 0)
 
#define P_ISHALFDEAD(opaque)   (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)
 
#define P_IGNORE(opaque)   (((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0)
 
#define P_HAS_GARBAGE(opaque)   (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)
 
#define P_INCOMPLETE_SPLIT(opaque)   (((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0)
 
#define P_HIKEY   ((OffsetNumber) 1)
 
#define P_FIRSTKEY   ((OffsetNumber) 2)
 
#define P_FIRSTDATAKEY(opaque)   (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
 
#define INDEX_ALT_TID_MASK   INDEX_AM_RESERVED_BIT
 
#define BT_RESERVED_OFFSET_MASK   0xF000
 
#define BT_N_KEYS_OFFSET_MASK   0x0FFF
 
#define BTreeInnerTupleGetDownLink(itup)   ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
 
#define BTreeInnerTupleSetDownLink(itup, blkno)   ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
 
#define BTreeTupleGetTopParent(itup)   ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
 
#define BTreeTupleSetTopParent(itup, blkno)
 
#define BTreeTupleGetNAtts(itup, rel)
 
#define BTreeTupleSetNAtts(itup, n)
 
#define BTCommuteStrategyNumber(strat)   (BTMaxStrategyNumber + 1 - (strat))
 
#define BTORDER_PROC   1
 
#define BTSORTSUPPORT_PROC   2
 
#define BTINRANGE_PROC   3
 
#define BTNProcs   3
 
#define BT_READ   BUFFER_LOCK_SHARE
 
#define BT_WRITE   BUFFER_LOCK_EXCLUSIVE
 
#define BTScanPosIsPinned(scanpos)
 
#define BTScanPosUnpin(scanpos)
 
#define BTScanPosUnpinIfPinned(scanpos)
 
#define BTScanPosIsValid(scanpos)
 
#define BTScanPosInvalidate(scanpos)
 
#define SK_BT_REQFWD   0x00010000 /* required to continue forward scan */
 
#define SK_BT_REQBKWD   0x00020000 /* required to continue backward scan */
 
#define SK_BT_INDOPTION_SHIFT   24 /* must clear the above bits */
 
#define SK_BT_DESC   (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT)
 
#define SK_BT_NULLS_FIRST   (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT)
 

Typedefs

typedef uint16 BTCycleId
 
typedef struct BTPageOpaqueData BTPageOpaqueData
 
typedef BTPageOpaqueDataBTPageOpaque
 
typedef struct BTMetaPageData BTMetaPageData
 
typedef struct BTStackData BTStackData
 
typedef BTStackDataBTStack
 
typedef struct BTScanPosItem BTScanPosItem
 
typedef struct BTScanPosData BTScanPosData
 
typedef BTScanPosDataBTScanPos
 
typedef struct BTArrayKeyInfo BTArrayKeyInfo
 
typedef struct BTScanOpaqueData BTScanOpaqueData
 
typedef BTScanOpaqueDataBTScanOpaque
 

Functions

void btbuildempty (Relation index)
 
bool btinsert (Relation rel, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, struct IndexInfo *indexInfo)
 
IndexScanDesc btbeginscan (Relation rel, int nkeys, int norderbys)
 
Size btestimateparallelscan (void)
 
void btinitparallelscan (void *target)
 
bool btgettuple (IndexScanDesc scan, ScanDirection dir)
 
int64 btgetbitmap (IndexScanDesc scan, TIDBitmap *tbm)
 
void btrescan (IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys)
 
void btparallelrescan (IndexScanDesc scan)
 
void btendscan (IndexScanDesc scan)
 
void btmarkpos (IndexScanDesc scan)
 
void btrestrpos (IndexScanDesc scan)
 
IndexBulkDeleteResultbtbulkdelete (IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
 
IndexBulkDeleteResultbtvacuumcleanup (IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
bool btcanreturn (Relation index, int attno)
 
bool _bt_parallel_seize (IndexScanDesc scan, BlockNumber *pageno)
 
void _bt_parallel_release (IndexScanDesc scan, BlockNumber scan_page)
 
void _bt_parallel_done (IndexScanDesc scan)
 
void _bt_parallel_advance_array_keys (IndexScanDesc scan)
 
bool _bt_doinsert (Relation rel, IndexTuple itup, IndexUniqueCheck checkUnique, Relation heapRel)
 
Buffer _bt_getstackbuf (Relation rel, BTStack stack, int access)
 
void _bt_finish_split (Relation rel, Buffer bbuf, BTStack stack)
 
void _bt_initmetapage (Page page, BlockNumber rootbknum, uint32 level)
 
void _bt_update_meta_cleanup_info (Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
 
void _bt_upgrademetapage (Page page)
 
Buffer _bt_getroot (Relation rel, int access)
 
Buffer _bt_gettrueroot (Relation rel)
 
int _bt_getrootheight (Relation rel)
 
void _bt_checkpage (Relation rel, Buffer buf)
 
Buffer _bt_getbuf (Relation rel, BlockNumber blkno, int access)
 
Buffer _bt_relandgetbuf (Relation rel, Buffer obuf, BlockNumber blkno, int access)
 
void _bt_relbuf (Relation rel, Buffer buf)
 
void _bt_pageinit (Page page, Size size)
 
bool _bt_page_recyclable (Page page)
 
void _bt_delitems_delete (Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, Relation heapRel)
 
void _bt_delitems_vacuum (Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
 
int _bt_pagedel (Relation rel, Buffer buf)
 
BTStack _bt_search (Relation rel, int keysz, ScanKey scankey, bool nextkey, Buffer *bufP, int access, Snapshot snapshot)
 
Buffer _bt_moveright (Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, int access, Snapshot snapshot)
 
OffsetNumber _bt_binsrch (Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
 
int32 _bt_compare (Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
 
bool _bt_first (IndexScanDesc scan, ScanDirection dir)
 
bool _bt_next (IndexScanDesc scan, ScanDirection dir)
 
Buffer _bt_get_endpoint (Relation rel, uint32 level, bool rightmost, Snapshot snapshot)
 
ScanKey _bt_mkscankey (Relation rel, IndexTuple itup)
 
ScanKey _bt_mkscankey_nodata (Relation rel)
 
void _bt_freeskey (ScanKey skey)
 
void _bt_freestack (BTStack stack)
 
void _bt_preprocess_array_keys (IndexScanDesc scan)
 
void _bt_start_array_keys (IndexScanDesc scan, ScanDirection dir)
 
bool _bt_advance_array_keys (IndexScanDesc scan, ScanDirection dir)
 
void _bt_mark_array_keys (IndexScanDesc scan)
 
void _bt_restore_array_keys (IndexScanDesc scan)
 
void _bt_preprocess_keys (IndexScanDesc scan)
 
IndexTuple _bt_checkkeys (IndexScanDesc scan, Page page, OffsetNumber offnum, ScanDirection dir, bool *continuescan)
 
void _bt_killitems (IndexScanDesc scan)
 
BTCycleId _bt_vacuum_cycleid (Relation rel)
 
BTCycleId _bt_start_vacuum (Relation rel)
 
void _bt_end_vacuum (Relation rel)
 
void _bt_end_vacuum_callback (int code, Datum arg)
 
Size BTreeShmemSize (void)
 
void BTreeShmemInit (void)
 
byteabtoptions (Datum reloptions, bool validate)
 
bool btproperty (Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
 
IndexTuple _bt_nonkey_truncate (Relation rel, IndexTuple itup)
 
bool _bt_check_natts (Relation rel, Page page, OffsetNumber offnum)
 
bool btvalidate (Oid opclassoid)
 
IndexBuildResultbtbuild (Relation heap, Relation index, struct IndexInfo *indexInfo)
 
void _bt_parallel_build_main (dsm_segment *seg, shm_toc *toc)
 

Macro Definition Documentation

◆ BT_N_KEYS_OFFSET_MASK

#define BT_N_KEYS_OFFSET_MASK   0x0FFF

Definition at line 221 of file nbtree.h.

Referenced by _bt_check_natts().

◆ BT_READ

◆ BT_RESERVED_OFFSET_MASK

#define BT_RESERVED_OFFSET_MASK   0xF000

Definition at line 220 of file nbtree.h.

◆ BT_WRITE

◆ BTCommuteStrategyNumber

#define BTCommuteStrategyNumber (   strat)    (BTMaxStrategyNumber + 1 - (strat))

Definition at line 271 of file nbtree.h.

Referenced by _bt_compare_scankey_args(), and _bt_fix_scankey_strategy().

◆ BTINRANGE_PROC

#define BTINRANGE_PROC   3

Definition at line 292 of file nbtree.h.

Referenced by assignProcTypes(), btvalidate(), and transformFrameOffset().

◆ BTMaxItemSize

#define BTMaxItemSize (   page)
Value:
MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
#define SizeOfPageHeaderData
Definition: bufpage.h:212
#define PageGetPageSize(page)
Definition: bufpage.h:264
#define MAXALIGN(LEN)
Definition: c.h:652
#define MAXALIGN_DOWN(LEN)
Definition: c.h:664

Definition at line 126 of file nbtree.h.

Referenced by _bt_buildadd(), and _bt_findinsertloc().

◆ BTNProcs

#define BTNProcs   3

Definition at line 293 of file nbtree.h.

Referenced by bthandler().

◆ BTORDER_PROC

◆ BTP_DELETED

#define BTP_DELETED   (1 << 2) /* page has been deleted from tree */

Definition at line 73 of file nbtree.h.

Referenced by _bt_unlink_halfdead_page(), and btree_xlog_unlink_page().

◆ BTP_HALF_DEAD

#define BTP_HALF_DEAD   (1 << 4) /* empty, but still in tree */

◆ BTP_HAS_GARBAGE

#define BTP_HAS_GARBAGE   (1 << 6) /* page has LP_DEAD tuples */

◆ BTP_INCOMPLETE_SPLIT

#define BTP_INCOMPLETE_SPLIT   (1 << 7) /* right sibling's downlink is missing */

◆ BTP_LEAF

#define BTP_LEAF   (1 << 0) /* leaf page, i.e. not internal page */

◆ BTP_META

#define BTP_META   (1 << 3) /* meta-page */

Definition at line 74 of file nbtree.h.

Referenced by _bt_initmetapage(), _bt_restore_meta(), and _bt_upgrademetapage().

◆ BTP_ROOT

#define BTP_ROOT   (1 << 1) /* root page (has no parent) */

Definition at line 72 of file nbtree.h.

Referenced by _bt_getroot(), _bt_newroot(), _bt_split(), _bt_uppershutdown(), and btree_xlog_newroot().

◆ BTP_SPLIT_END

#define BTP_SPLIT_END   (1 << 5) /* rightmost page of split group */

Definition at line 76 of file nbtree.h.

Referenced by _bt_split(), btree_mask(), and btvacuumpage().

◆ BTPageGetMeta

◆ BTREE_DEFAULT_FILLFACTOR

#define BTREE_DEFAULT_FILLFACTOR   90

Definition at line 139 of file nbtree.h.

Referenced by _bt_findsplitloc(), and _bt_pagestate().

◆ BTREE_MAGIC

#define BTREE_MAGIC   0x053162 /* magic number of btree pages */

◆ BTREE_METAPAGE

◆ BTREE_MIN_FILLFACTOR

#define BTREE_MIN_FILLFACTOR   10

Definition at line 138 of file nbtree.h.

◆ BTREE_MIN_VERSION

#define BTREE_MIN_VERSION   2 /* minimal supported version number */

◆ BTREE_NONLEAF_FILLFACTOR

#define BTREE_NONLEAF_FILLFACTOR   70

Definition at line 140 of file nbtree.h.

Referenced by _bt_findsplitloc(), and _bt_pagestate().

◆ BTREE_VERSION

◆ BTreeInnerTupleGetDownLink

◆ BTreeInnerTupleSetDownLink

#define BTreeInnerTupleSetDownLink (   itup,
  blkno 
)    ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))

◆ BTreeTupleGetNAtts

#define BTreeTupleGetNAtts (   itup,
  rel 
)
Value:
( \
(itup)->t_info & INDEX_ALT_TID_MASK ? \
( \
ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \
) \
: \
)
#define ItemPointerGetOffsetNumberNoCheck(pointer)
Definition: itemptr.h:108
#define BT_RESERVED_OFFSET_MASK
Definition: nbtree.h:220
#define AssertMacro(condition)
Definition: c.h:700
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:219
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:419
#define BT_N_KEYS_OFFSET_MASK
Definition: nbtree.h:221

Definition at line 247 of file nbtree.h.

Referenced by _bt_buildadd(), _bt_check_natts(), _bt_checkkeys(), _bt_insertonpg(), _bt_mkscankey(), _bt_newroot(), _bt_nonkey_truncate(), _bt_split(), _bt_uppershutdown(), and bt_target_page_check().

◆ BTreeTupleGetTopParent

#define BTreeTupleGetTopParent (   itup)    ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))

Definition at line 235 of file nbtree.h.

Referenced by _bt_unlink_halfdead_page(), and bt_downlink_missing_check().

◆ BTreeTupleSetNAtts

#define BTreeTupleSetNAtts (   itup,
 
)
Value:
do { \
(itup)->t_info |= INDEX_ALT_TID_MASK; \
Assert(((n) & BT_RESERVED_OFFSET_MASK) == 0); \
ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \
} while(0)
#define BT_RESERVED_OFFSET_MASK
Definition: nbtree.h:220
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:219
#define BT_N_KEYS_OFFSET_MASK
Definition: nbtree.h:221

Definition at line 257 of file nbtree.h.

Referenced by _bt_buildadd(), _bt_newroot(), _bt_nonkey_truncate(), _bt_pgaddtup(), and _bt_sortaddtup().

◆ BTreeTupleSetTopParent

#define BTreeTupleSetTopParent (   itup,
  blkno 
)
Value:
do { \
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
BTreeTupleSetNAtts((itup), 0); \
} while(0)

Definition at line 237 of file nbtree.h.

Referenced by _bt_mark_page_halfdead(), _bt_unlink_halfdead_page(), btree_xlog_mark_page_halfdead(), and btree_xlog_unlink_page().

◆ BTScanPosInvalidate

#define BTScanPosInvalidate (   scanpos)
Value:
do { \
(scanpos).currPage = InvalidBlockNumber; \
(scanpos).nextPage = InvalidBlockNumber; \
(scanpos).buf = InvalidBuffer; \
(scanpos).lsn = InvalidXLogRecPtr; \
(scanpos).nextTupleOffset = 0; \
} while (0);
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define InvalidBuffer
Definition: buf.h:25
static char * buf
Definition: pg_test_fsync.c:67
#define InvalidBlockNumber
Definition: block.h:33

Definition at line 418 of file nbtree.h.

Referenced by _bt_endpoint(), _bt_first(), _bt_readnextpage(), _bt_steppage(), btbeginscan(), btmarkpos(), btrescan(), and btrestrpos().

◆ BTScanPosIsPinned

#define BTScanPosIsPinned (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BufferIsValid((scanpos).buf) \
)
static char * buf
Definition: pg_test_fsync.c:67
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114

Definition at line 395 of file nbtree.h.

Referenced by _bt_killitems(), _bt_readnextpage(), _bt_readpage(), _bt_steppage(), and btrestrpos().

◆ BTScanPosIsValid

#define BTScanPosIsValid (   scanpos)
Value:
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BlockNumberIsValid((scanpos).currPage) \
)
static char * buf
Definition: pg_test_fsync.c:67
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114

Definition at line 412 of file nbtree.h.

Referenced by _bt_first(), _bt_killitems(), _bt_steppage(), btendscan(), btgettuple(), btmarkpos(), btrescan(), and btrestrpos().

◆ BTScanPosUnpin

#define BTScanPosUnpin (   scanpos)
Value:
do { \
ReleaseBuffer((scanpos).buf); \
(scanpos).buf = InvalidBuffer; \
} while (0)
#define InvalidBuffer
Definition: buf.h:25
static char * buf
Definition: pg_test_fsync.c:67

Definition at line 401 of file nbtree.h.

◆ BTScanPosUnpinIfPinned

#define BTScanPosUnpinIfPinned (   scanpos)
Value:
do { \
if (BTScanPosIsPinned(scanpos)) \
BTScanPosUnpin(scanpos); \
} while (0)
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:395

Definition at line 406 of file nbtree.h.

Referenced by _bt_readnextpage(), _bt_steppage(), btendscan(), btmarkpos(), btrescan(), and btrestrpos().

◆ BTSORTSUPPORT_PROC

#define BTSORTSUPPORT_PROC   2

Definition at line 291 of file nbtree.h.

Referenced by assignProcTypes(), btvalidate(), FinishSortSupportFunction(), and MJExamineQuals().

◆ INDEX_ALT_TID_MASK

#define INDEX_ALT_TID_MASK   INDEX_AM_RESERVED_BIT

Definition at line 219 of file nbtree.h.

Referenced by _bt_check_natts().

◆ MAX_BT_CYCLE_ID

#define MAX_BT_CYCLE_ID   0xFF7F

Definition at line 87 of file nbtree.h.

Referenced by _bt_start_vacuum().

◆ P_FIRSTDATAKEY

◆ P_FIRSTKEY

#define P_FIRSTKEY   ((OffsetNumber) 2)

Definition at line 186 of file nbtree.h.

Referenced by _bt_buildadd(), _bt_newroot(), _bt_slideleft(), and _bt_sortaddtup().

◆ P_HAS_GARBAGE

#define P_HAS_GARBAGE (   opaque)    (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)

Definition at line 164 of file nbtree.h.

Referenced by _bt_findinsertloc(), and palloc_btree_page().

◆ P_HIKEY

◆ P_IGNORE

◆ P_INCOMPLETE_SPLIT

◆ P_ISDELETED

◆ P_ISHALFDEAD

#define P_ISHALFDEAD (   opaque)    (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)

◆ P_ISLEAF

◆ P_ISMETA

#define P_ISMETA (   opaque)    (((opaque)->btpo_flags & BTP_META) != 0)

◆ P_ISROOT

#define P_ISROOT (   opaque)    (((opaque)->btpo_flags & BTP_ROOT) != 0)

◆ P_LEFTMOST

#define P_LEFTMOST (   opaque)    ((opaque)->btpo_prev == P_NONE)

◆ P_NONE

◆ P_RIGHTMOST

◆ SK_BT_DESC

◆ SK_BT_INDOPTION_SHIFT

#define SK_BT_INDOPTION_SHIFT   24 /* must clear the above bits */

Definition at line 488 of file nbtree.h.

Referenced by _bt_fix_scankey_strategy(), _bt_mkscankey(), and _bt_mkscankey_nodata().

◆ SK_BT_NULLS_FIRST

◆ SK_BT_REQBKWD

#define SK_BT_REQBKWD   0x00020000 /* required to continue backward scan */

Definition at line 487 of file nbtree.h.

Referenced by _bt_check_rowcompare(), _bt_checkkeys(), and _bt_mark_scankey_required().

◆ SK_BT_REQFWD

#define SK_BT_REQFWD   0x00010000 /* required to continue forward scan */

Definition at line 486 of file nbtree.h.

Referenced by _bt_check_rowcompare(), _bt_checkkeys(), and _bt_mark_scankey_required().

Typedef Documentation

◆ BTArrayKeyInfo

◆ BTCycleId

Definition at line 27 of file nbtree.h.

◆ BTMetaPageData

◆ BTPageOpaque

Definition at line 68 of file nbtree.h.

◆ BTPageOpaqueData

◆ BTScanOpaque

Definition at line 479 of file nbtree.h.

◆ BTScanOpaqueData

◆ BTScanPos

Definition at line 393 of file nbtree.h.

◆ BTScanPosData

◆ BTScanPosItem

◆ BTStack

Definition at line 321 of file nbtree.h.

◆ BTStackData

Function Documentation

◆ _bt_advance_array_keys()

bool _bt_advance_array_keys ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 558 of file nbtutils.c.

References _bt_parallel_advance_array_keys(), BTScanOpaqueData::arrayKeyData, BTScanOpaqueData::arrayKeys, BTArrayKeyInfo::cur_elem, BTArrayKeyInfo::elem_values, i, BTArrayKeyInfo::num_elems, BTScanOpaqueData::numArrayKeys, IndexScanDescData::opaque, IndexScanDescData::parallel_scan, BTArrayKeyInfo::scan_key, ScanDirectionIsBackward, and ScanKeyData::sk_argument.

Referenced by btgetbitmap(), and btgettuple().

559 {
560  BTScanOpaque so = (BTScanOpaque) scan->opaque;
561  bool found = false;
562  int i;
563 
564  /*
565  * We must advance the last array key most quickly, since it will
566  * correspond to the lowest-order index column among the available
567  * qualifications. This is necessary to ensure correct ordering of output
568  * when there are multiple array keys.
569  */
570  for (i = so->numArrayKeys - 1; i >= 0; i--)
571  {
572  BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
573  ScanKey skey = &so->arrayKeyData[curArrayKey->scan_key];
574  int cur_elem = curArrayKey->cur_elem;
575  int num_elems = curArrayKey->num_elems;
576 
577  if (ScanDirectionIsBackward(dir))
578  {
579  if (--cur_elem < 0)
580  {
581  cur_elem = num_elems - 1;
582  found = false; /* need to advance next array key */
583  }
584  else
585  found = true;
586  }
587  else
588  {
589  if (++cur_elem >= num_elems)
590  {
591  cur_elem = 0;
592  found = false; /* need to advance next array key */
593  }
594  else
595  found = true;
596  }
597 
598  curArrayKey->cur_elem = cur_elem;
599  skey->sk_argument = curArrayKey->elem_values[cur_elem];
600  if (found)
601  break;
602  }
603 
604  /* advance parallel scan */
605  if (scan->parallel_scan != NULL)
607 
608  return found;
609 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
Datum * elem_values
Definition: nbtree.h:434
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int cur_elem
Definition: nbtree.h:431
int numArrayKeys
Definition: nbtree.h:446
ScanKey arrayKeyData
Definition: nbtree.h:445
int num_elems
Definition: nbtree.h:433
void _bt_parallel_advance_array_keys(IndexScanDesc scan)
Definition: nbtree.c:761
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:450
int i
Datum sk_argument
Definition: skey.h:72
int scan_key
Definition: nbtree.h:430

◆ _bt_binsrch()

OffsetNumber _bt_binsrch ( Relation  rel,
Buffer  buf,
int  keysz,
ScanKey  scankey,
bool  nextkey 
)

Definition at line 323 of file nbtsearch.c.

References _bt_compare(), Assert, BufferGetPage, OffsetNumberPrev, P_FIRSTDATAKEY, P_ISLEAF, PageGetMaxOffsetNumber, and PageGetSpecialPointer.

Referenced by _bt_doinsert(), _bt_findinsertloc(), _bt_first(), and _bt_search().

328 {
329  Page page;
330  BTPageOpaque opaque;
331  OffsetNumber low,
332  high;
333  int32 result,
334  cmpval;
335 
336  page = BufferGetPage(buf);
337  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
338 
339  low = P_FIRSTDATAKEY(opaque);
340  high = PageGetMaxOffsetNumber(page);
341 
342  /*
343  * If there are no keys on the page, return the first available slot. Note
344  * this covers two cases: the page is really empty (no keys), or it
345  * contains only a high key. The latter case is possible after vacuuming.
346  * This can never happen on an internal page, however, since they are
347  * never empty (an internal page must have children).
348  */
349  if (high < low)
350  return low;
351 
352  /*
353  * Binary search to find the first key on the page >= scan key, or first
354  * key > scankey when nextkey is true.
355  *
356  * For nextkey=false (cmpval=1), the loop invariant is: all slots before
357  * 'low' are < scan key, all slots at or after 'high' are >= scan key.
358  *
359  * For nextkey=true (cmpval=0), the loop invariant is: all slots before
360  * 'low' are <= scan key, all slots at or after 'high' are > scan key.
361  *
362  * We can fall out when high == low.
363  */
364  high++; /* establish the loop invariant for high */
365 
366  cmpval = nextkey ? 0 : 1; /* select comparison value */
367 
368  while (high > low)
369  {
370  OffsetNumber mid = low + ((high - low) / 2);
371 
372  /* We have low <= mid < high, so mid points at a real slot */
373 
374  result = _bt_compare(rel, keysz, scankey, page, mid);
375 
376  if (result >= cmpval)
377  low = mid + 1;
378  else
379  high = mid;
380  }
381 
382  /*
383  * At this point we have high == low, but be careful: they could point
384  * past the last slot on the page.
385  *
386  * On a leaf page, we always return the first key >= scan key (resp. >
387  * scan key), which could be the last slot + 1.
388  */
389  if (P_ISLEAF(opaque))
390  return low;
391 
392  /*
393  * On a non-leaf page, return the last key < scan key (resp. <= scan key).
394  * There must be one if _bt_compare() is playing by the rules.
395  */
396  Assert(low > P_FIRSTDATAKEY(opaque));
397 
398  return OffsetNumberPrev(low);
399 }
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:313
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
Definition: nbtsearch.c:428
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_check_natts()

bool _bt_check_natts ( Relation  rel,
Page  page,
OffsetNumber  offnum 
)

Definition at line 2134 of file nbtutils.c.

References Assert, BT_N_KEYS_OFFSET_MASK, BTreeTupleGetNAtts, FirstOffsetNumber, INDEX_ALT_TID_MASK, INDEX_MAX_KEYS, IndexRelationGetNumberOfAttributes, IndexRelationGetNumberOfKeyAttributes, ItemPointerGetOffsetNumber, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_ISLEAF, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, StaticAssertStmt, IndexTupleData::t_info, and IndexTupleData::t_tid.

Referenced by _bt_compare(), and bt_target_page_check().

2135 {
2139  IndexTuple itup;
2140 
2141  /*
2142  * We cannot reliably test a deleted or half-deleted page, since they have
2143  * dummy high keys
2144  */
2145  if (P_IGNORE(opaque))
2146  return true;
2147 
2148  Assert(offnum >= FirstOffsetNumber &&
2149  offnum <= PageGetMaxOffsetNumber(page));
2150 
2151  /*
2152  * Mask allocated for number of keys in index tuple must be able to fit
2153  * maximum possible number of index attributes
2154  */
2156  "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS");
2157 
2158  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
2159 
2160  if (P_ISLEAF(opaque))
2161  {
2162  if (offnum >= P_FIRSTDATAKEY(opaque))
2163  {
2164  /*
2165  * Leaf tuples that are not the page high key (non-pivot tuples)
2166  * should never be truncated
2167  */
2168  return BTreeTupleGetNAtts(itup, rel) == natts;
2169  }
2170  else
2171  {
2172  /*
2173  * Rightmost page doesn't contain a page high key, so tuple was
2174  * checked above as ordinary leaf tuple
2175  */
2176  Assert(!P_RIGHTMOST(opaque));
2177 
2178  /* Page high key tuple contains only key attributes */
2179  return BTreeTupleGetNAtts(itup, rel) == nkeyatts;
2180  }
2181  }
2182  else /* !P_ISLEAF(opaque) */
2183  {
2184  if (offnum == P_FIRSTDATAKEY(opaque))
2185  {
2186  /*
2187  * The first tuple on any internal page (possibly the first after
2188  * its high key) is its negative infinity tuple. Negative
2189  * infinity tuples are always truncated to zero attributes. They
2190  * are a particular kind of pivot tuple.
2191  *
2192  * The number of attributes won't be explicitly represented if the
2193  * negative infinity tuple was generated during a page split that
2194  * occurred with a version of Postgres before v11. There must be
2195  * a problem when there is an explicit representation that is
2196  * non-zero, or when there is no explicit representation and the
2197  * tuple is evidently not a pre-pg_upgrade tuple.
2198  *
2199  * Prior to v11, downlinks always had P_HIKEY as their offset.
2200  * Use that to decide if the tuple is a pre-v11 tuple.
2201  */
2202  return BTreeTupleGetNAtts(itup, rel) == 0 ||
2203  ((itup->t_info & INDEX_ALT_TID_MASK) == 0 &&
2205  }
2206  else
2207  {
2208  /*
2209  * Tuple contains only key attributes despite on is it page high
2210  * key or not
2211  */
2212  return BTreeTupleGetNAtts(itup, rel) == nkeyatts;
2213  }
2214 
2215  }
2216 }
signed short int16
Definition: c.h:312
#define P_IGNORE(opaque)
Definition: nbtree.h:163
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
ItemPointerData t_tid
Definition: itup.h:37
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:247
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:795
#define INDEX_ALT_TID_MASK
Definition: nbtree.h:219
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:419
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define Assert(condition)
Definition: c.h:699
#define INDEX_MAX_KEYS
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BT_N_KEYS_OFFSET_MASK
Definition: nbtree.h:221
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define P_HIKEY
Definition: nbtree.h:185
unsigned short t_info
Definition: itup.h:49
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_checkkeys()

IndexTuple _bt_checkkeys ( IndexScanDesc  scan,
Page  page,
OffsetNumber  offnum,
ScanDirection  dir,
bool continuescan 
)

Definition at line 1370 of file nbtutils.c.

References _bt_check_rowcompare(), Assert, BTreeTupleGetNAtts, DatumGetBool, FunctionCall2Coll(), IndexScanDescData::ignore_killed_tuples, index_getattr, IndexScanDescData::indexRelation, ItemIdIsDead, BTScanOpaqueData::keyData, BTScanOpaqueData::numberOfKeys, IndexScanDescData::opaque, P_FIRSTDATAKEY, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, RelationGetDescr, ScanDirectionIsBackward, ScanDirectionIsForward, ScanKeyData::sk_argument, ScanKeyData::sk_attno, SK_BT_NULLS_FIRST, SK_BT_REQBKWD, SK_BT_REQFWD, ScanKeyData::sk_collation, ScanKeyData::sk_flags, ScanKeyData::sk_func, SK_ISNULL, SK_ROW_HEADER, SK_SEARCHNOTNULL, SK_SEARCHNULL, and test().

Referenced by _bt_readpage().

1373 {
1374  ItemId iid = PageGetItemId(page, offnum);
1375  bool tuple_alive;
1376  IndexTuple tuple;
1377  TupleDesc tupdesc;
1378  BTScanOpaque so;
1379  int keysz;
1380  int ikey;
1381  ScanKey key;
1382 
1383  *continuescan = true; /* default assumption */
1384 
1385  /*
1386  * If the scan specifies not to return killed tuples, then we treat a
1387  * killed tuple as not passing the qual. Most of the time, it's a win to
1388  * not bother examining the tuple's index keys, but just return
1389  * immediately with continuescan = true to proceed to the next tuple.
1390  * However, if this is the last tuple on the page, we should check the
1391  * index keys to prevent uselessly advancing to the next page.
1392  */
1393  if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
1394  {
1395  /* return immediately if there are more tuples on the page */
1396  if (ScanDirectionIsForward(dir))
1397  {
1398  if (offnum < PageGetMaxOffsetNumber(page))
1399  return NULL;
1400  }
1401  else
1402  {
1404 
1405  if (offnum > P_FIRSTDATAKEY(opaque))
1406  return NULL;
1407  }
1408 
1409  /*
1410  * OK, we want to check the keys so we can set continuescan correctly,
1411  * but we'll return NULL even if the tuple passes the key tests.
1412  */
1413  tuple_alive = false;
1414  }
1415  else
1416  tuple_alive = true;
1417 
1418  tuple = (IndexTuple) PageGetItem(page, iid);
1419 
1420  tupdesc = RelationGetDescr(scan->indexRelation);
1421  so = (BTScanOpaque) scan->opaque;
1422  keysz = so->numberOfKeys;
1423 
1424  for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
1425  {
1426  Datum datum;
1427  bool isNull;
1428  Datum test;
1429 
1430  Assert(key->sk_attno <= BTreeTupleGetNAtts(tuple, scan->indexRelation));
1431  /* row-comparison keys need special processing */
1432  if (key->sk_flags & SK_ROW_HEADER)
1433  {
1434  if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan))
1435  continue;
1436  return NULL;
1437  }
1438 
1439  datum = index_getattr(tuple,
1440  key->sk_attno,
1441  tupdesc,
1442  &isNull);
1443 
1444  if (key->sk_flags & SK_ISNULL)
1445  {
1446  /* Handle IS NULL/NOT NULL tests */
1447  if (key->sk_flags & SK_SEARCHNULL)
1448  {
1449  if (isNull)
1450  continue; /* tuple satisfies this qual */
1451  }
1452  else
1453  {
1455  if (!isNull)
1456  continue; /* tuple satisfies this qual */
1457  }
1458 
1459  /*
1460  * Tuple fails this qual. If it's a required qual for the current
1461  * scan direction, then we can conclude no further tuples will
1462  * pass, either.
1463  */
1464  if ((key->sk_flags & SK_BT_REQFWD) &&
1466  *continuescan = false;
1467  else if ((key->sk_flags & SK_BT_REQBKWD) &&
1469  *continuescan = false;
1470 
1471  /*
1472  * In any case, this indextuple doesn't match the qual.
1473  */
1474  return NULL;
1475  }
1476 
1477  if (isNull)
1478  {
1479  if (key->sk_flags & SK_BT_NULLS_FIRST)
1480  {
1481  /*
1482  * Since NULLs are sorted before non-NULLs, we know we have
1483  * reached the lower limit of the range of values for this
1484  * index attr. On a backward scan, we can stop if this qual
1485  * is one of the "must match" subset. We can stop regardless
1486  * of whether the qual is > or <, so long as it's required,
1487  * because it's not possible for any future tuples to pass. On
1488  * a forward scan, however, we must keep going, because we may
1489  * have initially positioned to the start of the index.
1490  */
1491  if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
1493  *continuescan = false;
1494  }
1495  else
1496  {
1497  /*
1498  * Since NULLs are sorted after non-NULLs, we know we have
1499  * reached the upper limit of the range of values for this
1500  * index attr. On a forward scan, we can stop if this qual is
1501  * one of the "must match" subset. We can stop regardless of
1502  * whether the qual is > or <, so long as it's required,
1503  * because it's not possible for any future tuples to pass. On
1504  * a backward scan, however, we must keep going, because we
1505  * may have initially positioned to the end of the index.
1506  */
1507  if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
1509  *continuescan = false;
1510  }
1511 
1512  /*
1513  * In any case, this indextuple doesn't match the qual.
1514  */
1515  return NULL;
1516  }
1517 
1518  test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
1519  datum, key->sk_argument);
1520 
1521  if (!DatumGetBool(test))
1522  {
1523  /*
1524  * Tuple fails this qual. If it's a required qual for the current
1525  * scan direction, then we can conclude no further tuples will
1526  * pass, either.
1527  *
1528  * Note: because we stop the scan as soon as any required equality
1529  * qual fails, it is critical that equality quals be used for the
1530  * initial positioning in _bt_first() when they are available. See
1531  * comments in _bt_first().
1532  */
1533  if ((key->sk_flags & SK_BT_REQFWD) &&
1535  *continuescan = false;
1536  else if ((key->sk_flags & SK_BT_REQBKWD) &&
1538  *continuescan = false;
1539 
1540  /*
1541  * In any case, this indextuple doesn't match the qual.
1542  */
1543  return NULL;
1544  }
1545  }
1546 
1547  /* Check for failure due to it being a killed tuple. */
1548  if (!tuple_alive)
1549  return NULL;
1550 
1551  /* If we get here, the tuple passes all index quals. */
1552  return tuple;
1553 }
static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, ScanDirection dir, bool *continuescan)
Definition: nbtutils.c:1565
static void test(void)
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
#define RelationGetDescr(relation)
Definition: rel.h:433
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1133
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:247
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
bool ignore_killed_tuples
Definition: relscan.h:102
Relation indexRelation
Definition: relscan.h:91
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
IndexTupleData * IndexTuple
Definition: itup.h:53
FmgrInfo sk_func
Definition: skey.h:71
#define DatumGetBool(X)
Definition: postgres.h:378
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_ISNULL
Definition: skey.h:115
#define SK_ROW_HEADER
Definition: skey.h:117
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:490
uintptr_t Datum
Definition: postgres.h:367
#define SK_BT_REQFWD
Definition: nbtree.h:486
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
int numberOfKeys
Definition: nbtree.h:441
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
ScanKey keyData
Definition: nbtree.h:442
Oid sk_collation
Definition: skey.h:70
#define SK_BT_REQBKWD
Definition: nbtree.h:487
Datum sk_argument
Definition: skey.h:72
#define SK_SEARCHNULL
Definition: skey.h:121
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_checkpage()

void _bt_checkpage ( Relation  rel,
Buffer  buf 
)

Definition at line 662 of file nbtpage.c.

References BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.

Referenced by _bt_doinsert(), _bt_getbuf(), _bt_relandgetbuf(), btvacuumpage(), btvacuumscan(), and palloc_btree_page().

663 {
664  Page page = BufferGetPage(buf);
665 
666  /*
667  * ReadBuffer verifies that every newly-read page passes
668  * PageHeaderIsValid, which means it either contains a reasonably sane
669  * page header or is all-zero. We have to defend against the all-zero
670  * case, however.
671  */
672  if (PageIsNew(page))
673  ereport(ERROR,
674  (errcode(ERRCODE_INDEX_CORRUPTED),
675  errmsg("index \"%s\" contains unexpected zero page at block %u",
678  errhint("Please REINDEX it.")));
679 
680  /*
681  * Additionally check that the special area looks sane.
682  */
683  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BTPageOpaqueData)))
684  ereport(ERROR,
685  (errcode(ERRCODE_INDEX_CORRUPTED),
686  errmsg("index \"%s\" contains corrupted page at block %u",
689  errhint("Please REINDEX it.")));
690 }
int errhint(const char *fmt,...)
Definition: elog.c:987
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define MAXALIGN(LEN)
Definition: c.h:652
#define PageGetSpecialSize(page)
Definition: bufpage.h:296
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define PageIsNew(page)
Definition: bufpage.h:225
int errmsg(const char *fmt,...)
Definition: elog.c:797
Pointer Page
Definition: bufpage.h:74

◆ _bt_compare()

int32 _bt_compare ( Relation  rel,
int  keysz,
ScanKey  scankey,
Page  page,
OffsetNumber  offnum 
)

Definition at line 428 of file nbtsearch.c.

References _bt_check_natts(), Assert, DatumGetInt32, FunctionCall2Coll(), i, index_getattr, P_FIRSTDATAKEY, P_ISLEAF, PageGetItem, PageGetItemId, PageGetSpecialPointer, RelationGetDescr, ScanKeyData::sk_argument, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, ScanKeyData::sk_func, and SK_ISNULL.

Referenced by _bt_binsrch(), _bt_doinsert(), _bt_findinsertloc(), _bt_moveright(), invariant_geq_offset(), invariant_leq_nontarget_offset(), and invariant_leq_offset().

433 {
434  TupleDesc itupdesc = RelationGetDescr(rel);
436  IndexTuple itup;
437  int i;
438 
439  Assert(_bt_check_natts(rel, page, offnum));
440 
441  /*
442  * Force result ">" if target item is first data item on an internal page
443  * --- see NOTE above.
444  */
445  if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
446  return 1;
447 
448  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
449 
450  /*
451  * The scan key is set up with the attribute number associated with each
452  * term in the key. It is important that, if the index is multi-key, the
453  * scan contain the first k key attributes, and that they be in order. If
454  * you think about how multi-key ordering works, you'll understand why
455  * this is.
456  *
457  * We don't test for violation of this condition here, however. The
458  * initial setup for the index scan had better have gotten it right (see
459  * _bt_first).
460  */
461 
462  for (i = 1; i <= keysz; i++)
463  {
464  Datum datum;
465  bool isNull;
466  int32 result;
467 
468  datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
469 
470  /* see comments about NULLs handling in btbuild */
471  if (scankey->sk_flags & SK_ISNULL) /* key is NULL */
472  {
473  if (isNull)
474  result = 0; /* NULL "=" NULL */
475  else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
476  result = -1; /* NULL "<" NOT_NULL */
477  else
478  result = 1; /* NULL ">" NOT_NULL */
479  }
480  else if (isNull) /* key is NOT_NULL and item is NULL */
481  {
482  if (scankey->sk_flags & SK_BT_NULLS_FIRST)
483  result = 1; /* NOT_NULL ">" NULL */
484  else
485  result = -1; /* NOT_NULL "<" NULL */
486  }
487  else
488  {
489  /*
490  * The sk_func needs to be passed the index value as left arg and
491  * the sk_argument as right arg (they might be of different
492  * types). Since it is convenient for callers to think of
493  * _bt_compare as comparing the scankey to the index item, we have
494  * to flip the sign of the comparison result. (Unless it's a DESC
495  * column, in which case we *don't* flip the sign.)
496  */
497  result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
498  scankey->sk_collation,
499  datum,
500  scankey->sk_argument));
501 
502  if (!(scankey->sk_flags & SK_BT_DESC))
503  result = -result;
504  }
505 
506  /* if the keys are unequal, return the difference */
507  if (result != 0)
508  return result;
509 
510  scankey++;
511  }
512 
513  /* if we get here, the keys are equal */
514  return 0;
515 }
#define DatumGetInt32(X)
Definition: postgres.h:457
#define RelationGetDescr(relation)
Definition: rel.h:433
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum)
Definition: nbtutils.c:2134
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1133
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:313
IndexTupleData * IndexTuple
Definition: itup.h:53
FmgrInfo sk_func
Definition: skey.h:71
#define SK_ISNULL
Definition: skey.h:115
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:490
uintptr_t Datum
Definition: postgres.h:367
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:699
#define SK_BT_DESC
Definition: nbtree.h:489
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
Oid sk_collation
Definition: skey.h:70
int i
Datum sk_argument
Definition: skey.h:72
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
AttrNumber sk_attno
Definition: skey.h:67
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_delitems_delete()

void _bt_delitems_delete ( Relation  rel,
Buffer  buf,
OffsetNumber itemnos,
int  nitems,
Relation  heapRel 
)

Definition at line 1021 of file nbtpage.c.

References Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, END_CRIT_SECTION, xl_btree_delete::hnode, MarkBufferDirty(), xl_btree_delete::nitems, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, RelationData::rd_node, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDelete, START_CRIT_SECTION, XLOG_BTREE_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_vacuum_one_page().

1024 {
1025  Page page = BufferGetPage(buf);
1026  BTPageOpaque opaque;
1027 
1028  /* Shouldn't be called unless there's something to do */
1029  Assert(nitems > 0);
1030 
1031  /* No ereport(ERROR) until changes are logged */
1033 
1034  /* Fix the page */
1035  PageIndexMultiDelete(page, itemnos, nitems);
1036 
1037  /*
1038  * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
1039  * because this is not called by VACUUM.
1040  */
1041 
1042  /*
1043  * Mark the page as not containing any LP_DEAD items. This is not
1044  * certainly true (there might be some that have recently been marked, but
1045  * weren't included in our target-item list), but it will almost always be
1046  * true and it doesn't seem worth an additional page scan to check it.
1047  * Remember that BTP_HAS_GARBAGE is only a hint anyway.
1048  */
1049  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1050  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1051 
1053 
1054  /* XLOG stuff */
1055  if (RelationNeedsWAL(rel))
1056  {
1057  XLogRecPtr recptr;
1058  xl_btree_delete xlrec_delete;
1059 
1060  xlrec_delete.hnode = heapRel->rd_node;
1061  xlrec_delete.nitems = nitems;
1062 
1063  XLogBeginInsert();
1065  XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
1066 
1067  /*
1068  * We need the target-offsets array whether or not we store the whole
1069  * buffer, to allow us to find the latestRemovedXid on a standby
1070  * server.
1071  */
1072  XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
1073 
1074  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
1075 
1076  PageSetLSN(page, recptr);
1077  }
1078 
1079  END_CRIT_SECTION();
1080 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
RelFileNode hnode
Definition: nbtxlog.h:126
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:33
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
RelFileNode rd_node
Definition: rel.h:55
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:699
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:832
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define RelationNeedsWAL(relation)
Definition: rel.h:510
#define SizeOfBtreeDelete
Definition: nbtxlog.h:133
void XLogBeginInsert(void)
Definition: xloginsert.c:120
uint16 btpo_flags
Definition: nbtree.h:64
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
#define BTP_HAS_GARBAGE
Definition: nbtree.h:77
Pointer Page
Definition: bufpage.h:74

◆ _bt_delitems_vacuum()

void _bt_delitems_vacuum ( Relation  rel,
Buffer  buf,
OffsetNumber itemnos,
int  nitems,
BlockNumber  lastBlockVacuumed 
)

Definition at line 948 of file nbtpage.c.

References BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BufferGetPage, END_CRIT_SECTION, xl_btree_vacuum::lastBlockVacuumed, MarkBufferDirty(), PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeVacuum, START_CRIT_SECTION, XLOG_BTREE_VACUUM, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by btvacuumpage(), and btvacuumscan().

951 {
952  Page page = BufferGetPage(buf);
953  BTPageOpaque opaque;
954 
955  /* No ereport(ERROR) until changes are logged */
957 
958  /* Fix the page */
959  if (nitems > 0)
960  PageIndexMultiDelete(page, itemnos, nitems);
961 
962  /*
963  * We can clear the vacuum cycle ID since this page has certainly been
964  * processed by the current vacuum scan.
965  */
966  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
967  opaque->btpo_cycleid = 0;
968 
969  /*
970  * Mark the page as not containing any LP_DEAD items. This is not
971  * certainly true (there might be some that have recently been marked, but
972  * weren't included in our target-item list), but it will almost always be
973  * true and it doesn't seem worth an additional page scan to check it.
974  * Remember that BTP_HAS_GARBAGE is only a hint anyway.
975  */
976  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
977 
979 
980  /* XLOG stuff */
981  if (RelationNeedsWAL(rel))
982  {
983  XLogRecPtr recptr;
984  xl_btree_vacuum xlrec_vacuum;
985 
986  xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
987 
988  XLogBeginInsert();
990  XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
991 
992  /*
993  * The target-offsets array is not in the buffer, but pretend that it
994  * is. When XLogInsert stores the whole buffer, the offsets array
995  * need not be stored too.
996  */
997  if (nitems > 0)
998  XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
999 
1000  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
1001 
1002  PageSetLSN(page, recptr);
1003  }
1004 
1005  END_CRIT_SECTION();
1006 }
BlockNumber lastBlockVacuumed
Definition: nbtxlog.h:172
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define SizeOfBtreeVacuum
Definition: nbtxlog.h:177
BTCycleId btpo_cycleid
Definition: nbtree.h:65
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:38
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:832
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define RelationNeedsWAL(relation)
Definition: rel.h:510
void XLogBeginInsert(void)
Definition: xloginsert.c:120
uint16 btpo_flags
Definition: nbtree.h:64
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
#define BTP_HAS_GARBAGE
Definition: nbtree.h:77
Pointer Page
Definition: bufpage.h:74

◆ _bt_doinsert()

bool _bt_doinsert ( Relation  rel,
IndexTuple  itup,
IndexUniqueCheck  checkUnique,
Relation  heapRel 
)

Definition at line 110 of file nbtinsert.c.

References _bt_binsrch(), _bt_check_unique(), _bt_checkpage(), _bt_compare(), _bt_findinsertloc(), _bt_freeskey(), _bt_freestack(), _bt_insertonpg(), _bt_mkscankey(), _bt_moveright(), _bt_relbuf(), _bt_search(), Assert, BT_WRITE, buf, BUFFER_LOCK_UNLOCK, BufferGetPage, CheckForSerializableConflictIn(), ConditionalLockBuffer(), IndexRelationGetNumberOfKeyAttributes, IndexTupleSize, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, LockBuffer(), MAXALIGN, P_FIRSTDATAKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_ISLEAF, P_RIGHTMOST, PageGetFreeSpace(), PageGetMaxOffsetNumber, PageGetSpecialPointer, ReadBuffer(), RelationGetTargetBlock, RelationSetTargetBlock, ReleaseBuffer(), SpeculativeInsertionWait(), IndexTupleData::t_tid, TransactionIdIsValid, UNIQUE_CHECK_EXISTING, UNIQUE_CHECK_NO, XactLockTableWait(), and XLTW_InsertIndex.

Referenced by btinsert().

112 {
113  bool is_unique = false;
114  int indnkeyatts;
115  ScanKey itup_scankey;
116  BTStack stack = NULL;
117  Buffer buf;
118  OffsetNumber offset;
119  bool fastpath;
120 
121  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
122  Assert(indnkeyatts != 0);
123 
124  /* we need an insertion scan key to do our search, so build one */
125  itup_scankey = _bt_mkscankey(rel, itup);
126 
127  /*
128  * It's very common to have an index on an auto-incremented or
129  * monotonically increasing value. In such cases, every insertion happens
130  * towards the end of the index. We try to optimize that case by caching
131  * the right-most leaf of the index. If our cached block is still the
132  * rightmost leaf, has enough free space to accommodate a new entry and
133  * the insertion key is strictly greater than the first key in this page,
134  * then we can safely conclude that the new key will be inserted in the
135  * cached block. So we simply search within the cached block and insert
136  * the key at the appropriate location. We call it a fastpath.
137  *
138  * Testing has revealed, though, that the fastpath can result in increased
139  * contention on the exclusive-lock on the rightmost leaf page. So we
140  * conditionally check if the lock is available. If it's not available
141  * then we simply abandon the fastpath and take the regular path. This
142  * makes sense because unavailability of the lock also signals that some
143  * other backend might be concurrently inserting into the page, thus
144  * reducing our chances to finding an insertion place in this page.
145  */
146 top:
147  fastpath = false;
148  offset = InvalidOffsetNumber;
150  {
151  Size itemsz;
152  Page page;
153  BTPageOpaque lpageop;
154 
155  /*
156  * Conditionally acquire exclusive lock on the buffer before doing any
157  * checks. If we don't get the lock, we simply follow slowpath. If we
158  * do get the lock, this ensures that the index state cannot change,
159  * as far as the rightmost part of the index is concerned.
160  */
161  buf = ReadBuffer(rel, RelationGetTargetBlock(rel));
162 
163  if (ConditionalLockBuffer(buf))
164  {
165  _bt_checkpage(rel, buf);
166 
167  page = BufferGetPage(buf);
168 
169  lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
170  itemsz = IndexTupleSize(itup);
171  itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this
172  * but we need to be consistent */
173 
174  /*
175  * Check if the page is still the rightmost leaf page, has enough
176  * free space to accommodate the new tuple, and the insertion scan
177  * key is strictly greater than the first key on the page.
178  */
179  if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
180  !P_IGNORE(lpageop) &&
181  (PageGetFreeSpace(page) > itemsz) &&
182  PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) &&
183  _bt_compare(rel, indnkeyatts, itup_scankey, page,
184  P_FIRSTDATAKEY(lpageop)) > 0)
185  {
186  /*
187  * The right-most block should never have an incomplete split.
188  * But be paranoid and check for it anyway.
189  */
190  Assert(!P_INCOMPLETE_SPLIT(lpageop));
191  fastpath = true;
192  }
193  else
194  {
195  _bt_relbuf(rel, buf);
196 
197  /*
198  * Something did not work out. Just forget about the cached
199  * block and follow the normal path. It might be set again if
200  * the conditions are favourable.
201  */
203  }
204  }
205  else
206  {
207  ReleaseBuffer(buf);
208 
209  /*
210  * If someone's holding a lock, it's likely to change anyway, so
211  * don't try again until we get an updated rightmost leaf.
212  */
214  }
215  }
216 
217  if (!fastpath)
218  {
219  /* find the first page containing this key */
220  stack = _bt_search(rel, indnkeyatts, itup_scankey, false, &buf, BT_WRITE,
221  NULL);
222 
223  /* trade in our read lock for a write lock */
225  LockBuffer(buf, BT_WRITE);
226 
227  /*
228  * If the page was split between the time that we surrendered our read
229  * lock and acquired our write lock, then this page may no longer be
230  * the right place for the key we want to insert. In this case, we
231  * need to move right in the tree. See Lehman and Yao for an
232  * excruciatingly precise description.
233  */
234  buf = _bt_moveright(rel, buf, indnkeyatts, itup_scankey, false,
235  true, stack, BT_WRITE, NULL);
236  }
237 
238  /*
239  * If we're not allowing duplicates, make sure the key isn't already in
240  * the index.
241  *
242  * NOTE: obviously, _bt_check_unique can only detect keys that are already
243  * in the index; so it cannot defend against concurrent insertions of the
244  * same key. We protect against that by means of holding a write lock on
245  * the target page. Any other would-be inserter of the same key must
246  * acquire a write lock on the same target page, so only one would-be
247  * inserter can be making the check at one time. Furthermore, once we are
248  * past the check we hold write locks continuously until we have performed
249  * our insertion, so no later inserter can fail to see our insertion.
250  * (This requires some care in _bt_insertonpg.)
251  *
252  * If we must wait for another xact, we release the lock while waiting,
253  * and then must start over completely.
254  *
255  * For a partial uniqueness check, we don't wait for the other xact. Just
256  * let the tuple in and return false for possibly non-unique, or true for
257  * definitely unique.
258  */
259  if (checkUnique != UNIQUE_CHECK_NO)
260  {
261  TransactionId xwait;
262  uint32 speculativeToken;
263 
264  offset = _bt_binsrch(rel, buf, indnkeyatts, itup_scankey, false);
265  xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
266  checkUnique, &is_unique, &speculativeToken);
267 
268  if (TransactionIdIsValid(xwait))
269  {
270  /* Have to wait for the other guy ... */
271  _bt_relbuf(rel, buf);
272 
273  /*
274  * If it's a speculative insertion, wait for it to finish (ie. to
275  * go ahead with the insertion, or kill the tuple). Otherwise
276  * wait for the transaction to finish as usual.
277  */
278  if (speculativeToken)
279  SpeculativeInsertionWait(xwait, speculativeToken);
280  else
281  XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
282 
283  /* start over... */
284  if (stack)
285  _bt_freestack(stack);
286  goto top;
287  }
288  }
289 
290  if (checkUnique != UNIQUE_CHECK_EXISTING)
291  {
292  /*
293  * The only conflict predicate locking cares about for indexes is when
294  * an index tuple insert conflicts with an existing lock. Since the
295  * actual location of the insert is hard to predict because of the
296  * random search used to prevent O(N^2) performance when there are
297  * many duplicate entries, we can just use the "first valid" page.
298  * This reasoning also applies to INCLUDE indexes, whose extra
299  * attributes are not considered part of the key space.
300  */
301  CheckForSerializableConflictIn(rel, NULL, buf);
302  /* do the insertion */
303  _bt_findinsertloc(rel, &buf, &offset, indnkeyatts, itup_scankey, itup,
304  stack, heapRel);
305  _bt_insertonpg(rel, buf, InvalidBuffer, stack, itup, offset, false);
306  }
307  else
308  {
309  /* just release the buffer */
310  _bt_relbuf(rel, buf);
311  }
312 
313  /* be tidy */
314  if (stack)
315  _bt_freestack(stack);
316  _bt_freeskey(itup_scankey);
317 
318  return is_unique;
319 }
static TransactionId _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, Buffer buf, OffsetNumber offset, ScanKey itup_scankey, IndexUniqueCheck checkUnique, bool *is_unique, uint32 *speculativeToken)
Definition: nbtinsert.c:341
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
void _bt_freestack(BTStack stack)
Definition: nbtutils.c:175
#define P_IGNORE(opaque)
Definition: nbtree.h:163
uint32 TransactionId
Definition: c.h:474
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
void _bt_freeskey(ScanKey skey)
Definition: nbtutils.c:166
ItemPointerData t_tid
Definition: itup.h:37
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
#define RelationGetTargetBlock(relation)
Definition: rel.h:493
void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer)
Definition: predicate.c:4280
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:578
uint16 OffsetNumber
Definition: off.h:24
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:662
Buffer _bt_moveright(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, int access, Snapshot snapshot)
Definition: nbtsearch.c:214
void SpeculativeInsertionWait(TransactionId xid, uint32 token)
Definition: lmgr.c:711
static char * buf
Definition: pg_test_fsync.c:67
OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
Definition: nbtsearch.c:323
unsigned int uint32
Definition: c.h:325
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
static void _bt_findinsertloc(Relation rel, Buffer *bufptr, OffsetNumber *offsetptr, int keysz, ScanKey scankey, IndexTuple newtup, BTStack stack, Relation heapRel)
Definition: nbtinsert.c:642
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3572
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
BTStack _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:97
#define InvalidOffsetNumber
Definition: off.h:26
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:554
#define Assert(condition)
Definition: c.h:699
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:500
size_t Size
Definition: c.h:433
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
#define MAXALIGN(LEN)
Definition: c.h:652
static void _bt_insertonpg(Relation rel, Buffer buf, Buffer cbuf, BTStack stack, IndexTuple itup, OffsetNumber newitemoff, bool split_only_page)
Definition: nbtinsert.c:835
#define TransactionIdIsValid(xid)
Definition: transam.h:41
int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
Definition: nbtsearch.c:428
ScanKey _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:62
#define BT_WRITE
Definition: nbtree.h:301
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74
#define IndexTupleSize(itup)
Definition: itup.h:71
#define P_ISLEAF(opaque)
Definition: nbtree.h:158

◆ _bt_end_vacuum()

void _bt_end_vacuum ( Relation  rel)

Definition at line 1976 of file nbtutils.c.

References LockRelId::dbId, i, LockInfoData::lockRelId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), BTVacInfo::num_vacuums, RelationData::rd_lockInfo, LockRelId::relId, BTOneVacInfo::relid, and BTVacInfo::vacuums.

Referenced by _bt_end_vacuum_callback(), and btbulkdelete().

1977 {
1978  int i;
1979 
1980  LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
1981 
1982  /* Find the array entry */
1983  for (i = 0; i < btvacinfo->num_vacuums; i++)
1984  {
1985  BTOneVacInfo *vac = &btvacinfo->vacuums[i];
1986 
1987  if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
1988  vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
1989  {
1990  /* Remove it by shifting down the last entry */
1991  *vac = btvacinfo->vacuums[btvacinfo->num_vacuums - 1];
1993  break;
1994  }
1995  }
1996 
1997  LWLockRelease(BtreeVacuumLock);
1998 }
LockRelId lockRelId
Definition: rel.h:44
static BTVacInfo * btvacinfo
Definition: nbtutils.c:1872
LockRelId relid
Definition: nbtutils.c:1860
Oid dbId
Definition: rel.h:39
BTOneVacInfo vacuums[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtutils.c:1869
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
LockInfoData rd_lockInfo
Definition: rel.h:87
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
int num_vacuums
Definition: nbtutils.c:1867
int i
Oid relId
Definition: rel.h:38

◆ _bt_end_vacuum_callback()

void _bt_end_vacuum_callback ( int  code,
Datum  arg 
)

Definition at line 2004 of file nbtutils.c.

References _bt_end_vacuum(), and DatumGetPointer.

Referenced by btbulkdelete().

2005 {
2007 }
void _bt_end_vacuum(Relation rel)
Definition: nbtutils.c:1976
#define DatumGetPointer(X)
Definition: postgres.h:534
void * arg

◆ _bt_finish_split()

void _bt_finish_split ( Relation  rel,
Buffer  bbuf,
BTStack  stack 
)

Definition at line 1938 of file nbtinsert.c.

References _bt_getbuf(), _bt_insert_parent(), _bt_relbuf(), Assert, BT_WRITE, BTMetaPageData::btm_root, BTPageGetMeta, BTPageOpaqueData::btpo_next, BTREE_METAPAGE, BufferGetBlockNumber(), BufferGetPage, DEBUG1, elog, P_INCOMPLETE_SPLIT, P_LEFTMOST, P_RIGHTMOST, and PageGetSpecialPointer.

Referenced by _bt_findinsertloc(), _bt_getstackbuf(), and _bt_moveright().

1939 {
1940  Page lpage = BufferGetPage(lbuf);
1941  BTPageOpaque lpageop = (BTPageOpaque) PageGetSpecialPointer(lpage);
1942  Buffer rbuf;
1943  Page rpage;
1944  BTPageOpaque rpageop;
1945  bool was_root;
1946  bool was_only;
1947 
1948  Assert(P_INCOMPLETE_SPLIT(lpageop));
1949 
1950  /* Lock right sibling, the one missing the downlink */
1951  rbuf = _bt_getbuf(rel, lpageop->btpo_next, BT_WRITE);
1952  rpage = BufferGetPage(rbuf);
1953  rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
1954 
1955  /* Could this be a root split? */
1956  if (!stack)
1957  {
1958  Buffer metabuf;
1959  Page metapg;
1960  BTMetaPageData *metad;
1961 
1962  /* acquire lock on the metapage */
1963  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
1964  metapg = BufferGetPage(metabuf);
1965  metad = BTPageGetMeta(metapg);
1966 
1967  was_root = (metad->btm_root == BufferGetBlockNumber(lbuf));
1968 
1969  _bt_relbuf(rel, metabuf);
1970  }
1971  else
1972  was_root = false;
1973 
1974  /* Was this the only page on the level before split? */
1975  was_only = (P_LEFTMOST(lpageop) && P_RIGHTMOST(rpageop));
1976 
1977  elog(DEBUG1, "finishing incomplete split of %u/%u",
1979 
1980  _bt_insert_parent(rel, lbuf, rbuf, stack, was_root, was_only);
1981 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define DEBUG1
Definition: elog.h:25
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
static void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf, BTStack stack, bool is_root, bool is_only)
Definition: nbtinsert.c:1827
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define BTPageGetMeta(p)
Definition: nbtree.h:112
#define P_LEFTMOST(opaque)
Definition: nbtree.h:156
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define BTREE_METAPAGE
Definition: nbtree.h:115
BlockNumber btm_root
Definition: nbtree.h:101
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define elog
Definition: elog.h:219
#define BT_WRITE
Definition: nbtree.h:301
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74

◆ _bt_first()

bool _bt_first ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 538 of file nbtsearch.c.

References _bt_binsrch(), _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_freestack(), _bt_initialize_more_data(), _bt_parallel_done(), _bt_parallel_readpage(), _bt_parallel_seize(), _bt_preprocess_keys(), _bt_readpage(), _bt_search(), _bt_steppage(), Assert, BT_READ, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, BTORDER_PROC, BTScanPosInvalidate, BTScanPosIsValid, buf, BTScanPosData::buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferIsValid, cur, BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, DatumGetPointer, elog, ERROR, get_opfamily_proc(), i, index_getprocinfo(), INDEX_MAX_KEYS, IndexScanDescData::indexRelation, InvalidBlockNumber, InvalidOid, InvalidStrategy, BTScanPosData::itemIndex, BTScanPosData::items, BTScanOpaqueData::keyData, LockBuffer(), BTScanOpaqueData::numberOfKeys, OffsetNumberPrev, IndexScanDescData::opaque, P_NONE, IndexScanDescData::parallel_scan, pgstat_count_index_scan, PredicateLockPage(), PredicateLockRelation(), BTScanOpaqueData::qual_ok, RelationData::rd_opcintype, RelationData::rd_opfamily, RegProcedureIsValid, RelationGetRelationName, ScanDirectionIsBackward, ScanDirectionIsForward, ScanKeyEntryInitialize(), ScanKeyEntryInitializeWithInfo(), ScanKeyData::sk_argument, ScanKeyData::sk_attno, SK_BT_DESC, SK_BT_NULLS_FIRST, ScanKeyData::sk_collation, ScanKeyData::sk_flags, SK_ISNULL, SK_ROW_END, SK_ROW_HEADER, SK_ROW_MEMBER, SK_SEARCHNOTNULL, ScanKeyData::sk_strategy, ScanKeyData::sk_subtype, status(), HeapTupleData::t_self, IndexScanDescData::xs_ctup, IndexScanDescData::xs_itup, IndexScanDescData::xs_snapshot, and IndexScanDescData::xs_want_itup.

Referenced by btgetbitmap(), and btgettuple().

539 {
540  Relation rel = scan->indexRelation;
541  BTScanOpaque so = (BTScanOpaque) scan->opaque;
542  Buffer buf;
543  BTStack stack;
544  OffsetNumber offnum;
545  StrategyNumber strat;
546  bool nextkey;
547  bool goback;
548  ScanKey startKeys[INDEX_MAX_KEYS];
549  ScanKeyData scankeys[INDEX_MAX_KEYS];
550  ScanKeyData notnullkeys[INDEX_MAX_KEYS];
551  int keysCount = 0;
552  int i;
553  bool status = true;
554  StrategyNumber strat_total;
555  BTScanPosItem *currItem;
556  BlockNumber blkno;
557 
559 
561 
562  /*
563  * Examine the scan keys and eliminate any redundant keys; also mark the
564  * keys that must be matched to continue the scan.
565  */
566  _bt_preprocess_keys(scan);
567 
568  /*
569  * Quit now if _bt_preprocess_keys() discovered that the scan keys can
570  * never be satisfied (eg, x == 1 AND x > 2).
571  */
572  if (!so->qual_ok)
573  return false;
574 
575  /*
576  * For parallel scans, get the starting page from shared state. If the
577  * scan has not started, proceed to find out first leaf page in the usual
578  * way while keeping other participating processes waiting. If the scan
579  * has already begun, use the page number from the shared structure.
580  */
581  if (scan->parallel_scan != NULL)
582  {
583  status = _bt_parallel_seize(scan, &blkno);
584  if (!status)
585  return false;
586  else if (blkno == P_NONE)
587  {
588  _bt_parallel_done(scan);
589  return false;
590  }
591  else if (blkno != InvalidBlockNumber)
592  {
593  if (!_bt_parallel_readpage(scan, blkno, dir))
594  return false;
595  goto readcomplete;
596  }
597  }
598 
599  /*----------
600  * Examine the scan keys to discover where we need to start the scan.
601  *
602  * We want to identify the keys that can be used as starting boundaries;
603  * these are =, >, or >= keys for a forward scan or =, <, <= keys for
604  * a backwards scan. We can use keys for multiple attributes so long as
605  * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
606  * a > or < boundary or find an attribute with no boundary (which can be
607  * thought of as the same as "> -infinity"), we can't use keys for any
608  * attributes to its right, because it would break our simplistic notion
609  * of what initial positioning strategy to use.
610  *
611  * When the scan keys include cross-type operators, _bt_preprocess_keys
612  * may not be able to eliminate redundant keys; in such cases we will
613  * arbitrarily pick a usable one for each attribute. This is correct
614  * but possibly not optimal behavior. (For example, with keys like
615  * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
616  * x=5 would be more efficient.) Since the situation only arises given
617  * a poorly-worded query plus an incomplete opfamily, live with it.
618  *
619  * When both equality and inequality keys appear for a single attribute
620  * (again, only possible when cross-type operators appear), we *must*
621  * select one of the equality keys for the starting point, because
622  * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
623  * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
624  * start at x=4, we will fail and stop before reaching x=10. If multiple
625  * equality quals survive preprocessing, however, it doesn't matter which
626  * one we use --- by definition, they are either redundant or
627  * contradictory.
628  *
629  * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier.
630  * If the index stores nulls at the end of the index we'll be starting
631  * from, and we have no boundary key for the column (which means the key
632  * we deduced NOT NULL from is an inequality key that constrains the other
633  * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to
634  * use as a boundary key. If we didn't do this, we might find ourselves
635  * traversing a lot of null entries at the start of the scan.
636  *
637  * In this loop, row-comparison keys are treated the same as keys on their
638  * first (leftmost) columns. We'll add on lower-order columns of the row
639  * comparison below, if possible.
640  *
641  * The selected scan keys (at most one per index column) are remembered by
642  * storing their addresses into the local startKeys[] array.
643  *----------
644  */
645  strat_total = BTEqualStrategyNumber;
646  if (so->numberOfKeys > 0)
647  {
648  AttrNumber curattr;
649  ScanKey chosen;
650  ScanKey impliesNN;
651  ScanKey cur;
652 
653  /*
654  * chosen is the so-far-chosen key for the current attribute, if any.
655  * We don't cast the decision in stone until we reach keys for the
656  * next attribute.
657  */
658  curattr = 1;
659  chosen = NULL;
660  /* Also remember any scankey that implies a NOT NULL constraint */
661  impliesNN = NULL;
662 
663  /*
664  * Loop iterates from 0 to numberOfKeys inclusive; we use the last
665  * pass to handle after-last-key processing. Actual exit from the
666  * loop is at one of the "break" statements below.
667  */
668  for (cur = so->keyData, i = 0;; cur++, i++)
669  {
670  if (i >= so->numberOfKeys || cur->sk_attno != curattr)
671  {
672  /*
673  * Done looking at keys for curattr. If we didn't find a
674  * usable boundary key, see if we can deduce a NOT NULL key.
675  */
676  if (chosen == NULL && impliesNN != NULL &&
677  ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
680  {
681  /* Yes, so build the key in notnullkeys[keysCount] */
682  chosen = &notnullkeys[keysCount];
683  ScanKeyEntryInitialize(chosen,
685  (impliesNN->sk_flags &
687  curattr,
688  ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
691  InvalidOid,
692  InvalidOid,
693  InvalidOid,
694  (Datum) 0);
695  }
696 
697  /*
698  * If we still didn't find a usable boundary key, quit; else
699  * save the boundary key pointer in startKeys.
700  */
701  if (chosen == NULL)
702  break;
703  startKeys[keysCount++] = chosen;
704 
705  /*
706  * Adjust strat_total, and quit if we have stored a > or <
707  * key.
708  */
709  strat = chosen->sk_strategy;
710  if (strat != BTEqualStrategyNumber)
711  {
712  strat_total = strat;
713  if (strat == BTGreaterStrategyNumber ||
714  strat == BTLessStrategyNumber)
715  break;
716  }
717 
718  /*
719  * Done if that was the last attribute, or if next key is not
720  * in sequence (implying no boundary key is available for the
721  * next attribute).
722  */
723  if (i >= so->numberOfKeys ||
724  cur->sk_attno != curattr + 1)
725  break;
726 
727  /*
728  * Reset for next attr.
729  */
730  curattr = cur->sk_attno;
731  chosen = NULL;
732  impliesNN = NULL;
733  }
734 
735  /*
736  * Can we use this key as a starting boundary for this attr?
737  *
738  * If not, does it imply a NOT NULL constraint? (Because
739  * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
740  * *any* inequality key works for that; we need not test.)
741  */
742  switch (cur->sk_strategy)
743  {
746  if (chosen == NULL)
747  {
748  if (ScanDirectionIsBackward(dir))
749  chosen = cur;
750  else
751  impliesNN = cur;
752  }
753  break;
755  /* override any non-equality choice */
756  chosen = cur;
757  break;
760  if (chosen == NULL)
761  {
762  if (ScanDirectionIsForward(dir))
763  chosen = cur;
764  else
765  impliesNN = cur;
766  }
767  break;
768  }
769  }
770  }
771 
772  /*
773  * If we found no usable boundary keys, we have to start from one end of
774  * the tree. Walk down that edge to the first or last key, and scan from
775  * there.
776  */
777  if (keysCount == 0)
778  {
779  bool match;
780 
781  match = _bt_endpoint(scan, dir);
782 
783  if (!match)
784  {
785  /* No match, so mark (parallel) scan finished */
786  _bt_parallel_done(scan);
787  }
788 
789  return match;
790  }
791 
792  /*
793  * We want to start the scan somewhere within the index. Set up an
794  * insertion scankey we can use to search for the boundary point we
795  * identified above. The insertion scankey is built in the local
796  * scankeys[] array, using the keys identified by startKeys[].
797  */
798  Assert(keysCount <= INDEX_MAX_KEYS);
799  for (i = 0; i < keysCount; i++)
800  {
801  ScanKey cur = startKeys[i];
802 
803  Assert(cur->sk_attno == i + 1);
804 
805  if (cur->sk_flags & SK_ROW_HEADER)
806  {
807  /*
808  * Row comparison header: look to the first row member instead.
809  *
810  * The member scankeys are already in insertion format (ie, they
811  * have sk_func = 3-way-comparison function), but we have to watch
812  * out for nulls, which _bt_preprocess_keys didn't check. A null
813  * in the first row member makes the condition unmatchable, just
814  * like qual_ok = false.
815  */
816  ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
817 
818  Assert(subkey->sk_flags & SK_ROW_MEMBER);
819  if (subkey->sk_flags & SK_ISNULL)
820  {
821  _bt_parallel_done(scan);
822  return false;
823  }
824  memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
825 
826  /*
827  * If the row comparison is the last positioning key we accepted,
828  * try to add additional keys from the lower-order row members.
829  * (If we accepted independent conditions on additional index
830  * columns, we use those instead --- doesn't seem worth trying to
831  * determine which is more restrictive.) Note that this is OK
832  * even if the row comparison is of ">" or "<" type, because the
833  * condition applied to all but the last row member is effectively
834  * ">=" or "<=", and so the extra keys don't break the positioning
835  * scheme. But, by the same token, if we aren't able to use all
836  * the row members, then the part of the row comparison that we
837  * did use has to be treated as just a ">=" or "<=" condition, and
838  * so we'd better adjust strat_total accordingly.
839  */
840  if (i == keysCount - 1)
841  {
842  bool used_all_subkeys = false;
843 
844  Assert(!(subkey->sk_flags & SK_ROW_END));
845  for (;;)
846  {
847  subkey++;
848  Assert(subkey->sk_flags & SK_ROW_MEMBER);
849  if (subkey->sk_attno != keysCount + 1)
850  break; /* out-of-sequence, can't use it */
851  if (subkey->sk_strategy != cur->sk_strategy)
852  break; /* wrong direction, can't use it */
853  if (subkey->sk_flags & SK_ISNULL)
854  break; /* can't use null keys */
855  Assert(keysCount < INDEX_MAX_KEYS);
856  memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
857  keysCount++;
858  if (subkey->sk_flags & SK_ROW_END)
859  {
860  used_all_subkeys = true;
861  break;
862  }
863  }
864  if (!used_all_subkeys)
865  {
866  switch (strat_total)
867  {
869  strat_total = BTLessEqualStrategyNumber;
870  break;
872  strat_total = BTGreaterEqualStrategyNumber;
873  break;
874  }
875  }
876  break; /* done with outer loop */
877  }
878  }
879  else
880  {
881  /*
882  * Ordinary comparison key. Transform the search-style scan key
883  * to an insertion scan key by replacing the sk_func with the
884  * appropriate btree comparison function.
885  *
886  * If scankey operator is not a cross-type comparison, we can use
887  * the cached comparison function; otherwise gotta look it up in
888  * the catalogs. (That can't lead to infinite recursion, since no
889  * indexscan initiated by syscache lookup will use cross-data-type
890  * operators.)
891  *
892  * We support the convention that sk_subtype == InvalidOid means
893  * the opclass input type; this is a hack to simplify life for
894  * ScanKeyInit().
895  */
896  if (cur->sk_subtype == rel->rd_opcintype[i] ||
897  cur->sk_subtype == InvalidOid)
898  {
899  FmgrInfo *procinfo;
900 
901  procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
903  cur->sk_flags,
904  cur->sk_attno,
906  cur->sk_subtype,
907  cur->sk_collation,
908  procinfo,
909  cur->sk_argument);
910  }
911  else
912  {
913  RegProcedure cmp_proc;
914 
915  cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
916  rel->rd_opcintype[i],
917  cur->sk_subtype,
918  BTORDER_PROC);
919  if (!RegProcedureIsValid(cmp_proc))
920  elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
921  BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
922  cur->sk_attno, RelationGetRelationName(rel));
923  ScanKeyEntryInitialize(scankeys + i,
924  cur->sk_flags,
925  cur->sk_attno,
927  cur->sk_subtype,
928  cur->sk_collation,
929  cmp_proc,
930  cur->sk_argument);
931  }
932  }
933  }
934 
935  /*----------
936  * Examine the selected initial-positioning strategy to determine exactly
937  * where we need to start the scan, and set flag variables to control the
938  * code below.
939  *
940  * If nextkey = false, _bt_search and _bt_binsrch will locate the first
941  * item >= scan key. If nextkey = true, they will locate the first
942  * item > scan key.
943  *
944  * If goback = true, we will then step back one item, while if
945  * goback = false, we will start the scan on the located item.
946  *----------
947  */
948  switch (strat_total)
949  {
951 
952  /*
953  * Find first item >= scankey, then back up one to arrive at last
954  * item < scankey. (Note: this positioning strategy is only used
955  * for a backward scan, so that is always the correct starting
956  * position.)
957  */
958  nextkey = false;
959  goback = true;
960  break;
961 
963 
964  /*
965  * Find first item > scankey, then back up one to arrive at last
966  * item <= scankey. (Note: this positioning strategy is only used
967  * for a backward scan, so that is always the correct starting
968  * position.)
969  */
970  nextkey = true;
971  goback = true;
972  break;
973 
975 
976  /*
977  * If a backward scan was specified, need to start with last equal
978  * item not first one.
979  */
980  if (ScanDirectionIsBackward(dir))
981  {
982  /*
983  * This is the same as the <= strategy. We will check at the
984  * end whether the found item is actually =.
985  */
986  nextkey = true;
987  goback = true;
988  }
989  else
990  {
991  /*
992  * This is the same as the >= strategy. We will check at the
993  * end whether the found item is actually =.
994  */
995  nextkey = false;
996  goback = false;
997  }
998  break;
999 
1001 
1002  /*
1003  * Find first item >= scankey. (This is only used for forward
1004  * scans.)
1005  */
1006  nextkey = false;
1007  goback = false;
1008  break;
1009 
1011 
1012  /*
1013  * Find first item > scankey. (This is only used for forward
1014  * scans.)
1015  */
1016  nextkey = true;
1017  goback = false;
1018  break;
1019 
1020  default:
1021  /* can't get here, but keep compiler quiet */
1022  elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
1023  return false;
1024  }
1025 
1026  /*
1027  * Use the manufactured insertion scan key to descend the tree and
1028  * position ourselves on the target leaf page.
1029  */
1030  stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ,
1031  scan->xs_snapshot);
1032 
1033  /* don't need to keep the stack around... */
1034  _bt_freestack(stack);
1035 
1036  if (!BufferIsValid(buf))
1037  {
1038  /*
1039  * We only get here if the index is completely empty. Lock relation
1040  * because nothing finer to lock exists.
1041  */
1042  PredicateLockRelation(rel, scan->xs_snapshot);
1043 
1044  /*
1045  * mark parallel scan as done, so that all the workers can finish
1046  * their scan
1047  */
1048  _bt_parallel_done(scan);
1050 
1051  return false;
1052  }
1053  else
1055  scan->xs_snapshot);
1056 
1057  _bt_initialize_more_data(so, dir);
1058 
1059  /* position to the precise item on the page */
1060  offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
1061 
1062  /*
1063  * If nextkey = false, we are positioned at the first item >= scan key, or
1064  * possibly at the end of a page on which all the existing items are less
1065  * than the scan key and we know that everything on later pages is greater
1066  * than or equal to scan key.
1067  *
1068  * If nextkey = true, we are positioned at the first item > scan key, or
1069  * possibly at the end of a page on which all the existing items are less
1070  * than or equal to the scan key and we know that everything on later
1071  * pages is greater than scan key.
1072  *
1073  * The actually desired starting point is either this item or the prior
1074  * one, or in the end-of-page case it's the first item on the next page or
1075  * the last item on this page. Adjust the starting offset if needed. (If
1076  * this results in an offset before the first item or after the last one,
1077  * _bt_readpage will report no items found, and then we'll step to the
1078  * next page as needed.)
1079  */
1080  if (goback)
1081  offnum = OffsetNumberPrev(offnum);
1082 
1083  /* remember which buffer we have pinned, if any */
1085  so->currPos.buf = buf;
1086 
1087  /*
1088  * Now load data from the first page of the scan.
1089  */
1090  if (!_bt_readpage(scan, dir, offnum))
1091  {
1092  /*
1093  * There's no actually-matching data on this page. Try to advance to
1094  * the next page. Return false if there's no matching data at all.
1095  */
1097  if (!_bt_steppage(scan, dir))
1098  return false;
1099  }
1100  else
1101  {
1102  /* Drop the lock, and maybe the pin, on the current page */
1104  }
1105 
1106 readcomplete:
1107  /* OK, itemIndex says what to return */
1108  currItem = &so->currPos.items[so->currPos.itemIndex];
1109  scan->xs_ctup.t_self = currItem->heapTid;
1110  if (scan->xs_want_itup)
1111  scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
1112 
1113  return true;
1114 }
ParallelIndexScanDesc parallel_scan
Definition: relscan.h:141
#define InvalidStrategy
Definition: stratnum.h:24
Oid sk_subtype
Definition: skey.h:69
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
Definition: fmgr.h:56
#define SK_ROW_MEMBER
Definition: skey.h:118
void _bt_freestack(BTStack stack)
Definition: nbtutils.c:175
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2475
#define BTORDER_PROC
Definition: nbtree.h:290
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:855
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple xs_itup
Definition: relscan.h:115
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:390
void ScanKeyEntryInitializeWithInfo(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, FmgrInfo *finfo, Datum argument)
Definition: scankey.c:101
void _bt_parallel_done(IndexScanDesc scan)
Definition: nbtree.c:720
static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
Definition: nbtsearch.c:1627
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
Definition: nbtsearch.c:57
int itemIndex
Definition: nbtree.h:388
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2452
char * currTuples
Definition: nbtree.h:462
regproc RegProcedure
Definition: c.h:472
#define P_NONE
Definition: nbtree.h:150
Snapshot xs_snapshot
Definition: relscan.h:92
struct cursor * cur
Definition: ecpg.c:28
uint16 StrategyNumber
Definition: stratnum.h:22
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
uint32 BlockNumber
Definition: block.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
Relation indexRelation
Definition: relscan.h:91
uint16 OffsetNumber
Definition: off.h:24
#define SK_ROW_END
Definition: skey.h:119
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
#define BT_READ
Definition: nbtree.h:300
#define ERROR
Definition: elog.h:43
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
StrategyNumber sk_strategy
Definition: skey.h:68
ItemPointerData t_self
Definition: htup.h:65
static char * buf
Definition: pg_test_fsync.c:67
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
ScanKeyData * ScanKey
Definition: skey.h:75
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1858
#define RegProcedureIsValid(p)
Definition: c.h:607
IndexTupleData * IndexTuple
Definition: itup.h:53
OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz, ScanKey scankey, bool nextkey)
Definition: nbtsearch.c:323
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define pgstat_count_index_scan(rel)
Definition: pgstat.h:1291
#define SK_SEARCHNOTNULL
Definition: skey.h:122
bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
Definition: nbtree.c:639
Oid * rd_opfamily
Definition: rel.h:154
#define SK_ISNULL
Definition: skey.h:115
#define SK_ROW_HEADER
Definition: skey.h:117
void _bt_preprocess_keys(IndexScanDesc scan)
Definition: nbtutils.c:756
#define SK_BT_NULLS_FIRST
Definition: nbtree.h:490
static void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
Definition: nbtsearch.c:1948
uintptr_t Datum
Definition: postgres.h:367
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
BTStack _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:97
#define InvalidOid
Definition: postgres_ext.h:36
bool xs_want_itup
Definition: relscan.h:97
int sk_flags
Definition: skey.h:66
#define Assert(condition)
Definition: c.h:699
#define SK_BT_DESC
Definition: nbtree.h:489
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:744
HeapTupleData xs_ctup
Definition: relscan.h:121
#define BTScanPosInvalidate(scanpos)
Definition: nbtree.h:418
#define INDEX_MAX_KEYS
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
int numberOfKeys
Definition: nbtree.h:441
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define DatumGetPointer(X)
Definition: postgres.h:534
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
BTScanPosData currPos
Definition: nbtree.h:475
ScanKey keyData
Definition: nbtree.h:442
Oid sk_collation
Definition: skey.h:70
int i
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1344
#define elog
Definition: elog.h:219
Buffer buf
Definition: nbtree.h:358
Oid * rd_opcintype
Definition: rel.h:155
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
#define BTLessStrategyNumber
Definition: stratnum.h:29
int Buffer
Definition: buf.h:23
Datum sk_argument
Definition: skey.h:72
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
Definition: nbtsearch.c:1186
int16 AttrNumber
Definition: attnum.h:21
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
AttrNumber sk_attno
Definition: skey.h:67

◆ _bt_freeskey()

void _bt_freeskey ( ScanKey  skey)

Definition at line 166 of file nbtutils.c.

References pfree().

Referenced by _bt_doinsert(), _bt_load(), tuplesort_begin_cluster(), and tuplesort_begin_index_btree().

167 {
168  pfree(skey);
169 }
void pfree(void *pointer)
Definition: mcxt.c:1031

◆ _bt_freestack()

void _bt_freestack ( BTStack  stack)

Definition at line 175 of file nbtutils.c.

References BTStackData::bts_parent, and pfree().

Referenced by _bt_doinsert(), and _bt_first().

176 {
177  BTStack ostack;
178 
179  while (stack != NULL)
180  {
181  ostack = stack;
182  stack = stack->bts_parent;
183  pfree(ostack);
184  }
185 }
void pfree(void *pointer)
Definition: mcxt.c:1031
struct BTStackData * bts_parent
Definition: nbtree.h:318

◆ _bt_get_endpoint()

Buffer _bt_get_endpoint ( Relation  rel,
uint32  level,
bool  rightmost,
Snapshot  snapshot 
)

Definition at line 1776 of file nbtsearch.c.

References _bt_getroot(), _bt_gettrueroot(), _bt_relandgetbuf(), BT_READ, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTreeInnerTupleGetDownLink, buf, BufferGetPage, BufferIsValid, elog, ERROR, InvalidBuffer, BTPageOpaqueData::level, P_FIRSTDATAKEY, P_IGNORE, P_NONE, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, RelationGetRelationName, and TestForOldSnapshot().

Referenced by _bt_endpoint(), and _bt_insert_parent().

1778 {
1779  Buffer buf;
1780  Page page;
1781  BTPageOpaque opaque;
1782  OffsetNumber offnum;
1783  BlockNumber blkno;
1784  IndexTuple itup;
1785 
1786  /*
1787  * If we are looking for a leaf page, okay to descend from fast root;
1788  * otherwise better descend from true root. (There is no point in being
1789  * smarter about intermediate levels.)
1790  */
1791  if (level == 0)
1792  buf = _bt_getroot(rel, BT_READ);
1793  else
1794  buf = _bt_gettrueroot(rel);
1795 
1796  if (!BufferIsValid(buf))
1797  return InvalidBuffer;
1798 
1799  page = BufferGetPage(buf);
1800  TestForOldSnapshot(snapshot, rel, page);
1801  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1802 
1803  for (;;)
1804  {
1805  /*
1806  * If we landed on a deleted page, step right to find a live page
1807  * (there must be one). Also, if we want the rightmost page, step
1808  * right if needed to get to it (this could happen if the page split
1809  * since we obtained a pointer to it).
1810  */
1811  while (P_IGNORE(opaque) ||
1812  (rightmost && !P_RIGHTMOST(opaque)))
1813  {
1814  blkno = opaque->btpo_next;
1815  if (blkno == P_NONE)
1816  elog(ERROR, "fell off the end of index \"%s\"",
1818  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1819  page = BufferGetPage(buf);
1820  TestForOldSnapshot(snapshot, rel, page);
1821  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1822  }
1823 
1824  /* Done? */
1825  if (opaque->btpo.level == level)
1826  break;
1827  if (opaque->btpo.level < level)
1828  elog(ERROR, "btree level %u not found in index \"%s\"",
1829  level, RelationGetRelationName(rel));
1830 
1831  /* Descend to leftmost or rightmost child page */
1832  if (rightmost)
1833  offnum = PageGetMaxOffsetNumber(page);
1834  else
1835  offnum = P_FIRSTDATAKEY(opaque);
1836 
1837  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
1838  blkno = BTreeInnerTupleGetDownLink(itup);
1839 
1840  buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
1841  page = BufferGetPage(buf);
1842  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1843  }
1844 
1845  return buf;
1846 }
BlockNumber btpo_next
Definition: nbtree.h:58
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
#define P_IGNORE(opaque)
Definition: nbtree.h:163
#define BTreeInnerTupleGetDownLink(itup)
Definition: nbtree.h:224
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:860
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:150
#define InvalidBuffer
Definition: buf.h:25
uint32 BlockNumber
Definition: block.h:31
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:300
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define RelationGetRelationName(relation)
Definition: rel.h:441
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:252
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 level
Definition: nbtree.h:61
Buffer _bt_gettrueroot(Relation rel)
Definition: nbtpage.c:498
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define elog
Definition: elog.h:219
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74

◆ _bt_getbuf()

Buffer _bt_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access 
)

Definition at line 729 of file nbtpage.c.

References _bt_checkpage(), _bt_log_reuse_page(), _bt_page_recyclable(), _bt_pageinit(), _bt_relbuf(), Assert, BT_WRITE, BTPageOpaqueData::btpo, buf, BufferGetPage, BufferGetPageSize, ConditionalLockBuffer(), DEBUG2, elog, ExclusiveLock, GetFreeIndexPage(), InvalidBlockNumber, LockBuffer(), LockRelationForExtension(), P_NEW, PageGetSpecialPointer, PageIsNew, ReadBuffer(), RELATION_IS_LOCAL, RelationNeedsWAL, ReleaseBuffer(), UnlockRelationForExtension(), BTPageOpaqueData::xact, and XLogStandbyInfoActive.

Referenced by _bt_finish_split(), _bt_getroot(), _bt_getrootheight(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insertonpg(), _bt_is_page_halfdead(), _bt_killitems(), _bt_lock_branch_parent(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_split(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), and _bt_walk_left().

730 {
731  Buffer buf;
732 
733  if (blkno != P_NEW)
734  {
735  /* Read an existing block of the relation */
736  buf = ReadBuffer(rel, blkno);
737  LockBuffer(buf, access);
738  _bt_checkpage(rel, buf);
739  }
740  else
741  {
742  bool needLock;
743  Page page;
744 
745  Assert(access == BT_WRITE);
746 
747  /*
748  * First see if the FSM knows of any free pages.
749  *
750  * We can't trust the FSM's report unreservedly; we have to check that
751  * the page is still free. (For example, an already-free page could
752  * have been re-used between the time the last VACUUM scanned it and
753  * the time the VACUUM made its FSM updates.)
754  *
755  * In fact, it's worse than that: we can't even assume that it's safe
756  * to take a lock on the reported page. If somebody else has a lock
757  * on it, or even worse our own caller does, we could deadlock. (The
758  * own-caller scenario is actually not improbable. Consider an index
759  * on a serial or timestamp column. Nearly all splits will be at the
760  * rightmost page, so it's entirely likely that _bt_split will call us
761  * while holding a lock on the page most recently acquired from FSM. A
762  * VACUUM running concurrently with the previous split could well have
763  * placed that page back in FSM.)
764  *
765  * To get around that, we ask for only a conditional lock on the
766  * reported page. If we fail, then someone else is using the page,
767  * and we may reasonably assume it's not free. (If we happen to be
768  * wrong, the worst consequence is the page will be lost to use till
769  * the next VACUUM, which is no big problem.)
770  */
771  for (;;)
772  {
773  blkno = GetFreeIndexPage(rel);
774  if (blkno == InvalidBlockNumber)
775  break;
776  buf = ReadBuffer(rel, blkno);
777  if (ConditionalLockBuffer(buf))
778  {
779  page = BufferGetPage(buf);
780  if (_bt_page_recyclable(page))
781  {
782  /*
783  * If we are generating WAL for Hot Standby then create a
784  * WAL record that will allow us to conflict with queries
785  * running on standby.
786  */
788  {
790 
791  _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
792  }
793 
794  /* Okay to use page. Re-initialize and return it */
795  _bt_pageinit(page, BufferGetPageSize(buf));
796  return buf;
797  }
798  elog(DEBUG2, "FSM returned nonrecyclable page");
799  _bt_relbuf(rel, buf);
800  }
801  else
802  {
803  elog(DEBUG2, "FSM returned nonlockable page");
804  /* couldn't get lock, so just drop pin */
805  ReleaseBuffer(buf);
806  }
807  }
808 
809  /*
810  * Extend the relation by one page.
811  *
812  * We have to use a lock to ensure no one else is extending the rel at
813  * the same time, else we will both try to initialize the same new
814  * page. We can skip locking for new or temp relations, however,
815  * since no one else could be accessing them.
816  */
817  needLock = !RELATION_IS_LOCAL(rel);
818 
819  if (needLock)
821 
822  buf = ReadBuffer(rel, P_NEW);
823 
824  /* Acquire buffer lock on new page */
825  LockBuffer(buf, BT_WRITE);
826 
827  /*
828  * Release the file-extension lock; it's now OK for someone else to
829  * extend the relation some more. Note that we cannot release this
830  * lock before we have buffer lock on the new page, or we risk a race
831  * condition against btvacuumscan --- see comments therein.
832  */
833  if (needLock)
835 
836  /* Initialize the new page before returning it */
837  page = BufferGetPage(buf);
838  Assert(PageIsNew(page));
839  _bt_pageinit(page, BufferGetPageSize(buf));
840  }
841 
842  /* ref count and lock type are correct */
843  return buf;
844 }
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:528
union BTPageOpaqueData::@46 btpo
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_NEW
Definition: bufmgr.h:82
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:903
TransactionId xact
Definition: nbtree.h:62
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:662
#define DEBUG2
Definition: elog.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3572
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
Definition: nbtpage.c:696
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define XLogStandbyInfoActive()
Definition: xlog.h:160
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
#define Assert(condition)
Definition: c.h:699
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:510
#define PageIsNew(page)
Definition: bufpage.h:225
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:891
#define elog
Definition: elog.h:219
#define BT_WRITE
Definition: nbtree.h:301
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74

◆ _bt_getroot()

Buffer _bt_getroot ( Relation  rel,
int  access 
)

Definition at line 252 of file nbtpage.c.

References _bt_cachemetadata(), _bt_getbuf(), _bt_getroot(), _bt_relandgetbuf(), _bt_relbuf(), _bt_upgrademetapage(), Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_LEAF, BTP_ROOT, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, InvalidBuffer, InvalidTransactionId, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, BTPageOpaqueData::level, xl_btree_newroot::level, LockBuffer(), MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, P_IGNORE, P_ISMETA, P_LEFTMOST, P_NEW, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, PageSetLSN, pfree(), RelationData::rd_amcache, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_metadata::root, xl_btree_newroot::rootblk, SizeOfBtreeNewroot, START_CRIT_SECTION, XLOG_BTREE_NEWROOT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_get_endpoint(), _bt_getroot(), and _bt_search().

253 {
254  Buffer metabuf;
255  Page metapg;
256  BTPageOpaque metaopaque;
257  Buffer rootbuf;
258  Page rootpage;
259  BTPageOpaque rootopaque;
260  BlockNumber rootblkno;
261  uint32 rootlevel;
262  BTMetaPageData *metad;
263 
264  /*
265  * Try to use previously-cached metapage data to find the root. This
266  * normally saves one buffer access per index search, which is a very
267  * helpful savings in bufmgr traffic and hence contention.
268  */
269  if (rel->rd_amcache != NULL)
270  {
271  metad = (BTMetaPageData *) rel->rd_amcache;
272  /* We shouldn't have cached it if any of these fail */
273  Assert(metad->btm_magic == BTREE_MAGIC);
275  Assert(metad->btm_version <= BTREE_VERSION);
276  Assert(metad->btm_root != P_NONE);
277 
278  rootblkno = metad->btm_fastroot;
279  Assert(rootblkno != P_NONE);
280  rootlevel = metad->btm_fastlevel;
281 
282  rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
283  rootpage = BufferGetPage(rootbuf);
284  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
285 
286  /*
287  * Since the cache might be stale, we check the page more carefully
288  * here than normal. We *must* check that it's not deleted. If it's
289  * not alone on its level, then we reject too --- this may be overly
290  * paranoid but better safe than sorry. Note we don't check P_ISROOT,
291  * because that's not set in a "fast root".
292  */
293  if (!P_IGNORE(rootopaque) &&
294  rootopaque->btpo.level == rootlevel &&
295  P_LEFTMOST(rootopaque) &&
296  P_RIGHTMOST(rootopaque))
297  {
298  /* OK, accept cached page as the root */
299  return rootbuf;
300  }
301  _bt_relbuf(rel, rootbuf);
302  /* Cache is stale, throw it away */
303  if (rel->rd_amcache)
304  pfree(rel->rd_amcache);
305  rel->rd_amcache = NULL;
306  }
307 
308  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
309  metapg = BufferGetPage(metabuf);
310  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
311  metad = BTPageGetMeta(metapg);
312 
313  /* sanity-check the metapage */
314  if (!P_ISMETA(metaopaque) ||
315  metad->btm_magic != BTREE_MAGIC)
316  ereport(ERROR,
317  (errcode(ERRCODE_INDEX_CORRUPTED),
318  errmsg("index \"%s\" is not a btree",
319  RelationGetRelationName(rel))));
320 
321  if (metad->btm_version < BTREE_MIN_VERSION ||
322  metad->btm_version > BTREE_VERSION)
323  ereport(ERROR,
324  (errcode(ERRCODE_INDEX_CORRUPTED),
325  errmsg("version mismatch in index \"%s\": file version %d, "
326  "current version %d, minimal supported version %d",
329 
330  /* if no root page initialized yet, do it */
331  if (metad->btm_root == P_NONE)
332  {
333  /* If access = BT_READ, caller doesn't want us to create root yet */
334  if (access == BT_READ)
335  {
336  _bt_relbuf(rel, metabuf);
337  return InvalidBuffer;
338  }
339 
340  /* trade in our read lock for a write lock */
341  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
342  LockBuffer(metabuf, BT_WRITE);
343 
344  /*
345  * Race condition: if someone else initialized the metadata between
346  * the time we released the read lock and acquired the write lock, we
347  * must avoid doing it again.
348  */
349  if (metad->btm_root != P_NONE)
350  {
351  /*
352  * Metadata initialized by someone else. In order to guarantee no
353  * deadlocks, we have to release the metadata page and start all
354  * over again. (Is that really true? But it's hardly worth trying
355  * to optimize this case.)
356  */
357  _bt_relbuf(rel, metabuf);
358  return _bt_getroot(rel, access);
359  }
360 
361  /*
362  * Get, initialize, write, and leave a lock of the appropriate type on
363  * the new root page. Since this is the first page in the tree, it's
364  * a leaf as well as the root.
365  */
366  rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
367  rootblkno = BufferGetBlockNumber(rootbuf);
368  rootpage = BufferGetPage(rootbuf);
369  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
370  rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
371  rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
372  rootopaque->btpo.level = 0;
373  rootopaque->btpo_cycleid = 0;
374 
375  /* NO ELOG(ERROR) till meta is updated */
377 
378  /* upgrade metapage if needed */
379  if (metad->btm_version < BTREE_VERSION)
380  _bt_upgrademetapage(metapg);
381 
382  metad->btm_root = rootblkno;
383  metad->btm_level = 0;
384  metad->btm_fastroot = rootblkno;
385  metad->btm_fastlevel = 0;
387  metad->btm_last_cleanup_num_heap_tuples = -1.0;
388 
389  MarkBufferDirty(rootbuf);
390  MarkBufferDirty(metabuf);
391 
392  /* XLOG stuff */
393  if (RelationNeedsWAL(rel))
394  {
395  xl_btree_newroot xlrec;
396  XLogRecPtr recptr;
398 
399  XLogBeginInsert();
402 
403  md.root = rootblkno;
404  md.level = 0;
405  md.fastroot = rootblkno;
406  md.fastlevel = 0;
409 
410  XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
411 
412  xlrec.rootblk = rootblkno;
413  xlrec.level = 0;
414 
415  XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
416 
417  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
418 
419  PageSetLSN(rootpage, recptr);
420  PageSetLSN(metapg, recptr);
421  }
422 
424 
425  /*
426  * swap root write lock for read lock. There is no danger of anyone
427  * else accessing the new root page while it's unlocked, since no one
428  * else knows where it is yet.
429  */
430  LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
431  LockBuffer(rootbuf, BT_READ);
432 
433  /* okay, metadata is correct, release lock on it */
434  _bt_relbuf(rel, metabuf);
435  }
436  else
437  {
438  rootblkno = metad->btm_fastroot;
439  Assert(rootblkno != P_NONE);
440  rootlevel = metad->btm_fastlevel;
441 
442  /*
443  * Cache the metapage data for next time
444  */
445  _bt_cachemetadata(rel, metad);
446 
447  /*
448  * We are done with the metapage; arrange to release it via first
449  * _bt_relandgetbuf call
450  */
451  rootbuf = metabuf;
452 
453  for (;;)
454  {
455  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
456  rootpage = BufferGetPage(rootbuf);
457  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
458 
459  if (!P_IGNORE(rootopaque))
460  break;
461 
462  /* it's dead, Jim. step right one page */
463  if (P_RIGHTMOST(rootopaque))
464  elog(ERROR, "no live root page found in index \"%s\"",
466  rootblkno = rootopaque->btpo_next;
467  }
468 
469  /* Note: can't check btpo.level on deleted pages */
470  if (rootopaque->btpo.level != rootlevel)
471  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
472  rootblkno, RelationGetRelationName(rel),
473  rootopaque->btpo.level, rootlevel);
474  }
475 
476  /*
477  * By here, we have a pin and read lock on the root page, and no lock set
478  * on the metadata page. Return the root page's buffer.
479  */
480  return rootbuf;
481 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
BlockNumber rootblk
Definition: nbtxlog.h:245
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define BTP_ROOT
Definition: nbtree.h:72
BlockNumber btpo_next
Definition: nbtree.h:58
#define P_IGNORE(opaque)
Definition: nbtree.h:163
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:86
uint32 btm_version
Definition: nbtree.h:100
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:860
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define BTREE_VERSION
Definition: nbtree.h:117
uint32 btm_magic
Definition: nbtree.h:99
#define BTP_LEAF
Definition: nbtree.h:71
union BTPageOpaqueData::@46 btpo
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
BlockNumber root
Definition: nbtxlog.h:50
#define P_NONE
Definition: nbtree.h:150
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 level
Definition: nbtxlog.h:246
uint32 BlockNumber
Definition: block.h:31
#define P_NEW
Definition: bufmgr.h:82
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define P_ISMETA(opaque)
Definition: nbtree.h:161
#define BT_READ
Definition: nbtree.h:300
BlockNumber btm_fastroot
Definition: nbtree.h:103
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:36
void pfree(void *pointer)
Definition: mcxt.c:1031
#define BTREE_MAGIC
Definition: nbtree.h:116
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:55
BTCycleId btpo_cycleid
Definition: nbtree.h:65
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:54
#define BTPageGetMeta(p)
Definition: nbtree.h:112
BlockNumber btpo_prev
Definition: nbtree.h:57
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define P_LEFTMOST(opaque)
Definition: nbtree.h:156
unsigned int uint32
Definition: c.h:325
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:252
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define BTREE_METAPAGE
Definition: nbtree.h:115
#define SizeOfBtreeNewroot
Definition: nbtxlog.h:249
uint32 btm_fastlevel
Definition: nbtree.h:104
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
uint32 level
Definition: nbtree.h:61
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
BlockNumber btm_root
Definition: nbtree.h:101
#define BTREE_MIN_VERSION
Definition: nbtree.h:118
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:108
#define RelationNeedsWAL(relation)
Definition: rel.h:510
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void _bt_cachemetadata(Relation rel, BTMetaPageData *metad)
Definition: nbtpage.c:113
uint32 fastlevel
Definition: nbtxlog.h:53
uint32 btm_level
Definition: nbtree.h:102
uint32 level
Definition: nbtxlog.h:51
BlockNumber fastroot
Definition: nbtxlog.h:52
#define elog
Definition: elog.h:219
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:106
void * rd_amcache
Definition: rel.h:164
#define BT_WRITE
Definition: nbtree.h:301
void XLogBeginInsert(void)
Definition: xloginsert.c:120
uint16 btpo_flags
Definition: nbtree.h:64
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74

◆ _bt_getrootheight()

int _bt_getrootheight ( Relation  rel)

Definition at line 594 of file nbtpage.c.

References _bt_cachemetadata(), _bt_getbuf(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, ereport, errcode(), errmsg(), ERROR, P_ISMETA, P_NONE, PageGetSpecialPointer, RelationData::rd_amcache, and RelationGetRelationName.

Referenced by _bt_insertonpg(), and get_relation_info().

595 {
596  BTMetaPageData *metad;
597 
598  /*
599  * We can get what we need from the cached metapage data. If it's not
600  * cached yet, load it. Sanity checks here must match _bt_getroot().
601  */
602  if (rel->rd_amcache == NULL)
603  {
604  Buffer metabuf;
605  Page metapg;
606  BTPageOpaque metaopaque;
607 
608  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
609  metapg = BufferGetPage(metabuf);
610  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
611  metad = BTPageGetMeta(metapg);
612 
613  /* sanity-check the metapage */
614  if (!P_ISMETA(metaopaque) ||
615  metad->btm_magic != BTREE_MAGIC)
616  ereport(ERROR,
617  (errcode(ERRCODE_INDEX_CORRUPTED),
618  errmsg("index \"%s\" is not a btree",
619  RelationGetRelationName(rel))));
620 
621  if (metad->btm_version < BTREE_MIN_VERSION ||
622  metad->btm_version > BTREE_VERSION)
623  ereport(ERROR,
624  (errcode(ERRCODE_INDEX_CORRUPTED),
625  errmsg("version mismatch in index \"%s\": file version %d, "
626  "current version %d, minimal supported version %d",
629 
630  /*
631  * If there's no root page yet, _bt_getroot() doesn't expect a cache
632  * to be made, so just stop here and report the index height is zero.
633  * (XXX perhaps _bt_getroot() should be changed to allow this case.)
634  */
635  if (metad->btm_root == P_NONE)
636  {
637  _bt_relbuf(rel, metabuf);
638  return 0;
639  }
640 
641  /*
642  * Cache the metapage data for next time
643  */
644  _bt_cachemetadata(rel, metad);
645 
646  _bt_relbuf(rel, metabuf);
647  }
648 
649  metad = (BTMetaPageData *) rel->rd_amcache;
650  /* We shouldn't have cached it if any of these fail */
651  Assert(metad->btm_magic == BTREE_MAGIC);
652  Assert(metad->btm_version == BTREE_VERSION);
653  Assert(metad->btm_fastroot != P_NONE);
654 
655  return metad->btm_fastlevel;
656 }
uint32 btm_version
Definition: nbtree.h:100
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define BTREE_VERSION
Definition: nbtree.h:117
uint32 btm_magic
Definition: nbtree.h:99
#define P_NONE
Definition: nbtree.h:150
int errcode(int sqlerrcode)
Definition: elog.c:575
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define P_ISMETA(opaque)
Definition: nbtree.h:161
#define BT_READ
Definition: nbtree.h:300
BlockNumber btm_fastroot
Definition: nbtree.h:103
#define BTREE_MAGIC
Definition: nbtree.h:116
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:112
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define BTREE_METAPAGE
Definition: nbtree.h:115
uint32 btm_fastlevel
Definition: nbtree.h:104
BlockNumber btm_root
Definition: nbtree.h:101
#define BTREE_MIN_VERSION
Definition: nbtree.h:118
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
#define Assert(condition)
Definition: c.h:699
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void _bt_cachemetadata(Relation rel, BTMetaPageData *metad)
Definition: nbtpage.c:113
void * rd_amcache
Definition: rel.h:164
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74

◆ _bt_getstackbuf()

Buffer _bt_getstackbuf ( Relation  rel,
BTStack  stack,
int  access 
)

Definition at line 1997 of file nbtinsert.c.

References _bt_finish_split(), _bt_getbuf(), _bt_relbuf(), BT_WRITE, BTPageOpaqueData::btpo_next, BTreeInnerTupleGetDownLink, BTStackData::bts_blkno, BTStackData::bts_btentry, BTStackData::bts_offset, BTStackData::bts_parent, buf, BufferGetPage, InvalidBuffer, InvalidOffsetNumber, OffsetNumberNext, OffsetNumberPrev, P_FIRSTDATAKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, and PageGetSpecialPointer.

Referenced by _bt_insert_parent(), and _bt_lock_branch_parent().

1998 {
1999  BlockNumber blkno;
2000  OffsetNumber start;
2001 
2002  blkno = stack->bts_blkno;
2003  start = stack->bts_offset;
2004 
2005  for (;;)
2006  {
2007  Buffer buf;
2008  Page page;
2009  BTPageOpaque opaque;
2010 
2011  buf = _bt_getbuf(rel, blkno, access);
2012  page = BufferGetPage(buf);
2013  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2014 
2015  if (access == BT_WRITE && P_INCOMPLETE_SPLIT(opaque))
2016  {
2017  _bt_finish_split(rel, buf, stack->bts_parent);
2018  continue;
2019  }
2020 
2021  if (!P_IGNORE(opaque))
2022  {
2023  OffsetNumber offnum,
2024  minoff,
2025  maxoff;
2026  ItemId itemid;
2027  IndexTuple item;
2028 
2029  minoff = P_FIRSTDATAKEY(opaque);
2030  maxoff = PageGetMaxOffsetNumber(page);
2031 
2032  /*
2033  * start = InvalidOffsetNumber means "search the whole page". We
2034  * need this test anyway due to possibility that page has a high
2035  * key now when it didn't before.
2036  */
2037  if (start < minoff)
2038  start = minoff;
2039 
2040  /*
2041  * Need this check too, to guard against possibility that page
2042  * split since we visited it originally.
2043  */
2044  if (start > maxoff)
2045  start = OffsetNumberNext(maxoff);
2046 
2047  /*
2048  * These loops will check every item on the page --- but in an
2049  * order that's attuned to the probability of where it actually
2050  * is. Scan to the right first, then to the left.
2051  */
2052  for (offnum = start;
2053  offnum <= maxoff;
2054  offnum = OffsetNumberNext(offnum))
2055  {
2056  itemid = PageGetItemId(page, offnum);
2057  item = (IndexTuple) PageGetItem(page, itemid);
2058 
2059  if (BTreeInnerTupleGetDownLink(item) == stack->bts_btentry)
2060  {
2061  /* Return accurate pointer to where link is now */
2062  stack->bts_blkno = blkno;
2063  stack->bts_offset = offnum;
2064  return buf;
2065  }
2066  }
2067 
2068  for (offnum = OffsetNumberPrev(start);
2069  offnum >= minoff;
2070  offnum = OffsetNumberPrev(offnum))
2071  {
2072  itemid = PageGetItemId(page, offnum);
2073  item = (IndexTuple) PageGetItem(page, itemid);
2074 
2075  if (BTreeInnerTupleGetDownLink(item) == stack->bts_btentry)
2076  {
2077  /* Return accurate pointer to where link is now */
2078  stack->bts_blkno = blkno;
2079  stack->bts_offset = offnum;
2080  return buf;
2081  }
2082  }
2083  }
2084 
2085  /*
2086  * The item we're looking for moved right at least one page.
2087  */
2088  if (P_RIGHTMOST(opaque))
2089  {
2090  _bt_relbuf(rel, buf);
2091  return InvalidBuffer;
2092  }
2093  blkno = opaque->btpo_next;
2094  start = InvalidOffsetNumber;
2095  _bt_relbuf(rel, buf);
2096  }
2097 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define P_IGNORE(opaque)
Definition: nbtree.h:163
#define BTreeInnerTupleGetDownLink(itup)
Definition: nbtree.h:224
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
#define InvalidBuffer
Definition: buf.h:25
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
uint16 OffsetNumber
Definition: off.h:24
void _bt_finish_split(Relation rel, Buffer lbuf, BTStack stack)
Definition: nbtinsert.c:1938
OffsetNumber bts_offset
Definition: nbtree.h:316
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define InvalidOffsetNumber
Definition: off.h:26
BlockNumber bts_blkno
Definition: nbtree.h:315
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
BlockNumber bts_btentry
Definition: nbtree.h:317
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
struct BTStackData * bts_parent
Definition: nbtree.h:318
#define BT_WRITE
Definition: nbtree.h:301
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74

◆ _bt_gettrueroot()

Buffer _bt_gettrueroot ( Relation  rel)

Definition at line 498 of file nbtpage.c.

References _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, elog, ereport, errcode(), errmsg(), ERROR, InvalidBuffer, BTPageOpaqueData::level, P_IGNORE, P_ISMETA, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, pfree(), RelationData::rd_amcache, and RelationGetRelationName.

Referenced by _bt_get_endpoint().

499 {
500  Buffer metabuf;
501  Page metapg;
502  BTPageOpaque metaopaque;
503  Buffer rootbuf;
504  Page rootpage;
505  BTPageOpaque rootopaque;
506  BlockNumber rootblkno;
507  uint32 rootlevel;
508  BTMetaPageData *metad;
509 
510  /*
511  * We don't try to use cached metapage data here, since (a) this path is
512  * not performance-critical, and (b) if we are here it suggests our cache
513  * is out-of-date anyway. In light of point (b), it's probably safest to
514  * actively flush any cached metapage info.
515  */
516  if (rel->rd_amcache)
517  pfree(rel->rd_amcache);
518  rel->rd_amcache = NULL;
519 
520  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
521  metapg = BufferGetPage(metabuf);
522  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
523  metad = BTPageGetMeta(metapg);
524 
525  if (!P_ISMETA(metaopaque) ||
526  metad->btm_magic != BTREE_MAGIC)
527  ereport(ERROR,
528  (errcode(ERRCODE_INDEX_CORRUPTED),
529  errmsg("index \"%s\" is not a btree",
530  RelationGetRelationName(rel))));
531 
532  if (metad->btm_version < BTREE_MIN_VERSION ||
533  metad->btm_version > BTREE_VERSION)
534  ereport(ERROR,
535  (errcode(ERRCODE_INDEX_CORRUPTED),
536  errmsg("version mismatch in index \"%s\": file version %d, "
537  "current version %d, minimal supported version %d",
540 
541  /* if no root page initialized yet, fail */
542  if (metad->btm_root == P_NONE)
543  {
544  _bt_relbuf(rel, metabuf);
545  return InvalidBuffer;
546  }
547 
548  rootblkno = metad->btm_root;
549  rootlevel = metad->btm_level;
550 
551  /*
552  * We are done with the metapage; arrange to release it via first
553  * _bt_relandgetbuf call
554  */
555  rootbuf = metabuf;
556 
557  for (;;)
558  {
559  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
560  rootpage = BufferGetPage(rootbuf);
561  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
562 
563  if (!P_IGNORE(rootopaque))
564  break;
565 
566  /* it's dead, Jim. step right one page */
567  if (P_RIGHTMOST(rootopaque))
568  elog(ERROR, "no live root page found in index \"%s\"",
570  rootblkno = rootopaque->btpo_next;
571  }
572 
573  /* Note: can't check btpo.level on deleted pages */
574  if (rootopaque->btpo.level != rootlevel)
575  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
576  rootblkno, RelationGetRelationName(rel),
577  rootopaque->btpo.level, rootlevel);
578 
579  return rootbuf;
580 }
BlockNumber btpo_next
Definition: nbtree.h:58
#define P_IGNORE(opaque)
Definition: nbtree.h:163
uint32 btm_version
Definition: nbtree.h:100
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:860
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define BTREE_VERSION
Definition: nbtree.h:117
uint32 btm_magic
Definition: nbtree.h:99
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:150
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
#define P_ISMETA(opaque)
Definition: nbtree.h:161
#define BT_READ
Definition: nbtree.h:300
void pfree(void *pointer)
Definition: mcxt.c:1031
#define BTREE_MAGIC
Definition: nbtree.h:116
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:112
#define RelationGetRelationName(relation)
Definition: rel.h:441
unsigned int uint32
Definition: c.h:325
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define BTREE_METAPAGE
Definition: nbtree.h:115
uint32 level
Definition: nbtree.h:61
BlockNumber btm_root
Definition: nbtree.h:101
#define BTREE_MIN_VERSION
Definition: nbtree.h:118
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
int errmsg(const char *fmt,...)
Definition: elog.c:797
uint32 btm_level
Definition: nbtree.h:102
#define elog
Definition: elog.h:219
void * rd_amcache
Definition: rel.h:164
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74

◆ _bt_initmetapage()

void _bt_initmetapage ( Page  page,
BlockNumber  rootbknum,
uint32  level 
)

Definition at line 50 of file nbtpage.c.

References _bt_pageinit(), BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_VERSION, InvalidTransactionId, and PageGetSpecialPointer.

Referenced by _bt_uppershutdown(), and btbuildempty().

51 {
52  BTMetaPageData *metad;
53  BTPageOpaque metaopaque;
54 
55  _bt_pageinit(page, BLCKSZ);
56 
57  metad = BTPageGetMeta(page);
58  metad->btm_magic = BTREE_MAGIC;
59  metad->btm_version = BTREE_VERSION;
60  metad->btm_root = rootbknum;
61  metad->btm_level = level;
62  metad->btm_fastroot = rootbknum;
63  metad->btm_fastlevel = level;
66 
67  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
68  metaopaque->btpo_flags = BTP_META;
69 
70  /*
71  * Set pd_lower just past the end of the metadata. This is essential,
72  * because without doing so, metadata will be lost if xlog.c compresses
73  * the page.
74  */
75  ((PageHeader) page)->pd_lower =
76  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
77 }
uint32 btm_version
Definition: nbtree.h:100
#define BTREE_VERSION
Definition: nbtree.h:117
uint32 btm_magic
Definition: nbtree.h:99
#define BTP_META
Definition: nbtree.h:74
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
BlockNumber btm_fastroot
Definition: nbtree.h:103
#define BTREE_MAGIC
Definition: nbtree.h:116
#define BTPageGetMeta(p)
Definition: nbtree.h:112
#define InvalidTransactionId
Definition: transam.h:31
uint32 btm_fastlevel
Definition: nbtree.h:104
BlockNumber btm_root
Definition: nbtree.h:101
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:108
uint32 btm_level
Definition: nbtree.h:102
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:891
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:106
uint16 btpo_flags
Definition: nbtree.h:64

◆ _bt_killitems()

void _bt_killitems ( IndexScanDesc  scan)

Definition at line 1744 of file nbtutils.c.

References _bt_getbuf(), _bt_relbuf(), Assert, BT_READ, BTP_HAS_GARBAGE, BTScanPosIsPinned, BTScanPosIsValid, buf, BTScanPosData::buf, BUFFER_LOCK_UNLOCK, BufferGetLSNAtomic(), BufferGetPage, BufferIsValid, BTScanPosData::currPage, BTScanOpaqueData::currPos, BTScanPosData::firstItem, BTScanPosItem::heapTid, i, BTScanPosItem::indexOffset, IndexScanDescData::indexRelation, ItemIdMarkDead, ItemPointerEquals(), BTScanPosData::items, BTScanOpaqueData::killedItems, LockBuffer(), BTScanPosData::lsn, MarkBufferDirtyHint(), BTScanOpaqueData::numKilled, OffsetNumberNext, IndexScanDescData::opaque, P_FIRSTDATAKEY, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, and IndexTupleData::t_tid.

Referenced by _bt_steppage(), btendscan(), btrescan(), and btrestrpos().

1745 {
1746  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1747  Page page;
1748  BTPageOpaque opaque;
1749  OffsetNumber minoff;
1750  OffsetNumber maxoff;
1751  int i;
1752  int numKilled = so->numKilled;
1753  bool killedsomething = false;
1754 
1756 
1757  /*
1758  * Always reset the scan state, so we don't look for same items on other
1759  * pages.
1760  */
1761  so->numKilled = 0;
1762 
1763  if (BTScanPosIsPinned(so->currPos))
1764  {
1765  /*
1766  * We have held the pin on this page since we read the index tuples,
1767  * so all we need to do is lock it. The pin will have prevented
1768  * re-use of any TID on the page, so there is no need to check the
1769  * LSN.
1770  */
1771  LockBuffer(so->currPos.buf, BT_READ);
1772 
1773  page = BufferGetPage(so->currPos.buf);
1774  }
1775  else
1776  {
1777  Buffer buf;
1778 
1779  /* Attempt to re-read the buffer, getting pin and lock. */
1780  buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
1781 
1782  /* It might not exist anymore; in which case we can't hint it. */
1783  if (!BufferIsValid(buf))
1784  return;
1785 
1786  page = BufferGetPage(buf);
1787  if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
1788  so->currPos.buf = buf;
1789  else
1790  {
1791  /* Modified while not pinned means hinting is not safe. */
1792  _bt_relbuf(scan->indexRelation, buf);
1793  return;
1794  }
1795  }
1796 
1797  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1798  minoff = P_FIRSTDATAKEY(opaque);
1799  maxoff = PageGetMaxOffsetNumber(page);
1800 
1801  for (i = 0; i < numKilled; i++)
1802  {
1803  int itemIndex = so->killedItems[i];
1804  BTScanPosItem *kitem = &so->currPos.items[itemIndex];
1805  OffsetNumber offnum = kitem->indexOffset;
1806 
1807  Assert(itemIndex >= so->currPos.firstItem &&
1808  itemIndex <= so->currPos.lastItem);
1809  if (offnum < minoff)
1810  continue; /* pure paranoia */
1811  while (offnum <= maxoff)
1812  {
1813  ItemId iid = PageGetItemId(page, offnum);
1814  IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
1815 
1816  if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
1817  {
1818  /* found the item */
1819  ItemIdMarkDead(iid);
1820  killedsomething = true;
1821  break; /* out of inner search loop */
1822  }
1823  offnum = OffsetNumberNext(offnum);
1824  }
1825  }
1826 
1827  /*
1828  * Since this can be redone later if needed, mark as dirty hint.
1829  *
1830  * Whenever we mark anything LP_DEAD, we also set the page's
1831  * BTP_HAS_GARBAGE flag, which is likewise just a hint.
1832  */
1833  if (killedsomething)
1834  {
1835  opaque->btpo_flags |= BTP_HAS_GARBAGE;
1836  MarkBufferDirtyHint(so->currPos.buf, true);
1837  }
1838 
1840 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:390
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
#define ItemIdMarkDead(itemId)
Definition: itemid.h:178
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
ItemPointerData t_tid
Definition: itup.h:37
BlockNumber currPage
Definition: nbtree.h:361
#define BTScanPosIsValid(scanpos)
Definition: nbtree.h:412
OffsetNumber indexOffset
Definition: nbtree.h:352
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
Relation indexRelation
Definition: relscan.h:91
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:300
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
Definition: bufmgr.c:2832
static char * buf
Definition: pg_test_fsync.c:67
#define BTScanPosIsPinned(scanpos)
Definition: nbtree.h:395
IndexTupleData * IndexTuple
Definition: itup.h:53
int firstItem
Definition: nbtree.h:386
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
int * killedItems
Definition: nbtree.h:454
#define Assert(condition)
Definition: c.h:699
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
ItemPointerData heapTid
Definition: nbtree.h:351
BTScanPosData currPos
Definition: nbtree.h:475
int i
XLogRecPtr lsn
Definition: nbtree.h:360
Buffer buf
Definition: nbtree.h:358
int Buffer
Definition: buf.h:23
#define BTP_HAS_GARBAGE
Definition: nbtree.h:77
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74

◆ _bt_mark_array_keys()

void _bt_mark_array_keys ( IndexScanDesc  scan)

Definition at line 617 of file nbtutils.c.

References BTScanOpaqueData::arrayKeys, BTArrayKeyInfo::cur_elem, i, BTArrayKeyInfo::mark_elem, BTScanOpaqueData::numArrayKeys, and IndexScanDescData::opaque.

Referenced by btmarkpos().

618 {
619  BTScanOpaque so = (BTScanOpaque) scan->opaque;
620  int i;
621 
622  for (i = 0; i < so->numArrayKeys; i++)
623  {
624  BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
625 
626  curArrayKey->mark_elem = curArrayKey->cur_elem;
627  }
628 }
int mark_elem
Definition: nbtree.h:432
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
int cur_elem
Definition: nbtree.h:431
int numArrayKeys
Definition: nbtree.h:446
BTArrayKeyInfo * arrayKeys
Definition: nbtree.h:450
int i

◆ _bt_mkscankey()

ScanKey _bt_mkscankey ( Relation  rel,
IndexTuple  itup 
)

Definition at line 62 of file nbtutils.c.

References arg, Assert, BTORDER_PROC, BTreeTupleGetNAtts, i, index_getattr, index_getprocinfo(), IndexRelationGetNumberOfAttributes, IndexRelationGetNumberOfKeyAttributes, InvalidOid, InvalidStrategy, palloc(), PG_USED_FOR_ASSERTS_ONLY, RelationData::rd_indcollation, RelationData::rd_indoption, RelationGetDescr, ScanKeyEntryInitializeWithInfo(), SK_BT_INDOPTION_SHIFT, and SK_ISNULL.

Referenced by _bt_doinsert(), _bt_pagedel(), bt_right_page_check_scankey(), and bt_target_page_check().

63 {
64  ScanKey skey;
65  TupleDesc itupdesc;
66  int indnatts PG_USED_FOR_ASSERTS_ONLY;
67  int indnkeyatts;
68  int16 *indoption;
69  int i;
70 
71  itupdesc = RelationGetDescr(rel);
72  indnatts = IndexRelationGetNumberOfAttributes(rel);
73  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
74  indoption = rel->rd_indoption;
75 
76  Assert(indnkeyatts > 0);
77  Assert(indnkeyatts <= indnatts);
78  Assert(BTreeTupleGetNAtts(itup, rel) == indnatts ||
79  BTreeTupleGetNAtts(itup, rel) == indnkeyatts);
80 
81  /*
82  * We'll execute search using scan key constructed on key columns. Non-key
83  * (INCLUDE index) columns are always omitted from scan keys.
84  */
85  skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData));
86 
87  for (i = 0; i < indnkeyatts; i++)
88  {
89  FmgrInfo *procinfo;
90  Datum arg;
91  bool null;
92  int flags;
93 
94  /*
95  * We can use the cached (default) support procs since no cross-type
96  * comparison can be needed.
97  */
98  procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
99  arg = index_getattr(itup, i + 1, itupdesc, &null);
100  flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
102  flags,
103  (AttrNumber) (i + 1),
105  InvalidOid,
106  rel->rd_indcollation[i],
107  procinfo,
108  arg);
109  }
110 
111  return skey;
112 }
#define SK_BT_INDOPTION_SHIFT
Definition: nbtree.h:488
signed short int16
Definition: c.h:312
#define InvalidStrategy
Definition: stratnum.h:24
Definition: fmgr.h:56
#define BTORDER_PROC
Definition: nbtree.h:290
int16 * rd_indoption
Definition: rel.h:158
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:855
void ScanKeyEntryInitializeWithInfo(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, FmgrInfo *finfo, Datum argument)
Definition: scankey.c:101
#define RelationGetDescr(relation)
Definition: rel.h:433
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:247
Oid * rd_indcollation
Definition: rel.h:165
ScanKeyData * ScanKey
Definition: skey.h:75
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:419
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define SK_ISNULL
Definition: skey.h:115
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
#define Assert(condition)
Definition: c.h:699
#define index_getattr(tup, attnum, tupleDesc, isnull)
Definition: itup.h:100
void * palloc(Size size)
Definition: mcxt.c:924
int i
void * arg
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:123
int16 AttrNumber
Definition: attnum.h:21

◆ _bt_mkscankey_nodata()

ScanKey _bt_mkscankey_nodata ( Relation  rel)

Definition at line 126 of file nbtutils.c.

References BTORDER_PROC, i, index_getprocinfo(), IndexRelationGetNumberOfKeyAttributes, InvalidOid, InvalidStrategy, palloc(), RelationData::rd_indcollation, RelationData::rd_indoption, ScanKeyEntryInitializeWithInfo(), SK_BT_INDOPTION_SHIFT, and SK_ISNULL.

Referenced by _bt_load(), tuplesort_begin_cluster(), and tuplesort_begin_index_btree().

127 {
128  ScanKey skey;
129  int indnkeyatts;
130  int16 *indoption;
131  int i;
132 
133  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
134  indoption = rel->rd_indoption;
135 
136  skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData));
137 
138  for (i = 0; i < indnkeyatts; i++)
139  {
140  FmgrInfo *procinfo;
141  int flags;
142 
143  /*
144  * We can use the cached (default) support procs since no cross-type
145  * comparison can be needed.
146  */
147  procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
148  flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT);
150  flags,
151  (AttrNumber) (i + 1),
153  InvalidOid,
154  rel->rd_indcollation[i],
155  procinfo,
156  (Datum) 0);
157  }
158 
159  return skey;
160 }
#define SK_BT_INDOPTION_SHIFT
Definition: nbtree.h:488
signed short int16
Definition: c.h:312
#define InvalidStrategy
Definition: stratnum.h:24
Definition: fmgr.h:56
#define BTORDER_PROC
Definition: nbtree.h:290
int16 * rd_indoption
Definition: rel.h:158
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:855
void ScanKeyEntryInitializeWithInfo(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, FmgrInfo *finfo, Datum argument)
Definition: scankey.c:101
Oid * rd_indcollation
Definition: rel.h:165
ScanKeyData * ScanKey
Definition: skey.h:75
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define SK_ISNULL
Definition: skey.h:115
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
void * palloc(Size size)
Definition: mcxt.c:924
int i
int16 AttrNumber
Definition: attnum.h:21

◆ _bt_moveright()

Buffer _bt_moveright ( Relation  rel,
Buffer  buf,
int  keysz,
ScanKey  scankey,
bool  nextkey,
bool  forupdate,
BTStack  stack,
int  access,
Snapshot  snapshot 
)

Definition at line 214 of file nbtsearch.c.

References _bt_compare(), _bt_finish_split(), _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BT_WRITE, BTPageOpaqueData::btpo_next, buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, elog, ERROR, LockBuffer(), P_HIKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetSpecialPointer, RelationGetRelationName, and TestForOldSnapshot().

Referenced by _bt_doinsert(), and _bt_search().

223 {
224  Page page;
225  BTPageOpaque opaque;
226  int32 cmpval;
227 
228  /*
229  * When nextkey = false (normal case): if the scan key that brought us to
230  * this page is > the high key stored on the page, then the page has split
231  * and we need to move right. (If the scan key is equal to the high key,
232  * we might or might not need to move right; have to scan the page first
233  * anyway.)
234  *
235  * When nextkey = true: move right if the scan key is >= page's high key.
236  *
237  * The page could even have split more than once, so scan as far as
238  * needed.
239  *
240  * We also have to move right if we followed a link that brought us to a
241  * dead page.
242  */
243  cmpval = nextkey ? 0 : 1;
244 
245  for (;;)
246  {
247  page = BufferGetPage(buf);
248  TestForOldSnapshot(snapshot, rel, page);
249  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
250 
251  if (P_RIGHTMOST(opaque))
252  break;
253 
254  /*
255  * Finish any incomplete splits we encounter along the way.
256  */
257  if (forupdate && P_INCOMPLETE_SPLIT(opaque))
258  {
260 
261  /* upgrade our lock if necessary */
262  if (access == BT_READ)
263  {
266  }
267 
268  if (P_INCOMPLETE_SPLIT(opaque))
269  _bt_finish_split(rel, buf, stack);
270  else
271  _bt_relbuf(rel, buf);
272 
273  /* re-acquire the lock in the right mode, and re-check */
274  buf = _bt_getbuf(rel, blkno, access);
275  continue;
276  }
277 
278  if (P_IGNORE(opaque) || _bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval)
279  {
280  /* step right one page */
281  buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access);
282  continue;
283  }
284  else
285  break;
286  }
287 
288  if (P_IGNORE(opaque))
289  elog(ERROR, "fell off the end of index \"%s\"",
291 
292  return buf;
293 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
BlockNumber btpo_next
Definition: nbtree.h:58
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
#define P_IGNORE(opaque)
Definition: nbtree.h:163
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:860
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
signed int int32
Definition: c.h:313
#define BT_READ
Definition: nbtree.h:300
void _bt_finish_split(Relation rel, Buffer lbuf, BTStack stack)
Definition: nbtinsert.c:1938
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:879
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define P_HIKEY
Definition: nbtree.h:185
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define elog
Definition: elog.h:219
int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum)
Definition: nbtsearch.c:428
#define BT_WRITE
Definition: nbtree.h:301
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:157
Pointer Page
Definition: bufpage.h:74

◆ _bt_next()

bool _bt_next ( IndexScanDesc  scan,
ScanDirection  dir 
)

Definition at line 1131 of file nbtsearch.c.

References _bt_steppage(), BTScanOpaqueData::currPos, BTScanOpaqueData::currTuples, BTScanPosData::firstItem, BTScanPosData::itemIndex, BTScanPosData::items, BTScanPosData::lastItem, IndexScanDescData::opaque, ScanDirectionIsForward, HeapTupleData::t_self, IndexScanDescData::xs_ctup, IndexScanDescData::xs_itup, and IndexScanDescData::xs_want_itup.

Referenced by btgetbitmap(), and btgettuple().

1132 {
1133  BTScanOpaque so = (BTScanOpaque) scan->opaque;
1134  BTScanPosItem *currItem;
1135 
1136  /*
1137  * Advance to next tuple on current page; or if there's no more, try to
1138  * step to the next page with data.
1139  */
1140  if (ScanDirectionIsForward(dir))
1141  {
1142  if (++so->currPos.itemIndex > so->currPos.lastItem)
1143  {
1144  if (!_bt_steppage(scan, dir))
1145  return false;
1146  }
1147  }
1148  else
1149  {
1150  if (--so->currPos.itemIndex < so->currPos.firstItem)
1151  {
1152  if (!_bt_steppage(scan, dir))
1153  return false;
1154  }
1155  }
1156 
1157  /* OK, itemIndex says what to return */
1158  currItem = &so->currPos.items[so->currPos.itemIndex];
1159  scan->xs_ctup.t_self = currItem->heapTid;
1160  if (scan->xs_want_itup)
1161  scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
1162 
1163  return true;
1164 }
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
IndexTuple xs_itup
Definition: relscan.h:115
BTScanPosItem items[MaxIndexTuplesPerPage]
Definition: nbtree.h:390
int itemIndex
Definition: nbtree.h:388
char * currTuples
Definition: nbtree.h:462
int lastItem
Definition: nbtree.h:387
BTScanOpaqueData * BTScanOpaque
Definition: nbtree.h:479
ItemPointerData t_self
Definition: htup.h:65
IndexTupleData * IndexTuple
Definition: itup.h:53
int firstItem
Definition: nbtree.h:386
bool xs_want_itup
Definition: relscan.h:97
HeapTupleData xs_ctup
Definition: relscan.h:121
BTScanPosData currPos
Definition: nbtree.h:475
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir)
Definition: nbtsearch.c:1344

◆ _bt_nonkey_truncate()

IndexTuple _bt_nonkey_truncate ( Relation  rel,
IndexTuple  itup 
)

Definition at line 2102 of file nbtutils.c.

References Assert, BTreeTupleGetNAtts, BTreeTupleSetNAtts, index_truncate_tuple(), IndexRelationGetNumberOfAttributes, IndexRelationGetNumberOfKeyAttributes, and RelationGetDescr.

Referenced by _bt_buildadd(), and _bt_split().

2103 {
2104  int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(rel);
2105  IndexTuple truncated;
2106 
2107  /*
2108  * We should only ever truncate leaf index tuples, which must have both
2109  * key and non-key attributes. It's never okay to truncate a second time.
2110  */
2111  Assert(BTreeTupleGetNAtts(itup, rel) ==
2113 
2114  truncated = index_truncate_tuple(RelationGetDescr(rel), itup, nkeyattrs);
2115  BTreeTupleSetNAtts(truncated, nkeyattrs);
2116 
2117  return truncated;
2118 }
#define RelationGetDescr(relation)
Definition: rel.h:433
#define BTreeTupleGetNAtts(itup, rel)
Definition: nbtree.h:247
#define BTreeTupleSetNAtts(itup, n)
Definition: nbtree.h:257
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:419
IndexTuple index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source, int leavenatts)
Definition: indextuple.c:470
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:426
#define Assert(condition)
Definition: c.h:699

◆ _bt_page_recyclable()

bool _bt_page_recyclable ( Page  page)

Definition at line 903 of file nbtpage.c.

References BTPageOpaqueData::btpo, P_ISDELETED, PageGetSpecialPointer, PageIsNew, RecentGlobalXmin, TransactionIdPrecedes(), and BTPageOpaqueData::xact.

Referenced by _bt_getbuf(), and btvacuumpage().

904 {
905  BTPageOpaque opaque;
906 
907  /*
908  * It's possible to find an all-zeroes page in an index --- for example, a
909  * backend might successfully extend the relation one page and then crash
910  * before it is able to make a WAL entry for adding the page. If we find a
911  * zeroed page then reclaim it.
912  */
913  if (PageIsNew(page))
914  return true;
915 
916  /*
917  * Otherwise, recycle if deleted and too old to have any processes
918  * interested in it.
919  */
920  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
921  if (P_ISDELETED(opaque) &&
923  return true;
924  return false;
925 }
union BTPageOpaqueData::@46 btpo
TransactionId xact
Definition: nbtree.h:62
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68
TransactionId RecentGlobalXmin
Definition: snapmgr.c:166
#define P_ISDELETED(opaque)
Definition: nbtree.h:160
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define PageIsNew(page)
Definition: bufpage.h:225

◆ _bt_pagedel()

int _bt_pagedel ( Relation  rel,
Buffer  buf 
)

Definition at line 1268 of file nbtpage.c.

References _bt_getbuf(), _bt_mark_page_halfdead(), _bt_mkscankey(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), Assert, BT_READ, BT_WRITE, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), IndexRelationGetNumberOfKeyAttributes, LockBuffer(), LOG, P_FIRSTDATAKEY, P_HIKEY, P_INCOMPLETE_SPLIT, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_LEFTMOST, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, RelationGetRelationName, and ReleaseBuffer().

Referenced by btvacuumpage().

1269 {
1270  int ndeleted = 0;
1271  BlockNumber rightsib;
1272  bool rightsib_empty;
1273  Page page;
1274  BTPageOpaque opaque;
1275 
1276  /*
1277  * "stack" is a search stack leading (approximately) to the target page.
1278  * It is initially NULL, but when iterating, we keep it to avoid
1279  * duplicated search effort.
1280  *
1281  * Also, when "stack" is not NULL, we have already checked that the
1282  * current page is not the right half of an incomplete split, i.e. the
1283  * left sibling does not have its INCOMPLETE_SPLIT flag set.
1284  */
1285  BTStack stack = NULL;
1286 
1287  for (;;)
1288  {
1289  page = BufferGetPage(buf);
1290  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1291 
1292  /*
1293  * Internal pages are never deleted directly, only as part of deleting
1294  * the whole branch all the way down to leaf level.
1295  */
1296  if (!P_ISLEAF(opaque))
1297  {
1298  /*
1299  * Pre-9.4 page deletion only marked internal pages as half-dead,
1300  * but now we only use that flag on leaf pages. The old algorithm
1301  * was never supposed to leave half-dead pages in the tree, it was
1302  * just a transient state, but it was nevertheless possible in
1303  * error scenarios. We don't know how to deal with them here. They
1304  * are harmless as far as searches are considered, but inserts
1305  * into the deleted keyspace could add out-of-order downlinks in
1306  * the upper levels. Log a notice, hopefully the admin will notice
1307  * and reindex.
1308  */
1309  if (P_ISHALFDEAD(opaque))
1310  ereport(LOG,
1311  (errcode(ERRCODE_INDEX_CORRUPTED),
1312  errmsg("index \"%s\" contains a half-dead internal page",
1314  errhint("This can be caused by an interrupted VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
1315  _bt_relbuf(rel, buf);
1316  return ndeleted;
1317  }
1318 
1319  /*
1320  * We can never delete rightmost pages nor root pages. While at it,
1321  * check that page is not already deleted and is empty.
1322  *
1323  * To keep the algorithm simple, we also never delete an incompletely
1324  * split page (they should be rare enough that this doesn't make any
1325  * meaningful difference to disk usage):
1326  *
1327  * The INCOMPLETE_SPLIT flag on the page tells us if the page is the
1328  * left half of an incomplete split, but ensuring that it's not the
1329  * right half is more complicated. For that, we have to check that
1330  * the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
1331  * the first iteration, we temporarily release the lock on the current
1332  * page, and check the left sibling and also construct a search stack
1333  * to. On subsequent iterations, we know we stepped right from a page
1334  * that passed these tests, so it's OK.
1335  */
1336  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
1337  P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
1338  P_INCOMPLETE_SPLIT(opaque))
1339  {
1340  /* Should never fail to delete a half-dead page */
1341  Assert(!P_ISHALFDEAD(opaque));
1342 
1343  _bt_relbuf(rel, buf);
1344  return ndeleted;
1345  }
1346 
1347  /*
1348  * First, remove downlink pointing to the page (or a parent of the
1349  * page, if we are going to delete a taller branch), and mark the page
1350  * as half-dead.
1351  */
1352  if (!P_ISHALFDEAD(opaque))
1353  {
1354  /*
1355  * We need an approximate pointer to the page's parent page. We
1356  * use the standard search mechanism to search for the page's high
1357  * key; this will give us a link to either the current parent or
1358  * someplace to its left (if there are multiple equal high keys).
1359  *
1360  * Also check if this is the right-half of an incomplete split
1361  * (see comment above).
1362  */
1363  if (!stack)
1364  {
1365  ScanKey itup_scankey;
1366  ItemId itemid;
1367  IndexTuple targetkey;
1368  Buffer lbuf;
1369  BlockNumber leftsib;
1370 
1371  itemid = PageGetItemId(page, P_HIKEY);
1372  targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
1373 
1374  leftsib = opaque->btpo_prev;
1375 
1376  /*
1377  * To avoid deadlocks, we'd better drop the leaf page lock
1378  * before going further.
1379  */
1381 
1382  /*
1383  * Fetch the left sibling, to check that it's not marked with
1384  * INCOMPLETE_SPLIT flag. That would mean that the page
1385  * to-be-deleted doesn't have a downlink, and the page
1386  * deletion algorithm isn't prepared to handle that.
1387  */
1388  if (!P_LEFTMOST(opaque))
1389  {
1390  BTPageOpaque lopaque;
1391  Page lpage;
1392 
1393  lbuf = _bt_getbuf(rel, leftsib, BT_READ);
1394  lpage = BufferGetPage(lbuf);
1395  lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
1396 
1397  /*
1398  * If the left sibling is split again by another backend,
1399  * after we released the lock, we know that the first
1400  * split must have finished, because we don't allow an
1401  * incompletely-split page to be split again. So we don't
1402  * need to walk right here.
1403  */
1404  if (lopaque->btpo_next == BufferGetBlockNumber(buf) &&
1405  P_INCOMPLETE_SPLIT(lopaque))
1406  {
1407  ReleaseBuffer(buf);
1408  _bt_relbuf(rel, lbuf);
1409  return ndeleted;
1410  }
1411  _bt_relbuf(rel, lbuf);
1412  }
1413 
1414  /* we need an insertion scan key for the search, so build one */
1415  itup_scankey = _bt_mkscankey(rel, targetkey);
1416  /* find the leftmost leaf page containing this key */
1417  stack = _bt_search(rel,
1419  itup_scankey, false, &lbuf, BT_READ, NULL);
1420  /* don't need a pin on the page */
1421  _bt_relbuf(rel, lbuf);
1422 
1423  /*
1424  * Re-lock the leaf page, and start over, to re-check that the
1425  * page can still be deleted.
1426  */
1428  continue;
1429  }
1430 
1431  if (!_bt_mark_page_halfdead(rel, buf, stack))
1432  {
1433  _bt_relbuf(rel, buf);
1434  return ndeleted;
1435  }
1436  }
1437 
1438  /*
1439  * Then unlink it from its siblings. Each call to
1440  * _bt_unlink_halfdead_page unlinks the topmost page from the branch,
1441  * making it shallower. Iterate until the leaf page is gone.
1442  */
1443  rightsib_empty = false;
1444  while (P_ISHALFDEAD(opaque))
1445  {
1446  if (!_bt_unlink_halfdead_page(rel, buf, &rightsib_empty))
1447  {
1448  /* _bt_unlink_halfdead_page already released buffer */
1449  return ndeleted;
1450  }
1451  ndeleted++;
1452  }
1453 
1454  rightsib = opaque->btpo_next;
1455 
1456  _bt_relbuf(rel, buf);
1457 
1458  /*
1459  * The page has now been deleted. If its right sibling is completely
1460  * empty, it's possible that the reason we haven't deleted it earlier
1461  * is that it was the rightmost child of the parent. Now that we
1462  * removed the downlink for this page, the right sibling might now be
1463  * the only child of the parent, and could be removed. It would be
1464  * picked up by the next vacuum anyway, but might as well try to
1465  * remove it now, so loop back to process the right sibling.
1466  */
1467  if (!rightsib_empty)
1468  break;
1469 
1470  buf = _bt_getbuf(rel, rightsib, BT_WRITE);
1471  }
1472 
1473  return ndeleted;
1474 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
BlockNumber btpo_next
Definition: nbtree.h:58
int errhint(const char *fmt,...)
Definition: elog.c:987
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:729
static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack)
Definition: nbtpage.c:1481
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:187
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:165
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:68