PostgreSQL Source Code  git master
nbtpage.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/tableam.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "utils/memdebug.h"
#include "utils/snapmgr.h"
Include dependency graph for nbtpage.c:

Go to the source code of this file.

Functions

static BTMetaPageData_bt_getmeta (Relation rel, Buffer metabuf)
 
static void _bt_log_reuse_page (Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
 
static TransactionId _bt_xid_horizon (Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
 
static bool _bt_mark_page_halfdead (Relation rel, Buffer leafbuf, BTStack stack)
 
static bool _bt_unlink_halfdead_page (Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
 
static bool _bt_lock_subtree_parent (Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
 
void _bt_initmetapage (Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
 
void _bt_upgrademetapage (Page page)
 
void _bt_update_meta_cleanup_info (Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
 
Buffer _bt_getroot (Relation rel, int access)
 
Buffer _bt_gettrueroot (Relation rel)
 
int _bt_getrootheight (Relation rel)
 
void _bt_metaversion (Relation rel, bool *heapkeyspace, bool *allequalimage)
 
void _bt_checkpage (Relation rel, Buffer buf)
 
Buffer _bt_getbuf (Relation rel, BlockNumber blkno, int access)
 
Buffer _bt_relandgetbuf (Relation rel, Buffer obuf, BlockNumber blkno, int access)
 
void _bt_relbuf (Relation rel, Buffer buf)
 
void _bt_lockbuf (Relation rel, Buffer buf, int access)
 
void _bt_unlockbuf (Relation rel, Buffer buf)
 
bool _bt_conditionallockbuf (Relation rel, Buffer buf)
 
void _bt_upgradelockbufcleanup (Relation rel, Buffer buf)
 
void _bt_pageinit (Page page, Size size)
 
bool _bt_page_recyclable (Page page)
 
void _bt_delitems_vacuum (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
 
void _bt_delitems_delete (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, Relation heapRel)
 
static bool _bt_leftsib_splitflag (Relation rel, BlockNumber leftsib, BlockNumber target)
 
static bool _bt_rightsib_halfdeadflag (Relation rel, BlockNumber leafrightsib)
 
uint32 _bt_pagedel (Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
 

Function Documentation

◆ _bt_checkpage()

void _bt_checkpage ( Relation  rel,
Buffer  buf 
)

Definition at line 726 of file nbtpage.c.

References BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.

Referenced by _bt_getbuf(), _bt_relandgetbuf(), _bt_search_insert(), bt_recheck_sibling_links(), btvacuumpage(), and palloc_btree_page().

727 {
728  Page page = BufferGetPage(buf);
729 
730  /*
731  * ReadBuffer verifies that every newly-read page passes
732  * PageHeaderIsValid, which means it either contains a reasonably sane
733  * page header or is all-zero. We have to defend against the all-zero
734  * case, however.
735  */
736  if (PageIsNew(page))
737  ereport(ERROR,
738  (errcode(ERRCODE_INDEX_CORRUPTED),
739  errmsg("index \"%s\" contains unexpected zero page at block %u",
742  errhint("Please REINDEX it.")));
743 
744  /*
745  * Additionally check that the special area looks sane.
746  */
747  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BTPageOpaqueData)))
748  ereport(ERROR,
749  (errcode(ERRCODE_INDEX_CORRUPTED),
750  errmsg("index \"%s\" contains corrupted page at block %u",
753  errhint("Please REINDEX it.")));
754 }
int errhint(const char *fmt,...)
Definition: elog.c:1071
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define ereport(elevel,...)
Definition: elog.h:144
#define MAXALIGN(LEN)
Definition: c.h:698
#define PageGetSpecialSize(page)
Definition: bufpage.h:300
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
#define PageIsNew(page)
Definition: bufpage.h:229
int errmsg(const char *fmt,...)
Definition: elog.c:824
Pointer Page
Definition: bufpage.h:78

◆ _bt_conditionallockbuf()

bool _bt_conditionallockbuf ( Relation  rel,
Buffer  buf 
)

Definition at line 1029 of file nbtpage.c.

References BufferGetPage, ConditionalLockBuffer(), RelationUsesLocalBuffers, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by _bt_getbuf(), and _bt_search_insert().

1030 {
1031  /* ConditionalLockBuffer() asserts that pin is held by this backend */
1032  if (!ConditionalLockBuffer(buf))
1033  return false;
1034 
1035  if (!RelationUsesLocalBuffers(rel))
1037 
1038  return true;
1039 }
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3783
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:572

◆ _bt_delitems_delete()

void _bt_delitems_delete ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
Relation  heapRel 
)

Definition at line 1290 of file nbtpage.c.

References _bt_xid_horizon(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, END_CRIT_SECTION, InvalidTransactionId, xl_btree_delete::latestRemovedXid, MarkBufferDirty(), xl_btree_delete::ndeleted, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDelete, START_CRIT_SECTION, XLOG_BTREE_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by _bt_dedup_one_page(), and _bt_vacuum_one_page().

1293 {
1294  Page page = BufferGetPage(buf);
1295  BTPageOpaque opaque;
1296  TransactionId latestRemovedXid = InvalidTransactionId;
1297 
1298  /* Shouldn't be called unless there's something to do */
1299  Assert(ndeletable > 0);
1300 
1302  latestRemovedXid =
1303  _bt_xid_horizon(rel, heapRel, page, deletable, ndeletable);
1304 
1305  /* No ereport(ERROR) until changes are logged */
1307 
1308  /* Fix the page */
1309  PageIndexMultiDelete(page, deletable, ndeletable);
1310 
1311  /*
1312  * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
1313  * because this is not called by VACUUM. Just clear the BTP_HAS_GARBAGE
1314  * page flag, since we deleted all items with their LP_DEAD bit set.
1315  */
1316  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1317  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1318 
1320 
1321  /* XLOG stuff */
1322  if (RelationNeedsWAL(rel))
1323  {
1324  XLogRecPtr recptr;
1325  xl_btree_delete xlrec_delete;
1326 
1327  xlrec_delete.latestRemovedXid = latestRemovedXid;
1328  xlrec_delete.ndeleted = ndeletable;
1329 
1330  XLogBeginInsert();
1332  XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
1333 
1334  /*
1335  * The deletable array is not in the buffer, but pretend that it is.
1336  * When XLogInsert stores the whole buffer, the array need not be
1337  * stored too.
1338  */
1339  XLogRegisterBufData(0, (char *) deletable,
1340  ndeletable * sizeof(OffsetNumber));
1341 
1342  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
1343 
1344  PageSetLSN(page, recptr);
1345  }
1346 
1347  END_CRIT_SECTION();
1348 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
TransactionId latestRemovedXid
Definition: nbtxlog.h:189
uint32 TransactionId
Definition: c.h:520
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint32 ndeleted
Definition: nbtxlog.h:190
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
Definition: nbtpage.c:1359
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:33
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLogStandbyInfoActive()
Definition: xlog.h:205
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1036
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define SizeOfBtreeDelete
Definition: nbtxlog.h:195
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78

◆ _bt_delitems_vacuum()

void _bt_delitems_vacuum ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
BTVacuumPosting updatable,
int  nupdatable 
)

Definition at line 1124 of file nbtpage.c.

References _bt_update_posting(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BufferGetBlockNumber(), BufferGetPage, BTVacuumPostingData::deletetids, elog, END_CRIT_SECTION, i, IndexTupleSize, BTVacuumPostingData::itup, MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, xl_btree_vacuum::ndeleted, xl_btree_update::ndeletedtids, BTVacuumPostingData::ndeletedtids, xl_btree_vacuum::nupdated, PageGetSpecialPointer, PageIndexMultiDelete(), PageIndexTupleOverwrite(), PageSetLSN, palloc(), PANIC, pfree(), REGBUF_STANDARD, RelationGetRelationName, RelationNeedsWAL, SizeOfBtreeUpdate, SizeOfBtreeVacuum, START_CRIT_SECTION, BTVacuumPostingData::updatedoffset, XLOG_BTREE_VACUUM, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by btvacuumpage().

1127 {
1128  Page page = BufferGetPage(buf);
1129  BTPageOpaque opaque;
1130  Size itemsz;
1131  char *updatedbuf = NULL;
1132  Size updatedbuflen = 0;
1133  OffsetNumber updatedoffsets[MaxIndexTuplesPerPage];
1134 
1135  /* Shouldn't be called unless there's something to do */
1136  Assert(ndeletable > 0 || nupdatable > 0);
1137 
1138  for (int i = 0; i < nupdatable; i++)
1139  {
1140  /* Replace work area IndexTuple with updated version */
1141  _bt_update_posting(updatable[i]);
1142 
1143  /* Maintain array of updatable page offsets for WAL record */
1144  updatedoffsets[i] = updatable[i]->updatedoffset;
1145  }
1146 
1147  /* XLOG stuff -- allocate and fill buffer before critical section */
1148  if (nupdatable > 0 && RelationNeedsWAL(rel))
1149  {
1150  Size offset = 0;
1151 
1152  for (int i = 0; i < nupdatable; i++)
1153  {
1154  BTVacuumPosting vacposting = updatable[i];
1155 
1156  itemsz = SizeOfBtreeUpdate +
1157  vacposting->ndeletedtids * sizeof(uint16);
1158  updatedbuflen += itemsz;
1159  }
1160 
1161  updatedbuf = palloc(updatedbuflen);
1162  for (int i = 0; i < nupdatable; i++)
1163  {
1164  BTVacuumPosting vacposting = updatable[i];
1165  xl_btree_update update;
1166 
1167  update.ndeletedtids = vacposting->ndeletedtids;
1168  memcpy(updatedbuf + offset, &update.ndeletedtids,
1170  offset += SizeOfBtreeUpdate;
1171 
1172  itemsz = update.ndeletedtids * sizeof(uint16);
1173  memcpy(updatedbuf + offset, vacposting->deletetids, itemsz);
1174  offset += itemsz;
1175  }
1176  }
1177 
1178  /* No ereport(ERROR) until changes are logged */
1180 
1181  /*
1182  * Handle posting tuple updates.
1183  *
1184  * Deliberately do this before handling simple deletes. If we did it the
1185  * other way around (i.e. WAL record order -- simple deletes before
1186  * updates) then we'd have to make compensating changes to the 'updatable'
1187  * array of offset numbers.
1188  *
1189  * PageIndexTupleOverwrite() won't unset each item's LP_DEAD bit when it
1190  * happens to already be set. Although we unset the BTP_HAS_GARBAGE page
1191  * level flag, unsetting individual LP_DEAD bits should still be avoided.
1192  */
1193  for (int i = 0; i < nupdatable; i++)
1194  {
1195  OffsetNumber updatedoffset = updatedoffsets[i];
1196  IndexTuple itup;
1197 
1198  itup = updatable[i]->itup;
1199  itemsz = MAXALIGN(IndexTupleSize(itup));
1200  if (!PageIndexTupleOverwrite(page, updatedoffset, (Item) itup,
1201  itemsz))
1202  elog(PANIC, "failed to update partially dead item in block %u of index \"%s\"",
1204  }
1205 
1206  /* Now handle simple deletes of entire tuples */
1207  if (ndeletable > 0)
1208  PageIndexMultiDelete(page, deletable, ndeletable);
1209 
1210  /*
1211  * We can clear the vacuum cycle ID since this page has certainly been
1212  * processed by the current vacuum scan.
1213  */
1214  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1215  opaque->btpo_cycleid = 0;
1216 
1217  /*
1218  * Mark the page as not containing any LP_DEAD items. This is not
1219  * certainly true (there might be some that have recently been marked, but
1220  * weren't targeted by VACUUM's heap scan), but it will be true often
1221  * enough. VACUUM does not delete items purely because they have their
1222  * LP_DEAD bit set, since doing so would necessitate explicitly logging a
1223  * latestRemovedXid cutoff (this is how _bt_delitems_delete works).
1224  *
1225  * The consequences of falsely unsetting BTP_HAS_GARBAGE should be fairly
1226  * limited, since we never falsely unset an LP_DEAD bit. Workloads that
1227  * are particularly dependent on LP_DEAD bits being set quickly will
1228  * usually manage to set the BTP_HAS_GARBAGE flag before the page fills up
1229  * again anyway. Furthermore, attempting a deduplication pass will remove
1230  * all LP_DEAD items, regardless of whether the BTP_HAS_GARBAGE hint bit
1231  * is set or not.
1232  */
1233  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1234 
1236 
1237  /* XLOG stuff */
1238  if (RelationNeedsWAL(rel))
1239  {
1240  XLogRecPtr recptr;
1241  xl_btree_vacuum xlrec_vacuum;
1242 
1243  xlrec_vacuum.ndeleted = ndeletable;
1244  xlrec_vacuum.nupdated = nupdatable;
1245 
1246  XLogBeginInsert();
1248  XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
1249 
1250  if (ndeletable > 0)
1251  XLogRegisterBufData(0, (char *) deletable,
1252  ndeletable * sizeof(OffsetNumber));
1253 
1254  if (nupdatable > 0)
1255  {
1256  XLogRegisterBufData(0, (char *) updatedoffsets,
1257  nupdatable * sizeof(OffsetNumber));
1258  XLogRegisterBufData(0, updatedbuf, updatedbuflen);
1259  }
1260 
1261  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
1262 
1263  PageSetLSN(page, recptr);
1264  }
1265 
1266  END_CRIT_SECTION();
1267 
1268  /* can't leak memory here */
1269  if (updatedbuf != NULL)
1270  pfree(updatedbuf);
1271  /* free tuples generated by calling _bt_update_posting() */
1272  for (int i = 0; i < nupdatable; i++)
1273  pfree(updatable[i]->itup);
1274 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
uint16 ndeletedtids
Definition: nbtree.h:782
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:676
#define SizeOfBtreeUpdate
Definition: nbtxlog.h:225
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
OffsetNumber updatedoffset
Definition: nbtree.h:779
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
IndexTuple itup
Definition: nbtree.h:778
Pointer Item
Definition: item.h:17
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint16 nupdated
Definition: nbtxlog.h:243
#define PANIC
Definition: elog.h:53
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
unsigned short uint16
Definition: c.h:373
void pfree(void *pointer)
Definition: mcxt.c:1057
#define SizeOfBtreeVacuum
Definition: nbtxlog.h:250
BTCycleId btpo_cycleid
Definition: nbtree.h:66
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1280
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:38
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint16 ndeleted
Definition: nbtxlog.h:242
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1036
size_t Size
Definition: c.h:473
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define MAXALIGN(LEN)
Definition: c.h:698
#define RelationNeedsWAL(relation)
Definition: rel.h:562
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:950
#define elog(elevel,...)
Definition: elog.h:214
int i
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
uint16 ndeletedtids
Definition: nbtxlog.h:220
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_getbuf()

Buffer _bt_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access 
)

Definition at line 803 of file nbtpage.c.

References _bt_checkpage(), _bt_conditionallockbuf(), _bt_lockbuf(), _bt_log_reuse_page(), _bt_page_recyclable(), _bt_pageinit(), _bt_relbuf(), Assert, BT_WRITE, BTPageOpaqueData::btpo, buf, BufferGetPage, BufferGetPageSize, DEBUG2, elog, ExclusiveLock, GetFreeIndexPage(), InvalidBlockNumber, LockRelationForExtension(), P_NEW, PageGetSpecialPointer, PageIsNew, ReadBuffer(), RELATION_IS_LOCAL, RelationNeedsWAL, ReleaseBuffer(), UnlockRelationForExtension(), BTPageOpaqueData::xact, and XLogStandbyInfoActive.

Referenced by _bt_finish_split(), _bt_getroot(), _bt_getrootheight(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insertonpg(), _bt_killitems(), _bt_leftsib_splitflag(), _bt_metaversion(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_rightsib_halfdeadflag(), _bt_split(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), and _bt_walk_left().

804 {
805  Buffer buf;
806 
807  if (blkno != P_NEW)
808  {
809  /* Read an existing block of the relation */
810  buf = ReadBuffer(rel, blkno);
811  _bt_lockbuf(rel, buf, access);
812  _bt_checkpage(rel, buf);
813  }
814  else
815  {
816  bool needLock;
817  Page page;
818 
819  Assert(access == BT_WRITE);
820 
821  /*
822  * First see if the FSM knows of any free pages.
823  *
824  * We can't trust the FSM's report unreservedly; we have to check that
825  * the page is still free. (For example, an already-free page could
826  * have been re-used between the time the last VACUUM scanned it and
827  * the time the VACUUM made its FSM updates.)
828  *
829  * In fact, it's worse than that: we can't even assume that it's safe
830  * to take a lock on the reported page. If somebody else has a lock
831  * on it, or even worse our own caller does, we could deadlock. (The
832  * own-caller scenario is actually not improbable. Consider an index
833  * on a serial or timestamp column. Nearly all splits will be at the
834  * rightmost page, so it's entirely likely that _bt_split will call us
835  * while holding a lock on the page most recently acquired from FSM. A
836  * VACUUM running concurrently with the previous split could well have
837  * placed that page back in FSM.)
838  *
839  * To get around that, we ask for only a conditional lock on the
840  * reported page. If we fail, then someone else is using the page,
841  * and we may reasonably assume it's not free. (If we happen to be
842  * wrong, the worst consequence is the page will be lost to use till
843  * the next VACUUM, which is no big problem.)
844  */
845  for (;;)
846  {
847  blkno = GetFreeIndexPage(rel);
848  if (blkno == InvalidBlockNumber)
849  break;
850  buf = ReadBuffer(rel, blkno);
851  if (_bt_conditionallockbuf(rel, buf))
852  {
853  page = BufferGetPage(buf);
854  if (_bt_page_recyclable(page))
855  {
856  /*
857  * If we are generating WAL for Hot Standby then create a
858  * WAL record that will allow us to conflict with queries
859  * running on standby, in case they have snapshots older
860  * than btpo.xact. This can only apply if the page does
861  * have a valid btpo.xact value, ie not if it's new. (We
862  * must check that because an all-zero page has no special
863  * space.)
864  */
865  if (XLogStandbyInfoActive() && RelationNeedsWAL(rel) &&
866  !PageIsNew(page))
867  {
869 
870  _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
871  }
872 
873  /* Okay to use page. Re-initialize and return it */
874  _bt_pageinit(page, BufferGetPageSize(buf));
875  return buf;
876  }
877  elog(DEBUG2, "FSM returned nonrecyclable page");
878  _bt_relbuf(rel, buf);
879  }
880  else
881  {
882  elog(DEBUG2, "FSM returned nonlockable page");
883  /* couldn't get lock, so just drop pin */
884  ReleaseBuffer(buf);
885  }
886  }
887 
888  /*
889  * Extend the relation by one page.
890  *
891  * We have to use a lock to ensure no one else is extending the rel at
892  * the same time, else we will both try to initialize the same new
893  * page. We can skip locking for new or temp relations, however,
894  * since no one else could be accessing them.
895  */
896  needLock = !RELATION_IS_LOCAL(rel);
897 
898  if (needLock)
900 
901  buf = ReadBuffer(rel, P_NEW);
902 
903  /* Acquire buffer lock on new page */
904  _bt_lockbuf(rel, buf, BT_WRITE);
905 
906  /*
907  * Release the file-extension lock; it's now OK for someone else to
908  * extend the relation some more. Note that we cannot release this
909  * lock before we have buffer lock on the new page, or we risk a race
910  * condition against btvacuumscan --- see comments therein.
911  */
912  if (needLock)
914 
915  /* Initialize the new page before returning it */
916  page = BufferGetPage(buf);
917  Assert(PageIsNew(page));
918  _bt_pageinit(page, BufferGetPageSize(buf));
919  }
920 
921  /* ref count and lock type are correct */
922  return buf;
923 }
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
union BTPageOpaqueData::@45 btpo
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3518
#define P_NEW
Definition: bufmgr.h:91
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:1081
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
#define DEBUG2
Definition: elog.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
Definition: nbtpage.c:760
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
#define XLogStandbyInfoActive()
Definition: xlog.h:205
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
#define Assert(condition)
Definition: c.h:745
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:607
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define PageIsNew(page)
Definition: bufpage.h:229
#define elog(elevel,...)
Definition: elog.h:214
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1066
bool _bt_conditionallockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1029
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_getmeta()

static BTMetaPageData * _bt_getmeta ( Relation  rel,
Buffer  metabuf 
)
static

Definition at line 136 of file nbtpage.c.

References BTMetaPageData::btm_magic, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_MAGIC, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, ereport, errcode(), errmsg(), ERROR, P_ISMETA, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_getroot(), _bt_getrootheight(), and _bt_metaversion().

137 {
138  Page metapg;
139  BTPageOpaque metaopaque;
140  BTMetaPageData *metad;
141 
142  metapg = BufferGetPage(metabuf);
143  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
144  metad = BTPageGetMeta(metapg);
145 
146  /* sanity-check the metapage */
147  if (!P_ISMETA(metaopaque) ||
148  metad->btm_magic != BTREE_MAGIC)
149  ereport(ERROR,
150  (errcode(ERRCODE_INDEX_CORRUPTED),
151  errmsg("index \"%s\" is not a btree",
152  RelationGetRelationName(rel))));
153 
154  if (metad->btm_version < BTREE_MIN_VERSION ||
155  metad->btm_version > BTREE_VERSION)
156  ereport(ERROR,
157  (errcode(ERRCODE_INDEX_CORRUPTED),
158  errmsg("version mismatch in index \"%s\": file version %d, "
159  "current version %d, minimal supported version %d",
162 
163  return metad;
164 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
int errcode(int sqlerrcode)
Definition: elog.c:610
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
#define ereport(elevel,...)
Definition: elog.h:144
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:824
Pointer Page
Definition: bufpage.h:78

◆ _bt_getroot()

Buffer _bt_getroot ( Relation  rel,
int  access 
)

Definition at line 272 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_getroot(), _bt_lockbuf(), _bt_relandgetbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_LEAF, BTP_ROOT, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, BufferGetBlockNumber(), BufferGetPage, elog, END_CRIT_SECTION, ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, InvalidBuffer, InvalidTransactionId, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, BTPageOpaqueData::level, xl_btree_newroot::level, MarkBufferDirty(), MemoryContextAlloc(), xl_btree_metadata::oldest_btpo_xact, P_IGNORE, P_LEFTMOST, P_NEW, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, PageSetLSN, pfree(), RelationData::rd_amcache, RelationData::rd_indexcxt, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_metadata::root, xl_btree_newroot::rootblk, SizeOfBtreeNewroot, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_NEWROOT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_get_endpoint(), _bt_getroot(), and _bt_search().

273 {
274  Buffer metabuf;
275  Buffer rootbuf;
276  Page rootpage;
277  BTPageOpaque rootopaque;
278  BlockNumber rootblkno;
279  uint32 rootlevel;
280  BTMetaPageData *metad;
281 
282  /*
283  * Try to use previously-cached metapage data to find the root. This
284  * normally saves one buffer access per index search, which is a very
285  * helpful savings in bufmgr traffic and hence contention.
286  */
287  if (rel->rd_amcache != NULL)
288  {
289  metad = (BTMetaPageData *) rel->rd_amcache;
290  /* We shouldn't have cached it if any of these fail */
291  Assert(metad->btm_magic == BTREE_MAGIC);
293  Assert(metad->btm_version <= BTREE_VERSION);
294  Assert(!metad->btm_allequalimage ||
296  Assert(metad->btm_root != P_NONE);
297 
298  rootblkno = metad->btm_fastroot;
299  Assert(rootblkno != P_NONE);
300  rootlevel = metad->btm_fastlevel;
301 
302  rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
303  rootpage = BufferGetPage(rootbuf);
304  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
305 
306  /*
307  * Since the cache might be stale, we check the page more carefully
308  * here than normal. We *must* check that it's not deleted. If it's
309  * not alone on its level, then we reject too --- this may be overly
310  * paranoid but better safe than sorry. Note we don't check P_ISROOT,
311  * because that's not set in a "fast root".
312  */
313  if (!P_IGNORE(rootopaque) &&
314  rootopaque->btpo.level == rootlevel &&
315  P_LEFTMOST(rootopaque) &&
316  P_RIGHTMOST(rootopaque))
317  {
318  /* OK, accept cached page as the root */
319  return rootbuf;
320  }
321  _bt_relbuf(rel, rootbuf);
322  /* Cache is stale, throw it away */
323  if (rel->rd_amcache)
324  pfree(rel->rd_amcache);
325  rel->rd_amcache = NULL;
326  }
327 
328  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
329  metad = _bt_getmeta(rel, metabuf);
330 
331  /* if no root page initialized yet, do it */
332  if (metad->btm_root == P_NONE)
333  {
334  Page metapg;
335 
336  /* If access = BT_READ, caller doesn't want us to create root yet */
337  if (access == BT_READ)
338  {
339  _bt_relbuf(rel, metabuf);
340  return InvalidBuffer;
341  }
342 
343  /* trade in our read lock for a write lock */
344  _bt_unlockbuf(rel, metabuf);
345  _bt_lockbuf(rel, metabuf, BT_WRITE);
346 
347  /*
348  * Race condition: if someone else initialized the metadata between
349  * the time we released the read lock and acquired the write lock, we
350  * must avoid doing it again.
351  */
352  if (metad->btm_root != P_NONE)
353  {
354  /*
355  * Metadata initialized by someone else. In order to guarantee no
356  * deadlocks, we have to release the metadata page and start all
357  * over again. (Is that really true? But it's hardly worth trying
358  * to optimize this case.)
359  */
360  _bt_relbuf(rel, metabuf);
361  return _bt_getroot(rel, access);
362  }
363 
364  /*
365  * Get, initialize, write, and leave a lock of the appropriate type on
366  * the new root page. Since this is the first page in the tree, it's
367  * a leaf as well as the root.
368  */
369  rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
370  rootblkno = BufferGetBlockNumber(rootbuf);
371  rootpage = BufferGetPage(rootbuf);
372  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
373  rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
374  rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
375  rootopaque->btpo.level = 0;
376  rootopaque->btpo_cycleid = 0;
377  /* Get raw page pointer for metapage */
378  metapg = BufferGetPage(metabuf);
379 
380  /* NO ELOG(ERROR) till meta is updated */
382 
383  /* upgrade metapage if needed */
384  if (metad->btm_version < BTREE_NOVAC_VERSION)
385  _bt_upgrademetapage(metapg);
386 
387  metad->btm_root = rootblkno;
388  metad->btm_level = 0;
389  metad->btm_fastroot = rootblkno;
390  metad->btm_fastlevel = 0;
392  metad->btm_last_cleanup_num_heap_tuples = -1.0;
393 
394  MarkBufferDirty(rootbuf);
395  MarkBufferDirty(metabuf);
396 
397  /* XLOG stuff */
398  if (RelationNeedsWAL(rel))
399  {
400  xl_btree_newroot xlrec;
401  XLogRecPtr recptr;
403 
404  XLogBeginInsert();
407 
409  md.version = metad->btm_version;
410  md.root = rootblkno;
411  md.level = 0;
412  md.fastroot = rootblkno;
413  md.fastlevel = 0;
416  md.allequalimage = metad->btm_allequalimage;
417 
418  XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
419 
420  xlrec.rootblk = rootblkno;
421  xlrec.level = 0;
422 
423  XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
424 
425  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
426 
427  PageSetLSN(rootpage, recptr);
428  PageSetLSN(metapg, recptr);
429  }
430 
432 
433  /*
434  * swap root write lock for read lock. There is no danger of anyone
435  * else accessing the new root page while it's unlocked, since no one
436  * else knows where it is yet.
437  */
438  _bt_unlockbuf(rel, rootbuf);
439  _bt_lockbuf(rel, rootbuf, BT_READ);
440 
441  /* okay, metadata is correct, release lock on it without caching */
442  _bt_relbuf(rel, metabuf);
443  }
444  else
445  {
446  rootblkno = metad->btm_fastroot;
447  Assert(rootblkno != P_NONE);
448  rootlevel = metad->btm_fastlevel;
449 
450  /*
451  * Cache the metapage data for next time
452  */
454  sizeof(BTMetaPageData));
455  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
456 
457  /*
458  * We are done with the metapage; arrange to release it via first
459  * _bt_relandgetbuf call
460  */
461  rootbuf = metabuf;
462 
463  for (;;)
464  {
465  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
466  rootpage = BufferGetPage(rootbuf);
467  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
468 
469  if (!P_IGNORE(rootopaque))
470  break;
471 
472  /* it's dead, Jim. step right one page */
473  if (P_RIGHTMOST(rootopaque))
474  elog(ERROR, "no live root page found in index \"%s\"",
476  rootblkno = rootopaque->btpo_next;
477  }
478 
479  /* Note: can't check btpo.level on deleted pages */
480  if (rootopaque->btpo.level != rootlevel)
481  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
482  rootblkno, RelationGetRelationName(rel),
483  rootopaque->btpo.level, rootlevel);
484  }
485 
486  /*
487  * By here, we have a pin and read lock on the root page, and no lock set
488  * on the metadata page. Return the root page's buffer.
489  */
490  return rootbuf;
491 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
BlockNumber rootblk
Definition: nbtxlog.h:318
#define BTP_ROOT
Definition: nbtree.h:73
BlockNumber btpo_next
Definition: nbtree.h:59
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:939
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_LEAF
Definition: nbtree.h:72
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint32 level
Definition: nbtxlog.h:319
uint32 BlockNumber
Definition: block.h:31
#define P_NEW
Definition: bufmgr.h:91
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:36
void pfree(void *pointer)
Definition: mcxt.c:1057
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
BTCycleId btpo_cycleid
Definition: nbtree.h:66
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define P_LEFTMOST(opaque)
Definition: nbtree.h:212
unsigned int uint32
Definition: c.h:374
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:272
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define SizeOfBtreeNewroot
Definition: nbtxlog.h:322
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:562
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
MemoryContext rd_indexcxt
Definition: rel.h:186
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
void * rd_amcache
Definition: rel.h:211
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_getrootheight()

int _bt_getrootheight ( Relation  rel)

Definition at line 604 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_insertonpg(), and get_relation_info().

605 {
606  BTMetaPageData *metad;
607 
608  if (rel->rd_amcache == NULL)
609  {
610  Buffer metabuf;
611 
612  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
613  metad = _bt_getmeta(rel, metabuf);
614 
615  /*
616  * If there's no root page yet, _bt_getroot() doesn't expect a cache
617  * to be made, so just stop here and report the index height is zero.
618  * (XXX perhaps _bt_getroot() should be changed to allow this case.)
619  */
620  if (metad->btm_root == P_NONE)
621  {
622  _bt_relbuf(rel, metabuf);
623  return 0;
624  }
625 
626  /*
627  * Cache the metapage data for next time
628  */
630  sizeof(BTMetaPageData));
631  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
632  _bt_relbuf(rel, metabuf);
633  }
634 
635  /* Get cached page */
636  metad = (BTMetaPageData *) rel->rd_amcache;
637  /* We shouldn't have cached it if any of these fail */
638  Assert(metad->btm_magic == BTREE_MAGIC);
640  Assert(metad->btm_version <= BTREE_VERSION);
641  Assert(!metad->btm_allequalimage ||
643  Assert(metad->btm_fastroot != P_NONE);
644 
645  return metad->btm_fastlevel;
646 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:745
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
MemoryContext rd_indexcxt
Definition: rel.h:186
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23

◆ _bt_gettrueroot()

Buffer _bt_gettrueroot ( Relation  rel)

Definition at line 508 of file nbtpage.c.

References _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, elog, ereport, errcode(), errmsg(), ERROR, InvalidBuffer, BTPageOpaqueData::level, P_IGNORE, P_ISMETA, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, pfree(), RelationData::rd_amcache, and RelationGetRelationName.

Referenced by _bt_get_endpoint().

509 {
510  Buffer metabuf;
511  Page metapg;
512  BTPageOpaque metaopaque;
513  Buffer rootbuf;
514  Page rootpage;
515  BTPageOpaque rootopaque;
516  BlockNumber rootblkno;
517  uint32 rootlevel;
518  BTMetaPageData *metad;
519 
520  /*
521  * We don't try to use cached metapage data here, since (a) this path is
522  * not performance-critical, and (b) if we are here it suggests our cache
523  * is out-of-date anyway. In light of point (b), it's probably safest to
524  * actively flush any cached metapage info.
525  */
526  if (rel->rd_amcache)
527  pfree(rel->rd_amcache);
528  rel->rd_amcache = NULL;
529 
530  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
531  metapg = BufferGetPage(metabuf);
532  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
533  metad = BTPageGetMeta(metapg);
534 
535  if (!P_ISMETA(metaopaque) ||
536  metad->btm_magic != BTREE_MAGIC)
537  ereport(ERROR,
538  (errcode(ERRCODE_INDEX_CORRUPTED),
539  errmsg("index \"%s\" is not a btree",
540  RelationGetRelationName(rel))));
541 
542  if (metad->btm_version < BTREE_MIN_VERSION ||
543  metad->btm_version > BTREE_VERSION)
544  ereport(ERROR,
545  (errcode(ERRCODE_INDEX_CORRUPTED),
546  errmsg("version mismatch in index \"%s\": file version %d, "
547  "current version %d, minimal supported version %d",
550 
551  /* if no root page initialized yet, fail */
552  if (metad->btm_root == P_NONE)
553  {
554  _bt_relbuf(rel, metabuf);
555  return InvalidBuffer;
556  }
557 
558  rootblkno = metad->btm_root;
559  rootlevel = metad->btm_level;
560 
561  /*
562  * We are done with the metapage; arrange to release it via first
563  * _bt_relandgetbuf call
564  */
565  rootbuf = metabuf;
566 
567  for (;;)
568  {
569  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
570  rootpage = BufferGetPage(rootbuf);
571  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
572 
573  if (!P_IGNORE(rootopaque))
574  break;
575 
576  /* it's dead, Jim. step right one page */
577  if (P_RIGHTMOST(rootopaque))
578  elog(ERROR, "no live root page found in index \"%s\"",
580  rootblkno = rootopaque->btpo_next;
581  }
582 
583  /* Note: can't check btpo.level on deleted pages */
584  if (rootopaque->btpo.level != rootlevel)
585  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
586  rootblkno, RelationGetRelationName(rel),
587  rootopaque->btpo.level, rootlevel);
588 
589  return rootbuf;
590 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_IGNORE(opaque)
Definition: nbtree.h:219
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:939
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1057
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:490
unsigned int uint32
Definition: c.h:374
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 level
Definition: nbtree.h:62
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:824
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_initmetapage()

void _bt_initmetapage ( Page  page,
BlockNumber  rootbknum,
uint32  level,
bool  allequalimage 
)

Definition at line 61 of file nbtpage.c.

References _bt_pageinit(), BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_VERSION, InvalidTransactionId, and PageGetSpecialPointer.

Referenced by _bt_uppershutdown(), and btbuildempty().

63 {
64  BTMetaPageData *metad;
65  BTPageOpaque metaopaque;
66 
67  _bt_pageinit(page, BLCKSZ);
68 
69  metad = BTPageGetMeta(page);
70  metad->btm_magic = BTREE_MAGIC;
71  metad->btm_version = BTREE_VERSION;
72  metad->btm_root = rootbknum;
73  metad->btm_level = level;
74  metad->btm_fastroot = rootbknum;
75  metad->btm_fastlevel = level;
78  metad->btm_allequalimage = allequalimage;
79 
80  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
81  metaopaque->btpo_flags = BTP_META;
82 
83  /*
84  * Set pd_lower just past the end of the metadata. This is essential,
85  * because without doing so, metadata will be lost if xlog.c compresses
86  * the page.
87  */
88  ((PageHeader) page)->pd_lower =
89  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
90 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
uint32 btm_level
Definition: nbtree.h:103
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1066
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
uint16 btpo_flags
Definition: nbtree.h:65

◆ _bt_leftsib_splitflag()

static bool _bt_leftsib_splitflag ( Relation  rel,
BlockNumber  leftsib,
BlockNumber  target 
)
static

Definition at line 1441 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTPageOpaqueData::btpo_next, buf, BufferGetPage, P_INCOMPLETE_SPLIT, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_lock_subtree_parent(), and _bt_pagedel().

1442 {
1443  Buffer buf;
1444  Page page;
1445  BTPageOpaque opaque;
1446  bool result;
1447 
1448  /* Easy case: No left sibling */
1449  if (leftsib == P_NONE)
1450  return false;
1451 
1452  buf = _bt_getbuf(rel, leftsib, BT_READ);
1453  page = BufferGetPage(buf);
1454  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1455 
1456  /*
1457  * If the left sibling was concurrently split, so that its next-pointer
1458  * doesn't point to the current page anymore, the split that created
1459  * target must be completed. Caller can reasonably expect that there will
1460  * be a downlink to the target page that it can relocate using its stack.
1461  * (We don't allow splitting an incompletely split page again until the
1462  * previous split has been completed.)
1463  */
1464  result = (opaque->btpo_next == target && P_INCOMPLETE_SPLIT(opaque));
1465  _bt_relbuf(rel, buf);
1466 
1467  return result;
1468 }
BlockNumber btpo_next
Definition: nbtree.h:59
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_NONE
Definition: nbtree.h:206
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_lock_subtree_parent()

static bool _bt_lock_subtree_parent ( Relation  rel,
BlockNumber  child,
BTStack  stack,
Buffer subtreeparent,
OffsetNumber poffset,
BlockNumber topparent,
BlockNumber topparentrightsib 
)
static

Definition at line 2492 of file nbtpage.c.

References _bt_getstackbuf(), _bt_leftsib_splitflag(), _bt_relbuf(), Assert, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTStackData::bts_blkno, BTStackData::bts_offset, BTStackData::bts_parent, BufferGetPage, ereport, errcode(), errmsg_internal(), ERROR, InvalidBuffer, P_FIRSTDATAKEY, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetMaxOffsetNumber, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_mark_page_halfdead().

2495 {
2496  BlockNumber parent,
2497  leftsibparent;
2498  OffsetNumber parentoffset,
2499  maxoff;
2500  Buffer pbuf;
2501  Page page;
2502  BTPageOpaque opaque;
2503 
2504  /*
2505  * Locate the pivot tuple whose downlink points to "child". Write lock
2506  * the parent page itself.
2507  */
2508  pbuf = _bt_getstackbuf(rel, stack, child);
2509  if (pbuf == InvalidBuffer)
2510  ereport(ERROR,
2511  (errcode(ERRCODE_INDEX_CORRUPTED),
2512  errmsg_internal("failed to re-find parent key in index \"%s\" for deletion target page %u",
2513  RelationGetRelationName(rel), child)));
2514  parent = stack->bts_blkno;
2515  parentoffset = stack->bts_offset;
2516 
2517  page = BufferGetPage(pbuf);
2518  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2519  maxoff = PageGetMaxOffsetNumber(page);
2520  leftsibparent = opaque->btpo_prev;
2521 
2522  /*
2523  * _bt_getstackbuf() completes page splits on returned parent buffer when
2524  * required.
2525  *
2526  * In general it's a bad idea for VACUUM to use up more disk space, which
2527  * is why page deletion does not finish incomplete page splits most of the
2528  * time. We allow this limited exception because the risk is much lower,
2529  * and the potential downside of not proceeding is much higher: A single
2530  * internal page with the INCOMPLETE_SPLIT flag set might otherwise
2531  * prevent us from deleting hundreds of empty leaf pages from one level
2532  * down.
2533  */
2534  Assert(!P_INCOMPLETE_SPLIT(opaque));
2535 
2536  if (parentoffset < maxoff)
2537  {
2538  /*
2539  * Child is not the rightmost child in parent, so it's safe to delete
2540  * the subtree whose root/topparent is child page
2541  */
2542  *subtreeparent = pbuf;
2543  *poffset = parentoffset;
2544  return true;
2545  }
2546 
2547  /*
2548  * Child is the rightmost child of parent.
2549  *
2550  * Since it's the rightmost child of parent, deleting the child (or
2551  * deleting the subtree whose root/topparent is the child page) is only
2552  * safe when it's also possible to delete the parent.
2553  */
2554  Assert(parentoffset == maxoff);
2555  if (parentoffset != P_FIRSTDATAKEY(opaque) || P_RIGHTMOST(opaque))
2556  {
2557  /*
2558  * Child isn't parent's only child, or parent is rightmost on its
2559  * entire level. Definitely cannot delete any pages.
2560  */
2561  _bt_relbuf(rel, pbuf);
2562  return false;
2563  }
2564 
2565  /*
2566  * Now make sure that the parent deletion is itself safe by examining the
2567  * child's grandparent page. Recurse, passing the parent page as the
2568  * child page (child's grandparent is the parent on the next level up). If
2569  * parent deletion is unsafe, then child deletion must also be unsafe (in
2570  * which case caller cannot delete any pages at all).
2571  */
2572  *topparent = parent;
2573  *topparentrightsib = opaque->btpo_next;
2574 
2575  /*
2576  * Release lock on parent before recursing.
2577  *
2578  * It's OK to release page locks on parent before recursive call locks
2579  * grandparent. An internal page can only acquire an entry if the child
2580  * is split, but that cannot happen as long as we still hold a lock on the
2581  * leafbuf page.
2582  */
2583  _bt_relbuf(rel, pbuf);
2584 
2585  /*
2586  * Before recursing, check that the left sibling of parent (if any) is not
2587  * marked with INCOMPLETE_SPLIT flag first (must do so after we drop the
2588  * parent lock).
2589  *
2590  * Note: We deliberately avoid completing incomplete splits here.
2591  */
2592  if (_bt_leftsib_splitflag(rel, leftsibparent, parent))
2593  return false;
2594 
2595  /* Recurse to examine child page's grandparent page */
2596  return _bt_lock_subtree_parent(rel, parent, stack->bts_parent,
2597  subtreeparent, poffset,
2598  topparent, topparentrightsib);
2599 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define InvalidBuffer
Definition: buf.h:25
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2492
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
BlockNumber btpo_prev
Definition: nbtree.h:58
OffsetNumber bts_offset
Definition: nbtree.h:603
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber bts_blkno
Definition: nbtree.h:602
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
struct BTStackData * bts_parent
Definition: nbtree.h:604
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Buffer _bt_getstackbuf(Relation rel, BTStack stack, BlockNumber child)
Definition: nbtinsert.c:2290
Pointer Page
Definition: bufpage.h:78
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1441

◆ _bt_lockbuf()

void _bt_lockbuf ( Relation  rel,
Buffer  buf,
int  access 
)

Definition at line 975 of file nbtpage.c.

References BufferGetPage, LockBuffer(), RelationUsesLocalBuffers, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by _bt_getbuf(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_readnextpage(), _bt_relandgetbuf(), _bt_search(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), and btvacuumpage().

976 {
977  /* LockBuffer() asserts that pin is held by this backend */
978  LockBuffer(buf, access);
979 
980  /*
981  * It doesn't matter that _bt_unlockbuf() won't get called in the
982  * event of an nbtree error (e.g. a unique violation error). That
983  * won't cause Valgrind false positives.
984  *
985  * The nbtree client requests are superimposed on top of the
986  * bufmgr.c buffer pin client requests. In the event of an nbtree
987  * error the buffer will certainly get marked as defined when the
988  * backend once again acquires its first pin on the buffer. (Of
989  * course, if the backend never touches the buffer again then it
990  * doesn't matter that it remains non-accessible to Valgrind.)
991  *
992  * Note: When an IndexTuple C pointer gets computed using an
993  * ItemId read from a page while a lock was held, the C pointer
994  * becomes unsafe to dereference forever as soon as the lock is
995  * released. Valgrind can only detect cases where the pointer
996  * gets dereferenced with no _current_ lock/pin held, though.
997  */
998  if (!RelationUsesLocalBuffers(rel))
1000 }
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3757
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:572

◆ _bt_log_reuse_page()

static void _bt_log_reuse_page ( Relation  rel,
BlockNumber  blkno,
TransactionId  latestRemovedXid 
)
static

Definition at line 760 of file nbtpage.c.

References xl_btree_reuse_page::block, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, RelationData::rd_node, SizeOfBtreeReusePage, XLOG_BTREE_REUSE_PAGE, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by _bt_getbuf().

761 {
762  xl_btree_reuse_page xlrec_reuse;
763 
764  /*
765  * Note that we don't register the buffer with the record, because this
766  * operation doesn't modify the page. This record only exists to provide a
767  * conflict point for Hot Standby.
768  */
769 
770  /* XLOG stuff */
771  xlrec_reuse.node = rel->rd_node;
772  xlrec_reuse.block = blkno;
773  xlrec_reuse.latestRemovedXid = latestRemovedXid;
774 
775  XLogBeginInsert();
776  XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
777 
778  XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
779 }
RelFileNode node
Definition: nbtxlog.h:206
#define SizeOfBtreeReusePage
Definition: nbtxlog.h:211
BlockNumber block
Definition: nbtxlog.h:207
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
RelFileNode rd_node
Definition: rel.h:55
TransactionId latestRemovedXid
Definition: nbtxlog.h:208
void XLogBeginInsert(void)
Definition: xloginsert.c:123

◆ _bt_mark_page_halfdead()

static bool _bt_mark_page_halfdead ( Relation  rel,
Buffer  leafbuf,
BTStack  stack 
)
static

Definition at line 1821 of file nbtpage.c.

References _bt_lock_subtree_parent(), _bt_relbuf(), _bt_rightsib_halfdeadflag(), Assert, BTP_HALF_DEAD, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetDownLink(), BTreeTupleSetDownLink(), BTreeTupleSetTopParent(), BufferGetBlockNumber(), BufferGetPage, DEBUG1, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, IndexTupleSize, InvalidBlockNumber, xl_btree_mark_page_halfdead::leafblk, xl_btree_mark_page_halfdead::leftblk, MarkBufferDirty(), MemSet, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexTupleDelete(), PageIndexTupleOverwrite(), PageSetLSN, xl_btree_mark_page_halfdead::poffset, PredicateLockPageCombine(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_mark_page_halfdead::rightblk, SizeOfBtreeMarkPageHalfDead, START_CRIT_SECTION, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

1822 {
1823  BlockNumber leafblkno;
1824  BlockNumber leafrightsib;
1825  BlockNumber topparent;
1826  BlockNumber topparentrightsib;
1827  ItemId itemid;
1828  Page page;
1829  BTPageOpaque opaque;
1830  Buffer subtreeparent;
1831  OffsetNumber poffset;
1832  OffsetNumber nextoffset;
1833  IndexTuple itup;
1834  IndexTupleData trunctuple;
1835 
1836  page = BufferGetPage(leafbuf);
1837  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1838 
1839  Assert(!P_RIGHTMOST(opaque) && !P_ISROOT(opaque) &&
1840  P_ISLEAF(opaque) && !P_IGNORE(opaque) &&
1841  P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
1842 
1843  /*
1844  * Save info about the leaf page.
1845  */
1846  leafblkno = BufferGetBlockNumber(leafbuf);
1847  leafrightsib = opaque->btpo_next;
1848 
1849  /*
1850  * Before attempting to lock the parent page, check that the right sibling
1851  * is not in half-dead state. A half-dead right sibling would have no
1852  * downlink in the parent, which would be highly confusing later when we
1853  * delete the downlink. It would fail the "right sibling of target page
1854  * is also the next child in parent page" cross-check below.
1855  */
1856  if (_bt_rightsib_halfdeadflag(rel, leafrightsib))
1857  {
1858  elog(DEBUG1, "could not delete page %u because its right sibling %u is half-dead",
1859  leafblkno, leafrightsib);
1860  return false;
1861  }
1862 
1863  /*
1864  * We cannot delete a page that is the rightmost child of its immediate
1865  * parent, unless it is the only child --- in which case the parent has to
1866  * be deleted too, and the same condition applies recursively to it. We
1867  * have to check this condition all the way up before trying to delete,
1868  * and lock the parent of the root of the to-be-deleted subtree (the
1869  * "subtree parent"). _bt_lock_subtree_parent() locks the subtree parent
1870  * for us. We remove the downlink to the "top parent" page (subtree root
1871  * page) from the subtree parent page below.
1872  *
1873  * Initialize topparent to be leafbuf page now. The final to-be-deleted
1874  * subtree is often a degenerate one page subtree consisting only of the
1875  * leafbuf page. When that happens, the leafbuf page is the final subtree
1876  * root page/top parent page.
1877  */
1878  topparent = leafblkno;
1879  topparentrightsib = leafrightsib;
1880  if (!_bt_lock_subtree_parent(rel, leafblkno, stack,
1881  &subtreeparent, &poffset,
1882  &topparent, &topparentrightsib))
1883  return false;
1884 
1885  /*
1886  * Check that the parent-page index items we're about to delete/overwrite
1887  * in subtree parent page contain what we expect. This can fail if the
1888  * index has become corrupt for some reason. We want to throw any error
1889  * before entering the critical section --- otherwise it'd be a PANIC.
1890  */
1891  page = BufferGetPage(subtreeparent);
1892  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1893 
1894 #ifdef USE_ASSERT_CHECKING
1895 
1896  /*
1897  * This is just an assertion because _bt_lock_subtree_parent should have
1898  * guaranteed tuple has the expected contents
1899  */
1900  itemid = PageGetItemId(page, poffset);
1901  itup = (IndexTuple) PageGetItem(page, itemid);
1902  Assert(BTreeTupleGetDownLink(itup) == topparent);
1903 #endif
1904 
1905  nextoffset = OffsetNumberNext(poffset);
1906  itemid = PageGetItemId(page, nextoffset);
1907  itup = (IndexTuple) PageGetItem(page, itemid);
1908  if (BTreeTupleGetDownLink(itup) != topparentrightsib)
1909  ereport(ERROR,
1910  (errcode(ERRCODE_INDEX_CORRUPTED),
1911  errmsg_internal("right sibling %u of block %u is not next child %u of block %u in index \"%s\"",
1912  topparentrightsib, topparent,
1913  BTreeTupleGetDownLink(itup),
1914  BufferGetBlockNumber(subtreeparent),
1915  RelationGetRelationName(rel))));
1916 
1917  /*
1918  * Any insert which would have gone on the leaf block will now go to its
1919  * right sibling. In other words, the key space moves right.
1920  */
1921  PredicateLockPageCombine(rel, leafblkno, leafrightsib);
1922 
1923  /* No ereport(ERROR) until changes are logged */
1925 
1926  /*
1927  * Update parent of subtree. We want to delete the downlink to the top
1928  * parent page/root of the subtree, and the *following* key. Easiest way
1929  * is to copy the right sibling's downlink over the downlink that points
1930  * to top parent page, and then delete the right sibling's original pivot
1931  * tuple.
1932  *
1933  * Lanin and Shasha make the key space move left when deleting a page,
1934  * whereas the key space moves right here. That's why we cannot simply
1935  * delete the pivot tuple with the downlink to the top parent page. See
1936  * nbtree/README.
1937  */
1938  page = BufferGetPage(subtreeparent);
1939  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1940 
1941  itemid = PageGetItemId(page, poffset);
1942  itup = (IndexTuple) PageGetItem(page, itemid);
1943  BTreeTupleSetDownLink(itup, topparentrightsib);
1944 
1945  nextoffset = OffsetNumberNext(poffset);
1946  PageIndexTupleDelete(page, nextoffset);
1947 
1948  /*
1949  * Mark the leaf page as half-dead, and stamp it with a link to the top
1950  * parent page. When the leaf page is also the top parent page, the link
1951  * is set to InvalidBlockNumber.
1952  */
1953  page = BufferGetPage(leafbuf);
1954  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1955  opaque->btpo_flags |= BTP_HALF_DEAD;
1956 
1958  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
1959  trunctuple.t_info = sizeof(IndexTupleData);
1960  if (topparent != leafblkno)
1961  BTreeTupleSetTopParent(&trunctuple, topparent);
1962  else
1964 
1965  if (!PageIndexTupleOverwrite(page, P_HIKEY, (Item) &trunctuple,
1966  IndexTupleSize(&trunctuple)))
1967  elog(ERROR, "could not overwrite high key in half-dead page");
1968 
1969  /* Must mark buffers dirty before XLogInsert */
1970  MarkBufferDirty(subtreeparent);
1971  MarkBufferDirty(leafbuf);
1972 
1973  /* XLOG stuff */
1974  if (RelationNeedsWAL(rel))
1975  {
1977  XLogRecPtr recptr;
1978 
1979  xlrec.poffset = poffset;
1980  xlrec.leafblk = leafblkno;
1981  if (topparent != leafblkno)
1982  xlrec.topparent = topparent;
1983  else
1984  xlrec.topparent = InvalidBlockNumber;
1985 
1986  XLogBeginInsert();
1987  XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
1988  XLogRegisterBuffer(1, subtreeparent, REGBUF_STANDARD);
1989 
1990  page = BufferGetPage(leafbuf);
1991  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1992  xlrec.leftblk = opaque->btpo_prev;
1993  xlrec.rightblk = opaque->btpo_next;
1994 
1996 
1997  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
1998 
1999  page = BufferGetPage(subtreeparent);
2000  PageSetLSN(page, recptr);
2001  page = BufferGetPage(leafbuf);
2002  PageSetLSN(page, recptr);
2003  }
2004 
2005  END_CRIT_SECTION();
2006 
2007  _bt_relbuf(rel, subtreeparent);
2008  return true;
2009 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define DEBUG1
Definition: elog.h:25
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:927
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define SizeOfBtreeMarkPageHalfDead
Definition: nbtxlog.h:273
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
static bool _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
Definition: nbtpage.c:1498
Pointer Item
Definition: item.h:17
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2492
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:610
#define MemSet(start, val, len)
Definition: c.h:949
uint32 BlockNumber
Definition: block.h:31
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1280
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:490
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:430
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:37
struct IndexTupleData IndexTupleData
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
#define elog(elevel,...)
Definition: elog.h:214
void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3183
unsigned short t_info
Definition: itup.h:49
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_metaversion()

void _bt_metaversion ( Relation  rel,
bool heapkeyspace,
bool allequalimage 
)

Definition at line 668 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_first(), _bt_mkscankey(), and bt_index_check_internal().

669 {
670  BTMetaPageData *metad;
671 
672  if (rel->rd_amcache == NULL)
673  {
674  Buffer metabuf;
675 
676  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
677  metad = _bt_getmeta(rel, metabuf);
678 
679  /*
680  * If there's no root page yet, _bt_getroot() doesn't expect a cache
681  * to be made, so just stop here. (XXX perhaps _bt_getroot() should
682  * be changed to allow this case.)
683  */
684  if (metad->btm_root == P_NONE)
685  {
686  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
687  *allequalimage = metad->btm_allequalimage;
688 
689  _bt_relbuf(rel, metabuf);
690  return;
691  }
692 
693  /*
694  * Cache the metapage data for next time
695  *
696  * An on-the-fly version upgrade performed by _bt_upgrademetapage()
697  * can change the nbtree version for an index without invalidating any
698  * local cache. This is okay because it can only happen when moving
699  * from version 2 to version 3, both of which are !heapkeyspace
700  * versions.
701  */
703  sizeof(BTMetaPageData));
704  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
705  _bt_relbuf(rel, metabuf);
706  }
707 
708  /* Get cached page */
709  metad = (BTMetaPageData *) rel->rd_amcache;
710  /* We shouldn't have cached it if any of these fail */
711  Assert(metad->btm_magic == BTREE_MAGIC);
713  Assert(metad->btm_version <= BTREE_VERSION);
714  Assert(!metad->btm_allequalimage ||
716  Assert(metad->btm_fastroot != P_NONE);
717 
718  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
719  *allequalimage = metad->btm_allequalimage;
720 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:745
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
MemoryContext rd_indexcxt
Definition: rel.h:186
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23

◆ _bt_page_recyclable()

bool _bt_page_recyclable ( Page  page)

Definition at line 1081 of file nbtpage.c.

References BTPageOpaqueData::btpo, GlobalVisCheckRemovableXid(), P_ISDELETED, PageGetSpecialPointer, PageIsNew, and BTPageOpaqueData::xact.

Referenced by _bt_getbuf(), and btvacuumpage().

1082 {
1083  BTPageOpaque opaque;
1084 
1085  /*
1086  * It's possible to find an all-zeroes page in an index --- for example, a
1087  * backend might successfully extend the relation one page and then crash
1088  * before it is able to make a WAL entry for adding the page. If we find a
1089  * zeroed page then reclaim it.
1090  */
1091  if (PageIsNew(page))
1092  return true;
1093 
1094  /*
1095  * Otherwise, recycle if deleted and too old to have any processes
1096  * interested in it.
1097  */
1098  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1099  if (P_ISDELETED(opaque) &&
1100  GlobalVisCheckRemovableXid(NULL, opaque->btpo.xact))
1101  return true;
1102  return false;
1103 }
union BTPageOpaqueData::@45 btpo
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4094
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define PageIsNew(page)
Definition: bufpage.h:229

◆ _bt_pagedel()

uint32 _bt_pagedel ( Relation  rel,
Buffer  leafbuf,
TransactionId oldestBtpoXact 
)

Definition at line 1549 of file nbtpage.c.

References _bt_getbuf(), _bt_leftsib_splitflag(), _bt_lockbuf(), _bt_mark_page_halfdead(), _bt_mkscankey(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), _bt_unlockbuf(), Assert, BT_READ, BT_WRITE, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetBlockNumber(), BufferGetPage, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), errmsg_internal(), LOG, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, BTScanInsertData::pivotsearch, RelationGetRelationName, ReleaseBuffer(), TransactionIdFollowsOrEquals(), and BTPageOpaqueData::xact.

Referenced by btvacuumpage().

1550 {
1551  uint32 ndeleted = 0;
1552  BlockNumber rightsib;
1553  bool rightsib_empty;
1554  Page page;
1555  BTPageOpaque opaque;
1556 
1557  /*
1558  * Save original leafbuf block number from caller. Only deleted blocks
1559  * that are <= scanblkno get counted in ndeleted return value.
1560  */
1561  BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1562 
1563  /*
1564  * "stack" is a search stack leading (approximately) to the target page.
1565  * It is initially NULL, but when iterating, we keep it to avoid
1566  * duplicated search effort.
1567  *
1568  * Also, when "stack" is not NULL, we have already checked that the
1569  * current page is not the right half of an incomplete split, i.e. the
1570  * left sibling does not have its INCOMPLETE_SPLIT flag set, including
1571  * when the current target page is to the right of caller's initial page
1572  * (the scanblkno page).
1573  */
1574  BTStack stack = NULL;
1575 
1576  for (;;)
1577  {
1578  page = BufferGetPage(leafbuf);
1579  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1580 
1581  /*
1582  * Internal pages are never deleted directly, only as part of deleting
1583  * the whole subtree all the way down to leaf level.
1584  *
1585  * Also check for deleted pages here. Caller never passes us a fully
1586  * deleted page. Only VACUUM can delete pages, so there can't have
1587  * been a concurrent deletion. Assume that we reached any deleted
1588  * page encountered here by following a sibling link, and that the
1589  * index is corrupt.
1590  */
1591  Assert(!P_ISDELETED(opaque));
1592  if (!P_ISLEAF(opaque) || P_ISDELETED(opaque))
1593  {
1594  /*
1595  * Pre-9.4 page deletion only marked internal pages as half-dead,
1596  * but now we only use that flag on leaf pages. The old algorithm
1597  * was never supposed to leave half-dead pages in the tree, it was
1598  * just a transient state, but it was nevertheless possible in
1599  * error scenarios. We don't know how to deal with them here. They
1600  * are harmless as far as searches are considered, but inserts
1601  * into the deleted keyspace could add out-of-order downlinks in
1602  * the upper levels. Log a notice, hopefully the admin will notice
1603  * and reindex.
1604  */
1605  if (P_ISHALFDEAD(opaque))
1606  ereport(LOG,
1607  (errcode(ERRCODE_INDEX_CORRUPTED),
1608  errmsg("index \"%s\" contains a half-dead internal page",
1610  errhint("This can be caused by an interrupted VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
1611 
1612  if (P_ISDELETED(opaque))
1613  ereport(LOG,
1614  (errcode(ERRCODE_INDEX_CORRUPTED),
1615  errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
1616  BufferGetBlockNumber(leafbuf),
1617  scanblkno,
1618  RelationGetRelationName(rel))));
1619 
1620  _bt_relbuf(rel, leafbuf);
1621  return ndeleted;
1622  }
1623 
1624  /*
1625  * We can never delete rightmost pages nor root pages. While at it,
1626  * check that page is empty, since it's possible that the leafbuf page
1627  * was empty a moment ago, but has since had some inserts.
1628  *
1629  * To keep the algorithm simple, we also never delete an incompletely
1630  * split page (they should be rare enough that this doesn't make any
1631  * meaningful difference to disk usage):
1632  *
1633  * The INCOMPLETE_SPLIT flag on the page tells us if the page is the
1634  * left half of an incomplete split, but ensuring that it's not the
1635  * right half is more complicated. For that, we have to check that
1636  * the left sibling doesn't have its INCOMPLETE_SPLIT flag set using
1637  * _bt_leftsib_splitflag(). On the first iteration, we temporarily
1638  * release the lock on scanblkno/leafbuf, check the left sibling, and
1639  * construct a search stack to scanblkno. On subsequent iterations,
1640  * we know we stepped right from a page that passed these tests, so
1641  * it's OK.
1642  */
1643  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) ||
1644  P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
1645  P_INCOMPLETE_SPLIT(opaque))
1646  {
1647  /* Should never fail to delete a half-dead page */
1648  Assert(!P_ISHALFDEAD(opaque));
1649 
1650  _bt_relbuf(rel, leafbuf);
1651  return ndeleted;
1652  }
1653 
1654  /*
1655  * First, remove downlink pointing to the page (or a parent of the
1656  * page, if we are going to delete a taller subtree), and mark the
1657  * leafbuf page half-dead
1658  */
1659  if (!P_ISHALFDEAD(opaque))
1660  {
1661  /*
1662  * We need an approximate pointer to the page's parent page. We
1663  * use a variant of the standard search mechanism to search for
1664  * the page's high key; this will give us a link to either the
1665  * current parent or someplace to its left (if there are multiple
1666  * equal high keys, which is possible with !heapkeyspace indexes).
1667  *
1668  * Also check if this is the right-half of an incomplete split
1669  * (see comment above).
1670  */
1671  if (!stack)
1672  {
1673  BTScanInsert itup_key;
1674  ItemId itemid;
1675  IndexTuple targetkey;
1676  BlockNumber leftsib,
1677  leafblkno;
1678  Buffer sleafbuf;
1679 
1680  itemid = PageGetItemId(page, P_HIKEY);
1681  targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
1682 
1683  leftsib = opaque->btpo_prev;
1684  leafblkno = BufferGetBlockNumber(leafbuf);
1685 
1686  /*
1687  * To avoid deadlocks, we'd better drop the leaf page lock
1688  * before going further.
1689  */
1690  _bt_unlockbuf(rel, leafbuf);
1691 
1692  /*
1693  * Check that the left sibling of leafbuf (if any) is not
1694  * marked with INCOMPLETE_SPLIT flag before proceeding
1695  */
1696  Assert(leafblkno == scanblkno);
1697  if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
1698  {
1699  ReleaseBuffer(leafbuf);
1700  Assert(ndeleted == 0);
1701  return ndeleted;
1702  }
1703 
1704  /* we need an insertion scan key for the search, so build one */
1705  itup_key = _bt_mkscankey(rel, targetkey);
1706  /* find the leftmost leaf page with matching pivot/high key */
1707  itup_key->pivotsearch = true;
1708  stack = _bt_search(rel, itup_key, &sleafbuf, BT_READ, NULL);
1709  /* won't need a second lock or pin on leafbuf */
1710  _bt_relbuf(rel, sleafbuf);
1711 
1712  /*
1713  * Re-lock the leaf page, and start over to use our stack
1714  * within _bt_mark_page_halfdead. We must do it that way
1715  * because it's possible that leafbuf can no longer be
1716  * deleted. We need to recheck.
1717  *
1718  * Note: We can't simply hold on to the sleafbuf lock instead,
1719  * because it's barely possible that sleafbuf is not the same
1720  * page as leafbuf. This happens when leafbuf split after our
1721  * original lock was dropped, but before _bt_search finished
1722  * its descent. We rely on the assumption that we'll find
1723  * leafbuf isn't safe to delete anymore in this scenario.
1724  * (Page deletion can cope with the stack being to the left of
1725  * leafbuf, but not to the right of leafbuf.)
1726  */
1727  _bt_lockbuf(rel, leafbuf, BT_WRITE);
1728  continue;
1729  }
1730 
1731  /*
1732  * See if it's safe to delete the leaf page, and determine how
1733  * many parent/internal pages above the leaf level will be
1734  * deleted. If it's safe then _bt_mark_page_halfdead will also
1735  * perform the first phase of deletion, which includes marking the
1736  * leafbuf page half-dead.
1737  */
1738  Assert(P_ISLEAF(opaque) && !P_IGNORE(opaque));
1739  if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
1740  {
1741  _bt_relbuf(rel, leafbuf);
1742  return ndeleted;
1743  }
1744  }
1745 
1746  /*
1747  * Then unlink it from its siblings. Each call to
1748  * _bt_unlink_halfdead_page unlinks the topmost page from the subtree,
1749  * making it shallower. Iterate until the leafbuf page is deleted.
1750  *
1751  * _bt_unlink_halfdead_page should never fail, since we established
1752  * that deletion is generally safe in _bt_mark_page_halfdead.
1753  */
1754  rightsib_empty = false;
1755  Assert(P_ISLEAF(opaque) && P_ISHALFDEAD(opaque));
1756  while (P_ISHALFDEAD(opaque))
1757  {
1758  /* Check for interrupts in _bt_unlink_halfdead_page */
1759  if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1760  &rightsib_empty, oldestBtpoXact,
1761  &ndeleted))
1762  {
1763  /* _bt_unlink_halfdead_page failed, released buffer */
1764  return ndeleted;
1765  }
1766  }
1767 
1768  Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
1770  *oldestBtpoXact));
1771 
1772  rightsib = opaque->btpo_next;
1773 
1774  _bt_relbuf(rel, leafbuf);
1775 
1776  /*
1777  * Check here, as calling loops will have locks held, preventing
1778  * interrupts from being processed.
1779  */
1781 
1782  /*
1783  * The page has now been deleted. If its right sibling is completely
1784  * empty, it's possible that the reason we haven't deleted it earlier
1785  * is that it was the rightmost child of the parent. Now that we
1786  * removed the downlink for this page, the right sibling might now be
1787  * the only child of the parent, and could be removed. It would be
1788  * picked up by the next vacuum anyway, but might as well try to
1789  * remove it now, so loop back to process the right sibling.
1790  *
1791  * Note: This relies on the assumption that _bt_getstackbuf() will be
1792  * able to reuse our original descent stack with a different child
1793  * block (provided that the child block is to the right of the
1794  * original leaf page reached by _bt_search()). It will even update
1795  * the descent stack each time we loop around, avoiding repeated work.
1796  */
1797  if (!rightsib_empty)
1798  break;
1799 
1800  leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
1801  }
1802 
1803  return ndeleted;
1804 }
BlockNumber btpo_next
Definition: nbtree.h:59
int errhint(const char *fmt,...)
Definition: elog.c:1071
#define P_IGNORE(opaque)
Definition: nbtree.h:219
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
union BTPageOpaqueData::@45 btpo
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3518
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
Definition: nbtpage.c:1821
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
#define RelationGetRelationName(relation)
Definition: rel.h:490
unsigned int uint32
Definition: c.h:374
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool pivotsearch
Definition: nbtree.h:663
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
BTStack _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:101
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
Definition: nbtpage.c:2043
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1441
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_pageinit()

void _bt_pageinit ( Page  page,
Size  size 
)

Definition at line 1066 of file nbtpage.c.

References PageInit().

Referenced by _bt_blnewpage(), _bt_getbuf(), _bt_initmetapage(), _bt_restore_meta(), _bt_split(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), and btree_xlog_unlink_page().

1067 {
1068  PageInit(page, size, sizeof(BTPageOpaqueData));
1069 }
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

◆ _bt_relandgetbuf()

Buffer _bt_relandgetbuf ( Relation  rel,
Buffer  obuf,
BlockNumber  blkno,
int  access 
)

Definition at line 939 of file nbtpage.c.

References _bt_checkpage(), _bt_lockbuf(), _bt_unlockbuf(), Assert, buf, BufferIsValid, P_NEW, and ReleaseAndReadBuffer().

Referenced by _bt_check_unique(), _bt_get_endpoint(), _bt_getroot(), _bt_gettrueroot(), _bt_moveright(), _bt_search(), _bt_stepright(), and _bt_walk_left().

940 {
941  Buffer buf;
942 
943  Assert(blkno != P_NEW);
944  if (BufferIsValid(obuf))
945  _bt_unlockbuf(rel, obuf);
946  buf = ReleaseAndReadBuffer(obuf, rel, blkno);
947  _bt_lockbuf(rel, buf, access);
948 
949  _bt_checkpage(rel, buf);
950  return buf;
951 }
#define P_NEW
Definition: bufmgr.h:91
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
static char * buf
Definition: pg_test_fsync.c:67
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
#define Assert(condition)
Definition: c.h:745
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1532
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
int Buffer
Definition: buf.h:23

◆ _bt_relbuf()

◆ _bt_rightsib_halfdeadflag()

static bool _bt_rightsib_halfdeadflag ( Relation  rel,
BlockNumber  leafrightsib 
)
static

Definition at line 1498 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), Assert, BT_READ, buf, BufferGetPage, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_mark_page_halfdead().

1499 {
1500  Buffer buf;
1501  Page page;
1502  BTPageOpaque opaque;
1503  bool result;
1504 
1505  Assert(leafrightsib != P_NONE);
1506 
1507  buf = _bt_getbuf(rel, leafrightsib, BT_READ);
1508  page = BufferGetPage(buf);
1509  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1510 
1511  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque));
1512  result = P_ISHALFDEAD(opaque);
1513  _bt_relbuf(rel, buf);
1514 
1515  return result;
1516 }
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_NONE
Definition: nbtree.h:206
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_unlink_halfdead_page()

static bool _bt_unlink_halfdead_page ( Relation  rel,
Buffer  leafbuf,
BlockNumber  scanblkno,
bool rightsib_empty,
TransactionId oldestBtpoXact,
uint32 ndeleted 
)
static

Definition at line 2043 of file nbtpage.c.

References _bt_getbuf(), _bt_lockbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BlockNumberIsValid, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_DELETED, BTP_HALF_DEAD, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_unlink_page::btpo_xact, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BTreeTupleGetDownLink(), BTreeTupleGetTopParent(), BTreeTupleSetTopParent(), buf, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, CHECK_FOR_INTERRUPTS, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, header(), InvalidBlockNumber, InvalidBuffer, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leftsib, xl_btree_metadata::level, BTPageOpaqueData::level, LOG, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, P_FIRSTDATAKEY, P_HIKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_NONE, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageSetLSN, PageHeaderData::pd_lower, PageHeaderData::pd_special, PageHeaderData::pd_upper, ReadNewTransactionId(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, ReleaseBuffer(), xl_btree_unlink_page::rightsib, xl_btree_metadata::root, SizeOfBtreeUnlinkPage, SizeOfPageHeaderData, START_CRIT_SECTION, xl_btree_unlink_page::topparent, TransactionIdIsValid, TransactionIdPrecedes(), xl_btree_metadata::version, BTPageOpaqueData::xact, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

2046 {
2047  BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
2048  BlockNumber leafleftsib;
2049  BlockNumber leafrightsib;
2050  BlockNumber target;
2051  BlockNumber leftsib;
2052  BlockNumber rightsib;
2053  Buffer lbuf = InvalidBuffer;
2054  Buffer buf;
2055  Buffer rbuf;
2056  Buffer metabuf = InvalidBuffer;
2057  Page metapg = NULL;
2058  BTMetaPageData *metad = NULL;
2059  ItemId itemid;
2060  Page page;
2062  BTPageOpaque opaque;
2063  bool rightsib_is_rightmost;
2064  int targetlevel;
2065  IndexTuple leafhikey;
2066  BlockNumber nextchild;
2067 
2068  page = BufferGetPage(leafbuf);
2069  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2070 
2071  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque) && P_ISHALFDEAD(opaque));
2072 
2073  /*
2074  * Remember some information about the leaf page.
2075  */
2076  itemid = PageGetItemId(page, P_HIKEY);
2077  leafhikey = (IndexTuple) PageGetItem(page, itemid);
2078  target = BTreeTupleGetTopParent(leafhikey);
2079  leafleftsib = opaque->btpo_prev;
2080  leafrightsib = opaque->btpo_next;
2081 
2082  _bt_unlockbuf(rel, leafbuf);
2083 
2084  /*
2085  * Check here, as calling loops will have locks held, preventing
2086  * interrupts from being processed.
2087  */
2089 
2090  /* Unlink the current top parent of the subtree */
2091  if (!BlockNumberIsValid(target))
2092  {
2093  /* Target is leaf page (or leaf page is top parent, if you prefer) */
2094  target = leafblkno;
2095 
2096  buf = leafbuf;
2097  leftsib = leafleftsib;
2098  targetlevel = 0;
2099  }
2100  else
2101  {
2102  /* Target is the internal page taken from leaf's top parent link */
2103  Assert(target != leafblkno);
2104 
2105  /* Fetch the block number of the target's left sibling */
2106  buf = _bt_getbuf(rel, target, BT_READ);
2107  page = BufferGetPage(buf);
2108  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2109  leftsib = opaque->btpo_prev;
2110  targetlevel = opaque->btpo.level;
2111  Assert(targetlevel > 0);
2112 
2113  /*
2114  * To avoid deadlocks, we'd better drop the target page lock before
2115  * going further.
2116  */
2117  _bt_unlockbuf(rel, buf);
2118  }
2119 
2120  /*
2121  * We have to lock the pages we need to modify in the standard order:
2122  * moving right, then up. Else we will deadlock against other writers.
2123  *
2124  * So, first lock the leaf page, if it's not the target. Then find and
2125  * write-lock the current left sibling of the target page. The sibling
2126  * that was current a moment ago could have split, so we may have to move
2127  * right. This search could fail if either the sibling or the target page
2128  * was deleted by someone else meanwhile; if so, give up. (Right now,
2129  * that should never happen, since page deletion is only done in VACUUM
2130  * and there shouldn't be multiple VACUUMs concurrently on the same
2131  * table.)
2132  */
2133  if (target != leafblkno)
2134  _bt_lockbuf(rel, leafbuf, BT_WRITE);
2135  if (leftsib != P_NONE)
2136  {
2137  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2138  page = BufferGetPage(lbuf);
2139  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2140  while (P_ISDELETED(opaque) || opaque->btpo_next != target)
2141  {
2142  /* step right one page */
2143  leftsib = opaque->btpo_next;
2144  _bt_relbuf(rel, lbuf);
2145 
2146  /*
2147  * It'd be good to check for interrupts here, but it's not easy to
2148  * do so because a lock is always held. This block isn't
2149  * frequently reached, so hopefully the consequences of not
2150  * checking interrupts aren't too bad.
2151  */
2152 
2153  if (leftsib == P_NONE)
2154  {
2155  elog(LOG, "no left sibling (concurrent deletion?) of block %u in \"%s\"",
2156  target,
2158  if (target != leafblkno)
2159  {
2160  /* we have only a pin on target, but pin+lock on leafbuf */
2161  ReleaseBuffer(buf);
2162  _bt_relbuf(rel, leafbuf);
2163  }
2164  else
2165  {
2166  /* we have only a pin on leafbuf */
2167  ReleaseBuffer(leafbuf);
2168  }
2169  return false;
2170  }
2171  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2172  page = BufferGetPage(lbuf);
2173  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2174  }
2175  }
2176  else
2177  lbuf = InvalidBuffer;
2178 
2179  /*
2180  * Next write-lock the target page itself. It's okay to take a write lock
2181  * rather than a superexclusive lock, since no scan will stop on an empty
2182  * page.
2183  */
2184  _bt_lockbuf(rel, buf, BT_WRITE);
2185  page = BufferGetPage(buf);
2186  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2187 
2188  /*
2189  * Check page is still empty etc, else abandon deletion. This is just for
2190  * paranoia's sake; a half-dead page cannot resurrect because there can be
2191  * only one vacuum process running at a time.
2192  */
2193  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
2194  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2195  target, RelationGetRelationName(rel));
2196 
2197  if (opaque->btpo_prev != leftsib)
2198  ereport(ERROR,
2199  (errcode(ERRCODE_INDEX_CORRUPTED),
2200  errmsg_internal("left link changed unexpectedly in block %u of index \"%s\"",
2201  target, RelationGetRelationName(rel))));
2202 
2203  if (target == leafblkno)
2204  {
2205  if (P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
2206  !P_ISLEAF(opaque) || !P_ISHALFDEAD(opaque))
2207  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2208  target, RelationGetRelationName(rel));
2209  nextchild = InvalidBlockNumber;
2210  }
2211  else
2212  {
2213  if (P_FIRSTDATAKEY(opaque) != PageGetMaxOffsetNumber(page) ||
2214  P_ISLEAF(opaque))
2215  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2216  target, RelationGetRelationName(rel));
2217 
2218  /* Remember the next non-leaf child down in the subtree */
2219  itemid = PageGetItemId(page, P_FIRSTDATAKEY(opaque));
2220  nextchild = BTreeTupleGetDownLink((IndexTuple) PageGetItem(page, itemid));
2221  if (nextchild == leafblkno)
2222  nextchild = InvalidBlockNumber;
2223  }
2224 
2225  /*
2226  * And next write-lock the (current) right sibling.
2227  */
2228  rightsib = opaque->btpo_next;
2229  rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
2230  page = BufferGetPage(rbuf);
2231  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2232  if (opaque->btpo_prev != target)
2233  ereport(ERROR,
2234  (errcode(ERRCODE_INDEX_CORRUPTED),
2235  errmsg_internal("right sibling's left-link doesn't match: "
2236  "block %u links to %u instead of expected %u in index \"%s\"",
2237  rightsib, opaque->btpo_prev, target,
2238  RelationGetRelationName(rel))));
2239  rightsib_is_rightmost = P_RIGHTMOST(opaque);
2240  *rightsib_empty = (P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
2241 
2242  /*
2243  * If we are deleting the next-to-last page on the target's level, then
2244  * the rightsib is a candidate to become the new fast root. (In theory, it
2245  * might be possible to push the fast root even further down, but the odds
2246  * of doing so are slim, and the locking considerations daunting.)
2247  *
2248  * We can safely acquire a lock on the metapage here --- see comments for
2249  * _bt_newroot().
2250  */
2251  if (leftsib == P_NONE && rightsib_is_rightmost)
2252  {
2253  page = BufferGetPage(rbuf);
2254  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2255  if (P_RIGHTMOST(opaque))
2256  {
2257  /* rightsib will be the only one left on the level */
2258  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
2259  metapg = BufferGetPage(metabuf);
2260  metad = BTPageGetMeta(metapg);
2261 
2262  /*
2263  * The expected case here is btm_fastlevel == targetlevel+1; if
2264  * the fastlevel is <= targetlevel, something is wrong, and we
2265  * choose to overwrite it to fix it.
2266  */
2267  if (metad->btm_fastlevel > targetlevel + 1)
2268  {
2269  /* no update wanted */
2270  _bt_relbuf(rel, metabuf);
2271  metabuf = InvalidBuffer;
2272  }
2273  }
2274  }
2275 
2276  /*
2277  * Here we begin doing the deletion.
2278  */
2279 
2280  /* No ereport(ERROR) until changes are logged */
2282 
2283  /*
2284  * Update siblings' side-links. Note the target page's side-links will
2285  * continue to point to the siblings. Asserts here are just rechecking
2286  * things we already verified above.
2287  */
2288  if (BufferIsValid(lbuf))
2289  {
2290  page = BufferGetPage(lbuf);
2291  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2292  Assert(opaque->btpo_next == target);
2293  opaque->btpo_next = rightsib;
2294  }
2295  page = BufferGetPage(rbuf);
2296  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2297  Assert(opaque->btpo_prev == target);
2298  opaque->btpo_prev = leftsib;
2299 
2300  /*
2301  * If we deleted a parent of the targeted leaf page, instead of the leaf
2302  * itself, update the leaf to point to the next remaining child in the
2303  * subtree.
2304  *
2305  * Note: We rely on the fact that a buffer pin on the leaf page has been
2306  * held since leafhikey was initialized. This is safe, though only
2307  * because the page was already half-dead at that point. The leaf page
2308  * cannot have been modified by any other backend during the period when
2309  * no lock was held.
2310  */
2311  if (target != leafblkno)
2312  BTreeTupleSetTopParent(leafhikey, nextchild);
2313 
2314  /*
2315  * Mark the page itself deleted. It can be recycled when all current
2316  * transactions are gone. Storing GetTopTransactionId() would work, but
2317  * we're in VACUUM and would not otherwise have an XID. Having already
2318  * updated links to the target, ReadNewTransactionId() suffices as an
2319  * upper bound. Any scan having retained a now-stale link is advertising
2320  * in its PGPROC an xmin less than or equal to the value we read here. It
2321  * will continue to do so, holding back the xmin horizon, for the duration
2322  * of that scan.
2323  */
2324  page = BufferGetPage(buf);
2325  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2326  Assert(P_ISHALFDEAD(opaque) || !P_ISLEAF(opaque));
2327  opaque->btpo_flags &= ~BTP_HALF_DEAD;
2328  opaque->btpo_flags |= BTP_DELETED;
2329  opaque->btpo.xact = ReadNewTransactionId();
2330 
2331  /*
2332  * Remove the remaining tuples on the page. This keeps things simple for
2333  * WAL consistency checking.
2334  */
2335  header = (PageHeader) page;
2336  header->pd_lower = SizeOfPageHeaderData;
2337  header->pd_upper = header->pd_special;
2338 
2339  /* And update the metapage, if needed */
2340  if (BufferIsValid(metabuf))
2341  {
2342  /* upgrade metapage if needed */
2343  if (metad->btm_version < BTREE_NOVAC_VERSION)
2344  _bt_upgrademetapage(metapg);
2345  metad->btm_fastroot = rightsib;
2346  metad->btm_fastlevel = targetlevel;
2347  MarkBufferDirty(metabuf);
2348  }
2349 
2350  /* Must mark buffers dirty before XLogInsert */
2351  MarkBufferDirty(rbuf);
2352  MarkBufferDirty(buf);
2353  if (BufferIsValid(lbuf))
2354  MarkBufferDirty(lbuf);
2355  if (target != leafblkno)
2356  MarkBufferDirty(leafbuf);
2357 
2358  /* XLOG stuff */
2359  if (RelationNeedsWAL(rel))
2360  {
2361  xl_btree_unlink_page xlrec;
2362  xl_btree_metadata xlmeta;
2363  uint8 xlinfo;
2364  XLogRecPtr recptr;
2365 
2366  XLogBeginInsert();
2367 
2369  if (BufferIsValid(lbuf))
2372  if (target != leafblkno)
2373  XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
2374 
2375  /* information on the unlinked block */
2376  xlrec.leftsib = leftsib;
2377  xlrec.rightsib = rightsib;
2378  xlrec.btpo_xact = opaque->btpo.xact;
2379 
2380  /* information needed to recreate the leaf block (if not the target) */
2381  xlrec.leafleftsib = leafleftsib;
2382  xlrec.leafrightsib = leafrightsib;
2383  xlrec.topparent = nextchild;
2384 
2385  XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
2386 
2387  if (BufferIsValid(metabuf))
2388  {
2390 
2392  xlmeta.version = metad->btm_version;
2393  xlmeta.root = metad->btm_root;
2394  xlmeta.level = metad->btm_level;
2395  xlmeta.fastroot = metad->btm_fastroot;
2396  xlmeta.fastlevel = metad->btm_fastlevel;
2397  xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
2399  xlmeta.allequalimage = metad->btm_allequalimage;
2400 
2401  XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
2402  xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
2403  }
2404  else
2405  xlinfo = XLOG_BTREE_UNLINK_PAGE;
2406 
2407  recptr = XLogInsert(RM_BTREE_ID, xlinfo);
2408 
2409  if (BufferIsValid(metabuf))
2410  {
2411  PageSetLSN(metapg, recptr);
2412  }
2413  page = BufferGetPage(rbuf);
2414  PageSetLSN(page, recptr);
2415  page = BufferGetPage(buf);
2416  PageSetLSN(page, recptr);
2417  if (BufferIsValid(lbuf))
2418  {
2419  page = BufferGetPage(lbuf);
2420  PageSetLSN(page, recptr);
2421  }
2422  if (target != leafblkno)
2423  {
2424  page = BufferGetPage(leafbuf);
2425  PageSetLSN(page, recptr);
2426  }
2427  }
2428 
2429  END_CRIT_SECTION();
2430 
2431  /* release metapage */
2432  if (BufferIsValid(metabuf))
2433  _bt_relbuf(rel, metabuf);
2434 
2435  /* release siblings */
2436  if (BufferIsValid(lbuf))
2437  _bt_relbuf(rel, lbuf);
2438  _bt_relbuf(rel, rbuf);
2439 
2440  if (!TransactionIdIsValid(*oldestBtpoXact) ||
2441  TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
2442  *oldestBtpoXact = opaque->btpo.xact;
2443 
2444  /*
2445  * If btvacuumscan won't revisit this page in a future btvacuumpage call
2446  * and count it as deleted then, we count it as deleted by current
2447  * btvacuumpage call
2448  */
2449  if (target <= scanblkno)
2450  (*ndeleted)++;
2451 
2452  /* If the target is not leafbuf, we're done with it now -- release it */
2453  if (target != leafblkno)
2454  _bt_relbuf(rel, buf);
2455 
2456  return true;
2457 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
BlockNumber btpo_next
Definition: nbtree.h:59
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
unsigned char uint8
Definition: c.h:372
#define P_NONE
Definition: nbtree.h:206
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3518
#define BTP_DELETED
Definition: nbtree.h:74
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:34
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define P_ISROOT(opaque)
Definition: nbtree.h:215
uint32 version
Definition: nbtxlog.h:50
LocationIndex pd_special
Definition: bufpage.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:144
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define RelationNeedsWAL(relation)
Definition: rel.h:562
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:210
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:308
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2668
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
uint32 level
Definition: nbtxlog.h:52
#define SizeOfBtreeUnlinkPage
Definition: nbtxlog.h:303
static BlockNumber BTreeTupleGetTopParent(IndexTuple leafhikey)
Definition: nbtree.h:488
BlockNumber fastroot
Definition: nbtxlog.h:53
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:35
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
LocationIndex pd_upper
Definition: bufpage.h:159
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
LocationIndex pd_lower
Definition: bufpage.h:158
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_unlockbuf()

void _bt_unlockbuf ( Relation  rel,
Buffer  buf 
)

Definition at line 1006 of file nbtpage.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, LockBuffer(), RelationUsesLocalBuffers, VALGRIND_CHECK_MEM_IS_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_first(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_relandgetbuf(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), and _bt_update_meta_cleanup_info().

1007 {
1008  /*
1009  * Buffer is pinned and locked, which means that it is expected to be
1010  * defined and addressable. Check that proactively.
1011  */
1013 
1014  /* LockBuffer() asserts that pin is held by this backend */
1016 
1017  if (!RelationUsesLocalBuffers(rel))
1019 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3757
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:572

◆ _bt_update_meta_cleanup_info()

void _bt_update_meta_cleanup_info ( Relation  rel,
TransactionId  oldestBtpoXact,
float8  numHeapTuples 
)

Definition at line 174 of file nbtpage.c.

References _bt_getbuf(), _bt_lockbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, END_CRIT_SECTION, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, xl_btree_metadata::root, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_META_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), and XLogRegisterBuffer().

Referenced by btvacuumscan().

176 {
177  Buffer metabuf;
178  Page metapg;
179  BTMetaPageData *metad;
180  bool needsRewrite = false;
181  XLogRecPtr recptr;
182 
183  /* read the metapage and check if it needs rewrite */
184  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
185  metapg = BufferGetPage(metabuf);
186  metad = BTPageGetMeta(metapg);
187 
188  /* outdated version of metapage always needs rewrite */
189  if (metad->btm_version < BTREE_NOVAC_VERSION)
190  needsRewrite = true;
191  else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
192  metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
193  needsRewrite = true;
194 
195  if (!needsRewrite)
196  {
197  _bt_relbuf(rel, metabuf);
198  return;
199  }
200 
201  /* trade in our read lock for a write lock */
202  _bt_unlockbuf(rel, metabuf);
203  _bt_lockbuf(rel, metabuf, BT_WRITE);
204 
206 
207  /* upgrade meta-page if needed */
208  if (metad->btm_version < BTREE_NOVAC_VERSION)
209  _bt_upgrademetapage(metapg);
210 
211  /* update cleanup-related information */
212  metad->btm_oldest_btpo_xact = oldestBtpoXact;
213  metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
214  MarkBufferDirty(metabuf);
215 
216  /* write wal record if needed */
217  if (RelationNeedsWAL(rel))
218  {
220 
221  XLogBeginInsert();
223 
225  md.version = metad->btm_version;
226  md.root = metad->btm_root;
227  md.level = metad->btm_level;
228  md.fastroot = metad->btm_fastroot;
229  md.fastlevel = metad->btm_fastlevel;
230  md.oldest_btpo_xact = oldestBtpoXact;
231  md.last_cleanup_num_heap_tuples = numHeapTuples;
232  md.allequalimage = metad->btm_allequalimage;
233 
234  XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
235 
236  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_META_CLEANUP);
237 
238  PageSetLSN(metapg, recptr);
239  }
240 
242  _bt_relbuf(rel, metabuf);
243 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:745
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:562
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:42
Pointer Page
Definition: bufpage.h:78

◆ _bt_upgradelockbufcleanup()

void _bt_upgradelockbufcleanup ( Relation  rel,
Buffer  buf 
)

Definition at line 1046 of file nbtpage.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, LockBuffer(), LockBufferForCleanup(), and VALGRIND_CHECK_MEM_IS_DEFINED.

Referenced by btvacuumpage().

1047 {
1048  /*
1049  * Buffer is pinned and locked, which means that it is expected to be
1050  * defined and addressable. Check that proactively.
1051  */
1053 
1054  /* LockBuffer() asserts that pin is held by this backend */
1057 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3814
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3757

◆ _bt_upgrademetapage()

void _bt_upgrademetapage ( Page  page)

Definition at line 101 of file nbtpage.c.

References Assert, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, InvalidTransactionId, PageGetSpecialPointer, and PG_USED_FOR_ASSERTS_ONLY.

Referenced by _bt_getroot(), _bt_insertonpg(), _bt_newroot(), _bt_unlink_halfdead_page(), and _bt_update_meta_cleanup_info().

102 {
103  BTMetaPageData *metad;
105 
106  metad = BTPageGetMeta(page);
107  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
108 
109  /* It must be really a meta page of upgradable version */
110  Assert(metaopaque->btpo_flags & BTP_META);
113 
114  /* Set version number and fill extra fields added into version 3 */
117  metad->btm_last_cleanup_num_heap_tuples = -1.0;
118  /* Only a REINDEX can set this field */
119  Assert(!metad->btm_allequalimage);
120  metad->btm_allequalimage = false;
121 
122  /* Adjust pd_lower (see _bt_initmetapage() for details) */
123  ((PageHeader) page)->pd_lower =
124  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
125 }
uint32 btm_version
Definition: nbtree.h:101
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:745
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121

◆ _bt_xid_horizon()

static TransactionId _bt_xid_horizon ( Relation  rel,
Relation  heapRel,
Page  page,
OffsetNumber deletable,
int  ndeletable 
)
static

Definition at line 1359 of file nbtpage.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), i, InvalidTransactionId, ItemIdIsDead, ItemPointerCopy, ItemPointerIsValid, Max, PageGetItem, PageGetItemId, palloc(), pfree(), repalloc(), IndexTupleData::t_tid, and table_compute_xid_horizon_for_tuples().

Referenced by _bt_delitems_delete().

1361 {
1362  TransactionId latestRemovedXid = InvalidTransactionId;
1363  int spacenhtids;
1364  int nhtids;
1365  ItemPointer htids;
1366 
1367  /* Array will grow iff there are posting list tuples to consider */
1368  spacenhtids = ndeletable;
1369  nhtids = 0;
1370  htids = (ItemPointer) palloc(sizeof(ItemPointerData) * spacenhtids);
1371  for (int i = 0; i < ndeletable; i++)
1372  {
1373  ItemId itemid;
1374  IndexTuple itup;
1375 
1376  itemid = PageGetItemId(page, deletable[i]);
1377  itup = (IndexTuple) PageGetItem(page, itemid);
1378 
1379  Assert(ItemIdIsDead(itemid));
1380  Assert(!BTreeTupleIsPivot(itup));
1381 
1382  if (!BTreeTupleIsPosting(itup))
1383  {
1384  if (nhtids + 1 > spacenhtids)
1385  {
1386  spacenhtids *= 2;
1387  htids = (ItemPointer)
1388  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1389  }
1390 
1391  Assert(ItemPointerIsValid(&itup->t_tid));
1392  ItemPointerCopy(&itup->t_tid, &htids[nhtids]);
1393  nhtids++;
1394  }
1395  else
1396  {
1397  int nposting = BTreeTupleGetNPosting(itup);
1398 
1399  if (nhtids + nposting > spacenhtids)
1400  {
1401  spacenhtids = Max(spacenhtids * 2, nhtids + nposting);
1402  htids = (ItemPointer)
1403  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1404  }
1405 
1406  for (int j = 0; j < nposting; j++)
1407  {
1408  ItemPointer htid = BTreeTupleGetPostingN(itup, j);
1409 
1410  Assert(ItemPointerIsValid(htid));
1411  ItemPointerCopy(htid, &htids[nhtids]);
1412  nhtids++;
1413  }
1414  }
1415  }
1416 
1417  Assert(nhtids >= ndeletable);
1418 
1419  latestRemovedXid =
1420  table_compute_xid_horizon_for_tuples(heapRel, htids, nhtids);
1421 
1422  pfree(htids);
1423 
1424  return latestRemovedXid;
1425 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
uint32 TransactionId
Definition: c.h:520
ItemPointerData t_tid
Definition: itup.h:37
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
ItemPointerData * ItemPointer
Definition: itemptr.h:49
static TransactionId table_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *items, int nitems)
Definition: tableam.h:1130
void pfree(void *pointer)
Definition: mcxt.c:1057
IndexTupleData * IndexTuple
Definition: itup.h:53
#define InvalidTransactionId
Definition: transam.h:31
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:412
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
#define Max(x, y)
Definition: c.h:921
#define Assert(condition)
Definition: c.h:745
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
void * palloc(Size size)
Definition: mcxt.c:950
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161