PostgreSQL Source Code  git master
nbtpage.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/tableam.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "utils/memdebug.h"
#include "utils/snapmgr.h"
Include dependency graph for nbtpage.c:

Go to the source code of this file.

Functions

static BTMetaPageData_bt_getmeta (Relation rel, Buffer metabuf)
 
static void _bt_log_reuse_page (Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
 
static TransactionId _bt_xid_horizon (Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
 
static bool _bt_mark_page_halfdead (Relation rel, Buffer leafbuf, BTStack stack)
 
static bool _bt_unlink_halfdead_page (Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
 
static bool _bt_lock_subtree_parent (Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
 
void _bt_initmetapage (Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
 
void _bt_upgrademetapage (Page page)
 
void _bt_update_meta_cleanup_info (Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
 
Buffer _bt_getroot (Relation rel, int access)
 
Buffer _bt_gettrueroot (Relation rel)
 
int _bt_getrootheight (Relation rel)
 
void _bt_metaversion (Relation rel, bool *heapkeyspace, bool *allequalimage)
 
void _bt_checkpage (Relation rel, Buffer buf)
 
Buffer _bt_getbuf (Relation rel, BlockNumber blkno, int access)
 
Buffer _bt_relandgetbuf (Relation rel, Buffer obuf, BlockNumber blkno, int access)
 
void _bt_relbuf (Relation rel, Buffer buf)
 
void _bt_lockbuf (Relation rel, Buffer buf, int access)
 
void _bt_unlockbuf (Relation rel, Buffer buf)
 
bool _bt_conditionallockbuf (Relation rel, Buffer buf)
 
void _bt_upgradelockbufcleanup (Relation rel, Buffer buf)
 
void _bt_pageinit (Page page, Size size)
 
bool _bt_page_recyclable (Page page)
 
void _bt_delitems_vacuum (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
 
void _bt_delitems_delete (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, Relation heapRel)
 
static bool _bt_leftsib_splitflag (Relation rel, BlockNumber leftsib, BlockNumber target)
 
static bool _bt_rightsib_halfdeadflag (Relation rel, BlockNumber leafrightsib)
 
uint32 _bt_pagedel (Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
 

Function Documentation

◆ _bt_checkpage()

void _bt_checkpage ( Relation  rel,
Buffer  buf 
)

Definition at line 726 of file nbtpage.c.

References BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.

Referenced by _bt_getbuf(), _bt_relandgetbuf(), _bt_search_insert(), bt_recheck_sibling_links(), btvacuumpage(), and palloc_btree_page().

727 {
728  Page page = BufferGetPage(buf);
729 
730  /*
731  * ReadBuffer verifies that every newly-read page passes
732  * PageHeaderIsValid, which means it either contains a reasonably sane
733  * page header or is all-zero. We have to defend against the all-zero
734  * case, however.
735  */
736  if (PageIsNew(page))
737  ereport(ERROR,
738  (errcode(ERRCODE_INDEX_CORRUPTED),
739  errmsg("index \"%s\" contains unexpected zero page at block %u",
742  errhint("Please REINDEX it.")));
743 
744  /*
745  * Additionally check that the special area looks sane.
746  */
747  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BTPageOpaqueData)))
748  ereport(ERROR,
749  (errcode(ERRCODE_INDEX_CORRUPTED),
750  errmsg("index \"%s\" contains corrupted page at block %u",
753  errhint("Please REINDEX it.")));
754 }
int errhint(const char *fmt,...)
Definition: elog.c:1149
int errcode(int sqlerrcode)
Definition: elog.c:691
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:68
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define ereport(elevel,...)
Definition: elog.h:155
#define MAXALIGN(LEN)
Definition: c.h:753
#define PageGetSpecialSize(page)
Definition: bufpage.h:300
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
#define PageIsNew(page)
Definition: bufpage.h:229
int errmsg(const char *fmt,...)
Definition: elog.c:902
Pointer Page
Definition: bufpage.h:78

◆ _bt_conditionallockbuf()

bool _bt_conditionallockbuf ( Relation  rel,
Buffer  buf 
)

Definition at line 1029 of file nbtpage.c.

References BufferGetPage, ConditionalLockBuffer(), RelationUsesLocalBuffers, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by _bt_getbuf(), and _bt_search_insert().

1030 {
1031  /* ConditionalLockBuffer() asserts that pin is held by this backend */
1032  if (!ConditionalLockBuffer(buf))
1033  return false;
1034 
1035  if (!RelationUsesLocalBuffers(rel))
1037 
1038  return true;
1039 }
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3778
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573

◆ _bt_delitems_delete()

void _bt_delitems_delete ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
Relation  heapRel 
)

Definition at line 1282 of file nbtpage.c.

References _bt_xid_horizon(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, END_CRIT_SECTION, InvalidTransactionId, xl_btree_delete::latestRemovedXid, MarkBufferDirty(), xl_btree_delete::ndeleted, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDelete, START_CRIT_SECTION, XLOG_BTREE_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by _bt_delete_or_dedup_one_page().

1285 {
1286  Page page = BufferGetPage(buf);
1287  BTPageOpaque opaque;
1288  TransactionId latestRemovedXid = InvalidTransactionId;
1289 
1290  /* Shouldn't be called unless there's something to do */
1291  Assert(ndeletable > 0);
1292 
1294  latestRemovedXid =
1295  _bt_xid_horizon(rel, heapRel, page, deletable, ndeletable);
1296 
1297  /* No ereport(ERROR) until changes are logged */
1299 
1300  /* Fix the page */
1301  PageIndexMultiDelete(page, deletable, ndeletable);
1302 
1303  /*
1304  * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
1305  * because this is not called by VACUUM
1306  */
1307  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1308 
1309  /*
1310  * Clear the BTP_HAS_GARBAGE page flag.
1311  *
1312  * This flag indicates the presence of LP_DEAD items on the page (though
1313  * not reliably). Note that we only trust it with pg_upgrade'd
1314  * !heapkeyspace indexes.
1315  */
1316  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1317 
1319 
1320  /* XLOG stuff */
1321  if (RelationNeedsWAL(rel))
1322  {
1323  XLogRecPtr recptr;
1324  xl_btree_delete xlrec_delete;
1325 
1326  xlrec_delete.latestRemovedXid = latestRemovedXid;
1327  xlrec_delete.ndeleted = ndeletable;
1328 
1329  XLogBeginInsert();
1331  XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
1332 
1333  /*
1334  * The deletable array is not in the buffer, but pretend that it is.
1335  * When XLogInsert stores the whole buffer, the array need not be
1336  * stored too.
1337  */
1338  XLogRegisterBufData(0, (char *) deletable,
1339  ndeletable * sizeof(OffsetNumber));
1340 
1341  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
1342 
1343  PageSetLSN(page, recptr);
1344  }
1345 
1346  END_CRIT_SECTION();
1347 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
TransactionId latestRemovedXid
Definition: nbtxlog.h:189
uint32 TransactionId
Definition: c.h:575
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint32 ndeleted
Definition: nbtxlog.h:190
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:68
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
Definition: nbtpage.c:1358
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:33
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLogStandbyInfoActive()
Definition: xlog.h:205
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1044
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define RelationNeedsWAL(relation)
Definition: rel.h:563
#define SizeOfBtreeDelete
Definition: nbtxlog.h:195
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78

◆ _bt_delitems_vacuum()

void _bt_delitems_vacuum ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
BTVacuumPosting updatable,
int  nupdatable 
)

Definition at line 1124 of file nbtpage.c.

References _bt_update_posting(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BufferGetBlockNumber(), BufferGetPage, BTVacuumPostingData::deletetids, elog, END_CRIT_SECTION, i, IndexTupleSize, BTVacuumPostingData::itup, MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, xl_btree_vacuum::ndeleted, xl_btree_update::ndeletedtids, BTVacuumPostingData::ndeletedtids, xl_btree_vacuum::nupdated, PageGetSpecialPointer, PageIndexMultiDelete(), PageIndexTupleOverwrite(), PageSetLSN, palloc(), PANIC, pfree(), REGBUF_STANDARD, RelationGetRelationName, RelationNeedsWAL, SizeOfBtreeUpdate, SizeOfBtreeVacuum, START_CRIT_SECTION, BTVacuumPostingData::updatedoffset, XLOG_BTREE_VACUUM, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by btvacuumpage().

1127 {
1128  Page page = BufferGetPage(buf);
1129  BTPageOpaque opaque;
1130  Size itemsz;
1131  char *updatedbuf = NULL;
1132  Size updatedbuflen = 0;
1133  OffsetNumber updatedoffsets[MaxIndexTuplesPerPage];
1134 
1135  /* Shouldn't be called unless there's something to do */
1136  Assert(ndeletable > 0 || nupdatable > 0);
1137 
1138  for (int i = 0; i < nupdatable; i++)
1139  {
1140  /* Replace work area IndexTuple with updated version */
1141  _bt_update_posting(updatable[i]);
1142 
1143  /* Maintain array of updatable page offsets for WAL record */
1144  updatedoffsets[i] = updatable[i]->updatedoffset;
1145  }
1146 
1147  /* XLOG stuff -- allocate and fill buffer before critical section */
1148  if (nupdatable > 0 && RelationNeedsWAL(rel))
1149  {
1150  Size offset = 0;
1151 
1152  for (int i = 0; i < nupdatable; i++)
1153  {
1154  BTVacuumPosting vacposting = updatable[i];
1155 
1156  itemsz = SizeOfBtreeUpdate +
1157  vacposting->ndeletedtids * sizeof(uint16);
1158  updatedbuflen += itemsz;
1159  }
1160 
1161  updatedbuf = palloc(updatedbuflen);
1162  for (int i = 0; i < nupdatable; i++)
1163  {
1164  BTVacuumPosting vacposting = updatable[i];
1165  xl_btree_update update;
1166 
1167  update.ndeletedtids = vacposting->ndeletedtids;
1168  memcpy(updatedbuf + offset, &update.ndeletedtids,
1170  offset += SizeOfBtreeUpdate;
1171 
1172  itemsz = update.ndeletedtids * sizeof(uint16);
1173  memcpy(updatedbuf + offset, vacposting->deletetids, itemsz);
1174  offset += itemsz;
1175  }
1176  }
1177 
1178  /* No ereport(ERROR) until changes are logged */
1180 
1181  /*
1182  * Handle posting tuple updates.
1183  *
1184  * Deliberately do this before handling simple deletes. If we did it the
1185  * other way around (i.e. WAL record order -- simple deletes before
1186  * updates) then we'd have to make compensating changes to the 'updatable'
1187  * array of offset numbers.
1188  *
1189  * PageIndexTupleOverwrite() won't unset each item's LP_DEAD bit when it
1190  * happens to already be set. It's important that we not interfere with
1191  * _bt_delitems_delete().
1192  */
1193  for (int i = 0; i < nupdatable; i++)
1194  {
1195  OffsetNumber updatedoffset = updatedoffsets[i];
1196  IndexTuple itup;
1197 
1198  itup = updatable[i]->itup;
1199  itemsz = MAXALIGN(IndexTupleSize(itup));
1200  if (!PageIndexTupleOverwrite(page, updatedoffset, (Item) itup,
1201  itemsz))
1202  elog(PANIC, "failed to update partially dead item in block %u of index \"%s\"",
1204  }
1205 
1206  /* Now handle simple deletes of entire tuples */
1207  if (ndeletable > 0)
1208  PageIndexMultiDelete(page, deletable, ndeletable);
1209 
1210  /*
1211  * We can clear the vacuum cycle ID since this page has certainly been
1212  * processed by the current vacuum scan.
1213  */
1214  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1215  opaque->btpo_cycleid = 0;
1216 
1217  /*
1218  * Clear the BTP_HAS_GARBAGE page flag.
1219  *
1220  * This flag indicates the presence of LP_DEAD items on the page (though
1221  * not reliably). Note that we only trust it with pg_upgrade'd
1222  * !heapkeyspace indexes. That's why clearing it here won't usually
1223  * interfere with _bt_delitems_delete().
1224  */
1225  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1226 
1228 
1229  /* XLOG stuff */
1230  if (RelationNeedsWAL(rel))
1231  {
1232  XLogRecPtr recptr;
1233  xl_btree_vacuum xlrec_vacuum;
1234 
1235  xlrec_vacuum.ndeleted = ndeletable;
1236  xlrec_vacuum.nupdated = nupdatable;
1237 
1238  XLogBeginInsert();
1240  XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
1241 
1242  if (ndeletable > 0)
1243  XLogRegisterBufData(0, (char *) deletable,
1244  ndeletable * sizeof(OffsetNumber));
1245 
1246  if (nupdatable > 0)
1247  {
1248  XLogRegisterBufData(0, (char *) updatedoffsets,
1249  nupdatable * sizeof(OffsetNumber));
1250  XLogRegisterBufData(0, updatedbuf, updatedbuflen);
1251  }
1252 
1253  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
1254 
1255  PageSetLSN(page, recptr);
1256  }
1257 
1258  END_CRIT_SECTION();
1259 
1260  /* can't leak memory here */
1261  if (updatedbuf != NULL)
1262  pfree(updatedbuf);
1263  /* free tuples generated by calling _bt_update_posting() */
1264  for (int i = 0; i < nupdatable; i++)
1265  pfree(updatable[i]->itup);
1266 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
uint16 ndeletedtids
Definition: nbtree.h:782
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:632
#define SizeOfBtreeUpdate
Definition: nbtxlog.h:225
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
OffsetNumber updatedoffset
Definition: nbtree.h:779
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
IndexTuple itup
Definition: nbtree.h:778
Pointer Item
Definition: item.h:17
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint16 nupdated
Definition: nbtxlog.h:243
#define PANIC
Definition: elog.h:53
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
unsigned short uint16
Definition: c.h:428
void pfree(void *pointer)
Definition: mcxt.c:1057
#define SizeOfBtreeVacuum
Definition: nbtxlog.h:250
BTCycleId btpo_cycleid
Definition: nbtree.h:66
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1288
static char * buf
Definition: pg_test_fsync.c:68
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:38
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint16 ndeleted
Definition: nbtxlog.h:242
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1044
size_t Size
Definition: c.h:528
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define MAXALIGN(LEN)
Definition: c.h:753
#define RelationNeedsWAL(relation)
Definition: rel.h:563
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:950
#define elog(elevel,...)
Definition: elog.h:228
int i
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
uint16 ndeletedtids
Definition: nbtxlog.h:220
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_getbuf()

Buffer _bt_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access 
)

Definition at line 803 of file nbtpage.c.

References _bt_checkpage(), _bt_conditionallockbuf(), _bt_lockbuf(), _bt_log_reuse_page(), _bt_page_recyclable(), _bt_pageinit(), _bt_relbuf(), Assert, BT_WRITE, BTPageOpaqueData::btpo, buf, BufferGetPage, BufferGetPageSize, DEBUG2, elog, ExclusiveLock, GetFreeIndexPage(), InvalidBlockNumber, LockRelationForExtension(), P_NEW, PageGetSpecialPointer, PageIsNew, ReadBuffer(), RELATION_IS_LOCAL, RelationNeedsWAL, ReleaseBuffer(), UnlockRelationForExtension(), BTPageOpaqueData::xact, and XLogStandbyInfoActive.

Referenced by _bt_finish_split(), _bt_getroot(), _bt_getrootheight(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insertonpg(), _bt_killitems(), _bt_leftsib_splitflag(), _bt_metaversion(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_rightsib_halfdeadflag(), _bt_split(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), and _bt_walk_left().

804 {
805  Buffer buf;
806 
807  if (blkno != P_NEW)
808  {
809  /* Read an existing block of the relation */
810  buf = ReadBuffer(rel, blkno);
811  _bt_lockbuf(rel, buf, access);
812  _bt_checkpage(rel, buf);
813  }
814  else
815  {
816  bool needLock;
817  Page page;
818 
819  Assert(access == BT_WRITE);
820 
821  /*
822  * First see if the FSM knows of any free pages.
823  *
824  * We can't trust the FSM's report unreservedly; we have to check that
825  * the page is still free. (For example, an already-free page could
826  * have been re-used between the time the last VACUUM scanned it and
827  * the time the VACUUM made its FSM updates.)
828  *
829  * In fact, it's worse than that: we can't even assume that it's safe
830  * to take a lock on the reported page. If somebody else has a lock
831  * on it, or even worse our own caller does, we could deadlock. (The
832  * own-caller scenario is actually not improbable. Consider an index
833  * on a serial or timestamp column. Nearly all splits will be at the
834  * rightmost page, so it's entirely likely that _bt_split will call us
835  * while holding a lock on the page most recently acquired from FSM. A
836  * VACUUM running concurrently with the previous split could well have
837  * placed that page back in FSM.)
838  *
839  * To get around that, we ask for only a conditional lock on the
840  * reported page. If we fail, then someone else is using the page,
841  * and we may reasonably assume it's not free. (If we happen to be
842  * wrong, the worst consequence is the page will be lost to use till
843  * the next VACUUM, which is no big problem.)
844  */
845  for (;;)
846  {
847  blkno = GetFreeIndexPage(rel);
848  if (blkno == InvalidBlockNumber)
849  break;
850  buf = ReadBuffer(rel, blkno);
851  if (_bt_conditionallockbuf(rel, buf))
852  {
853  page = BufferGetPage(buf);
854  if (_bt_page_recyclable(page))
855  {
856  /*
857  * If we are generating WAL for Hot Standby then create a
858  * WAL record that will allow us to conflict with queries
859  * running on standby, in case they have snapshots older
860  * than btpo.xact. This can only apply if the page does
861  * have a valid btpo.xact value, ie not if it's new. (We
862  * must check that because an all-zero page has no special
863  * space.)
864  */
865  if (XLogStandbyInfoActive() && RelationNeedsWAL(rel) &&
866  !PageIsNew(page))
867  {
869 
870  _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
871  }
872 
873  /* Okay to use page. Re-initialize and return it */
874  _bt_pageinit(page, BufferGetPageSize(buf));
875  return buf;
876  }
877  elog(DEBUG2, "FSM returned nonrecyclable page");
878  _bt_relbuf(rel, buf);
879  }
880  else
881  {
882  elog(DEBUG2, "FSM returned nonlockable page");
883  /* couldn't get lock, so just drop pin */
884  ReleaseBuffer(buf);
885  }
886  }
887 
888  /*
889  * Extend the relation by one page.
890  *
891  * We have to use a lock to ensure no one else is extending the rel at
892  * the same time, else we will both try to initialize the same new
893  * page. We can skip locking for new or temp relations, however,
894  * since no one else could be accessing them.
895  */
896  needLock = !RELATION_IS_LOCAL(rel);
897 
898  if (needLock)
900 
901  buf = ReadBuffer(rel, P_NEW);
902 
903  /* Acquire buffer lock on new page */
904  _bt_lockbuf(rel, buf, BT_WRITE);
905 
906  /*
907  * Release the file-extension lock; it's now OK for someone else to
908  * extend the relation some more. Note that we cannot release this
909  * lock before we have buffer lock on the new page, or we risk a race
910  * condition against btvacuumscan --- see comments therein.
911  */
912  if (needLock)
914 
915  /* Initialize the new page before returning it */
916  page = BufferGetPage(buf);
917  Assert(PageIsNew(page));
918  _bt_pageinit(page, BufferGetPageSize(buf));
919  }
920 
921  /* ref count and lock type are correct */
922  return buf;
923 }
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:584
union BTPageOpaqueData::@45 btpo
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define P_NEW
Definition: bufmgr.h:91
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:1081
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
#define DEBUG2
Definition: elog.h:24
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
Definition: nbtpage.c:760
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
#define XLogStandbyInfoActive()
Definition: xlog.h:205
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
#define Assert(condition)
Definition: c.h:800
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:607
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:563
#define PageIsNew(page)
Definition: bufpage.h:229
#define elog(elevel,...)
Definition: elog.h:228
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1066
bool _bt_conditionallockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1029
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_getmeta()

static BTMetaPageData * _bt_getmeta ( Relation  rel,
Buffer  metabuf 
)
static

Definition at line 136 of file nbtpage.c.

References BTMetaPageData::btm_magic, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_MAGIC, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, ereport, errcode(), errmsg(), ERROR, P_ISMETA, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_getroot(), _bt_getrootheight(), and _bt_metaversion().

137 {
138  Page metapg;
139  BTPageOpaque metaopaque;
140  BTMetaPageData *metad;
141 
142  metapg = BufferGetPage(metabuf);
143  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
144  metad = BTPageGetMeta(metapg);
145 
146  /* sanity-check the metapage */
147  if (!P_ISMETA(metaopaque) ||
148  metad->btm_magic != BTREE_MAGIC)
149  ereport(ERROR,
150  (errcode(ERRCODE_INDEX_CORRUPTED),
151  errmsg("index \"%s\" is not a btree",
152  RelationGetRelationName(rel))));
153 
154  if (metad->btm_version < BTREE_MIN_VERSION ||
155  metad->btm_version > BTREE_VERSION)
156  ereport(ERROR,
157  (errcode(ERRCODE_INDEX_CORRUPTED),
158  errmsg("version mismatch in index \"%s\": file version %d, "
159  "current version %d, minimal supported version %d",
162 
163  return metad;
164 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
int errcode(int sqlerrcode)
Definition: elog.c:691
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
#define ereport(elevel,...)
Definition: elog.h:155
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:902
Pointer Page
Definition: bufpage.h:78

◆ _bt_getroot()

Buffer _bt_getroot ( Relation  rel,
int  access 
)

Definition at line 272 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_getroot(), _bt_lockbuf(), _bt_relandgetbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_LEAF, BTP_ROOT, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, BufferGetBlockNumber(), BufferGetPage, elog, END_CRIT_SECTION, ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, InvalidBuffer, InvalidTransactionId, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, BTPageOpaqueData::level, xl_btree_newroot::level, MarkBufferDirty(), MemoryContextAlloc(), xl_btree_metadata::oldest_btpo_xact, P_IGNORE, P_LEFTMOST, P_NEW, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, PageSetLSN, pfree(), RelationData::rd_amcache, RelationData::rd_indexcxt, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_metadata::root, xl_btree_newroot::rootblk, SizeOfBtreeNewroot, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_NEWROOT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_get_endpoint(), _bt_getroot(), and _bt_search().

273 {
274  Buffer metabuf;
275  Buffer rootbuf;
276  Page rootpage;
277  BTPageOpaque rootopaque;
278  BlockNumber rootblkno;
279  uint32 rootlevel;
280  BTMetaPageData *metad;
281 
282  /*
283  * Try to use previously-cached metapage data to find the root. This
284  * normally saves one buffer access per index search, which is a very
285  * helpful savings in bufmgr traffic and hence contention.
286  */
287  if (rel->rd_amcache != NULL)
288  {
289  metad = (BTMetaPageData *) rel->rd_amcache;
290  /* We shouldn't have cached it if any of these fail */
291  Assert(metad->btm_magic == BTREE_MAGIC);
293  Assert(metad->btm_version <= BTREE_VERSION);
294  Assert(!metad->btm_allequalimage ||
296  Assert(metad->btm_root != P_NONE);
297 
298  rootblkno = metad->btm_fastroot;
299  Assert(rootblkno != P_NONE);
300  rootlevel = metad->btm_fastlevel;
301 
302  rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
303  rootpage = BufferGetPage(rootbuf);
304  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
305 
306  /*
307  * Since the cache might be stale, we check the page more carefully
308  * here than normal. We *must* check that it's not deleted. If it's
309  * not alone on its level, then we reject too --- this may be overly
310  * paranoid but better safe than sorry. Note we don't check P_ISROOT,
311  * because that's not set in a "fast root".
312  */
313  if (!P_IGNORE(rootopaque) &&
314  rootopaque->btpo.level == rootlevel &&
315  P_LEFTMOST(rootopaque) &&
316  P_RIGHTMOST(rootopaque))
317  {
318  /* OK, accept cached page as the root */
319  return rootbuf;
320  }
321  _bt_relbuf(rel, rootbuf);
322  /* Cache is stale, throw it away */
323  if (rel->rd_amcache)
324  pfree(rel->rd_amcache);
325  rel->rd_amcache = NULL;
326  }
327 
328  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
329  metad = _bt_getmeta(rel, metabuf);
330 
331  /* if no root page initialized yet, do it */
332  if (metad->btm_root == P_NONE)
333  {
334  Page metapg;
335 
336  /* If access = BT_READ, caller doesn't want us to create root yet */
337  if (access == BT_READ)
338  {
339  _bt_relbuf(rel, metabuf);
340  return InvalidBuffer;
341  }
342 
343  /* trade in our read lock for a write lock */
344  _bt_unlockbuf(rel, metabuf);
345  _bt_lockbuf(rel, metabuf, BT_WRITE);
346 
347  /*
348  * Race condition: if someone else initialized the metadata between
349  * the time we released the read lock and acquired the write lock, we
350  * must avoid doing it again.
351  */
352  if (metad->btm_root != P_NONE)
353  {
354  /*
355  * Metadata initialized by someone else. In order to guarantee no
356  * deadlocks, we have to release the metadata page and start all
357  * over again. (Is that really true? But it's hardly worth trying
358  * to optimize this case.)
359  */
360  _bt_relbuf(rel, metabuf);
361  return _bt_getroot(rel, access);
362  }
363 
364  /*
365  * Get, initialize, write, and leave a lock of the appropriate type on
366  * the new root page. Since this is the first page in the tree, it's
367  * a leaf as well as the root.
368  */
369  rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
370  rootblkno = BufferGetBlockNumber(rootbuf);
371  rootpage = BufferGetPage(rootbuf);
372  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
373  rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
374  rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
375  rootopaque->btpo.level = 0;
376  rootopaque->btpo_cycleid = 0;
377  /* Get raw page pointer for metapage */
378  metapg = BufferGetPage(metabuf);
379 
380  /* NO ELOG(ERROR) till meta is updated */
382 
383  /* upgrade metapage if needed */
384  if (metad->btm_version < BTREE_NOVAC_VERSION)
385  _bt_upgrademetapage(metapg);
386 
387  metad->btm_root = rootblkno;
388  metad->btm_level = 0;
389  metad->btm_fastroot = rootblkno;
390  metad->btm_fastlevel = 0;
392  metad->btm_last_cleanup_num_heap_tuples = -1.0;
393 
394  MarkBufferDirty(rootbuf);
395  MarkBufferDirty(metabuf);
396 
397  /* XLOG stuff */
398  if (RelationNeedsWAL(rel))
399  {
400  xl_btree_newroot xlrec;
401  XLogRecPtr recptr;
403 
404  XLogBeginInsert();
407 
409  md.version = metad->btm_version;
410  md.root = rootblkno;
411  md.level = 0;
412  md.fastroot = rootblkno;
413  md.fastlevel = 0;
416  md.allequalimage = metad->btm_allequalimage;
417 
418  XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
419 
420  xlrec.rootblk = rootblkno;
421  xlrec.level = 0;
422 
423  XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
424 
425  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
426 
427  PageSetLSN(rootpage, recptr);
428  PageSetLSN(metapg, recptr);
429  }
430 
432 
433  /*
434  * swap root write lock for read lock. There is no danger of anyone
435  * else accessing the new root page while it's unlocked, since no one
436  * else knows where it is yet.
437  */
438  _bt_unlockbuf(rel, rootbuf);
439  _bt_lockbuf(rel, rootbuf, BT_READ);
440 
441  /* okay, metadata is correct, release lock on it without caching */
442  _bt_relbuf(rel, metabuf);
443  }
444  else
445  {
446  rootblkno = metad->btm_fastroot;
447  Assert(rootblkno != P_NONE);
448  rootlevel = metad->btm_fastlevel;
449 
450  /*
451  * Cache the metapage data for next time
452  */
454  sizeof(BTMetaPageData));
455  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
456 
457  /*
458  * We are done with the metapage; arrange to release it via first
459  * _bt_relandgetbuf call
460  */
461  rootbuf = metabuf;
462 
463  for (;;)
464  {
465  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
466  rootpage = BufferGetPage(rootbuf);
467  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
468 
469  if (!P_IGNORE(rootopaque))
470  break;
471 
472  /* it's dead, Jim. step right one page */
473  if (P_RIGHTMOST(rootopaque))
474  elog(ERROR, "no live root page found in index \"%s\"",
476  rootblkno = rootopaque->btpo_next;
477  }
478 
479  /* Note: can't check btpo.level on deleted pages */
480  if (rootopaque->btpo.level != rootlevel)
481  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
482  rootblkno, RelationGetRelationName(rel),
483  rootopaque->btpo.level, rootlevel);
484  }
485 
486  /*
487  * By here, we have a pin and read lock on the root page, and no lock set
488  * on the metadata page. Return the root page's buffer.
489  */
490  return rootbuf;
491 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
BlockNumber rootblk
Definition: nbtxlog.h:318
#define BTP_ROOT
Definition: nbtree.h:73
BlockNumber btpo_next
Definition: nbtree.h:59
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:939
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_LEAF
Definition: nbtree.h:72
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint32 level
Definition: nbtxlog.h:319
uint32 BlockNumber
Definition: block.h:31
#define P_NEW
Definition: bufmgr.h:91
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:36
void pfree(void *pointer)
Definition: mcxt.c:1057
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
BTCycleId btpo_cycleid
Definition: nbtree.h:66
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define P_LEFTMOST(opaque)
Definition: nbtree.h:212
unsigned int uint32
Definition: c.h:429
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:272
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define SizeOfBtreeNewroot
Definition: nbtxlog.h:322
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:563
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:228
MemoryContext rd_indexcxt
Definition: rel.h:187
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
void * rd_amcache
Definition: rel.h:212
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_getrootheight()

int _bt_getrootheight ( Relation  rel)

Definition at line 604 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_insertonpg(), and get_relation_info().

605 {
606  BTMetaPageData *metad;
607 
608  if (rel->rd_amcache == NULL)
609  {
610  Buffer metabuf;
611 
612  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
613  metad = _bt_getmeta(rel, metabuf);
614 
615  /*
616  * If there's no root page yet, _bt_getroot() doesn't expect a cache
617  * to be made, so just stop here and report the index height is zero.
618  * (XXX perhaps _bt_getroot() should be changed to allow this case.)
619  */
620  if (metad->btm_root == P_NONE)
621  {
622  _bt_relbuf(rel, metabuf);
623  return 0;
624  }
625 
626  /*
627  * Cache the metapage data for next time
628  */
630  sizeof(BTMetaPageData));
631  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
632  _bt_relbuf(rel, metabuf);
633  }
634 
635  /* Get cached page */
636  metad = (BTMetaPageData *) rel->rd_amcache;
637  /* We shouldn't have cached it if any of these fail */
638  Assert(metad->btm_magic == BTREE_MAGIC);
640  Assert(metad->btm_version <= BTREE_VERSION);
641  Assert(!metad->btm_allequalimage ||
643  Assert(metad->btm_fastroot != P_NONE);
644 
645  return metad->btm_fastlevel;
646 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:800
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
MemoryContext rd_indexcxt
Definition: rel.h:187
void * rd_amcache
Definition: rel.h:212
int Buffer
Definition: buf.h:23

◆ _bt_gettrueroot()

Buffer _bt_gettrueroot ( Relation  rel)

Definition at line 508 of file nbtpage.c.

References _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, elog, ereport, errcode(), errmsg(), ERROR, InvalidBuffer, BTPageOpaqueData::level, P_IGNORE, P_ISMETA, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, pfree(), RelationData::rd_amcache, and RelationGetRelationName.

Referenced by _bt_get_endpoint().

509 {
510  Buffer metabuf;
511  Page metapg;
512  BTPageOpaque metaopaque;
513  Buffer rootbuf;
514  Page rootpage;
515  BTPageOpaque rootopaque;
516  BlockNumber rootblkno;
517  uint32 rootlevel;
518  BTMetaPageData *metad;
519 
520  /*
521  * We don't try to use cached metapage data here, since (a) this path is
522  * not performance-critical, and (b) if we are here it suggests our cache
523  * is out-of-date anyway. In light of point (b), it's probably safest to
524  * actively flush any cached metapage info.
525  */
526  if (rel->rd_amcache)
527  pfree(rel->rd_amcache);
528  rel->rd_amcache = NULL;
529 
530  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
531  metapg = BufferGetPage(metabuf);
532  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
533  metad = BTPageGetMeta(metapg);
534 
535  if (!P_ISMETA(metaopaque) ||
536  metad->btm_magic != BTREE_MAGIC)
537  ereport(ERROR,
538  (errcode(ERRCODE_INDEX_CORRUPTED),
539  errmsg("index \"%s\" is not a btree",
540  RelationGetRelationName(rel))));
541 
542  if (metad->btm_version < BTREE_MIN_VERSION ||
543  metad->btm_version > BTREE_VERSION)
544  ereport(ERROR,
545  (errcode(ERRCODE_INDEX_CORRUPTED),
546  errmsg("version mismatch in index \"%s\": file version %d, "
547  "current version %d, minimal supported version %d",
550 
551  /* if no root page initialized yet, fail */
552  if (metad->btm_root == P_NONE)
553  {
554  _bt_relbuf(rel, metabuf);
555  return InvalidBuffer;
556  }
557 
558  rootblkno = metad->btm_root;
559  rootlevel = metad->btm_level;
560 
561  /*
562  * We are done with the metapage; arrange to release it via first
563  * _bt_relandgetbuf call
564  */
565  rootbuf = metabuf;
566 
567  for (;;)
568  {
569  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
570  rootpage = BufferGetPage(rootbuf);
571  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
572 
573  if (!P_IGNORE(rootopaque))
574  break;
575 
576  /* it's dead, Jim. step right one page */
577  if (P_RIGHTMOST(rootopaque))
578  elog(ERROR, "no live root page found in index \"%s\"",
580  rootblkno = rootopaque->btpo_next;
581  }
582 
583  /* Note: can't check btpo.level on deleted pages */
584  if (rootopaque->btpo.level != rootlevel)
585  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
586  rootblkno, RelationGetRelationName(rel),
587  rootopaque->btpo.level, rootlevel);
588 
589  return rootbuf;
590 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_IGNORE(opaque)
Definition: nbtree.h:219
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:939
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:691
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1057
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:491
unsigned int uint32
Definition: c.h:429
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 level
Definition: nbtree.h:62
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:155
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:902
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:228
void * rd_amcache
Definition: rel.h:212
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_initmetapage()

void _bt_initmetapage ( Page  page,
BlockNumber  rootbknum,
uint32  level,
bool  allequalimage 
)

Definition at line 61 of file nbtpage.c.

References _bt_pageinit(), BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_VERSION, InvalidTransactionId, and PageGetSpecialPointer.

Referenced by _bt_uppershutdown(), and btbuildempty().

63 {
64  BTMetaPageData *metad;
65  BTPageOpaque metaopaque;
66 
67  _bt_pageinit(page, BLCKSZ);
68 
69  metad = BTPageGetMeta(page);
70  metad->btm_magic = BTREE_MAGIC;
71  metad->btm_version = BTREE_VERSION;
72  metad->btm_root = rootbknum;
73  metad->btm_level = level;
74  metad->btm_fastroot = rootbknum;
75  metad->btm_fastlevel = level;
78  metad->btm_allequalimage = allequalimage;
79 
80  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
81  metaopaque->btpo_flags = BTP_META;
82 
83  /*
84  * Set pd_lower just past the end of the metadata. This is essential,
85  * because without doing so, metadata will be lost if xlog.c compresses
86  * the page.
87  */
88  ((PageHeader) page)->pd_lower =
89  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
90 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
uint32 btm_level
Definition: nbtree.h:103
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1066
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
uint16 btpo_flags
Definition: nbtree.h:65

◆ _bt_leftsib_splitflag()

static bool _bt_leftsib_splitflag ( Relation  rel,
BlockNumber  leftsib,
BlockNumber  target 
)
static

Definition at line 1440 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTPageOpaqueData::btpo_next, buf, BufferGetPage, P_INCOMPLETE_SPLIT, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_lock_subtree_parent(), and _bt_pagedel().

1441 {
1442  Buffer buf;
1443  Page page;
1444  BTPageOpaque opaque;
1445  bool result;
1446 
1447  /* Easy case: No left sibling */
1448  if (leftsib == P_NONE)
1449  return false;
1450 
1451  buf = _bt_getbuf(rel, leftsib, BT_READ);
1452  page = BufferGetPage(buf);
1453  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1454 
1455  /*
1456  * If the left sibling was concurrently split, so that its next-pointer
1457  * doesn't point to the current page anymore, the split that created
1458  * target must be completed. Caller can reasonably expect that there will
1459  * be a downlink to the target page that it can relocate using its stack.
1460  * (We don't allow splitting an incompletely split page again until the
1461  * previous split has been completed.)
1462  */
1463  result = (opaque->btpo_next == target && P_INCOMPLETE_SPLIT(opaque));
1464  _bt_relbuf(rel, buf);
1465 
1466  return result;
1467 }
BlockNumber btpo_next
Definition: nbtree.h:59
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_NONE
Definition: nbtree.h:206
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_lock_subtree_parent()

static bool _bt_lock_subtree_parent ( Relation  rel,
BlockNumber  child,
BTStack  stack,
Buffer subtreeparent,
OffsetNumber poffset,
BlockNumber topparent,
BlockNumber topparentrightsib 
)
static

Definition at line 2491 of file nbtpage.c.

References _bt_getstackbuf(), _bt_leftsib_splitflag(), _bt_relbuf(), Assert, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTStackData::bts_blkno, BTStackData::bts_offset, BTStackData::bts_parent, BufferGetPage, ereport, errcode(), errmsg_internal(), ERROR, InvalidBuffer, P_FIRSTDATAKEY, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetMaxOffsetNumber, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_mark_page_halfdead().

2494 {
2495  BlockNumber parent,
2496  leftsibparent;
2497  OffsetNumber parentoffset,
2498  maxoff;
2499  Buffer pbuf;
2500  Page page;
2501  BTPageOpaque opaque;
2502 
2503  /*
2504  * Locate the pivot tuple whose downlink points to "child". Write lock
2505  * the parent page itself.
2506  */
2507  pbuf = _bt_getstackbuf(rel, stack, child);
2508  if (pbuf == InvalidBuffer)
2509  ereport(ERROR,
2510  (errcode(ERRCODE_INDEX_CORRUPTED),
2511  errmsg_internal("failed to re-find parent key in index \"%s\" for deletion target page %u",
2512  RelationGetRelationName(rel), child)));
2513  parent = stack->bts_blkno;
2514  parentoffset = stack->bts_offset;
2515 
2516  page = BufferGetPage(pbuf);
2517  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2518  maxoff = PageGetMaxOffsetNumber(page);
2519  leftsibparent = opaque->btpo_prev;
2520 
2521  /*
2522  * _bt_getstackbuf() completes page splits on returned parent buffer when
2523  * required.
2524  *
2525  * In general it's a bad idea for VACUUM to use up more disk space, which
2526  * is why page deletion does not finish incomplete page splits most of the
2527  * time. We allow this limited exception because the risk is much lower,
2528  * and the potential downside of not proceeding is much higher: A single
2529  * internal page with the INCOMPLETE_SPLIT flag set might otherwise
2530  * prevent us from deleting hundreds of empty leaf pages from one level
2531  * down.
2532  */
2533  Assert(!P_INCOMPLETE_SPLIT(opaque));
2534 
2535  if (parentoffset < maxoff)
2536  {
2537  /*
2538  * Child is not the rightmost child in parent, so it's safe to delete
2539  * the subtree whose root/topparent is child page
2540  */
2541  *subtreeparent = pbuf;
2542  *poffset = parentoffset;
2543  return true;
2544  }
2545 
2546  /*
2547  * Child is the rightmost child of parent.
2548  *
2549  * Since it's the rightmost child of parent, deleting the child (or
2550  * deleting the subtree whose root/topparent is the child page) is only
2551  * safe when it's also possible to delete the parent.
2552  */
2553  Assert(parentoffset == maxoff);
2554  if (parentoffset != P_FIRSTDATAKEY(opaque) || P_RIGHTMOST(opaque))
2555  {
2556  /*
2557  * Child isn't parent's only child, or parent is rightmost on its
2558  * entire level. Definitely cannot delete any pages.
2559  */
2560  _bt_relbuf(rel, pbuf);
2561  return false;
2562  }
2563 
2564  /*
2565  * Now make sure that the parent deletion is itself safe by examining the
2566  * child's grandparent page. Recurse, passing the parent page as the
2567  * child page (child's grandparent is the parent on the next level up). If
2568  * parent deletion is unsafe, then child deletion must also be unsafe (in
2569  * which case caller cannot delete any pages at all).
2570  */
2571  *topparent = parent;
2572  *topparentrightsib = opaque->btpo_next;
2573 
2574  /*
2575  * Release lock on parent before recursing.
2576  *
2577  * It's OK to release page locks on parent before recursive call locks
2578  * grandparent. An internal page can only acquire an entry if the child
2579  * is split, but that cannot happen as long as we still hold a lock on the
2580  * leafbuf page.
2581  */
2582  _bt_relbuf(rel, pbuf);
2583 
2584  /*
2585  * Before recursing, check that the left sibling of parent (if any) is not
2586  * marked with INCOMPLETE_SPLIT flag first (must do so after we drop the
2587  * parent lock).
2588  *
2589  * Note: We deliberately avoid completing incomplete splits here.
2590  */
2591  if (_bt_leftsib_splitflag(rel, leftsibparent, parent))
2592  return false;
2593 
2594  /* Recurse to examine child page's grandparent page */
2595  return _bt_lock_subtree_parent(rel, parent, stack->bts_parent,
2596  subtreeparent, poffset,
2597  topparent, topparentrightsib);
2598 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define InvalidBuffer
Definition: buf.h:25
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2491
int errcode(int sqlerrcode)
Definition: elog.c:691
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
BlockNumber btpo_prev
Definition: nbtree.h:58
OffsetNumber bts_offset
Definition: nbtree.h:603
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber bts_blkno
Definition: nbtree.h:602
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:155
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
struct BTStackData * bts_parent
Definition: nbtree.h:604
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Buffer _bt_getstackbuf(Relation rel, BTStack stack, BlockNumber child)
Definition: nbtinsert.c:2273
Pointer Page
Definition: bufpage.h:78
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1440

◆ _bt_lockbuf()

void _bt_lockbuf ( Relation  rel,
Buffer  buf,
int  access 
)

Definition at line 975 of file nbtpage.c.

References BufferGetPage, LockBuffer(), RelationUsesLocalBuffers, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by _bt_getbuf(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_readnextpage(), _bt_relandgetbuf(), _bt_search(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), and btvacuumpage().

976 {
977  /* LockBuffer() asserts that pin is held by this backend */
978  LockBuffer(buf, access);
979 
980  /*
981  * It doesn't matter that _bt_unlockbuf() won't get called in the
982  * event of an nbtree error (e.g. a unique violation error). That
983  * won't cause Valgrind false positives.
984  *
985  * The nbtree client requests are superimposed on top of the
986  * bufmgr.c buffer pin client requests. In the event of an nbtree
987  * error the buffer will certainly get marked as defined when the
988  * backend once again acquires its first pin on the buffer. (Of
989  * course, if the backend never touches the buffer again then it
990  * doesn't matter that it remains non-accessible to Valgrind.)
991  *
992  * Note: When an IndexTuple C pointer gets computed using an
993  * ItemId read from a page while a lock was held, the C pointer
994  * becomes unsafe to dereference forever as soon as the lock is
995  * released. Valgrind can only detect cases where the pointer
996  * gets dereferenced with no _current_ lock/pin held, though.
997  */
998  if (!RelationUsesLocalBuffers(rel))
1000 }
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573

◆ _bt_log_reuse_page()

static void _bt_log_reuse_page ( Relation  rel,
BlockNumber  blkno,
TransactionId  latestRemovedXid 
)
static

Definition at line 760 of file nbtpage.c.

References xl_btree_reuse_page::block, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, RelationData::rd_node, SizeOfBtreeReusePage, XLOG_BTREE_REUSE_PAGE, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by _bt_getbuf().

761 {
762  xl_btree_reuse_page xlrec_reuse;
763 
764  /*
765  * Note that we don't register the buffer with the record, because this
766  * operation doesn't modify the page. This record only exists to provide a
767  * conflict point for Hot Standby.
768  */
769 
770  /* XLOG stuff */
771  xlrec_reuse.node = rel->rd_node;
772  xlrec_reuse.block = blkno;
773  xlrec_reuse.latestRemovedXid = latestRemovedXid;
774 
775  XLogBeginInsert();
776  XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
777 
778  XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
779 }
RelFileNode node
Definition: nbtxlog.h:206
#define SizeOfBtreeReusePage
Definition: nbtxlog.h:211
BlockNumber block
Definition: nbtxlog.h:207
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
RelFileNode rd_node
Definition: rel.h:55
TransactionId latestRemovedXid
Definition: nbtxlog.h:208
void XLogBeginInsert(void)
Definition: xloginsert.c:123

◆ _bt_mark_page_halfdead()

static bool _bt_mark_page_halfdead ( Relation  rel,
Buffer  leafbuf,
BTStack  stack 
)
static

Definition at line 1820 of file nbtpage.c.

References _bt_lock_subtree_parent(), _bt_relbuf(), _bt_rightsib_halfdeadflag(), Assert, BTP_HALF_DEAD, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetDownLink(), BTreeTupleSetDownLink(), BTreeTupleSetTopParent(), BufferGetBlockNumber(), BufferGetPage, DEBUG1, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, IndexTupleSize, InvalidBlockNumber, xl_btree_mark_page_halfdead::leafblk, xl_btree_mark_page_halfdead::leftblk, MarkBufferDirty(), MemSet, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexTupleDelete(), PageIndexTupleOverwrite(), PageSetLSN, xl_btree_mark_page_halfdead::poffset, PredicateLockPageCombine(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_mark_page_halfdead::rightblk, SizeOfBtreeMarkPageHalfDead, START_CRIT_SECTION, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

1821 {
1822  BlockNumber leafblkno;
1823  BlockNumber leafrightsib;
1824  BlockNumber topparent;
1825  BlockNumber topparentrightsib;
1826  ItemId itemid;
1827  Page page;
1828  BTPageOpaque opaque;
1829  Buffer subtreeparent;
1830  OffsetNumber poffset;
1831  OffsetNumber nextoffset;
1832  IndexTuple itup;
1833  IndexTupleData trunctuple;
1834 
1835  page = BufferGetPage(leafbuf);
1836  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1837 
1838  Assert(!P_RIGHTMOST(opaque) && !P_ISROOT(opaque) &&
1839  P_ISLEAF(opaque) && !P_IGNORE(opaque) &&
1840  P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
1841 
1842  /*
1843  * Save info about the leaf page.
1844  */
1845  leafblkno = BufferGetBlockNumber(leafbuf);
1846  leafrightsib = opaque->btpo_next;
1847 
1848  /*
1849  * Before attempting to lock the parent page, check that the right sibling
1850  * is not in half-dead state. A half-dead right sibling would have no
1851  * downlink in the parent, which would be highly confusing later when we
1852  * delete the downlink. It would fail the "right sibling of target page
1853  * is also the next child in parent page" cross-check below.
1854  */
1855  if (_bt_rightsib_halfdeadflag(rel, leafrightsib))
1856  {
1857  elog(DEBUG1, "could not delete page %u because its right sibling %u is half-dead",
1858  leafblkno, leafrightsib);
1859  return false;
1860  }
1861 
1862  /*
1863  * We cannot delete a page that is the rightmost child of its immediate
1864  * parent, unless it is the only child --- in which case the parent has to
1865  * be deleted too, and the same condition applies recursively to it. We
1866  * have to check this condition all the way up before trying to delete,
1867  * and lock the parent of the root of the to-be-deleted subtree (the
1868  * "subtree parent"). _bt_lock_subtree_parent() locks the subtree parent
1869  * for us. We remove the downlink to the "top parent" page (subtree root
1870  * page) from the subtree parent page below.
1871  *
1872  * Initialize topparent to be leafbuf page now. The final to-be-deleted
1873  * subtree is often a degenerate one page subtree consisting only of the
1874  * leafbuf page. When that happens, the leafbuf page is the final subtree
1875  * root page/top parent page.
1876  */
1877  topparent = leafblkno;
1878  topparentrightsib = leafrightsib;
1879  if (!_bt_lock_subtree_parent(rel, leafblkno, stack,
1880  &subtreeparent, &poffset,
1881  &topparent, &topparentrightsib))
1882  return false;
1883 
1884  /*
1885  * Check that the parent-page index items we're about to delete/overwrite
1886  * in subtree parent page contain what we expect. This can fail if the
1887  * index has become corrupt for some reason. We want to throw any error
1888  * before entering the critical section --- otherwise it'd be a PANIC.
1889  */
1890  page = BufferGetPage(subtreeparent);
1891  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1892 
1893 #ifdef USE_ASSERT_CHECKING
1894 
1895  /*
1896  * This is just an assertion because _bt_lock_subtree_parent should have
1897  * guaranteed tuple has the expected contents
1898  */
1899  itemid = PageGetItemId(page, poffset);
1900  itup = (IndexTuple) PageGetItem(page, itemid);
1901  Assert(BTreeTupleGetDownLink(itup) == topparent);
1902 #endif
1903 
1904  nextoffset = OffsetNumberNext(poffset);
1905  itemid = PageGetItemId(page, nextoffset);
1906  itup = (IndexTuple) PageGetItem(page, itemid);
1907  if (BTreeTupleGetDownLink(itup) != topparentrightsib)
1908  ereport(ERROR,
1909  (errcode(ERRCODE_INDEX_CORRUPTED),
1910  errmsg_internal("right sibling %u of block %u is not next child %u of block %u in index \"%s\"",
1911  topparentrightsib, topparent,
1912  BTreeTupleGetDownLink(itup),
1913  BufferGetBlockNumber(subtreeparent),
1914  RelationGetRelationName(rel))));
1915 
1916  /*
1917  * Any insert which would have gone on the leaf block will now go to its
1918  * right sibling. In other words, the key space moves right.
1919  */
1920  PredicateLockPageCombine(rel, leafblkno, leafrightsib);
1921 
1922  /* No ereport(ERROR) until changes are logged */
1924 
1925  /*
1926  * Update parent of subtree. We want to delete the downlink to the top
1927  * parent page/root of the subtree, and the *following* key. Easiest way
1928  * is to copy the right sibling's downlink over the downlink that points
1929  * to top parent page, and then delete the right sibling's original pivot
1930  * tuple.
1931  *
1932  * Lanin and Shasha make the key space move left when deleting a page,
1933  * whereas the key space moves right here. That's why we cannot simply
1934  * delete the pivot tuple with the downlink to the top parent page. See
1935  * nbtree/README.
1936  */
1937  page = BufferGetPage(subtreeparent);
1938  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1939 
1940  itemid = PageGetItemId(page, poffset);
1941  itup = (IndexTuple) PageGetItem(page, itemid);
1942  BTreeTupleSetDownLink(itup, topparentrightsib);
1943 
1944  nextoffset = OffsetNumberNext(poffset);
1945  PageIndexTupleDelete(page, nextoffset);
1946 
1947  /*
1948  * Mark the leaf page as half-dead, and stamp it with a link to the top
1949  * parent page. When the leaf page is also the top parent page, the link
1950  * is set to InvalidBlockNumber.
1951  */
1952  page = BufferGetPage(leafbuf);
1953  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1954  opaque->btpo_flags |= BTP_HALF_DEAD;
1955 
1957  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
1958  trunctuple.t_info = sizeof(IndexTupleData);
1959  if (topparent != leafblkno)
1960  BTreeTupleSetTopParent(&trunctuple, topparent);
1961  else
1963 
1964  if (!PageIndexTupleOverwrite(page, P_HIKEY, (Item) &trunctuple,
1965  IndexTupleSize(&trunctuple)))
1966  elog(ERROR, "could not overwrite high key in half-dead page");
1967 
1968  /* Must mark buffers dirty before XLogInsert */
1969  MarkBufferDirty(subtreeparent);
1970  MarkBufferDirty(leafbuf);
1971 
1972  /* XLOG stuff */
1973  if (RelationNeedsWAL(rel))
1974  {
1976  XLogRecPtr recptr;
1977 
1978  xlrec.poffset = poffset;
1979  xlrec.leafblk = leafblkno;
1980  if (topparent != leafblkno)
1981  xlrec.topparent = topparent;
1982  else
1983  xlrec.topparent = InvalidBlockNumber;
1984 
1985  XLogBeginInsert();
1986  XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
1987  XLogRegisterBuffer(1, subtreeparent, REGBUF_STANDARD);
1988 
1989  page = BufferGetPage(leafbuf);
1990  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1991  xlrec.leftblk = opaque->btpo_prev;
1992  xlrec.rightblk = opaque->btpo_next;
1993 
1995 
1996  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
1997 
1998  page = BufferGetPage(subtreeparent);
1999  PageSetLSN(page, recptr);
2000  page = BufferGetPage(leafbuf);
2001  PageSetLSN(page, recptr);
2002  }
2003 
2004  END_CRIT_SECTION();
2005 
2006  _bt_relbuf(rel, subtreeparent);
2007  return true;
2008 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define DEBUG1
Definition: elog.h:25
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:935
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define SizeOfBtreeMarkPageHalfDead
Definition: nbtxlog.h:273
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
static bool _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
Definition: nbtpage.c:1497
Pointer Item
Definition: item.h:17
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2491
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:691
#define MemSet(start, val, len)
Definition: c.h:1004
uint32 BlockNumber
Definition: block.h:31
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1288
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:491
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:430
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:37
struct IndexTupleData IndexTupleData
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:155
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:563
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
#define elog(elevel,...)
Definition: elog.h:228
void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3181
unsigned short t_info
Definition: itup.h:49
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_metaversion()

void _bt_metaversion ( Relation  rel,
bool heapkeyspace,
bool allequalimage 
)

Definition at line 668 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_first(), _bt_mkscankey(), and bt_index_check_internal().

669 {
670  BTMetaPageData *metad;
671 
672  if (rel->rd_amcache == NULL)
673  {
674  Buffer metabuf;
675 
676  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
677  metad = _bt_getmeta(rel, metabuf);
678 
679  /*
680  * If there's no root page yet, _bt_getroot() doesn't expect a cache
681  * to be made, so just stop here. (XXX perhaps _bt_getroot() should
682  * be changed to allow this case.)
683  */
684  if (metad->btm_root == P_NONE)
685  {
686  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
687  *allequalimage = metad->btm_allequalimage;
688 
689  _bt_relbuf(rel, metabuf);
690  return;
691  }
692 
693  /*
694  * Cache the metapage data for next time
695  *
696  * An on-the-fly version upgrade performed by _bt_upgrademetapage()
697  * can change the nbtree version for an index without invalidating any
698  * local cache. This is okay because it can only happen when moving
699  * from version 2 to version 3, both of which are !heapkeyspace
700  * versions.
701  */
703  sizeof(BTMetaPageData));
704  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
705  _bt_relbuf(rel, metabuf);
706  }
707 
708  /* Get cached page */
709  metad = (BTMetaPageData *) rel->rd_amcache;
710  /* We shouldn't have cached it if any of these fail */
711  Assert(metad->btm_magic == BTREE_MAGIC);
713  Assert(metad->btm_version <= BTREE_VERSION);
714  Assert(!metad->btm_allequalimage ||
716  Assert(metad->btm_fastroot != P_NONE);
717 
718  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
719  *allequalimage = metad->btm_allequalimage;
720 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:136
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:800
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
MemoryContext rd_indexcxt
Definition: rel.h:187
void * rd_amcache
Definition: rel.h:212
int Buffer
Definition: buf.h:23

◆ _bt_page_recyclable()

bool _bt_page_recyclable ( Page  page)

Definition at line 1081 of file nbtpage.c.

References BTPageOpaqueData::btpo, GlobalVisCheckRemovableXid(), P_ISDELETED, PageGetSpecialPointer, PageIsNew, and BTPageOpaqueData::xact.

Referenced by _bt_getbuf(), and btvacuumpage().

1082 {
1083  BTPageOpaque opaque;
1084 
1085  /*
1086  * It's possible to find an all-zeroes page in an index --- for example, a
1087  * backend might successfully extend the relation one page and then crash
1088  * before it is able to make a WAL entry for adding the page. If we find a
1089  * zeroed page then reclaim it.
1090  */
1091  if (PageIsNew(page))
1092  return true;
1093 
1094  /*
1095  * Otherwise, recycle if deleted and too old to have any processes
1096  * interested in it.
1097  */
1098  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1099  if (P_ISDELETED(opaque) &&
1100  GlobalVisCheckRemovableXid(NULL, opaque->btpo.xact))
1101  return true;
1102  return false;
1103 }
union BTPageOpaqueData::@45 btpo
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
Definition: procarray.c:4156
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define PageIsNew(page)
Definition: bufpage.h:229

◆ _bt_pagedel()

uint32 _bt_pagedel ( Relation  rel,
Buffer  leafbuf,
TransactionId oldestBtpoXact 
)

Definition at line 1548 of file nbtpage.c.

References _bt_getbuf(), _bt_leftsib_splitflag(), _bt_lockbuf(), _bt_mark_page_halfdead(), _bt_mkscankey(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), _bt_unlockbuf(), Assert, BT_READ, BT_WRITE, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetBlockNumber(), BufferGetPage, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), errmsg_internal(), LOG, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, BTScanInsertData::pivotsearch, RelationGetRelationName, ReleaseBuffer(), TransactionIdFollowsOrEquals(), and BTPageOpaqueData::xact.

Referenced by btvacuumpage().

1549 {
1550  uint32 ndeleted = 0;
1551  BlockNumber rightsib;
1552  bool rightsib_empty;
1553  Page page;
1554  BTPageOpaque opaque;
1555 
1556  /*
1557  * Save original leafbuf block number from caller. Only deleted blocks
1558  * that are <= scanblkno get counted in ndeleted return value.
1559  */
1560  BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1561 
1562  /*
1563  * "stack" is a search stack leading (approximately) to the target page.
1564  * It is initially NULL, but when iterating, we keep it to avoid
1565  * duplicated search effort.
1566  *
1567  * Also, when "stack" is not NULL, we have already checked that the
1568  * current page is not the right half of an incomplete split, i.e. the
1569  * left sibling does not have its INCOMPLETE_SPLIT flag set, including
1570  * when the current target page is to the right of caller's initial page
1571  * (the scanblkno page).
1572  */
1573  BTStack stack = NULL;
1574 
1575  for (;;)
1576  {
1577  page = BufferGetPage(leafbuf);
1578  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1579 
1580  /*
1581  * Internal pages are never deleted directly, only as part of deleting
1582  * the whole subtree all the way down to leaf level.
1583  *
1584  * Also check for deleted pages here. Caller never passes us a fully
1585  * deleted page. Only VACUUM can delete pages, so there can't have
1586  * been a concurrent deletion. Assume that we reached any deleted
1587  * page encountered here by following a sibling link, and that the
1588  * index is corrupt.
1589  */
1590  Assert(!P_ISDELETED(opaque));
1591  if (!P_ISLEAF(opaque) || P_ISDELETED(opaque))
1592  {
1593  /*
1594  * Pre-9.4 page deletion only marked internal pages as half-dead,
1595  * but now we only use that flag on leaf pages. The old algorithm
1596  * was never supposed to leave half-dead pages in the tree, it was
1597  * just a transient state, but it was nevertheless possible in
1598  * error scenarios. We don't know how to deal with them here. They
1599  * are harmless as far as searches are considered, but inserts
1600  * into the deleted keyspace could add out-of-order downlinks in
1601  * the upper levels. Log a notice, hopefully the admin will notice
1602  * and reindex.
1603  */
1604  if (P_ISHALFDEAD(opaque))
1605  ereport(LOG,
1606  (errcode(ERRCODE_INDEX_CORRUPTED),
1607  errmsg("index \"%s\" contains a half-dead internal page",
1609  errhint("This can be caused by an interrupted VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
1610 
1611  if (P_ISDELETED(opaque))
1612  ereport(LOG,
1613  (errcode(ERRCODE_INDEX_CORRUPTED),
1614  errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
1615  BufferGetBlockNumber(leafbuf),
1616  scanblkno,
1617  RelationGetRelationName(rel))));
1618 
1619  _bt_relbuf(rel, leafbuf);
1620  return ndeleted;
1621  }
1622 
1623  /*
1624  * We can never delete rightmost pages nor root pages. While at it,
1625  * check that page is empty, since it's possible that the leafbuf page
1626  * was empty a moment ago, but has since had some inserts.
1627  *
1628  * To keep the algorithm simple, we also never delete an incompletely
1629  * split page (they should be rare enough that this doesn't make any
1630  * meaningful difference to disk usage):
1631  *
1632  * The INCOMPLETE_SPLIT flag on the page tells us if the page is the
1633  * left half of an incomplete split, but ensuring that it's not the
1634  * right half is more complicated. For that, we have to check that
1635  * the left sibling doesn't have its INCOMPLETE_SPLIT flag set using
1636  * _bt_leftsib_splitflag(). On the first iteration, we temporarily
1637  * release the lock on scanblkno/leafbuf, check the left sibling, and
1638  * construct a search stack to scanblkno. On subsequent iterations,
1639  * we know we stepped right from a page that passed these tests, so
1640  * it's OK.
1641  */
1642  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) ||
1643  P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
1644  P_INCOMPLETE_SPLIT(opaque))
1645  {
1646  /* Should never fail to delete a half-dead page */
1647  Assert(!P_ISHALFDEAD(opaque));
1648 
1649  _bt_relbuf(rel, leafbuf);
1650  return ndeleted;
1651  }
1652 
1653  /*
1654  * First, remove downlink pointing to the page (or a parent of the
1655  * page, if we are going to delete a taller subtree), and mark the
1656  * leafbuf page half-dead
1657  */
1658  if (!P_ISHALFDEAD(opaque))
1659  {
1660  /*
1661  * We need an approximate pointer to the page's parent page. We
1662  * use a variant of the standard search mechanism to search for
1663  * the page's high key; this will give us a link to either the
1664  * current parent or someplace to its left (if there are multiple
1665  * equal high keys, which is possible with !heapkeyspace indexes).
1666  *
1667  * Also check if this is the right-half of an incomplete split
1668  * (see comment above).
1669  */
1670  if (!stack)
1671  {
1672  BTScanInsert itup_key;
1673  ItemId itemid;
1674  IndexTuple targetkey;
1675  BlockNumber leftsib,
1676  leafblkno;
1677  Buffer sleafbuf;
1678 
1679  itemid = PageGetItemId(page, P_HIKEY);
1680  targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
1681 
1682  leftsib = opaque->btpo_prev;
1683  leafblkno = BufferGetBlockNumber(leafbuf);
1684 
1685  /*
1686  * To avoid deadlocks, we'd better drop the leaf page lock
1687  * before going further.
1688  */
1689  _bt_unlockbuf(rel, leafbuf);
1690 
1691  /*
1692  * Check that the left sibling of leafbuf (if any) is not
1693  * marked with INCOMPLETE_SPLIT flag before proceeding
1694  */
1695  Assert(leafblkno == scanblkno);
1696  if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
1697  {
1698  ReleaseBuffer(leafbuf);
1699  Assert(ndeleted == 0);
1700  return ndeleted;
1701  }
1702 
1703  /* we need an insertion scan key for the search, so build one */
1704  itup_key = _bt_mkscankey(rel, targetkey);
1705  /* find the leftmost leaf page with matching pivot/high key */
1706  itup_key->pivotsearch = true;
1707  stack = _bt_search(rel, itup_key, &sleafbuf, BT_READ, NULL);
1708  /* won't need a second lock or pin on leafbuf */
1709  _bt_relbuf(rel, sleafbuf);
1710 
1711  /*
1712  * Re-lock the leaf page, and start over to use our stack
1713  * within _bt_mark_page_halfdead. We must do it that way
1714  * because it's possible that leafbuf can no longer be
1715  * deleted. We need to recheck.
1716  *
1717  * Note: We can't simply hold on to the sleafbuf lock instead,
1718  * because it's barely possible that sleafbuf is not the same
1719  * page as leafbuf. This happens when leafbuf split after our
1720  * original lock was dropped, but before _bt_search finished
1721  * its descent. We rely on the assumption that we'll find
1722  * leafbuf isn't safe to delete anymore in this scenario.
1723  * (Page deletion can cope with the stack being to the left of
1724  * leafbuf, but not to the right of leafbuf.)
1725  */
1726  _bt_lockbuf(rel, leafbuf, BT_WRITE);
1727  continue;
1728  }
1729 
1730  /*
1731  * See if it's safe to delete the leaf page, and determine how
1732  * many parent/internal pages above the leaf level will be
1733  * deleted. If it's safe then _bt_mark_page_halfdead will also
1734  * perform the first phase of deletion, which includes marking the
1735  * leafbuf page half-dead.
1736  */
1737  Assert(P_ISLEAF(opaque) && !P_IGNORE(opaque));
1738  if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
1739  {
1740  _bt_relbuf(rel, leafbuf);
1741  return ndeleted;
1742  }
1743  }
1744 
1745  /*
1746  * Then unlink it from its siblings. Each call to
1747  * _bt_unlink_halfdead_page unlinks the topmost page from the subtree,
1748  * making it shallower. Iterate until the leafbuf page is deleted.
1749  *
1750  * _bt_unlink_halfdead_page should never fail, since we established
1751  * that deletion is generally safe in _bt_mark_page_halfdead.
1752  */
1753  rightsib_empty = false;
1754  Assert(P_ISLEAF(opaque) && P_ISHALFDEAD(opaque));
1755  while (P_ISHALFDEAD(opaque))
1756  {
1757  /* Check for interrupts in _bt_unlink_halfdead_page */
1758  if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1759  &rightsib_empty, oldestBtpoXact,
1760  &ndeleted))
1761  {
1762  /* _bt_unlink_halfdead_page failed, released buffer */
1763  return ndeleted;
1764  }
1765  }
1766 
1767  Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
1769  *oldestBtpoXact));
1770 
1771  rightsib = opaque->btpo_next;
1772 
1773  _bt_relbuf(rel, leafbuf);
1774 
1775  /*
1776  * Check here, as calling loops will have locks held, preventing
1777  * interrupts from being processed.
1778  */
1780 
1781  /*
1782  * The page has now been deleted. If its right sibling is completely
1783  * empty, it's possible that the reason we haven't deleted it earlier
1784  * is that it was the rightmost child of the parent. Now that we
1785  * removed the downlink for this page, the right sibling might now be
1786  * the only child of the parent, and could be removed. It would be
1787  * picked up by the next vacuum anyway, but might as well try to
1788  * remove it now, so loop back to process the right sibling.
1789  *
1790  * Note: This relies on the assumption that _bt_getstackbuf() will be
1791  * able to reuse our original descent stack with a different child
1792  * block (provided that the child block is to the right of the
1793  * original leaf page reached by _bt_search()). It will even update
1794  * the descent stack each time we loop around, avoiding repeated work.
1795  */
1796  if (!rightsib_empty)
1797  break;
1798 
1799  leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
1800  }
1801 
1802  return ndeleted;
1803 }
BlockNumber btpo_next
Definition: nbtree.h:59
int errhint(const char *fmt,...)
Definition: elog.c:1149
#define P_IGNORE(opaque)
Definition: nbtree.h:219
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
union BTPageOpaqueData::@45 btpo
int errcode(int sqlerrcode)
Definition: elog.c:691
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
Definition: nbtpage.c:1820
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
#define RelationGetRelationName(relation)
Definition: rel.h:491
unsigned int uint32
Definition: c.h:429
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool pivotsearch
Definition: nbtree.h:663
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:155
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
int errmsg(const char *fmt,...)
Definition: elog.c:902
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
BTStack _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:101
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
Definition: nbtpage.c:2042
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1440
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_pageinit()

void _bt_pageinit ( Page  page,
Size  size 
)

Definition at line 1066 of file nbtpage.c.

References PageInit().

Referenced by _bt_blnewpage(), _bt_getbuf(), _bt_initmetapage(), _bt_restore_meta(), _bt_split(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), and btree_xlog_unlink_page().

1067 {
1068  PageInit(page, size, sizeof(BTPageOpaqueData));
1069 }
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

◆ _bt_relandgetbuf()

Buffer _bt_relandgetbuf ( Relation  rel,
Buffer  obuf,
BlockNumber  blkno,
int  access 
)

Definition at line 939 of file nbtpage.c.

References _bt_checkpage(), _bt_lockbuf(), _bt_unlockbuf(), Assert, buf, BufferIsValid, P_NEW, and ReleaseAndReadBuffer().

Referenced by _bt_check_unique(), _bt_get_endpoint(), _bt_getroot(), _bt_gettrueroot(), _bt_moveright(), _bt_search(), _bt_stepright(), and _bt_walk_left().

940 {
941  Buffer buf;
942 
943  Assert(blkno != P_NEW);
944  if (BufferIsValid(obuf))
945  _bt_unlockbuf(rel, obuf);
946  buf = ReleaseAndReadBuffer(obuf, rel, blkno);
947  _bt_lockbuf(rel, buf, access);
948 
949  _bt_checkpage(rel, buf);
950  return buf;
951 }
#define P_NEW
Definition: bufmgr.h:91
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:726
static char * buf
Definition: pg_test_fsync.c:68
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
#define Assert(condition)
Definition: c.h:800
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1534
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
int Buffer
Definition: buf.h:23

◆ _bt_relbuf()

◆ _bt_rightsib_halfdeadflag()

static bool _bt_rightsib_halfdeadflag ( Relation  rel,
BlockNumber  leafrightsib 
)
static

Definition at line 1497 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), Assert, BT_READ, buf, BufferGetPage, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_mark_page_halfdead().

1498 {
1499  Buffer buf;
1500  Page page;
1501  BTPageOpaque opaque;
1502  bool result;
1503 
1504  Assert(leafrightsib != P_NONE);
1505 
1506  buf = _bt_getbuf(rel, leafrightsib, BT_READ);
1507  page = BufferGetPage(buf);
1508  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1509 
1510  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque));
1511  result = P_ISHALFDEAD(opaque);
1512  _bt_relbuf(rel, buf);
1513 
1514  return result;
1515 }
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
#define P_NONE
Definition: nbtree.h:206
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_unlink_halfdead_page()

static bool _bt_unlink_halfdead_page ( Relation  rel,
Buffer  leafbuf,
BlockNumber  scanblkno,
bool rightsib_empty,
TransactionId oldestBtpoXact,
uint32 ndeleted 
)
static

Definition at line 2042 of file nbtpage.c.

References _bt_getbuf(), _bt_lockbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BlockNumberIsValid, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_DELETED, BTP_HALF_DEAD, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_unlink_page::btpo_xact, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BTreeTupleGetDownLink(), BTreeTupleGetTopParent(), BTreeTupleSetTopParent(), buf, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, CHECK_FOR_INTERRUPTS, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, header(), InvalidBlockNumber, InvalidBuffer, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leftsib, xl_btree_metadata::level, BTPageOpaqueData::level, LOG, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, P_FIRSTDATAKEY, P_HIKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_NONE, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageSetLSN, PageHeaderData::pd_lower, PageHeaderData::pd_special, PageHeaderData::pd_upper, ReadNewTransactionId(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, ReleaseBuffer(), xl_btree_unlink_page::rightsib, xl_btree_metadata::root, SizeOfBtreeUnlinkPage, SizeOfPageHeaderData, START_CRIT_SECTION, xl_btree_unlink_page::topparent, TransactionIdIsValid, TransactionIdPrecedes(), xl_btree_metadata::version, BTPageOpaqueData::xact, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

2045 {
2046  BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
2047  BlockNumber leafleftsib;
2048  BlockNumber leafrightsib;
2049  BlockNumber target;
2050  BlockNumber leftsib;
2051  BlockNumber rightsib;
2052  Buffer lbuf = InvalidBuffer;
2053  Buffer buf;
2054  Buffer rbuf;
2055  Buffer metabuf = InvalidBuffer;
2056  Page metapg = NULL;
2057  BTMetaPageData *metad = NULL;
2058  ItemId itemid;
2059  Page page;
2061  BTPageOpaque opaque;
2062  bool rightsib_is_rightmost;
2063  int targetlevel;
2064  IndexTuple leafhikey;
2065  BlockNumber nextchild;
2066 
2067  page = BufferGetPage(leafbuf);
2068  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2069 
2070  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque) && P_ISHALFDEAD(opaque));
2071 
2072  /*
2073  * Remember some information about the leaf page.
2074  */
2075  itemid = PageGetItemId(page, P_HIKEY);
2076  leafhikey = (IndexTuple) PageGetItem(page, itemid);
2077  target = BTreeTupleGetTopParent(leafhikey);
2078  leafleftsib = opaque->btpo_prev;
2079  leafrightsib = opaque->btpo_next;
2080 
2081  _bt_unlockbuf(rel, leafbuf);
2082 
2083  /*
2084  * Check here, as calling loops will have locks held, preventing
2085  * interrupts from being processed.
2086  */
2088 
2089  /* Unlink the current top parent of the subtree */
2090  if (!BlockNumberIsValid(target))
2091  {
2092  /* Target is leaf page (or leaf page is top parent, if you prefer) */
2093  target = leafblkno;
2094 
2095  buf = leafbuf;
2096  leftsib = leafleftsib;
2097  targetlevel = 0;
2098  }
2099  else
2100  {
2101  /* Target is the internal page taken from leaf's top parent link */
2102  Assert(target != leafblkno);
2103 
2104  /* Fetch the block number of the target's left sibling */
2105  buf = _bt_getbuf(rel, target, BT_READ);
2106  page = BufferGetPage(buf);
2107  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2108  leftsib = opaque->btpo_prev;
2109  targetlevel = opaque->btpo.level;
2110  Assert(targetlevel > 0);
2111 
2112  /*
2113  * To avoid deadlocks, we'd better drop the target page lock before
2114  * going further.
2115  */
2116  _bt_unlockbuf(rel, buf);
2117  }
2118 
2119  /*
2120  * We have to lock the pages we need to modify in the standard order:
2121  * moving right, then up. Else we will deadlock against other writers.
2122  *
2123  * So, first lock the leaf page, if it's not the target. Then find and
2124  * write-lock the current left sibling of the target page. The sibling
2125  * that was current a moment ago could have split, so we may have to move
2126  * right. This search could fail if either the sibling or the target page
2127  * was deleted by someone else meanwhile; if so, give up. (Right now,
2128  * that should never happen, since page deletion is only done in VACUUM
2129  * and there shouldn't be multiple VACUUMs concurrently on the same
2130  * table.)
2131  */
2132  if (target != leafblkno)
2133  _bt_lockbuf(rel, leafbuf, BT_WRITE);
2134  if (leftsib != P_NONE)
2135  {
2136  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2137  page = BufferGetPage(lbuf);
2138  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2139  while (P_ISDELETED(opaque) || opaque->btpo_next != target)
2140  {
2141  /* step right one page */
2142  leftsib = opaque->btpo_next;
2143  _bt_relbuf(rel, lbuf);
2144 
2145  /*
2146  * It'd be good to check for interrupts here, but it's not easy to
2147  * do so because a lock is always held. This block isn't
2148  * frequently reached, so hopefully the consequences of not
2149  * checking interrupts aren't too bad.
2150  */
2151 
2152  if (leftsib == P_NONE)
2153  {
2154  elog(LOG, "no left sibling (concurrent deletion?) of block %u in \"%s\"",
2155  target,
2157  if (target != leafblkno)
2158  {
2159  /* we have only a pin on target, but pin+lock on leafbuf */
2160  ReleaseBuffer(buf);
2161  _bt_relbuf(rel, leafbuf);
2162  }
2163  else
2164  {
2165  /* we have only a pin on leafbuf */
2166  ReleaseBuffer(leafbuf);
2167  }
2168  return false;
2169  }
2170  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2171  page = BufferGetPage(lbuf);
2172  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2173  }
2174  }
2175  else
2176  lbuf = InvalidBuffer;
2177 
2178  /*
2179  * Next write-lock the target page itself. It's okay to take a write lock
2180  * rather than a superexclusive lock, since no scan will stop on an empty
2181  * page.
2182  */
2183  _bt_lockbuf(rel, buf, BT_WRITE);
2184  page = BufferGetPage(buf);
2185  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2186 
2187  /*
2188  * Check page is still empty etc, else abandon deletion. This is just for
2189  * paranoia's sake; a half-dead page cannot resurrect because there can be
2190  * only one vacuum process running at a time.
2191  */
2192  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
2193  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2194  target, RelationGetRelationName(rel));
2195 
2196  if (opaque->btpo_prev != leftsib)
2197  ereport(ERROR,
2198  (errcode(ERRCODE_INDEX_CORRUPTED),
2199  errmsg_internal("left link changed unexpectedly in block %u of index \"%s\"",
2200  target, RelationGetRelationName(rel))));
2201 
2202  if (target == leafblkno)
2203  {
2204  if (P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
2205  !P_ISLEAF(opaque) || !P_ISHALFDEAD(opaque))
2206  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2207  target, RelationGetRelationName(rel));
2208  nextchild = InvalidBlockNumber;
2209  }
2210  else
2211  {
2212  if (P_FIRSTDATAKEY(opaque) != PageGetMaxOffsetNumber(page) ||
2213  P_ISLEAF(opaque))
2214  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2215  target, RelationGetRelationName(rel));
2216 
2217  /* Remember the next non-leaf child down in the subtree */
2218  itemid = PageGetItemId(page, P_FIRSTDATAKEY(opaque));
2219  nextchild = BTreeTupleGetDownLink((IndexTuple) PageGetItem(page, itemid));
2220  if (nextchild == leafblkno)
2221  nextchild = InvalidBlockNumber;
2222  }
2223 
2224  /*
2225  * And next write-lock the (current) right sibling.
2226  */
2227  rightsib = opaque->btpo_next;
2228  rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
2229  page = BufferGetPage(rbuf);
2230  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2231  if (opaque->btpo_prev != target)
2232  ereport(ERROR,
2233  (errcode(ERRCODE_INDEX_CORRUPTED),
2234  errmsg_internal("right sibling's left-link doesn't match: "
2235  "block %u links to %u instead of expected %u in index \"%s\"",
2236  rightsib, opaque->btpo_prev, target,
2237  RelationGetRelationName(rel))));
2238  rightsib_is_rightmost = P_RIGHTMOST(opaque);
2239  *rightsib_empty = (P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
2240 
2241  /*
2242  * If we are deleting the next-to-last page on the target's level, then
2243  * the rightsib is a candidate to become the new fast root. (In theory, it
2244  * might be possible to push the fast root even further down, but the odds
2245  * of doing so are slim, and the locking considerations daunting.)
2246  *
2247  * We can safely acquire a lock on the metapage here --- see comments for
2248  * _bt_newroot().
2249  */
2250  if (leftsib == P_NONE && rightsib_is_rightmost)
2251  {
2252  page = BufferGetPage(rbuf);
2253  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2254  if (P_RIGHTMOST(opaque))
2255  {
2256  /* rightsib will be the only one left on the level */
2257  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
2258  metapg = BufferGetPage(metabuf);
2259  metad = BTPageGetMeta(metapg);
2260 
2261  /*
2262  * The expected case here is btm_fastlevel == targetlevel+1; if
2263  * the fastlevel is <= targetlevel, something is wrong, and we
2264  * choose to overwrite it to fix it.
2265  */
2266  if (metad->btm_fastlevel > targetlevel + 1)
2267  {
2268  /* no update wanted */
2269  _bt_relbuf(rel, metabuf);
2270  metabuf = InvalidBuffer;
2271  }
2272  }
2273  }
2274 
2275  /*
2276  * Here we begin doing the deletion.
2277  */
2278 
2279  /* No ereport(ERROR) until changes are logged */
2281 
2282  /*
2283  * Update siblings' side-links. Note the target page's side-links will
2284  * continue to point to the siblings. Asserts here are just rechecking
2285  * things we already verified above.
2286  */
2287  if (BufferIsValid(lbuf))
2288  {
2289  page = BufferGetPage(lbuf);
2290  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2291  Assert(opaque->btpo_next == target);
2292  opaque->btpo_next = rightsib;
2293  }
2294  page = BufferGetPage(rbuf);
2295  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2296  Assert(opaque->btpo_prev == target);
2297  opaque->btpo_prev = leftsib;
2298 
2299  /*
2300  * If we deleted a parent of the targeted leaf page, instead of the leaf
2301  * itself, update the leaf to point to the next remaining child in the
2302  * subtree.
2303  *
2304  * Note: We rely on the fact that a buffer pin on the leaf page has been
2305  * held since leafhikey was initialized. This is safe, though only
2306  * because the page was already half-dead at that point. The leaf page
2307  * cannot have been modified by any other backend during the period when
2308  * no lock was held.
2309  */
2310  if (target != leafblkno)
2311  BTreeTupleSetTopParent(leafhikey, nextchild);
2312 
2313  /*
2314  * Mark the page itself deleted. It can be recycled when all current
2315  * transactions are gone. Storing GetTopTransactionId() would work, but
2316  * we're in VACUUM and would not otherwise have an XID. Having already
2317  * updated links to the target, ReadNewTransactionId() suffices as an
2318  * upper bound. Any scan having retained a now-stale link is advertising
2319  * in its PGPROC an xmin less than or equal to the value we read here. It
2320  * will continue to do so, holding back the xmin horizon, for the duration
2321  * of that scan.
2322  */
2323  page = BufferGetPage(buf);
2324  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2325  Assert(P_ISHALFDEAD(opaque) || !P_ISLEAF(opaque));
2326  opaque->btpo_flags &= ~BTP_HALF_DEAD;
2327  opaque->btpo_flags |= BTP_DELETED;
2328  opaque->btpo.xact = ReadNewTransactionId();
2329 
2330  /*
2331  * Remove the remaining tuples on the page. This keeps things simple for
2332  * WAL consistency checking.
2333  */
2334  header = (PageHeader) page;
2335  header->pd_lower = SizeOfPageHeaderData;
2336  header->pd_upper = header->pd_special;
2337 
2338  /* And update the metapage, if needed */
2339  if (BufferIsValid(metabuf))
2340  {
2341  /* upgrade metapage if needed */
2342  if (metad->btm_version < BTREE_NOVAC_VERSION)
2343  _bt_upgrademetapage(metapg);
2344  metad->btm_fastroot = rightsib;
2345  metad->btm_fastlevel = targetlevel;
2346  MarkBufferDirty(metabuf);
2347  }
2348 
2349  /* Must mark buffers dirty before XLogInsert */
2350  MarkBufferDirty(rbuf);
2351  MarkBufferDirty(buf);
2352  if (BufferIsValid(lbuf))
2353  MarkBufferDirty(lbuf);
2354  if (target != leafblkno)
2355  MarkBufferDirty(leafbuf);
2356 
2357  /* XLOG stuff */
2358  if (RelationNeedsWAL(rel))
2359  {
2360  xl_btree_unlink_page xlrec;
2361  xl_btree_metadata xlmeta;
2362  uint8 xlinfo;
2363  XLogRecPtr recptr;
2364 
2365  XLogBeginInsert();
2366 
2368  if (BufferIsValid(lbuf))
2371  if (target != leafblkno)
2372  XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
2373 
2374  /* information on the unlinked block */
2375  xlrec.leftsib = leftsib;
2376  xlrec.rightsib = rightsib;
2377  xlrec.btpo_xact = opaque->btpo.xact;
2378 
2379  /* information needed to recreate the leaf block (if not the target) */
2380  xlrec.leafleftsib = leafleftsib;
2381  xlrec.leafrightsib = leafrightsib;
2382  xlrec.topparent = nextchild;
2383 
2384  XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
2385 
2386  if (BufferIsValid(metabuf))
2387  {
2389 
2391  xlmeta.version = metad->btm_version;
2392  xlmeta.root = metad->btm_root;
2393  xlmeta.level = metad->btm_level;
2394  xlmeta.fastroot = metad->btm_fastroot;
2395  xlmeta.fastlevel = metad->btm_fastlevel;
2396  xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
2398  xlmeta.allequalimage = metad->btm_allequalimage;
2399 
2400  XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
2401  xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
2402  }
2403  else
2404  xlinfo = XLOG_BTREE_UNLINK_PAGE;
2405 
2406  recptr = XLogInsert(RM_BTREE_ID, xlinfo);
2407 
2408  if (BufferIsValid(metabuf))
2409  {
2410  PageSetLSN(metapg, recptr);
2411  }
2412  page = BufferGetPage(rbuf);
2413  PageSetLSN(page, recptr);
2414  page = BufferGetPage(buf);
2415  PageSetLSN(page, recptr);
2416  if (BufferIsValid(lbuf))
2417  {
2418  page = BufferGetPage(lbuf);
2419  PageSetLSN(page, recptr);
2420  }
2421  if (target != leafblkno)
2422  {
2423  page = BufferGetPage(leafbuf);
2424  PageSetLSN(page, recptr);
2425  }
2426  }
2427 
2428  END_CRIT_SECTION();
2429 
2430  /* release metapage */
2431  if (BufferIsValid(metabuf))
2432  _bt_relbuf(rel, metabuf);
2433 
2434  /* release siblings */
2435  if (BufferIsValid(lbuf))
2436  _bt_relbuf(rel, lbuf);
2437  _bt_relbuf(rel, rbuf);
2438 
2439  if (!TransactionIdIsValid(*oldestBtpoXact) ||
2440  TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
2441  *oldestBtpoXact = opaque->btpo.xact;
2442 
2443  /*
2444  * If btvacuumscan won't revisit this page in a future btvacuumpage call
2445  * and count it as deleted then, we count it as deleted by current
2446  * btvacuumpage call
2447  */
2448  if (target <= scanblkno)
2449  (*ndeleted)++;
2450 
2451  /* If the target is not leafbuf, we're done with it now -- release it */
2452  if (target != leafblkno)
2453  _bt_relbuf(rel, buf);
2454 
2455  return true;
2456 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
BlockNumber btpo_next
Definition: nbtree.h:59
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
unsigned char uint8
Definition: c.h:427
#define P_NONE
Definition: nbtree.h:206
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:691
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define BTP_DELETED
Definition: nbtree.h:74
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
static char * buf
Definition: pg_test_fsync.c:68
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:491
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:34
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define P_ISROOT(opaque)
Definition: nbtree.h:215
uint32 version
Definition: nbtxlog.h:50
LocationIndex pd_special
Definition: bufpage.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
#define ereport(elevel,...)
Definition: elog.h:155
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define RelationNeedsWAL(relation)
Definition: rel.h:563
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:210
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:308
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:228
uint32 level
Definition: nbtxlog.h:52
#define SizeOfBtreeUnlinkPage
Definition: nbtxlog.h:303
static BlockNumber BTreeTupleGetTopParent(IndexTuple leafhikey)
Definition: nbtree.h:488
BlockNumber fastroot
Definition: nbtxlog.h:53
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:35
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
LocationIndex pd_upper
Definition: bufpage.h:159
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
LocationIndex pd_lower
Definition: bufpage.h:158
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_unlockbuf()

void _bt_unlockbuf ( Relation  rel,
Buffer  buf 
)

Definition at line 1006 of file nbtpage.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, LockBuffer(), RelationUsesLocalBuffers, VALGRIND_CHECK_MEM_IS_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_first(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_relandgetbuf(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), and _bt_update_meta_cleanup_info().

1007 {
1008  /*
1009  * Buffer is pinned and locked, which means that it is expected to be
1010  * defined and addressable. Check that proactively.
1011  */
1013 
1014  /* LockBuffer() asserts that pin is held by this backend */
1016 
1017  if (!RelationUsesLocalBuffers(rel))
1019 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573

◆ _bt_update_meta_cleanup_info()

void _bt_update_meta_cleanup_info ( Relation  rel,
TransactionId  oldestBtpoXact,
float8  numHeapTuples 
)

Definition at line 174 of file nbtpage.c.

References _bt_getbuf(), _bt_lockbuf(), _bt_relbuf(), _bt_unlockbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetPage, END_CRIT_SECTION, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, xl_btree_metadata::root, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_META_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), and XLogRegisterBuffer().

Referenced by btvacuumscan().

176 {
177  Buffer metabuf;
178  Page metapg;
179  BTMetaPageData *metad;
180  bool needsRewrite = false;
181  XLogRecPtr recptr;
182 
183  /* read the metapage and check if it needs rewrite */
184  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
185  metapg = BufferGetPage(metabuf);
186  metad = BTPageGetMeta(metapg);
187 
188  /* outdated version of metapage always needs rewrite */
189  if (metad->btm_version < BTREE_NOVAC_VERSION)
190  needsRewrite = true;
191  else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
192  metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
193  needsRewrite = true;
194 
195  if (!needsRewrite)
196  {
197  _bt_relbuf(rel, metabuf);
198  return;
199  }
200 
201  /* trade in our read lock for a write lock */
202  _bt_unlockbuf(rel, metabuf);
203  _bt_lockbuf(rel, metabuf, BT_WRITE);
204 
206 
207  /* upgrade meta-page if needed */
208  if (metad->btm_version < BTREE_NOVAC_VERSION)
209  _bt_upgrademetapage(metapg);
210 
211  /* update cleanup-related information */
212  metad->btm_oldest_btpo_xact = oldestBtpoXact;
213  metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
214  MarkBufferDirty(metabuf);
215 
216  /* write wal record if needed */
217  if (RelationNeedsWAL(rel))
218  {
220 
221  XLogBeginInsert();
223 
225  md.version = metad->btm_version;
226  md.root = metad->btm_root;
227  md.level = metad->btm_level;
228  md.fastroot = metad->btm_fastroot;
229  md.fastlevel = metad->btm_fastlevel;
230  md.oldest_btpo_xact = oldestBtpoXact;
231  md.last_cleanup_num_heap_tuples = numHeapTuples;
232  md.allequalimage = metad->btm_allequalimage;
233 
234  XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
235 
236  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_META_CLEANUP);
237 
238  PageSetLSN(metapg, recptr);
239  }
240 
242  _bt_relbuf(rel, metabuf);
243 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
bool allequalimage
Definition: nbtxlog.h:57
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:101
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:803
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
void _bt_lockbuf(Relation rel, Buffer buf, int access)
Definition: nbtpage.c:975
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:959
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:563
void _bt_unlockbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:1006
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:123
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:42
Pointer Page
Definition: bufpage.h:78

◆ _bt_upgradelockbufcleanup()

void _bt_upgradelockbufcleanup ( Relation  rel,
Buffer  buf 
)

Definition at line 1046 of file nbtpage.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, LockBuffer(), LockBufferForCleanup(), and VALGRIND_CHECK_MEM_IS_DEFINED.

Referenced by btvacuumpage().

1047 {
1048  /*
1049  * Buffer is pinned and locked, which means that it is expected to be
1050  * defined and addressable. Check that proactively.
1051  */
1053 
1054  /* LockBuffer() asserts that pin is held by this backend */
1057 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3809
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752

◆ _bt_upgrademetapage()

void _bt_upgrademetapage ( Page  page)

Definition at line 101 of file nbtpage.c.

References Assert, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, InvalidTransactionId, PageGetSpecialPointer, and PG_USED_FOR_ASSERTS_ONLY.

Referenced by _bt_getroot(), _bt_insertonpg(), _bt_newroot(), _bt_unlink_halfdead_page(), and _bt_update_meta_cleanup_info().

102 {
103  BTMetaPageData *metad;
105 
106  metad = BTPageGetMeta(page);
107  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
108 
109  /* It must be really a meta page of upgradable version */
110  Assert(metaopaque->btpo_flags & BTP_META);
113 
114  /* Set version number and fill extra fields added into version 3 */
117  metad->btm_last_cleanup_num_heap_tuples = -1.0;
118  /* Only a REINDEX can set this field */
119  Assert(!metad->btm_allequalimage);
120  metad->btm_allequalimage = false;
121 
122  /* Adjust pd_lower (see _bt_initmetapage() for details) */
123  ((PageHeader) page)->pd_lower =
124  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
125 }
uint32 btm_version
Definition: nbtree.h:101
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:800
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:143

◆ _bt_xid_horizon()

static TransactionId _bt_xid_horizon ( Relation  rel,
Relation  heapRel,
Page  page,
OffsetNumber deletable,
int  ndeletable 
)
static

Definition at line 1358 of file nbtpage.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), i, InvalidTransactionId, ItemIdIsDead, ItemPointerCopy, ItemPointerIsValid, Max, PageGetItem, PageGetItemId, palloc(), pfree(), repalloc(), IndexTupleData::t_tid, and table_compute_xid_horizon_for_tuples().

Referenced by _bt_delitems_delete().

1360 {
1361  TransactionId latestRemovedXid = InvalidTransactionId;
1362  int spacenhtids;
1363  int nhtids;
1364  ItemPointer htids;
1365 
1366  /* Array will grow iff there are posting list tuples to consider */
1367  spacenhtids = ndeletable;
1368  nhtids = 0;
1369  htids = (ItemPointer) palloc(sizeof(ItemPointerData) * spacenhtids);
1370  for (int i = 0; i < ndeletable; i++)
1371  {
1372  ItemId itemid;
1373  IndexTuple itup;
1374 
1375  itemid = PageGetItemId(page, deletable[i]);
1376  itup = (IndexTuple) PageGetItem(page, itemid);
1377 
1378  Assert(ItemIdIsDead(itemid));
1379  Assert(!BTreeTupleIsPivot(itup));
1380 
1381  if (!BTreeTupleIsPosting(itup))
1382  {
1383  if (nhtids + 1 > spacenhtids)
1384  {
1385  spacenhtids *= 2;
1386  htids = (ItemPointer)
1387  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1388  }
1389 
1390  Assert(ItemPointerIsValid(&itup->t_tid));
1391  ItemPointerCopy(&itup->t_tid, &htids[nhtids]);
1392  nhtids++;
1393  }
1394  else
1395  {
1396  int nposting = BTreeTupleGetNPosting(itup);
1397 
1398  if (nhtids + nposting > spacenhtids)
1399  {
1400  spacenhtids = Max(spacenhtids * 2, nhtids + nposting);
1401  htids = (ItemPointer)
1402  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1403  }
1404 
1405  for (int j = 0; j < nposting; j++)
1406  {
1407  ItemPointer htid = BTreeTupleGetPostingN(itup, j);
1408 
1409  Assert(ItemPointerIsValid(htid));
1410  ItemPointerCopy(htid, &htids[nhtids]);
1411  nhtids++;
1412  }
1413  }
1414  }
1415 
1416  Assert(nhtids >= ndeletable);
1417 
1418  latestRemovedXid =
1419  table_compute_xid_horizon_for_tuples(heapRel, htids, nhtids);
1420 
1421  pfree(htids);
1422 
1423  return latestRemovedXid;
1424 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
uint32 TransactionId
Definition: c.h:575
ItemPointerData t_tid
Definition: itup.h:37
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
ItemPointerData * ItemPointer
Definition: itemptr.h:49
static TransactionId table_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *items, int nitems)
Definition: tableam.h:1130
void pfree(void *pointer)
Definition: mcxt.c:1057
IndexTupleData * IndexTuple
Definition: itup.h:53
#define InvalidTransactionId
Definition: transam.h:31
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:412
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
#define Max(x, y)
Definition: c.h:976
#define Assert(condition)
Definition: c.h:800
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
void * palloc(Size size)
Definition: mcxt.c:950
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161