PostgreSQL Source Code  git master
nbtpage.c File Reference
#include "postgres.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/tableam.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "utils/snapmgr.h"
Include dependency graph for nbtpage.c:

Go to the source code of this file.

Functions

static BTMetaPageData_bt_getmeta (Relation rel, Buffer metabuf)
 
static void _bt_log_reuse_page (Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
 
static TransactionId _bt_xid_horizon (Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
 
static bool _bt_mark_page_halfdead (Relation rel, Buffer leafbuf, BTStack stack)
 
static bool _bt_unlink_halfdead_page (Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
 
static bool _bt_lock_subtree_parent (Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
 
void _bt_initmetapage (Page page, BlockNumber rootbknum, uint32 level, bool allequalimage)
 
void _bt_upgrademetapage (Page page)
 
void _bt_update_meta_cleanup_info (Relation rel, TransactionId oldestBtpoXact, float8 numHeapTuples)
 
Buffer _bt_getroot (Relation rel, int access)
 
Buffer _bt_gettrueroot (Relation rel)
 
int _bt_getrootheight (Relation rel)
 
void _bt_metaversion (Relation rel, bool *heapkeyspace, bool *allequalimage)
 
void _bt_checkpage (Relation rel, Buffer buf)
 
Buffer _bt_getbuf (Relation rel, BlockNumber blkno, int access)
 
Buffer _bt_relandgetbuf (Relation rel, Buffer obuf, BlockNumber blkno, int access)
 
void _bt_relbuf (Relation rel, Buffer buf)
 
void _bt_pageinit (Page page, Size size)
 
bool _bt_page_recyclable (Page page)
 
void _bt_delitems_vacuum (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable)
 
void _bt_delitems_delete (Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, Relation heapRel)
 
static bool _bt_leftsib_splitflag (Relation rel, BlockNumber leftsib, BlockNumber target)
 
static bool _bt_rightsib_halfdeadflag (Relation rel, BlockNumber leafrightsib)
 
uint32 _bt_pagedel (Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
 

Function Documentation

◆ _bt_checkpage()

void _bt_checkpage ( Relation  rel,
Buffer  buf 
)

Definition at line 725 of file nbtpage.c.

References BufferGetBlockNumber(), BufferGetPage, ereport, errcode(), errhint(), errmsg(), ERROR, MAXALIGN, PageGetSpecialSize, PageIsNew, and RelationGetRelationName.

Referenced by _bt_getbuf(), _bt_relandgetbuf(), _bt_search_insert(), btvacuumpage(), and palloc_btree_page().

726 {
727  Page page = BufferGetPage(buf);
728 
729  /*
730  * ReadBuffer verifies that every newly-read page passes
731  * PageHeaderIsValid, which means it either contains a reasonably sane
732  * page header or is all-zero. We have to defend against the all-zero
733  * case, however.
734  */
735  if (PageIsNew(page))
736  ereport(ERROR,
737  (errcode(ERRCODE_INDEX_CORRUPTED),
738  errmsg("index \"%s\" contains unexpected zero page at block %u",
741  errhint("Please REINDEX it.")));
742 
743  /*
744  * Additionally check that the special area looks sane.
745  */
746  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BTPageOpaqueData)))
747  ereport(ERROR,
748  (errcode(ERRCODE_INDEX_CORRUPTED),
749  errmsg("index \"%s\" contains corrupted page at block %u",
752  errhint("Please REINDEX it.")));
753 }
int errhint(const char *fmt,...)
Definition: elog.c:1071
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:67
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define ereport(elevel,...)
Definition: elog.h:144
#define MAXALIGN(LEN)
Definition: c.h:691
#define PageGetSpecialSize(page)
Definition: bufpage.h:300
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
#define PageIsNew(page)
Definition: bufpage.h:229
int errmsg(const char *fmt,...)
Definition: elog.c:824
Pointer Page
Definition: bufpage.h:78

◆ _bt_delitems_delete()

void _bt_delitems_delete ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
Relation  heapRel 
)

Definition at line 1183 of file nbtpage.c.

References _bt_xid_horizon(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, END_CRIT_SECTION, InvalidTransactionId, xl_btree_delete::latestRemovedXid, MarkBufferDirty(), xl_btree_delete::ndeleted, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, REGBUF_STANDARD, RelationNeedsWAL, SizeOfBtreeDelete, START_CRIT_SECTION, XLOG_BTREE_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by _bt_dedup_one_page(), and _bt_vacuum_one_page().

1186 {
1187  Page page = BufferGetPage(buf);
1188  BTPageOpaque opaque;
1189  TransactionId latestRemovedXid = InvalidTransactionId;
1190 
1191  /* Shouldn't be called unless there's something to do */
1192  Assert(ndeletable > 0);
1193 
1195  latestRemovedXid =
1196  _bt_xid_horizon(rel, heapRel, page, deletable, ndeletable);
1197 
1198  /* No ereport(ERROR) until changes are logged */
1200 
1201  /* Fix the page */
1202  PageIndexMultiDelete(page, deletable, ndeletable);
1203 
1204  /*
1205  * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
1206  * because this is not called by VACUUM. Just clear the BTP_HAS_GARBAGE
1207  * page flag, since we deleted all items with their LP_DEAD bit set.
1208  */
1209  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1210  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1211 
1213 
1214  /* XLOG stuff */
1215  if (RelationNeedsWAL(rel))
1216  {
1217  XLogRecPtr recptr;
1218  xl_btree_delete xlrec_delete;
1219 
1220  xlrec_delete.latestRemovedXid = latestRemovedXid;
1221  xlrec_delete.ndeleted = ndeletable;
1222 
1223  XLogBeginInsert();
1225  XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
1226 
1227  /*
1228  * The deletable array is not in the buffer, but pretend that it is.
1229  * When XLogInsert stores the whole buffer, the array need not be
1230  * stored too.
1231  */
1232  XLogRegisterBufData(0, (char *) deletable,
1233  ndeletable * sizeof(OffsetNumber));
1234 
1235  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
1236 
1237  PageSetLSN(page, recptr);
1238  }
1239 
1240  END_CRIT_SECTION();
1241 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:362
TransactionId latestRemovedXid
Definition: nbtxlog.h:189
uint32 TransactionId
Definition: c.h:513
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint32 ndeleted
Definition: nbtxlog.h:190
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page, OffsetNumber *deletable, int ndeletable)
Definition: nbtpage.c:1252
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:33
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
#define XLogStandbyInfoActive()
Definition: xlog.h:205
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:828
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define SizeOfBtreeDelete
Definition: nbtxlog.h:195
void XLogBeginInsert(void)
Definition: xloginsert.c:121
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78

◆ _bt_delitems_vacuum()

void _bt_delitems_vacuum ( Relation  rel,
Buffer  buf,
OffsetNumber deletable,
int  ndeletable,
BTVacuumPosting updatable,
int  nupdatable 
)

Definition at line 1017 of file nbtpage.c.

References _bt_update_posting(), Assert, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BufferGetBlockNumber(), BufferGetPage, BTVacuumPostingData::deletetids, elog, END_CRIT_SECTION, i, IndexTupleSize, BTVacuumPostingData::itup, MarkBufferDirty(), MAXALIGN, MaxIndexTuplesPerPage, xl_btree_vacuum::ndeleted, xl_btree_update::ndeletedtids, BTVacuumPostingData::ndeletedtids, xl_btree_vacuum::nupdated, PageGetSpecialPointer, PageIndexMultiDelete(), PageIndexTupleOverwrite(), PageSetLSN, palloc(), PANIC, pfree(), REGBUF_STANDARD, RelationGetRelationName, RelationNeedsWAL, SizeOfBtreeUpdate, SizeOfBtreeVacuum, START_CRIT_SECTION, BTVacuumPostingData::updatedoffset, XLOG_BTREE_VACUUM, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by btvacuumpage().

1020 {
1021  Page page = BufferGetPage(buf);
1022  BTPageOpaque opaque;
1023  Size itemsz;
1024  char *updatedbuf = NULL;
1025  Size updatedbuflen = 0;
1026  OffsetNumber updatedoffsets[MaxIndexTuplesPerPage];
1027 
1028  /* Shouldn't be called unless there's something to do */
1029  Assert(ndeletable > 0 || nupdatable > 0);
1030 
1031  for (int i = 0; i < nupdatable; i++)
1032  {
1033  /* Replace work area IndexTuple with updated version */
1034  _bt_update_posting(updatable[i]);
1035 
1036  /* Maintain array of updatable page offsets for WAL record */
1037  updatedoffsets[i] = updatable[i]->updatedoffset;
1038  }
1039 
1040  /* XLOG stuff -- allocate and fill buffer before critical section */
1041  if (nupdatable > 0 && RelationNeedsWAL(rel))
1042  {
1043  Size offset = 0;
1044 
1045  for (int i = 0; i < nupdatable; i++)
1046  {
1047  BTVacuumPosting vacposting = updatable[i];
1048 
1049  itemsz = SizeOfBtreeUpdate +
1050  vacposting->ndeletedtids * sizeof(uint16);
1051  updatedbuflen += itemsz;
1052  }
1053 
1054  updatedbuf = palloc(updatedbuflen);
1055  for (int i = 0; i < nupdatable; i++)
1056  {
1057  BTVacuumPosting vacposting = updatable[i];
1058  xl_btree_update update;
1059 
1060  update.ndeletedtids = vacposting->ndeletedtids;
1061  memcpy(updatedbuf + offset, &update.ndeletedtids,
1063  offset += SizeOfBtreeUpdate;
1064 
1065  itemsz = update.ndeletedtids * sizeof(uint16);
1066  memcpy(updatedbuf + offset, vacposting->deletetids, itemsz);
1067  offset += itemsz;
1068  }
1069  }
1070 
1071  /* No ereport(ERROR) until changes are logged */
1073 
1074  /*
1075  * Handle posting tuple updates.
1076  *
1077  * Deliberately do this before handling simple deletes. If we did it the
1078  * other way around (i.e. WAL record order -- simple deletes before
1079  * updates) then we'd have to make compensating changes to the 'updatable'
1080  * array of offset numbers.
1081  *
1082  * PageIndexTupleOverwrite() won't unset each item's LP_DEAD bit when it
1083  * happens to already be set. Although we unset the BTP_HAS_GARBAGE page
1084  * level flag, unsetting individual LP_DEAD bits should still be avoided.
1085  */
1086  for (int i = 0; i < nupdatable; i++)
1087  {
1088  OffsetNumber updatedoffset = updatedoffsets[i];
1089  IndexTuple itup;
1090 
1091  itup = updatable[i]->itup;
1092  itemsz = MAXALIGN(IndexTupleSize(itup));
1093  if (!PageIndexTupleOverwrite(page, updatedoffset, (Item) itup,
1094  itemsz))
1095  elog(PANIC, "failed to update partially dead item in block %u of index \"%s\"",
1097  }
1098 
1099  /* Now handle simple deletes of entire tuples */
1100  if (ndeletable > 0)
1101  PageIndexMultiDelete(page, deletable, ndeletable);
1102 
1103  /*
1104  * We can clear the vacuum cycle ID since this page has certainly been
1105  * processed by the current vacuum scan.
1106  */
1107  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1108  opaque->btpo_cycleid = 0;
1109 
1110  /*
1111  * Mark the page as not containing any LP_DEAD items. This is not
1112  * certainly true (there might be some that have recently been marked, but
1113  * weren't targeted by VACUUM's heap scan), but it will be true often
1114  * enough. VACUUM does not delete items purely because they have their
1115  * LP_DEAD bit set, since doing so would necessitate explicitly logging a
1116  * latestRemovedXid cutoff (this is how _bt_delitems_delete works).
1117  *
1118  * The consequences of falsely unsetting BTP_HAS_GARBAGE should be fairly
1119  * limited, since we never falsely unset an LP_DEAD bit. Workloads that
1120  * are particularly dependent on LP_DEAD bits being set quickly will
1121  * usually manage to set the BTP_HAS_GARBAGE flag before the page fills up
1122  * again anyway. Furthermore, attempting a deduplication pass will remove
1123  * all LP_DEAD items, regardless of whether the BTP_HAS_GARBAGE hint bit
1124  * is set or not.
1125  */
1126  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
1127 
1129 
1130  /* XLOG stuff */
1131  if (RelationNeedsWAL(rel))
1132  {
1133  XLogRecPtr recptr;
1134  xl_btree_vacuum xlrec_vacuum;
1135 
1136  xlrec_vacuum.ndeleted = ndeletable;
1137  xlrec_vacuum.nupdated = nupdatable;
1138 
1139  XLogBeginInsert();
1141  XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
1142 
1143  if (ndeletable > 0)
1144  XLogRegisterBufData(0, (char *) deletable,
1145  ndeletable * sizeof(OffsetNumber));
1146 
1147  if (nupdatable > 0)
1148  {
1149  XLogRegisterBufData(0, (char *) updatedoffsets,
1150  nupdatable * sizeof(OffsetNumber));
1151  XLogRegisterBufData(0, updatedbuf, updatedbuflen);
1152  }
1153 
1154  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
1155 
1156  PageSetLSN(page, recptr);
1157  }
1158 
1159  END_CRIT_SECTION();
1160 
1161  /* can't leak memory here */
1162  if (updatedbuf != NULL)
1163  pfree(updatedbuf);
1164  /* free tuples generated by calling _bt_update_posting() */
1165  for (int i = 0; i < nupdatable; i++)
1166  pfree(updatable[i]->itup);
1167 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:362
uint16 ndeletedtids
Definition: nbtree.h:782
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:676
#define SizeOfBtreeUpdate
Definition: nbtxlog.h:225
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
OffsetNumber updatedoffset
Definition: nbtree.h:779
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
IndexTuple itup
Definition: nbtree.h:778
Pointer Item
Definition: item.h:17
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint16 nupdated
Definition: nbtxlog.h:243
#define PANIC
Definition: elog.h:53
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
#define SizeOfBtreeVacuum
Definition: nbtxlog.h:250
BTCycleId btpo_cycleid
Definition: nbtree.h:66
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1060
static char * buf
Definition: pg_test_fsync.c:67
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:38
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint16 ndeleted
Definition: nbtxlog.h:242
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:783
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:828
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define MAXALIGN(LEN)
Definition: c.h:691
#define RelationNeedsWAL(relation)
Definition: rel.h:562
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:214
int i
void XLogBeginInsert(void)
Definition: xloginsert.c:121
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
uint16 ndeletedtids
Definition: nbtxlog.h:220
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ _bt_getbuf()

Buffer _bt_getbuf ( Relation  rel,
BlockNumber  blkno,
int  access 
)

Definition at line 792 of file nbtpage.c.

References _bt_checkpage(), _bt_log_reuse_page(), _bt_page_recyclable(), _bt_pageinit(), _bt_relbuf(), Assert, BT_WRITE, BTPageOpaqueData::btpo, buf, BufferGetPage, BufferGetPageSize, ConditionalLockBuffer(), DEBUG2, elog, ExclusiveLock, GetFreeIndexPage(), InvalidBlockNumber, LockBuffer(), LockRelationForExtension(), P_NEW, PageGetSpecialPointer, PageIsNew, ReadBuffer(), RELATION_IS_LOCAL, RelationNeedsWAL, ReleaseBuffer(), UnlockRelationForExtension(), BTPageOpaqueData::xact, and XLogStandbyInfoActive.

Referenced by _bt_finish_split(), _bt_getroot(), _bt_getrootheight(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insertonpg(), _bt_killitems(), _bt_leftsib_splitflag(), _bt_metaversion(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_rightsib_halfdeadflag(), _bt_split(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), _bt_vacuum_needs_cleanup(), and _bt_walk_left().

793 {
794  Buffer buf;
795 
796  if (blkno != P_NEW)
797  {
798  /* Read an existing block of the relation */
799  buf = ReadBuffer(rel, blkno);
800  LockBuffer(buf, access);
801  _bt_checkpage(rel, buf);
802  }
803  else
804  {
805  bool needLock;
806  Page page;
807 
808  Assert(access == BT_WRITE);
809 
810  /*
811  * First see if the FSM knows of any free pages.
812  *
813  * We can't trust the FSM's report unreservedly; we have to check that
814  * the page is still free. (For example, an already-free page could
815  * have been re-used between the time the last VACUUM scanned it and
816  * the time the VACUUM made its FSM updates.)
817  *
818  * In fact, it's worse than that: we can't even assume that it's safe
819  * to take a lock on the reported page. If somebody else has a lock
820  * on it, or even worse our own caller does, we could deadlock. (The
821  * own-caller scenario is actually not improbable. Consider an index
822  * on a serial or timestamp column. Nearly all splits will be at the
823  * rightmost page, so it's entirely likely that _bt_split will call us
824  * while holding a lock on the page most recently acquired from FSM. A
825  * VACUUM running concurrently with the previous split could well have
826  * placed that page back in FSM.)
827  *
828  * To get around that, we ask for only a conditional lock on the
829  * reported page. If we fail, then someone else is using the page,
830  * and we may reasonably assume it's not free. (If we happen to be
831  * wrong, the worst consequence is the page will be lost to use till
832  * the next VACUUM, which is no big problem.)
833  */
834  for (;;)
835  {
836  blkno = GetFreeIndexPage(rel);
837  if (blkno == InvalidBlockNumber)
838  break;
839  buf = ReadBuffer(rel, blkno);
840  if (ConditionalLockBuffer(buf))
841  {
842  page = BufferGetPage(buf);
843  if (_bt_page_recyclable(page))
844  {
845  /*
846  * If we are generating WAL for Hot Standby then create a
847  * WAL record that will allow us to conflict with queries
848  * running on standby, in case they have snapshots older
849  * than btpo.xact. This can only apply if the page does
850  * have a valid btpo.xact value, ie not if it's new. (We
851  * must check that because an all-zero page has no special
852  * space.)
853  */
854  if (XLogStandbyInfoActive() && RelationNeedsWAL(rel) &&
855  !PageIsNew(page))
856  {
858 
859  _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
860  }
861 
862  /* Okay to use page. Re-initialize and return it */
863  _bt_pageinit(page, BufferGetPageSize(buf));
864  return buf;
865  }
866  elog(DEBUG2, "FSM returned nonrecyclable page");
867  _bt_relbuf(rel, buf);
868  }
869  else
870  {
871  elog(DEBUG2, "FSM returned nonlockable page");
872  /* couldn't get lock, so just drop pin */
873  ReleaseBuffer(buf);
874  }
875  }
876 
877  /*
878  * Extend the relation by one page.
879  *
880  * We have to use a lock to ensure no one else is extending the rel at
881  * the same time, else we will both try to initialize the same new
882  * page. We can skip locking for new or temp relations, however,
883  * since no one else could be accessing them.
884  */
885  needLock = !RELATION_IS_LOCAL(rel);
886 
887  if (needLock)
889 
890  buf = ReadBuffer(rel, P_NEW);
891 
892  /* Acquire buffer lock on new page */
893  LockBuffer(buf, BT_WRITE);
894 
895  /*
896  * Release the file-extension lock; it's now OK for someone else to
897  * extend the relation some more. Note that we cannot release this
898  * lock before we have buffer lock on the new page, or we risk a race
899  * condition against btvacuumscan --- see comments therein.
900  */
901  if (needLock)
903 
904  /* Initialize the new page before returning it */
905  page = BufferGetPage(buf);
906  Assert(PageIsNew(page));
907  _bt_pageinit(page, BufferGetPageSize(buf));
908  }
909 
910  /* ref count and lock type are correct */
911  return buf;
912 }
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
union BTPageOpaqueData::@45 btpo
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3483
#define P_NEW
Definition: bufmgr.h:91
bool _bt_page_recyclable(Page page)
Definition: nbtpage.c:974
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:725
#define DEBUG2
Definition: elog.h:24
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3748
static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
Definition: nbtpage.c:759
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
#define XLogStandbyInfoActive()
Definition: xlog.h:205
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
#define Assert(condition)
Definition: c.h:738
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:606
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define PageIsNew(page)
Definition: bufpage.h:229
#define elog(elevel,...)
Definition: elog.h:214
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:959
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_getmeta()

static BTMetaPageData * _bt_getmeta ( Relation  rel,
Buffer  metabuf 
)
static

Definition at line 135 of file nbtpage.c.

References BTMetaPageData::btm_magic, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_MAGIC, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, ereport, errcode(), errmsg(), ERROR, P_ISMETA, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_getroot(), _bt_getrootheight(), and _bt_metaversion().

136 {
137  Page metapg;
138  BTPageOpaque metaopaque;
139  BTMetaPageData *metad;
140 
141  metapg = BufferGetPage(metabuf);
142  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
143  metad = BTPageGetMeta(metapg);
144 
145  /* sanity-check the metapage */
146  if (!P_ISMETA(metaopaque) ||
147  metad->btm_magic != BTREE_MAGIC)
148  ereport(ERROR,
149  (errcode(ERRCODE_INDEX_CORRUPTED),
150  errmsg("index \"%s\" is not a btree",
151  RelationGetRelationName(rel))));
152 
153  if (metad->btm_version < BTREE_MIN_VERSION ||
154  metad->btm_version > BTREE_VERSION)
155  ereport(ERROR,
156  (errcode(ERRCODE_INDEX_CORRUPTED),
157  errmsg("version mismatch in index \"%s\": file version %d, "
158  "current version %d, minimal supported version %d",
161 
162  return metad;
163 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
int errcode(int sqlerrcode)
Definition: elog.c:610
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
#define ereport(elevel,...)
Definition: elog.h:144
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:824
Pointer Page
Definition: bufpage.h:78

◆ _bt_getroot()

Buffer _bt_getroot ( Relation  rel,
int  access 
)

Definition at line 271 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_getroot(), _bt_relandgetbuf(), _bt_relbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_LEAF, BTP_ROOT, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, elog, END_CRIT_SECTION, ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, InvalidBuffer, InvalidTransactionId, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, BTPageOpaqueData::level, xl_btree_newroot::level, LockBuffer(), MarkBufferDirty(), MemoryContextAlloc(), xl_btree_metadata::oldest_btpo_xact, P_IGNORE, P_LEFTMOST, P_NEW, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, PageSetLSN, pfree(), RelationData::rd_amcache, RelationData::rd_indexcxt, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_metadata::root, xl_btree_newroot::rootblk, SizeOfBtreeNewroot, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_NEWROOT, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_get_endpoint(), _bt_getroot(), and _bt_search().

272 {
273  Buffer metabuf;
274  Buffer rootbuf;
275  Page rootpage;
276  BTPageOpaque rootopaque;
277  BlockNumber rootblkno;
278  uint32 rootlevel;
279  BTMetaPageData *metad;
280 
281  /*
282  * Try to use previously-cached metapage data to find the root. This
283  * normally saves one buffer access per index search, which is a very
284  * helpful savings in bufmgr traffic and hence contention.
285  */
286  if (rel->rd_amcache != NULL)
287  {
288  metad = (BTMetaPageData *) rel->rd_amcache;
289  /* We shouldn't have cached it if any of these fail */
290  Assert(metad->btm_magic == BTREE_MAGIC);
292  Assert(metad->btm_version <= BTREE_VERSION);
293  Assert(!metad->btm_allequalimage ||
295  Assert(metad->btm_root != P_NONE);
296 
297  rootblkno = metad->btm_fastroot;
298  Assert(rootblkno != P_NONE);
299  rootlevel = metad->btm_fastlevel;
300 
301  rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
302  rootpage = BufferGetPage(rootbuf);
303  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
304 
305  /*
306  * Since the cache might be stale, we check the page more carefully
307  * here than normal. We *must* check that it's not deleted. If it's
308  * not alone on its level, then we reject too --- this may be overly
309  * paranoid but better safe than sorry. Note we don't check P_ISROOT,
310  * because that's not set in a "fast root".
311  */
312  if (!P_IGNORE(rootopaque) &&
313  rootopaque->btpo.level == rootlevel &&
314  P_LEFTMOST(rootopaque) &&
315  P_RIGHTMOST(rootopaque))
316  {
317  /* OK, accept cached page as the root */
318  return rootbuf;
319  }
320  _bt_relbuf(rel, rootbuf);
321  /* Cache is stale, throw it away */
322  if (rel->rd_amcache)
323  pfree(rel->rd_amcache);
324  rel->rd_amcache = NULL;
325  }
326 
327  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
328  metad = _bt_getmeta(rel, metabuf);
329 
330  /* if no root page initialized yet, do it */
331  if (metad->btm_root == P_NONE)
332  {
333  Page metapg;
334 
335  /* If access = BT_READ, caller doesn't want us to create root yet */
336  if (access == BT_READ)
337  {
338  _bt_relbuf(rel, metabuf);
339  return InvalidBuffer;
340  }
341 
342  /* trade in our read lock for a write lock */
343  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
344  LockBuffer(metabuf, BT_WRITE);
345 
346  /*
347  * Race condition: if someone else initialized the metadata between
348  * the time we released the read lock and acquired the write lock, we
349  * must avoid doing it again.
350  */
351  if (metad->btm_root != P_NONE)
352  {
353  /*
354  * Metadata initialized by someone else. In order to guarantee no
355  * deadlocks, we have to release the metadata page and start all
356  * over again. (Is that really true? But it's hardly worth trying
357  * to optimize this case.)
358  */
359  _bt_relbuf(rel, metabuf);
360  return _bt_getroot(rel, access);
361  }
362 
363  /*
364  * Get, initialize, write, and leave a lock of the appropriate type on
365  * the new root page. Since this is the first page in the tree, it's
366  * a leaf as well as the root.
367  */
368  rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
369  rootblkno = BufferGetBlockNumber(rootbuf);
370  rootpage = BufferGetPage(rootbuf);
371  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
372  rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
373  rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
374  rootopaque->btpo.level = 0;
375  rootopaque->btpo_cycleid = 0;
376  /* Get raw page pointer for metapage */
377  metapg = BufferGetPage(metabuf);
378 
379  /* NO ELOG(ERROR) till meta is updated */
381 
382  /* upgrade metapage if needed */
383  if (metad->btm_version < BTREE_NOVAC_VERSION)
384  _bt_upgrademetapage(metapg);
385 
386  metad->btm_root = rootblkno;
387  metad->btm_level = 0;
388  metad->btm_fastroot = rootblkno;
389  metad->btm_fastlevel = 0;
391  metad->btm_last_cleanup_num_heap_tuples = -1.0;
392 
393  MarkBufferDirty(rootbuf);
394  MarkBufferDirty(metabuf);
395 
396  /* XLOG stuff */
397  if (RelationNeedsWAL(rel))
398  {
399  xl_btree_newroot xlrec;
400  XLogRecPtr recptr;
402 
403  XLogBeginInsert();
406 
408  md.version = metad->btm_version;
409  md.root = rootblkno;
410  md.level = 0;
411  md.fastroot = rootblkno;
412  md.fastlevel = 0;
415  md.allequalimage = metad->btm_allequalimage;
416 
417  XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
418 
419  xlrec.rootblk = rootblkno;
420  xlrec.level = 0;
421 
422  XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
423 
424  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
425 
426  PageSetLSN(rootpage, recptr);
427  PageSetLSN(metapg, recptr);
428  }
429 
431 
432  /*
433  * swap root write lock for read lock. There is no danger of anyone
434  * else accessing the new root page while it's unlocked, since no one
435  * else knows where it is yet.
436  */
437  LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
438  LockBuffer(rootbuf, BT_READ);
439 
440  /* okay, metadata is correct, release lock on it without caching */
441  _bt_relbuf(rel, metabuf);
442  }
443  else
444  {
445  rootblkno = metad->btm_fastroot;
446  Assert(rootblkno != P_NONE);
447  rootlevel = metad->btm_fastlevel;
448 
449  /*
450  * Cache the metapage data for next time
451  */
453  sizeof(BTMetaPageData));
454  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
455 
456  /*
457  * We are done with the metapage; arrange to release it via first
458  * _bt_relandgetbuf call
459  */
460  rootbuf = metabuf;
461 
462  for (;;)
463  {
464  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
465  rootpage = BufferGetPage(rootbuf);
466  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
467 
468  if (!P_IGNORE(rootopaque))
469  break;
470 
471  /* it's dead, Jim. step right one page */
472  if (P_RIGHTMOST(rootopaque))
473  elog(ERROR, "no live root page found in index \"%s\"",
475  rootblkno = rootopaque->btpo_next;
476  }
477 
478  /* Note: can't check btpo.level on deleted pages */
479  if (rootopaque->btpo.level != rootlevel)
480  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
481  rootblkno, RelationGetRelationName(rel),
482  rootopaque->btpo.level, rootlevel);
483  }
484 
485  /*
486  * By here, we have a pin and read lock on the root page, and no lock set
487  * on the metadata page. Return the root page's buffer.
488  */
489  return rootbuf;
490 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:362
bool allequalimage
Definition: nbtxlog.h:57
BlockNumber rootblk
Definition: nbtxlog.h:318
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BTP_ROOT
Definition: nbtree.h:73
BlockNumber btpo_next
Definition: nbtree.h:59
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:135
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:100
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:928
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_LEAF
Definition: nbtree.h:72
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
uint32 level
Definition: nbtxlog.h:319
uint32 BlockNumber
Definition: block.h:31
#define P_NEW
Definition: bufmgr.h:91
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:36
void pfree(void *pointer)
Definition: mcxt.c:1056
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
BTCycleId btpo_cycleid
Definition: nbtree.h:66
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define P_LEFTMOST(opaque)
Definition: nbtree.h:212
unsigned int uint32
Definition: c.h:367
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
Buffer _bt_getroot(Relation rel, int access)
Definition: nbtpage.c:271
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define SizeOfBtreeNewroot
Definition: nbtxlog.h:322
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:562
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
MemoryContext rd_indexcxt
Definition: rel.h:186
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
void * rd_amcache
Definition: rel.h:211
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:121
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_getrootheight()

int _bt_getrootheight ( Relation  rel)

Definition at line 603 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_insertonpg(), and get_relation_info().

604 {
605  BTMetaPageData *metad;
606 
607  if (rel->rd_amcache == NULL)
608  {
609  Buffer metabuf;
610 
611  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
612  metad = _bt_getmeta(rel, metabuf);
613 
614  /*
615  * If there's no root page yet, _bt_getroot() doesn't expect a cache
616  * to be made, so just stop here and report the index height is zero.
617  * (XXX perhaps _bt_getroot() should be changed to allow this case.)
618  */
619  if (metad->btm_root == P_NONE)
620  {
621  _bt_relbuf(rel, metabuf);
622  return 0;
623  }
624 
625  /*
626  * Cache the metapage data for next time
627  */
629  sizeof(BTMetaPageData));
630  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
631  _bt_relbuf(rel, metabuf);
632  }
633 
634  /* Get cached page */
635  metad = (BTMetaPageData *) rel->rd_amcache;
636  /* We shouldn't have cached it if any of these fail */
637  Assert(metad->btm_magic == BTREE_MAGIC);
639  Assert(metad->btm_version <= BTREE_VERSION);
640  Assert(!metad->btm_allequalimage ||
642  Assert(metad->btm_fastroot != P_NONE);
643 
644  return metad->btm_fastlevel;
645 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:135
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define Assert(condition)
Definition: c.h:738
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
MemoryContext rd_indexcxt
Definition: rel.h:186
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23

◆ _bt_gettrueroot()

Buffer _bt_gettrueroot ( Relation  rel)

Definition at line 507 of file nbtpage.c.

References _bt_getbuf(), _bt_relandgetbuf(), _bt_relbuf(), BT_READ, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_VERSION, BufferGetPage, elog, ereport, errcode(), errmsg(), ERROR, InvalidBuffer, BTPageOpaqueData::level, P_IGNORE, P_ISMETA, P_NONE, P_RIGHTMOST, PageGetSpecialPointer, pfree(), RelationData::rd_amcache, and RelationGetRelationName.

Referenced by _bt_get_endpoint().

508 {
509  Buffer metabuf;
510  Page metapg;
511  BTPageOpaque metaopaque;
512  Buffer rootbuf;
513  Page rootpage;
514  BTPageOpaque rootopaque;
515  BlockNumber rootblkno;
516  uint32 rootlevel;
517  BTMetaPageData *metad;
518 
519  /*
520  * We don't try to use cached metapage data here, since (a) this path is
521  * not performance-critical, and (b) if we are here it suggests our cache
522  * is out-of-date anyway. In light of point (b), it's probably safest to
523  * actively flush any cached metapage info.
524  */
525  if (rel->rd_amcache)
526  pfree(rel->rd_amcache);
527  rel->rd_amcache = NULL;
528 
529  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
530  metapg = BufferGetPage(metabuf);
531  metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
532  metad = BTPageGetMeta(metapg);
533 
534  if (!P_ISMETA(metaopaque) ||
535  metad->btm_magic != BTREE_MAGIC)
536  ereport(ERROR,
537  (errcode(ERRCODE_INDEX_CORRUPTED),
538  errmsg("index \"%s\" is not a btree",
539  RelationGetRelationName(rel))));
540 
541  if (metad->btm_version < BTREE_MIN_VERSION ||
542  metad->btm_version > BTREE_VERSION)
543  ereport(ERROR,
544  (errcode(ERRCODE_INDEX_CORRUPTED),
545  errmsg("version mismatch in index \"%s\": file version %d, "
546  "current version %d, minimal supported version %d",
549 
550  /* if no root page initialized yet, fail */
551  if (metad->btm_root == P_NONE)
552  {
553  _bt_relbuf(rel, metabuf);
554  return InvalidBuffer;
555  }
556 
557  rootblkno = metad->btm_root;
558  rootlevel = metad->btm_level;
559 
560  /*
561  * We are done with the metapage; arrange to release it via first
562  * _bt_relandgetbuf call
563  */
564  rootbuf = metabuf;
565 
566  for (;;)
567  {
568  rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);
569  rootpage = BufferGetPage(rootbuf);
570  rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
571 
572  if (!P_IGNORE(rootopaque))
573  break;
574 
575  /* it's dead, Jim. step right one page */
576  if (P_RIGHTMOST(rootopaque))
577  elog(ERROR, "no live root page found in index \"%s\"",
579  rootblkno = rootopaque->btpo_next;
580  }
581 
582  /* Note: can't check btpo.level on deleted pages */
583  if (rootopaque->btpo.level != rootlevel)
584  elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
585  rootblkno, RelationGetRelationName(rel),
586  rootopaque->btpo.level, rootlevel);
587 
588  return rootbuf;
589 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_IGNORE(opaque)
Definition: nbtree.h:219
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
Definition: nbtpage.c:928
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
union BTPageOpaqueData::@45 btpo
#define P_NONE
Definition: nbtree.h:206
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define P_ISMETA(opaque)
Definition: nbtree.h:217
#define BT_READ
Definition: nbtree.h:587
void pfree(void *pointer)
Definition: mcxt.c:1056
#define BTREE_MAGIC
Definition: nbtree.h:142
#define ERROR
Definition: elog.h:43
#define BTPageGetMeta(p)
Definition: nbtree.h:114
#define RelationGetRelationName(relation)
Definition: rel.h:490
unsigned int uint32
Definition: c.h:367
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 level
Definition: nbtree.h:62
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:824
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Pointer Page
Definition: bufpage.h:78

◆ _bt_initmetapage()

void _bt_initmetapage ( Page  page,
BlockNumber  rootbknum,
uint32  level,
bool  allequalimage 
)

Definition at line 60 of file nbtpage.c.

References _bt_pageinit(), BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_VERSION, InvalidTransactionId, and PageGetSpecialPointer.

Referenced by _bt_uppershutdown(), and btbuildempty().

62 {
63  BTMetaPageData *metad;
64  BTPageOpaque metaopaque;
65 
66  _bt_pageinit(page, BLCKSZ);
67 
68  metad = BTPageGetMeta(page);
69  metad->btm_magic = BTREE_MAGIC;
70  metad->btm_version = BTREE_VERSION;
71  metad->btm_root = rootbknum;
72  metad->btm_level = level;
73  metad->btm_fastroot = rootbknum;
74  metad->btm_fastlevel = level;
77  metad->btm_allequalimage = allequalimage;
78 
79  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
80  metaopaque->btpo_flags = BTP_META;
81 
82  /*
83  * Set pd_lower just past the end of the metadata. This is essential,
84  * because without doing so, metadata will be lost if xlog.c compresses
85  * the page.
86  */
87  ((PageHeader) page)->pd_lower =
88  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
89 }
uint32 btm_version
Definition: nbtree.h:101
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
uint32 btm_fastlevel
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:102
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
uint32 btm_level
Definition: nbtree.h:103
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:959
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
uint16 btpo_flags
Definition: nbtree.h:65

◆ _bt_leftsib_splitflag()

static bool _bt_leftsib_splitflag ( Relation  rel,
BlockNumber  leftsib,
BlockNumber  target 
)
static

Definition at line 1334 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), BT_READ, BTPageOpaqueData::btpo_next, buf, BufferGetPage, P_INCOMPLETE_SPLIT, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_lock_subtree_parent(), and _bt_pagedel().

1335 {
1336  Buffer buf;
1337  Page page;
1338  BTPageOpaque opaque;
1339  bool result;
1340 
1341  /* Easy case: No left sibling */
1342  if (leftsib == P_NONE)
1343  return false;
1344 
1345  buf = _bt_getbuf(rel, leftsib, BT_READ);
1346  page = BufferGetPage(buf);
1347  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1348 
1349  /*
1350  * If the left sibling was concurrently split, so that its next-pointer
1351  * doesn't point to the current page anymore, the split that created
1352  * target must be completed. Caller can reasonably expect that there will
1353  * be a downlink to the target page that it can relocate using its stack.
1354  * (We don't allow splitting an incompletely split page again until the
1355  * previous split has been completed.)
1356  */
1357  result = (opaque->btpo_next == target && P_INCOMPLETE_SPLIT(opaque));
1358  _bt_relbuf(rel, buf);
1359 
1360  return result;
1361 }
BlockNumber btpo_next
Definition: nbtree.h:59
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define P_NONE
Definition: nbtree.h:206
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_lock_subtree_parent()

static bool _bt_lock_subtree_parent ( Relation  rel,
BlockNumber  child,
BTStack  stack,
Buffer subtreeparent,
OffsetNumber poffset,
BlockNumber topparent,
BlockNumber topparentrightsib 
)
static

Definition at line 2375 of file nbtpage.c.

References _bt_getstackbuf(), _bt_leftsib_splitflag(), _bt_relbuf(), Assert, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTStackData::bts_blkno, BTStackData::bts_offset, BTStackData::bts_parent, BufferGetPage, ereport, errcode(), errmsg_internal(), ERROR, InvalidBuffer, P_FIRSTDATAKEY, P_INCOMPLETE_SPLIT, P_RIGHTMOST, PageGetMaxOffsetNumber, PageGetSpecialPointer, and RelationGetRelationName.

Referenced by _bt_mark_page_halfdead().

2378 {
2379  BlockNumber parent,
2380  leftsibparent;
2381  OffsetNumber parentoffset,
2382  maxoff;
2383  Buffer pbuf;
2384  Page page;
2385  BTPageOpaque opaque;
2386 
2387  /*
2388  * Locate the pivot tuple whose downlink points to "child". Write lock
2389  * the parent page itself.
2390  */
2391  pbuf = _bt_getstackbuf(rel, stack, child);
2392  if (pbuf == InvalidBuffer)
2393  ereport(ERROR,
2394  (errcode(ERRCODE_INDEX_CORRUPTED),
2395  errmsg_internal("failed to re-find parent key in index \"%s\" for deletion target page %u",
2396  RelationGetRelationName(rel), child)));
2397  parent = stack->bts_blkno;
2398  parentoffset = stack->bts_offset;
2399 
2400  page = BufferGetPage(pbuf);
2401  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2402  maxoff = PageGetMaxOffsetNumber(page);
2403  leftsibparent = opaque->btpo_prev;
2404 
2405  /*
2406  * _bt_getstackbuf() completes page splits on returned parent buffer when
2407  * required.
2408  *
2409  * In general it's a bad idea for VACUUM to use up more disk space, which
2410  * is why page deletion does not finish incomplete page splits most of the
2411  * time. We allow this limited exception because the risk is much lower,
2412  * and the potential downside of not proceeding is much higher: A single
2413  * internal page with the INCOMPLETE_SPLIT flag set might otherwise
2414  * prevent us from deleting hundreds of empty leaf pages from one level
2415  * down.
2416  */
2417  Assert(!P_INCOMPLETE_SPLIT(opaque));
2418 
2419  if (parentoffset < maxoff)
2420  {
2421  /*
2422  * Child is not the rightmost child in parent, so it's safe to delete
2423  * the subtree whose root/topparent is child page
2424  */
2425  *subtreeparent = pbuf;
2426  *poffset = parentoffset;
2427  return true;
2428  }
2429 
2430  /*
2431  * Child is the rightmost child of parent.
2432  *
2433  * Since it's the rightmost child of parent, deleting the child (or
2434  * deleting the subtree whose root/topparent is the child page) is only
2435  * safe when it's also possible to delete the parent.
2436  */
2437  Assert(parentoffset == maxoff);
2438  if (parentoffset != P_FIRSTDATAKEY(opaque) || P_RIGHTMOST(opaque))
2439  {
2440  /*
2441  * Child isn't parent's only child, or parent is rightmost on its
2442  * entire level. Definitely cannot delete any pages.
2443  */
2444  _bt_relbuf(rel, pbuf);
2445  return false;
2446  }
2447 
2448  /*
2449  * Now make sure that the parent deletion is itself safe by examining the
2450  * child's grandparent page. Recurse, passing the parent page as the
2451  * child page (child's grandparent is the parent on the next level up). If
2452  * parent deletion is unsafe, then child deletion must also be unsafe (in
2453  * which case caller cannot delete any pages at all).
2454  */
2455  *topparent = parent;
2456  *topparentrightsib = opaque->btpo_next;
2457 
2458  /*
2459  * Release lock on parent before recursing.
2460  *
2461  * It's OK to release page locks on parent before recursive call locks
2462  * grandparent. An internal page can only acquire an entry if the child
2463  * is split, but that cannot happen as long as we still hold a lock on the
2464  * leafbuf page.
2465  */
2466  _bt_relbuf(rel, pbuf);
2467 
2468  /*
2469  * Before recursing, check that the left sibling of parent (if any) is not
2470  * marked with INCOMPLETE_SPLIT flag first (must do so after we drop the
2471  * parent lock).
2472  *
2473  * Note: We deliberately avoid completing incomplete splits here.
2474  */
2475  if (_bt_leftsib_splitflag(rel, leftsibparent, parent))
2476  return false;
2477 
2478  /* Recurse to examine child page's grandparent page */
2479  return _bt_lock_subtree_parent(rel, parent, stack->bts_parent,
2480  subtreeparent, poffset,
2481  topparent, topparentrightsib);
2482 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define InvalidBuffer
Definition: buf.h:25
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2375
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
BlockNumber btpo_prev
Definition: nbtree.h:58
OffsetNumber bts_offset
Definition: nbtree.h:603
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber bts_blkno
Definition: nbtree.h:602
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
struct BTStackData * bts_parent
Definition: nbtree.h:604
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
Buffer _bt_getstackbuf(Relation rel, BTStack stack, BlockNumber child)
Definition: nbtinsert.c:2292
Pointer Page
Definition: bufpage.h:78
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1334

◆ _bt_log_reuse_page()

static void _bt_log_reuse_page ( Relation  rel,
BlockNumber  blkno,
TransactionId  latestRemovedXid 
)
static

Definition at line 759 of file nbtpage.c.

References xl_btree_reuse_page::block, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, RelationData::rd_node, SizeOfBtreeReusePage, XLOG_BTREE_REUSE_PAGE, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by _bt_getbuf().

760 {
761  xl_btree_reuse_page xlrec_reuse;
762 
763  /*
764  * Note that we don't register the buffer with the record, because this
765  * operation doesn't modify the page. This record only exists to provide a
766  * conflict point for Hot Standby.
767  */
768 
769  /* XLOG stuff */
770  xlrec_reuse.node = rel->rd_node;
771  xlrec_reuse.block = blkno;
772  xlrec_reuse.latestRemovedXid = latestRemovedXid;
773 
774  XLogBeginInsert();
775  XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
776 
777  XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
778 }
RelFileNode node
Definition: nbtxlog.h:206
#define SizeOfBtreeReusePage
Definition: nbtxlog.h:211
BlockNumber block
Definition: nbtxlog.h:207
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
RelFileNode rd_node
Definition: rel.h:55
TransactionId latestRemovedXid
Definition: nbtxlog.h:208
void XLogBeginInsert(void)
Definition: xloginsert.c:121

◆ _bt_mark_page_halfdead()

static bool _bt_mark_page_halfdead ( Relation  rel,
Buffer  leafbuf,
BTStack  stack 
)
static

Definition at line 1713 of file nbtpage.c.

References _bt_lock_subtree_parent(), _bt_relbuf(), _bt_rightsib_halfdeadflag(), Assert, BTP_HALF_DEAD, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetDownLink(), BTreeTupleSetDownLink(), BTreeTupleSetTopParent(), BufferGetBlockNumber(), BufferGetPage, DEBUG1, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, IndexTupleSize, InvalidBlockNumber, xl_btree_mark_page_halfdead::leafblk, xl_btree_mark_page_halfdead::leftblk, MarkBufferDirty(), MemSet, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageIndexTupleDelete(), PageIndexTupleOverwrite(), PageSetLSN, xl_btree_mark_page_halfdead::poffset, PredicateLockPageCombine(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, xl_btree_mark_page_halfdead::rightblk, SizeOfBtreeMarkPageHalfDead, START_CRIT_SECTION, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

1714 {
1715  BlockNumber leafblkno;
1716  BlockNumber leafrightsib;
1717  BlockNumber topparent;
1718  BlockNumber topparentrightsib;
1719  ItemId itemid;
1720  Page page;
1721  BTPageOpaque opaque;
1722  Buffer subtreeparent;
1723  OffsetNumber poffset;
1724  OffsetNumber nextoffset;
1725  IndexTuple itup;
1726  IndexTupleData trunctuple;
1727 
1728  page = BufferGetPage(leafbuf);
1729  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1730 
1731  Assert(!P_RIGHTMOST(opaque) && !P_ISROOT(opaque) &&
1732  P_ISLEAF(opaque) && !P_IGNORE(opaque) &&
1733  P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
1734 
1735  /*
1736  * Save info about the leaf page.
1737  */
1738  leafblkno = BufferGetBlockNumber(leafbuf);
1739  leafrightsib = opaque->btpo_next;
1740 
1741  /*
1742  * Before attempting to lock the parent page, check that the right sibling
1743  * is not in half-dead state. A half-dead right sibling would have no
1744  * downlink in the parent, which would be highly confusing later when we
1745  * delete the downlink. It would fail the "right sibling of target page
1746  * is also the next child in parent page" cross-check below.
1747  */
1748  if (_bt_rightsib_halfdeadflag(rel, leafrightsib))
1749  {
1750  elog(DEBUG1, "could not delete page %u because its right sibling %u is half-dead",
1751  leafblkno, leafrightsib);
1752  return false;
1753  }
1754 
1755  /*
1756  * We cannot delete a page that is the rightmost child of its immediate
1757  * parent, unless it is the only child --- in which case the parent has to
1758  * be deleted too, and the same condition applies recursively to it. We
1759  * have to check this condition all the way up before trying to delete,
1760  * and lock the parent of the root of the to-be-deleted subtree (the
1761  * "subtree parent"). _bt_lock_subtree_parent() locks the subtree parent
1762  * for us. We remove the downlink to the "top parent" page (subtree root
1763  * page) from the subtree parent page below.
1764  *
1765  * Initialize topparent to be leafbuf page now. The final to-be-deleted
1766  * subtree is often a degenerate one page subtree consisting only of the
1767  * leafbuf page. When that happens, the leafbuf page is the final subtree
1768  * root page/top parent page.
1769  */
1770  topparent = leafblkno;
1771  topparentrightsib = leafrightsib;
1772  if (!_bt_lock_subtree_parent(rel, leafblkno, stack,
1773  &subtreeparent, &poffset,
1774  &topparent, &topparentrightsib))
1775  return false;
1776 
1777  /*
1778  * Check that the parent-page index items we're about to delete/overwrite
1779  * in subtree parent page contain what we expect. This can fail if the
1780  * index has become corrupt for some reason. We want to throw any error
1781  * before entering the critical section --- otherwise it'd be a PANIC.
1782  */
1783  page = BufferGetPage(subtreeparent);
1784  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1785 
1786 #ifdef USE_ASSERT_CHECKING
1787 
1788  /*
1789  * This is just an assertion because _bt_lock_subtree_parent should have
1790  * guaranteed tuple has the expected contents
1791  */
1792  itemid = PageGetItemId(page, poffset);
1793  itup = (IndexTuple) PageGetItem(page, itemid);
1794  Assert(BTreeTupleGetDownLink(itup) == topparent);
1795 #endif
1796 
1797  nextoffset = OffsetNumberNext(poffset);
1798  itemid = PageGetItemId(page, nextoffset);
1799  itup = (IndexTuple) PageGetItem(page, itemid);
1800  if (BTreeTupleGetDownLink(itup) != topparentrightsib)
1801  ereport(ERROR,
1802  (errcode(ERRCODE_INDEX_CORRUPTED),
1803  errmsg_internal("right sibling %u of block %u is not next child %u of block %u in index \"%s\"",
1804  topparentrightsib, topparent,
1805  BTreeTupleGetDownLink(itup),
1806  BufferGetBlockNumber(subtreeparent),
1807  RelationGetRelationName(rel))));
1808 
1809  /*
1810  * Any insert which would have gone on the leaf block will now go to its
1811  * right sibling. In other words, the key space moves right.
1812  */
1813  PredicateLockPageCombine(rel, leafblkno, leafrightsib);
1814 
1815  /* No ereport(ERROR) until changes are logged */
1817 
1818  /*
1819  * Update parent of subtree. We want to delete the downlink to the top
1820  * parent page/root of the subtree, and the *following* key. Easiest way
1821  * is to copy the right sibling's downlink over the downlink that points
1822  * to top parent page, and then delete the right sibling's original pivot
1823  * tuple.
1824  *
1825  * Lanin and Shasha make the key space move left when deleting a page,
1826  * whereas the key space moves right here. That's why we cannot simply
1827  * delete the pivot tuple with the downlink to the top parent page. See
1828  * nbtree/README.
1829  */
1830  page = BufferGetPage(subtreeparent);
1831  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1832 
1833  itemid = PageGetItemId(page, poffset);
1834  itup = (IndexTuple) PageGetItem(page, itemid);
1835  BTreeTupleSetDownLink(itup, topparentrightsib);
1836 
1837  nextoffset = OffsetNumberNext(poffset);
1838  PageIndexTupleDelete(page, nextoffset);
1839 
1840  /*
1841  * Mark the leaf page as half-dead, and stamp it with a link to the top
1842  * parent page. When the leaf page is also the top parent page, the link
1843  * is set to InvalidBlockNumber.
1844  */
1845  page = BufferGetPage(leafbuf);
1846  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1847  opaque->btpo_flags |= BTP_HALF_DEAD;
1848 
1850  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
1851  trunctuple.t_info = sizeof(IndexTupleData);
1852  if (topparent != leafblkno)
1853  BTreeTupleSetTopParent(&trunctuple, topparent);
1854  else
1856 
1857  if (!PageIndexTupleOverwrite(page, P_HIKEY, (Item) &trunctuple,
1858  IndexTupleSize(&trunctuple)))
1859  elog(ERROR, "could not overwrite high key in half-dead page");
1860 
1861  /* Must mark buffers dirty before XLogInsert */
1862  MarkBufferDirty(subtreeparent);
1863  MarkBufferDirty(leafbuf);
1864 
1865  /* XLOG stuff */
1866  if (RelationNeedsWAL(rel))
1867  {
1869  XLogRecPtr recptr;
1870 
1871  xlrec.poffset = poffset;
1872  xlrec.leafblk = leafblkno;
1873  if (topparent != leafblkno)
1874  xlrec.topparent = topparent;
1875  else
1876  xlrec.topparent = InvalidBlockNumber;
1877 
1878  XLogBeginInsert();
1879  XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
1880  XLogRegisterBuffer(1, subtreeparent, REGBUF_STANDARD);
1881 
1882  page = BufferGetPage(leafbuf);
1883  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1884  xlrec.leftblk = opaque->btpo_prev;
1885  xlrec.rightblk = opaque->btpo_next;
1886 
1888 
1889  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
1890 
1891  page = BufferGetPage(subtreeparent);
1892  PageSetLSN(page, recptr);
1893  page = BufferGetPage(leafbuf);
1894  PageSetLSN(page, recptr);
1895  }
1896 
1897  END_CRIT_SECTION();
1898 
1899  _bt_relbuf(rel, subtreeparent);
1900  return true;
1901 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define DEBUG1
Definition: elog.h:25
#define P_IGNORE(opaque)
Definition: nbtree.h:219
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:719
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define SizeOfBtreeMarkPageHalfDead
Definition: nbtxlog.h:273
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
static bool _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
Definition: nbtpage.c:1391
Pointer Item
Definition: item.h:17
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *subtreeparent, OffsetNumber *poffset, BlockNumber *topparent, BlockNumber *topparentrightsib)
Definition: nbtpage.c:2375
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:610
#define MemSet(start, val, len)
Definition: c.h:971
uint32 BlockNumber
Definition: block.h:31
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
uint16 OffsetNumber
Definition: off.h:24
#define ERROR
Definition: elog.h:43
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1060
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:490
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:430
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:37
struct IndexTupleData IndexTupleData
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
#define elog(elevel,...)
Definition: elog.h:214
void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3183
unsigned short t_info
Definition: itup.h:49
void XLogBeginInsert(void)
Definition: xloginsert.c:121
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_metaversion()

void _bt_metaversion ( Relation  rel,
bool heapkeyspace,
bool allequalimage 
)

Definition at line 667 of file nbtpage.c.

References _bt_getbuf(), _bt_getmeta(), _bt_relbuf(), Assert, BT_READ, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTREE_MAGIC, BTREE_METAPAGE, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, BTREE_VERSION, MemoryContextAlloc(), P_NONE, RelationData::rd_amcache, and RelationData::rd_indexcxt.

Referenced by _bt_first(), _bt_mkscankey(), and bt_index_check_internal().

668 {
669  BTMetaPageData *metad;
670 
671  if (rel->rd_amcache == NULL)
672  {
673  Buffer metabuf;
674 
675  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
676  metad = _bt_getmeta(rel, metabuf);
677 
678  /*
679  * If there's no root page yet, _bt_getroot() doesn't expect a cache
680  * to be made, so just stop here. (XXX perhaps _bt_getroot() should
681  * be changed to allow this case.)
682  */
683  if (metad->btm_root == P_NONE)
684  {
685  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
686  *allequalimage = metad->btm_allequalimage;
687 
688  _bt_relbuf(rel, metabuf);
689  return;
690  }
691 
692  /*
693  * Cache the metapage data for next time
694  *
695  * An on-the-fly version upgrade performed by _bt_upgrademetapage()
696  * can change the nbtree version for an index without invalidating any
697  * local cache. This is okay because it can only happen when moving
698  * from version 2 to version 3, both of which are !heapkeyspace
699  * versions.
700  */
702  sizeof(BTMetaPageData));
703  memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
704  _bt_relbuf(rel, metabuf);
705  }
706 
707  /* Get cached page */
708  metad = (BTMetaPageData *) rel->rd_amcache;
709  /* We shouldn't have cached it if any of these fail */
710  Assert(metad->btm_magic == BTREE_MAGIC);
712  Assert(metad->btm_version <= BTREE_VERSION);
713  Assert(!metad->btm_allequalimage ||
715  Assert(metad->btm_fastroot != P_NONE);
716 
717  *heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
718  *allequalimage = metad->btm_allequalimage;
719 }
static BTMetaPageData * _bt_getmeta(Relation rel, Buffer metabuf)
Definition: nbtpage.c:135
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define BTREE_VERSION
Definition: nbtree.h:143
uint32 btm_magic
Definition: nbtree.h:100
#define P_NONE
Definition: nbtree.h:206
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:142
bool btm_allequalimage
Definition: nbtree.h:111
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_METAPAGE
Definition: nbtree.h:141
BlockNumber btm_root
Definition: nbtree.h:102
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define Assert(condition)
Definition: c.h:738
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
MemoryContext rd_indexcxt
Definition: rel.h:186
void * rd_amcache
Definition: rel.h:211
int Buffer
Definition: buf.h:23

◆ _bt_page_recyclable()

bool _bt_page_recyclable ( Page  page)

Definition at line 974 of file nbtpage.c.

References BTPageOpaqueData::btpo, P_ISDELETED, PageGetSpecialPointer, PageIsNew, RecentGlobalXmin, TransactionIdPrecedes(), and BTPageOpaqueData::xact.

Referenced by _bt_getbuf(), and btvacuumpage().

975 {
976  BTPageOpaque opaque;
977 
978  /*
979  * It's possible to find an all-zeroes page in an index --- for example, a
980  * backend might successfully extend the relation one page and then crash
981  * before it is able to make a WAL entry for adding the page. If we find a
982  * zeroed page then reclaim it.
983  */
984  if (PageIsNew(page))
985  return true;
986 
987  /*
988  * Otherwise, recycle if deleted and too old to have any processes
989  * interested in it.
990  */
991  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
992  if (P_ISDELETED(opaque) &&
994  return true;
995  return false;
996 }
union BTPageOpaqueData::@45 btpo
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
TransactionId RecentGlobalXmin
Definition: snapmgr.c:168
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define PageIsNew(page)
Definition: bufpage.h:229

◆ _bt_pagedel()

uint32 _bt_pagedel ( Relation  rel,
Buffer  leafbuf,
TransactionId oldestBtpoXact 
)

Definition at line 1442 of file nbtpage.c.

References _bt_getbuf(), _bt_leftsib_splitflag(), _bt_mark_page_halfdead(), _bt_mkscankey(), _bt_relbuf(), _bt_search(), _bt_unlink_halfdead_page(), Assert, BT_READ, BT_WRITE, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, CHECK_FOR_INTERRUPTS, CopyIndexTuple(), ereport, errcode(), errhint(), errmsg(), errmsg_internal(), LockBuffer(), LOG, P_FIRSTDATAKEY, P_HIKEY, P_IGNORE, P_INCOMPLETE_SPLIT, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, BTScanInsertData::pivotsearch, RelationGetRelationName, ReleaseBuffer(), TransactionIdFollowsOrEquals(), and BTPageOpaqueData::xact.

Referenced by btvacuumpage().

1443 {
1444  uint32 ndeleted = 0;
1445  BlockNumber rightsib;
1446  bool rightsib_empty;
1447  Page page;
1448  BTPageOpaque opaque;
1449 
1450  /*
1451  * Save original leafbuf block number from caller. Only deleted blocks
1452  * that are <= scanblkno get counted in ndeleted return value.
1453  */
1454  BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1455 
1456  /*
1457  * "stack" is a search stack leading (approximately) to the target page.
1458  * It is initially NULL, but when iterating, we keep it to avoid
1459  * duplicated search effort.
1460  *
1461  * Also, when "stack" is not NULL, we have already checked that the
1462  * current page is not the right half of an incomplete split, i.e. the
1463  * left sibling does not have its INCOMPLETE_SPLIT flag set, including
1464  * when the current target page is to the right of caller's initial page
1465  * (the scanblkno page).
1466  */
1467  BTStack stack = NULL;
1468 
1469  for (;;)
1470  {
1471  page = BufferGetPage(leafbuf);
1472  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1473 
1474  /*
1475  * Internal pages are never deleted directly, only as part of deleting
1476  * the whole subtree all the way down to leaf level.
1477  *
1478  * Also check for deleted pages here. Caller never passes us a fully
1479  * deleted page. Only VACUUM can delete pages, so there can't have
1480  * been a concurrent deletion. Assume that we reached any deleted
1481  * page encountered here by following a sibling link, and that the
1482  * index is corrupt.
1483  */
1484  Assert(!P_ISDELETED(opaque));
1485  if (!P_ISLEAF(opaque) || P_ISDELETED(opaque))
1486  {
1487  /*
1488  * Pre-9.4 page deletion only marked internal pages as half-dead,
1489  * but now we only use that flag on leaf pages. The old algorithm
1490  * was never supposed to leave half-dead pages in the tree, it was
1491  * just a transient state, but it was nevertheless possible in
1492  * error scenarios. We don't know how to deal with them here. They
1493  * are harmless as far as searches are considered, but inserts
1494  * into the deleted keyspace could add out-of-order downlinks in
1495  * the upper levels. Log a notice, hopefully the admin will notice
1496  * and reindex.
1497  */
1498  if (P_ISHALFDEAD(opaque))
1499  ereport(LOG,
1500  (errcode(ERRCODE_INDEX_CORRUPTED),
1501  errmsg("index \"%s\" contains a half-dead internal page",
1503  errhint("This can be caused by an interrupted VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
1504 
1505  if (P_ISDELETED(opaque))
1506  ereport(LOG,
1507  (errcode(ERRCODE_INDEX_CORRUPTED),
1508  errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
1509  BufferGetBlockNumber(leafbuf),
1510  scanblkno,
1511  RelationGetRelationName(rel))));
1512 
1513  _bt_relbuf(rel, leafbuf);
1514  return ndeleted;
1515  }
1516 
1517  /*
1518  * We can never delete rightmost pages nor root pages. While at it,
1519  * check that page is empty, since it's possible that the leafbuf page
1520  * was empty a moment ago, but has since had some inserts.
1521  *
1522  * To keep the algorithm simple, we also never delete an incompletely
1523  * split page (they should be rare enough that this doesn't make any
1524  * meaningful difference to disk usage):
1525  *
1526  * The INCOMPLETE_SPLIT flag on the page tells us if the page is the
1527  * left half of an incomplete split, but ensuring that it's not the
1528  * right half is more complicated. For that, we have to check that
1529  * the left sibling doesn't have its INCOMPLETE_SPLIT flag set using
1530  * _bt_leftsib_splitflag(). On the first iteration, we temporarily
1531  * release the lock on scanblkno/leafbuf, check the left sibling, and
1532  * construct a search stack to scanblkno. On subsequent iterations,
1533  * we know we stepped right from a page that passed these tests, so
1534  * it's OK.
1535  */
1536  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) ||
1537  P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
1538  P_INCOMPLETE_SPLIT(opaque))
1539  {
1540  /* Should never fail to delete a half-dead page */
1541  Assert(!P_ISHALFDEAD(opaque));
1542 
1543  _bt_relbuf(rel, leafbuf);
1544  return ndeleted;
1545  }
1546 
1547  /*
1548  * First, remove downlink pointing to the page (or a parent of the
1549  * page, if we are going to delete a taller subtree), and mark the
1550  * leafbuf page half-dead
1551  */
1552  if (!P_ISHALFDEAD(opaque))
1553  {
1554  /*
1555  * We need an approximate pointer to the page's parent page. We
1556  * use a variant of the standard search mechanism to search for
1557  * the page's high key; this will give us a link to either the
1558  * current parent or someplace to its left (if there are multiple
1559  * equal high keys, which is possible with !heapkeyspace indexes).
1560  *
1561  * Also check if this is the right-half of an incomplete split
1562  * (see comment above).
1563  */
1564  if (!stack)
1565  {
1566  BTScanInsert itup_key;
1567  ItemId itemid;
1568  IndexTuple targetkey;
1569  BlockNumber leftsib,
1570  leafblkno;
1571  Buffer sleafbuf;
1572 
1573  itemid = PageGetItemId(page, P_HIKEY);
1574  targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
1575 
1576  leftsib = opaque->btpo_prev;
1577  leafblkno = BufferGetBlockNumber(leafbuf);
1578 
1579  /*
1580  * To avoid deadlocks, we'd better drop the leaf page lock
1581  * before going further.
1582  */
1583  LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK);
1584 
1585  /*
1586  * Check that the left sibling of leafbuf (if any) is not
1587  * marked with INCOMPLETE_SPLIT flag before proceeding
1588  */
1589  Assert(leafblkno == scanblkno);
1590  if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
1591  {
1592  ReleaseBuffer(leafbuf);
1593  return ndeleted;
1594  }
1595 
1596  /* we need an insertion scan key for the search, so build one */
1597  itup_key = _bt_mkscankey(rel, targetkey);
1598  /* find the leftmost leaf page with matching pivot/high key */
1599  itup_key->pivotsearch = true;
1600  stack = _bt_search(rel, itup_key, &sleafbuf, BT_READ, NULL);
1601  /* won't need a second lock or pin on leafbuf */
1602  _bt_relbuf(rel, sleafbuf);
1603 
1604  /*
1605  * Re-lock the leaf page, and start over to use our stack
1606  * within _bt_mark_page_halfdead. We must do it that way
1607  * because it's possible that leafbuf can no longer be
1608  * deleted. We need to recheck.
1609  *
1610  * Note: We can't simply hold on to the sleafbuf lock instead,
1611  * because it's barely possible that sleafbuf is not the same
1612  * page as leafbuf. This happens when leafbuf split after our
1613  * original lock was dropped, but before _bt_search finished
1614  * its descent. We rely on the assumption that we'll find
1615  * leafbuf isn't safe to delete anymore in this scenario.
1616  * (Page deletion can cope with the stack being to the left of
1617  * leafbuf, but not to the right of leafbuf.)
1618  */
1619  LockBuffer(leafbuf, BT_WRITE);
1620  continue;
1621  }
1622 
1623  /*
1624  * See if it's safe to delete the leaf page, and determine how
1625  * many parent/internal pages above the leaf level will be
1626  * deleted. If it's safe then _bt_mark_page_halfdead will also
1627  * perform the first phase of deletion, which includes marking the
1628  * leafbuf page half-dead.
1629  */
1630  Assert(P_ISLEAF(opaque) && !P_IGNORE(opaque));
1631  if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
1632  {
1633  _bt_relbuf(rel, leafbuf);
1634  return ndeleted;
1635  }
1636  }
1637 
1638  /*
1639  * Then unlink it from its siblings. Each call to
1640  * _bt_unlink_halfdead_page unlinks the topmost page from the subtree,
1641  * making it shallower. Iterate until the leafbuf page is deleted.
1642  *
1643  * _bt_unlink_halfdead_page should never fail, since we established
1644  * that deletion is generally safe in _bt_mark_page_halfdead.
1645  */
1646  rightsib_empty = false;
1647  Assert(P_ISLEAF(opaque) && P_ISHALFDEAD(opaque));
1648  while (P_ISHALFDEAD(opaque))
1649  {
1650  /* Check for interrupts in _bt_unlink_halfdead_page */
1651  if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1652  &rightsib_empty, oldestBtpoXact,
1653  &ndeleted))
1654  {
1655  /* _bt_unlink_halfdead_page failed, released buffer */
1656  return ndeleted;
1657  }
1658  }
1659 
1660  Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
1662  *oldestBtpoXact));
1663 
1664  rightsib = opaque->btpo_next;
1665 
1666  _bt_relbuf(rel, leafbuf);
1667 
1668  /*
1669  * Check here, as calling loops will have locks held, preventing
1670  * interrupts from being processed.
1671  */
1673 
1674  /*
1675  * The page has now been deleted. If its right sibling is completely
1676  * empty, it's possible that the reason we haven't deleted it earlier
1677  * is that it was the rightmost child of the parent. Now that we
1678  * removed the downlink for this page, the right sibling might now be
1679  * the only child of the parent, and could be removed. It would be
1680  * picked up by the next vacuum anyway, but might as well try to
1681  * remove it now, so loop back to process the right sibling.
1682  *
1683  * Note: This relies on the assumption that _bt_getstackbuf() will be
1684  * able to reuse our original descent stack with a different child
1685  * block (provided that the child block is to the right of the
1686  * original leaf page reached by _bt_search()). It will even update
1687  * the descent stack each time we loop around, avoiding repeated work.
1688  */
1689  if (!rightsib_empty)
1690  break;
1691 
1692  leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
1693  }
1694 
1695  return ndeleted;
1696 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
BlockNumber btpo_next
Definition: nbtree.h:59
int errhint(const char *fmt,...)
Definition: elog.c:1071
#define P_IGNORE(opaque)
Definition: nbtree.h:219
BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup)
Definition: nbtutils.c:90
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
union BTPageOpaqueData::@45 btpo
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3483
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:221
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
Definition: nbtpage.c:1713
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:510
#define RelationGetRelationName(relation)
Definition: rel.h:490
unsigned int uint32
Definition: c.h:367
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool pivotsearch
Definition: nbtree.h:663
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
#define P_ISROOT(opaque)
Definition: nbtree.h:215
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define BT_WRITE
Definition: nbtree.h:588
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
BTStack _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access, Snapshot snapshot)
Definition: nbtsearch.c:101
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, bool *rightsib_empty, TransactionId *oldestBtpoXact, uint32 *ndeleted)
Definition: nbtpage.c:1935
static bool _bt_leftsib_splitflag(Relation rel, BlockNumber leftsib, BlockNumber target)
Definition: nbtpage.c:1334
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_pageinit()

void _bt_pageinit ( Page  page,
Size  size 
)

Definition at line 959 of file nbtpage.c.

References PageInit().

Referenced by _bt_blnewpage(), _bt_getbuf(), _bt_initmetapage(), _bt_restore_meta(), _bt_split(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), and btree_xlog_unlink_page().

960 {
961  PageInit(page, size, sizeof(BTPageOpaqueData));
962 }
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

◆ _bt_relandgetbuf()

Buffer _bt_relandgetbuf ( Relation  rel,
Buffer  obuf,
BlockNumber  blkno,
int  access 
)

Definition at line 928 of file nbtpage.c.

References _bt_checkpage(), Assert, buf, BUFFER_LOCK_UNLOCK, BufferIsValid, LockBuffer(), P_NEW, and ReleaseAndReadBuffer().

Referenced by _bt_check_unique(), _bt_get_endpoint(), _bt_getroot(), _bt_gettrueroot(), _bt_moveright(), _bt_search(), _bt_stepright(), and _bt_walk_left().

929 {
930  Buffer buf;
931 
932  Assert(blkno != P_NEW);
933  if (BufferIsValid(obuf))
935  buf = ReleaseAndReadBuffer(obuf, rel, blkno);
936  LockBuffer(buf, access);
937  _bt_checkpage(rel, buf);
938  return buf;
939 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define P_NEW
Definition: bufmgr.h:91
void _bt_checkpage(Relation rel, Buffer buf)
Definition: nbtpage.c:725
static char * buf
Definition: pg_test_fsync.c:67
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
#define Assert(condition)
Definition: c.h:738
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1531
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int Buffer
Definition: buf.h:23

◆ _bt_relbuf()

◆ _bt_rightsib_halfdeadflag()

static bool _bt_rightsib_halfdeadflag ( Relation  rel,
BlockNumber  leafrightsib 
)
static

Definition at line 1391 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), Assert, BT_READ, buf, BufferGetPage, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_NONE, and PageGetSpecialPointer.

Referenced by _bt_mark_page_halfdead().

1392 {
1393  Buffer buf;
1394  Page page;
1395  BTPageOpaque opaque;
1396  bool result;
1397 
1398  Assert(leafrightsib != P_NONE);
1399 
1400  buf = _bt_getbuf(rel, leafrightsib, BT_READ);
1401  page = BufferGetPage(buf);
1402  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1403 
1404  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque));
1405  result = P_ISHALFDEAD(opaque);
1406  _bt_relbuf(rel, buf);
1407 
1408  return result;
1409 }
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
#define P_NONE
Definition: nbtree.h:206
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_unlink_halfdead_page()

static bool _bt_unlink_halfdead_page ( Relation  rel,
Buffer  leafbuf,
BlockNumber  scanblkno,
bool rightsib_empty,
TransactionId oldestBtpoXact,
uint32 ndeleted 
)
static

Definition at line 1935 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BlockNumberIsValid, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_DELETED, BTP_HALF_DEAD, BTPageGetMeta, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_unlink_page::btpo_xact, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BTreeTupleGetDownLink(), BTreeTupleGetTopParent(), BTreeTupleSetTopParent(), buf, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferIsValid, CHECK_FOR_INTERRUPTS, elog, END_CRIT_SECTION, ereport, errcode(), errmsg_internal(), ERROR, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, InvalidBlockNumber, InvalidBuffer, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leftsib, xl_btree_metadata::level, BTPageOpaqueData::level, LockBuffer(), LOG, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, P_FIRSTDATAKEY, P_HIKEY, P_ISDELETED, P_ISHALFDEAD, P_ISLEAF, P_ISROOT, P_NONE, P_RIGHTMOST, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PageGetSpecialPointer, PageSetLSN, ReadNewTransactionId(), REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetRelationName, RelationNeedsWAL, ReleaseBuffer(), xl_btree_unlink_page::rightsib, xl_btree_metadata::root, SizeOfBtreeUnlinkPage, START_CRIT_SECTION, xl_btree_unlink_page::topparent, TransactionIdIsValid, TransactionIdPrecedes(), xl_btree_metadata::version, BTPageOpaqueData::xact, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by _bt_pagedel().

1938 {
1939  BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
1940  BlockNumber leafleftsib;
1941  BlockNumber leafrightsib;
1942  BlockNumber target;
1943  BlockNumber leftsib;
1944  BlockNumber rightsib;
1945  Buffer lbuf = InvalidBuffer;
1946  Buffer buf;
1947  Buffer rbuf;
1948  Buffer metabuf = InvalidBuffer;
1949  Page metapg = NULL;
1950  BTMetaPageData *metad = NULL;
1951  ItemId itemid;
1952  Page page;
1953  BTPageOpaque opaque;
1954  bool rightsib_is_rightmost;
1955  int targetlevel;
1956  IndexTuple leafhikey;
1957  BlockNumber nextchild;
1958 
1959  page = BufferGetPage(leafbuf);
1960  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
1961 
1962  Assert(P_ISLEAF(opaque) && !P_ISDELETED(opaque) && P_ISHALFDEAD(opaque));
1963 
1964  /*
1965  * Remember some information about the leaf page.
1966  */
1967  itemid = PageGetItemId(page, P_HIKEY);
1968  leafhikey = (IndexTuple) PageGetItem(page, itemid);
1969  target = BTreeTupleGetTopParent(leafhikey);
1970  leafleftsib = opaque->btpo_prev;
1971  leafrightsib = opaque->btpo_next;
1972 
1973  LockBuffer(leafbuf, BUFFER_LOCK_UNLOCK);
1974 
1975  /*
1976  * Check here, as calling loops will have locks held, preventing
1977  * interrupts from being processed.
1978  */
1980 
1981  /* Unlink the current top parent of the subtree */
1982  if (!BlockNumberIsValid(target))
1983  {
1984  /* Target is leaf page (or leaf page is top parent, if you prefer) */
1985  target = leafblkno;
1986 
1987  buf = leafbuf;
1988  leftsib = leafleftsib;
1989  targetlevel = 0;
1990  }
1991  else
1992  {
1993  /* Target is the internal page taken from leaf's top parent link */
1994  Assert(target != leafblkno);
1995 
1996  /* Fetch the block number of the target's left sibling */
1997  buf = _bt_getbuf(rel, target, BT_READ);
1998  page = BufferGetPage(buf);
1999  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2000  leftsib = opaque->btpo_prev;
2001  targetlevel = opaque->btpo.level;
2002  Assert(targetlevel > 0);
2003 
2004  /*
2005  * To avoid deadlocks, we'd better drop the target page lock before
2006  * going further.
2007  */
2009  }
2010 
2011  /*
2012  * We have to lock the pages we need to modify in the standard order:
2013  * moving right, then up. Else we will deadlock against other writers.
2014  *
2015  * So, first lock the leaf page, if it's not the target. Then find and
2016  * write-lock the current left sibling of the target page. The sibling
2017  * that was current a moment ago could have split, so we may have to move
2018  * right. This search could fail if either the sibling or the target page
2019  * was deleted by someone else meanwhile; if so, give up. (Right now,
2020  * that should never happen, since page deletion is only done in VACUUM
2021  * and there shouldn't be multiple VACUUMs concurrently on the same
2022  * table.)
2023  */
2024  if (target != leafblkno)
2025  LockBuffer(leafbuf, BT_WRITE);
2026  if (leftsib != P_NONE)
2027  {
2028  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2029  page = BufferGetPage(lbuf);
2030  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2031  while (P_ISDELETED(opaque) || opaque->btpo_next != target)
2032  {
2033  /* step right one page */
2034  leftsib = opaque->btpo_next;
2035  _bt_relbuf(rel, lbuf);
2036 
2037  /*
2038  * It'd be good to check for interrupts here, but it's not easy to
2039  * do so because a lock is always held. This block isn't
2040  * frequently reached, so hopefully the consequences of not
2041  * checking interrupts aren't too bad.
2042  */
2043 
2044  if (leftsib == P_NONE)
2045  {
2046  elog(LOG, "no left sibling (concurrent deletion?) of block %u in \"%s\"",
2047  target,
2049  if (target != leafblkno)
2050  {
2051  /* we have only a pin on target, but pin+lock on leafbuf */
2052  ReleaseBuffer(buf);
2053  _bt_relbuf(rel, leafbuf);
2054  }
2055  else
2056  {
2057  /* we have only a pin on leafbuf */
2058  ReleaseBuffer(leafbuf);
2059  }
2060  return false;
2061  }
2062  lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
2063  page = BufferGetPage(lbuf);
2064  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2065  }
2066  }
2067  else
2068  lbuf = InvalidBuffer;
2069 
2070  /*
2071  * Next write-lock the target page itself. It's okay to take a write lock
2072  * rather than a superexclusive lock, since no scan will stop on an empty
2073  * page.
2074  */
2075  LockBuffer(buf, BT_WRITE);
2076  page = BufferGetPage(buf);
2077  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2078 
2079  /*
2080  * Check page is still empty etc, else abandon deletion. This is just for
2081  * paranoia's sake; a half-dead page cannot resurrect because there can be
2082  * only one vacuum process running at a time.
2083  */
2084  if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
2085  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2086  target, RelationGetRelationName(rel));
2087 
2088  if (opaque->btpo_prev != leftsib)
2089  ereport(ERROR,
2090  (errcode(ERRCODE_INDEX_CORRUPTED),
2091  errmsg_internal("left link changed unexpectedly in block %u of index \"%s\"",
2092  target, RelationGetRelationName(rel))));
2093 
2094  if (target == leafblkno)
2095  {
2096  if (P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
2097  !P_ISLEAF(opaque) || !P_ISHALFDEAD(opaque))
2098  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2099  target, RelationGetRelationName(rel));
2100  nextchild = InvalidBlockNumber;
2101  }
2102  else
2103  {
2104  if (P_FIRSTDATAKEY(opaque) != PageGetMaxOffsetNumber(page) ||
2105  P_ISLEAF(opaque))
2106  elog(ERROR, "half-dead page changed status unexpectedly in block %u of index \"%s\"",
2107  target, RelationGetRelationName(rel));
2108 
2109  /* Remember the next non-leaf child down in the subtree */
2110  itemid = PageGetItemId(page, P_FIRSTDATAKEY(opaque));
2111  nextchild = BTreeTupleGetDownLink((IndexTuple) PageGetItem(page, itemid));
2112  if (nextchild == leafblkno)
2113  nextchild = InvalidBlockNumber;
2114  }
2115 
2116  /*
2117  * And next write-lock the (current) right sibling.
2118  */
2119  rightsib = opaque->btpo_next;
2120  rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
2121  page = BufferGetPage(rbuf);
2122  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2123  if (opaque->btpo_prev != target)
2124  ereport(ERROR,
2125  (errcode(ERRCODE_INDEX_CORRUPTED),
2126  errmsg_internal("right sibling's left-link doesn't match: "
2127  "block %u links to %u instead of expected %u in index \"%s\"",
2128  rightsib, opaque->btpo_prev, target,
2129  RelationGetRelationName(rel))));
2130  rightsib_is_rightmost = P_RIGHTMOST(opaque);
2131  *rightsib_empty = (P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page));
2132 
2133  /*
2134  * If we are deleting the next-to-last page on the target's level, then
2135  * the rightsib is a candidate to become the new fast root. (In theory, it
2136  * might be possible to push the fast root even further down, but the odds
2137  * of doing so are slim, and the locking considerations daunting.)
2138  *
2139  * We can safely acquire a lock on the metapage here --- see comments for
2140  * _bt_newroot().
2141  */
2142  if (leftsib == P_NONE && rightsib_is_rightmost)
2143  {
2144  page = BufferGetPage(rbuf);
2145  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2146  if (P_RIGHTMOST(opaque))
2147  {
2148  /* rightsib will be the only one left on the level */
2149  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
2150  metapg = BufferGetPage(metabuf);
2151  metad = BTPageGetMeta(metapg);
2152 
2153  /*
2154  * The expected case here is btm_fastlevel == targetlevel+1; if
2155  * the fastlevel is <= targetlevel, something is wrong, and we
2156  * choose to overwrite it to fix it.
2157  */
2158  if (metad->btm_fastlevel > targetlevel + 1)
2159  {
2160  /* no update wanted */
2161  _bt_relbuf(rel, metabuf);
2162  metabuf = InvalidBuffer;
2163  }
2164  }
2165  }
2166 
2167  /*
2168  * Here we begin doing the deletion.
2169  */
2170 
2171  /* No ereport(ERROR) until changes are logged */
2173 
2174  /*
2175  * Update siblings' side-links. Note the target page's side-links will
2176  * continue to point to the siblings. Asserts here are just rechecking
2177  * things we already verified above.
2178  */
2179  if (BufferIsValid(lbuf))
2180  {
2181  page = BufferGetPage(lbuf);
2182  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2183  Assert(opaque->btpo_next == target);
2184  opaque->btpo_next = rightsib;
2185  }
2186  page = BufferGetPage(rbuf);
2187  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2188  Assert(opaque->btpo_prev == target);
2189  opaque->btpo_prev = leftsib;
2190 
2191  /*
2192  * If we deleted a parent of the targeted leaf page, instead of the leaf
2193  * itself, update the leaf to point to the next remaining child in the
2194  * subtree.
2195  *
2196  * Note: We rely on the fact that a buffer pin on the leaf page has been
2197  * held since leafhikey was initialized. This is safe, though only
2198  * because the page was already half-dead at that point. The leaf page
2199  * cannot have been modified by any other backend during the period when
2200  * no lock was held.
2201  */
2202  if (target != leafblkno)
2203  BTreeTupleSetTopParent(leafhikey, nextchild);
2204 
2205  /*
2206  * Mark the page itself deleted. It can be recycled when all current
2207  * transactions are gone. Storing GetTopTransactionId() would work, but
2208  * we're in VACUUM and would not otherwise have an XID. Having already
2209  * updated links to the target, ReadNewTransactionId() suffices as an
2210  * upper bound. Any scan having retained a now-stale link is advertising
2211  * in its PGXACT an xmin less than or equal to the value we read here. It
2212  * will continue to do so, holding back RecentGlobalXmin, for the duration
2213  * of that scan.
2214  */
2215  page = BufferGetPage(buf);
2216  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
2217  Assert(P_ISHALFDEAD(opaque) || !P_ISLEAF(opaque));
2218  opaque->btpo_flags &= ~BTP_HALF_DEAD;
2219  opaque->btpo_flags |= BTP_DELETED;
2220  opaque->btpo.xact = ReadNewTransactionId();
2221 
2222  /* And update the metapage, if needed */
2223  if (BufferIsValid(metabuf))
2224  {
2225  /* upgrade metapage if needed */
2226  if (metad->btm_version < BTREE_NOVAC_VERSION)
2227  _bt_upgrademetapage(metapg);
2228  metad->btm_fastroot = rightsib;
2229  metad->btm_fastlevel = targetlevel;
2230  MarkBufferDirty(metabuf);
2231  }
2232 
2233  /* Must mark buffers dirty before XLogInsert */
2234  MarkBufferDirty(rbuf);
2235  MarkBufferDirty(buf);
2236  if (BufferIsValid(lbuf))
2237  MarkBufferDirty(lbuf);
2238  if (target != leafblkno)
2239  MarkBufferDirty(leafbuf);
2240 
2241  /* XLOG stuff */
2242  if (RelationNeedsWAL(rel))
2243  {
2244  xl_btree_unlink_page xlrec;
2245  xl_btree_metadata xlmeta;
2246  uint8 xlinfo;
2247  XLogRecPtr recptr;
2248 
2249  XLogBeginInsert();
2250 
2252  if (BufferIsValid(lbuf))
2255  if (target != leafblkno)
2256  XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
2257 
2258  /* information on the unlinked block */
2259  xlrec.leftsib = leftsib;
2260  xlrec.rightsib = rightsib;
2261  xlrec.btpo_xact = opaque->btpo.xact;
2262 
2263  /* information needed to recreate the leaf block (if not the target) */
2264  xlrec.leafleftsib = leafleftsib;
2265  xlrec.leafrightsib = leafrightsib;
2266  xlrec.topparent = nextchild;
2267 
2268  XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
2269 
2270  if (BufferIsValid(metabuf))
2271  {
2273 
2275  xlmeta.version = metad->btm_version;
2276  xlmeta.root = metad->btm_root;
2277  xlmeta.level = metad->btm_level;
2278  xlmeta.fastroot = metad->btm_fastroot;
2279  xlmeta.fastlevel = metad->btm_fastlevel;
2280  xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
2282  xlmeta.allequalimage = metad->btm_allequalimage;
2283 
2284  XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
2285  xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
2286  }
2287  else
2288  xlinfo = XLOG_BTREE_UNLINK_PAGE;
2289 
2290  recptr = XLogInsert(RM_BTREE_ID, xlinfo);
2291 
2292  if (BufferIsValid(metabuf))
2293  {
2294  PageSetLSN(metapg, recptr);
2295  }
2296  page = BufferGetPage(rbuf);
2297  PageSetLSN(page, recptr);
2298  page = BufferGetPage(buf);
2299  PageSetLSN(page, recptr);
2300  if (BufferIsValid(lbuf))
2301  {
2302  page = BufferGetPage(lbuf);
2303  PageSetLSN(page, recptr);
2304  }
2305  if (target != leafblkno)
2306  {
2307  page = BufferGetPage(leafbuf);
2308  PageSetLSN(page, recptr);
2309  }
2310  }
2311 
2312  END_CRIT_SECTION();
2313 
2314  /* release metapage */
2315  if (BufferIsValid(metabuf))
2316  _bt_relbuf(rel, metabuf);
2317 
2318  /* release siblings */
2319  if (BufferIsValid(lbuf))
2320  _bt_relbuf(rel, lbuf);
2321  _bt_relbuf(rel, rbuf);
2322 
2323  if (!TransactionIdIsValid(*oldestBtpoXact) ||
2324  TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
2325  *oldestBtpoXact = opaque->btpo.xact;
2326 
2327  /*
2328  * If btvacuumscan won't revisit this page in a future btvacuumpage call
2329  * and count it as deleted then, we count it as deleted by current
2330  * btvacuumpage call
2331  */
2332  if (target <= scanblkno)
2333  (*ndeleted)++;
2334 
2335  /* If the target is not leafbuf, we're done with it now -- release it */
2336  if (target != leafblkno)
2337  _bt_relbuf(rel, buf);
2338 
2339  return true;
2340 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:362
bool allequalimage
Definition: nbtxlog.h:57
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
BlockNumber btpo_next
Definition: nbtree.h:59
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:100
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:244
#define BTP_HALF_DEAD
Definition: nbtree.h:76
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
union BTPageOpaqueData::@45 btpo
unsigned char uint8
Definition: c.h:365
#define P_NONE
Definition: nbtree.h:206
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:424
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3483
#define BTP_DELETED
Definition: nbtree.h:74
#define LOG
Definition: elog.h:26
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define P_ISHALFDEAD(opaque)
Definition: nbtree.h:218
#define ERROR
Definition: elog.h:43
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
BlockNumber btpo_prev
Definition: nbtree.h:58
static char * buf
Definition: pg_test_fsync.c:67
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:34
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
#define P_ISDELETED(opaque)
Definition: nbtree.h:216
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define P_ISROOT(opaque)
Definition: nbtree.h:215
uint32 version
Definition: nbtxlog.h:50
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 btm_fastlevel
Definition: nbtree.h:105
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:324
uint32 level
Definition: nbtree.h:62
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
#define ereport(elevel,...)
Definition: elog.h:144
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define InvalidBlockNumber
Definition: block.h:33
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define RelationNeedsWAL(relation)
Definition: rel.h:562
static TransactionId ReadNewTransactionId(void)
Definition: transam.h:258
#define P_HIKEY
Definition: nbtree.h:242
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
#define elog(elevel,...)
Definition: elog.h:214
uint32 level
Definition: nbtxlog.h:52
#define SizeOfBtreeUnlinkPage
Definition: nbtxlog.h:303
static BlockNumber BTreeTupleGetTopParent(IndexTuple leafhikey)
Definition: nbtree.h:488
BlockNumber fastroot
Definition: nbtxlog.h:53
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:35
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:121
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:213
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:494
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define P_ISLEAF(opaque)
Definition: nbtree.h:214

◆ _bt_update_meta_cleanup_info()

void _bt_update_meta_cleanup_info ( Relation  rel,
TransactionId  oldestBtpoXact,
float8  numHeapTuples 
)

Definition at line 173 of file nbtpage.c.

References _bt_getbuf(), _bt_relbuf(), _bt_upgrademetapage(), xl_btree_metadata::allequalimage, Assert, BT_READ, BT_WRITE, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTPageGetMeta, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BUFFER_LOCK_UNLOCK, BufferGetPage, END_CRIT_SECTION, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, LockBuffer(), MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, PageSetLSN, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationNeedsWAL, xl_btree_metadata::root, START_CRIT_SECTION, xl_btree_metadata::version, XLOG_BTREE_META_CLEANUP, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), and XLogRegisterBuffer().

Referenced by btvacuumscan().

175 {
176  Buffer metabuf;
177  Page metapg;
178  BTMetaPageData *metad;
179  bool needsRewrite = false;
180  XLogRecPtr recptr;
181 
182  /* read the metapage and check if it needs rewrite */
183  metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
184  metapg = BufferGetPage(metabuf);
185  metad = BTPageGetMeta(metapg);
186 
187  /* outdated version of metapage always needs rewrite */
188  if (metad->btm_version < BTREE_NOVAC_VERSION)
189  needsRewrite = true;
190  else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
191  metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
192  needsRewrite = true;
193 
194  if (!needsRewrite)
195  {
196  _bt_relbuf(rel, metabuf);
197  return;
198  }
199 
200  /* trade in our read lock for a write lock */
201  LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
202  LockBuffer(metabuf, BT_WRITE);
203 
205 
206  /* upgrade meta-page if needed */
207  if (metad->btm_version < BTREE_NOVAC_VERSION)
208  _bt_upgrademetapage(metapg);
209 
210  /* update cleanup-related information */
211  metad->btm_oldest_btpo_xact = oldestBtpoXact;
212  metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
213  MarkBufferDirty(metabuf);
214 
215  /* write wal record if needed */
216  if (RelationNeedsWAL(rel))
217  {
219 
220  XLogBeginInsert();
222 
224  md.version = metad->btm_version;
225  md.root = metad->btm_root;
226  md.level = metad->btm_level;
227  md.fastroot = metad->btm_fastroot;
228  md.fastlevel = metad->btm_fastlevel;
229  md.oldest_btpo_xact = oldestBtpoXact;
230  md.last_cleanup_num_heap_tuples = numHeapTuples;
231  md.allequalimage = metad->btm_allequalimage;
232 
233  XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
234 
235  recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_META_CLEANUP);
236 
237  PageSetLSN(metapg, recptr);
238  }
239 
241  _bt_relbuf(rel, metabuf);
242 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:362
bool allequalimage
Definition: nbtxlog.h:57
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void _bt_upgrademetapage(Page page)
Definition: nbtpage.c:100
uint32 btm_version
Definition: nbtree.h:101
Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Definition: nbtpage.c:792
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:214
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BlockNumber root
Definition: nbtxlog.h:51
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define BT_READ
Definition: nbtree.h:587
BlockNumber btm_fastroot
Definition: nbtree.h:104
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:56
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:55
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BTREE_METAPAGE
Definition: nbtree.h:141
uint32 version
Definition: nbtxlog.h:50
uint32 btm_fastlevel
Definition: nbtree.h:105
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:416
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
BlockNumber btm_root
Definition: nbtree.h:102
void _bt_relbuf(Relation rel, Buffer buf)
Definition: nbtpage.c:947
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
#define RelationNeedsWAL(relation)
Definition: rel.h:562
uint32 fastlevel
Definition: nbtxlog.h:54
uint32 btm_level
Definition: nbtree.h:103
uint32 level
Definition: nbtxlog.h:52
BlockNumber fastroot
Definition: nbtxlog.h:53
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define BT_WRITE
Definition: nbtree.h:588
void XLogBeginInsert(void)
Definition: xloginsert.c:121
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:42
Pointer Page
Definition: bufpage.h:78

◆ _bt_upgrademetapage()

void _bt_upgrademetapage ( Page  page)

Definition at line 100 of file nbtpage.c.

References Assert, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTREE_MIN_VERSION, BTREE_NOVAC_VERSION, InvalidTransactionId, PageGetSpecialPointer, and PG_USED_FOR_ASSERTS_ONLY.

Referenced by _bt_getroot(), _bt_insertonpg(), _bt_newroot(), _bt_unlink_halfdead_page(), and _bt_update_meta_cleanup_info().

101 {
102  BTMetaPageData *metad;
104 
105  metad = BTPageGetMeta(page);
106  metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
107 
108  /* It must be really a meta page of upgradable version */
109  Assert(metaopaque->btpo_flags & BTP_META);
112 
113  /* Set version number and fill extra fields added into version 3 */
116  metad->btm_last_cleanup_num_heap_tuples = -1.0;
117  /* Only a REINDEX can set this field */
118  Assert(!metad->btm_allequalimage);
119  metad->btm_allequalimage = false;
120 
121  /* Adjust pd_lower (see _bt_initmetapage() for details) */
122  ((PageHeader) page)->pd_lower =
123  ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
124 }
uint32 btm_version
Definition: nbtree.h:101
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
#define BTPageGetMeta(p)
Definition: nbtree.h:114
bool btm_allequalimage
Definition: nbtree.h:111
#define InvalidTransactionId
Definition: transam.h:31
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:145
#define BTREE_MIN_VERSION
Definition: nbtree.h:144
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:738
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121

◆ _bt_xid_horizon()

static TransactionId _bt_xid_horizon ( Relation  rel,
Relation  heapRel,
Page  page,
OffsetNumber deletable,
int  ndeletable 
)
static

Definition at line 1252 of file nbtpage.c.

References Assert, BTreeTupleGetNPosting(), BTreeTupleGetPostingN(), BTreeTupleIsPivot(), BTreeTupleIsPosting(), i, InvalidTransactionId, ItemIdIsDead, ItemPointerCopy, ItemPointerIsValid, Max, PageGetItem, PageGetItemId, palloc(), pfree(), repalloc(), IndexTupleData::t_tid, and table_compute_xid_horizon_for_tuples().

Referenced by _bt_delitems_delete().

1254 {
1255  TransactionId latestRemovedXid = InvalidTransactionId;
1256  int spacenhtids;
1257  int nhtids;
1258  ItemPointer htids;
1259 
1260  /* Array will grow iff there are posting list tuples to consider */
1261  spacenhtids = ndeletable;
1262  nhtids = 0;
1263  htids = (ItemPointer) palloc(sizeof(ItemPointerData) * spacenhtids);
1264  for (int i = 0; i < ndeletable; i++)
1265  {
1266  ItemId itemid;
1267  IndexTuple itup;
1268 
1269  itemid = PageGetItemId(page, deletable[i]);
1270  itup = (IndexTuple) PageGetItem(page, itemid);
1271 
1272  Assert(ItemIdIsDead(itemid));
1273  Assert(!BTreeTupleIsPivot(itup));
1274 
1275  if (!BTreeTupleIsPosting(itup))
1276  {
1277  if (nhtids + 1 > spacenhtids)
1278  {
1279  spacenhtids *= 2;
1280  htids = (ItemPointer)
1281  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1282  }
1283 
1284  Assert(ItemPointerIsValid(&itup->t_tid));
1285  ItemPointerCopy(&itup->t_tid, &htids[nhtids]);
1286  nhtids++;
1287  }
1288  else
1289  {
1290  int nposting = BTreeTupleGetNPosting(itup);
1291 
1292  if (nhtids + nposting > spacenhtids)
1293  {
1294  spacenhtids = Max(spacenhtids * 2, nhtids + nposting);
1295  htids = (ItemPointer)
1296  repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
1297  }
1298 
1299  for (int j = 0; j < nposting; j++)
1300  {
1301  ItemPointer htid = BTreeTupleGetPostingN(itup, j);
1302 
1303  Assert(ItemPointerIsValid(htid));
1304  ItemPointerCopy(htid, &htids[nhtids]);
1305  nhtids++;
1306  }
1307  }
1308  }
1309 
1310  Assert(nhtids >= ndeletable);
1311 
1312  latestRemovedXid =
1313  table_compute_xid_horizon_for_tuples(heapRel, htids, nhtids);
1314 
1315  pfree(htids);
1316 
1317  return latestRemovedXid;
1318 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
static bool BTreeTupleIsPivot(IndexTuple itup)
Definition: nbtree.h:348
uint32 TransactionId
Definition: c.h:513
ItemPointerData t_tid
Definition: itup.h:37
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
ItemPointerData * ItemPointer
Definition: itemptr.h:49
static TransactionId table_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *items, int nitems)
Definition: tableam.h:1104
void pfree(void *pointer)
Definition: mcxt.c:1056
IndexTupleData * IndexTuple
Definition: itup.h:53
#define InvalidTransactionId
Definition: transam.h:31
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static ItemPointer BTreeTupleGetPostingN(IndexTuple posting, int n)
Definition: nbtree.h:412
static bool BTreeTupleIsPosting(IndexTuple itup)
Definition: nbtree.h:360
#define Max(x, y)
Definition: c.h:914
#define Assert(condition)
Definition: c.h:738
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
void * palloc(Size size)
Definition: mcxt.c:949
int i
static uint16 BTreeTupleGetNPosting(IndexTuple posting)
Definition: nbtree.h:386
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161