PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nbtxlog.c File Reference
#include "postgres.h"
#include "access/bufmask.h"
#include "access/heapam_xlog.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "storage/procarray.h"
#include "miscadmin.h"
Include dependency graph for nbtxlog.c:

Go to the source code of this file.

Functions

static void _bt_restore_page (Page page, char *from, int len)
 
static void _bt_restore_meta (XLogReaderState *record, uint8 block_id)
 
static void _bt_clear_incomplete_split (XLogReaderState *record, uint8 block_id)
 
static void btree_xlog_insert (bool isleaf, bool ismeta, XLogReaderState *record)
 
static void btree_xlog_split (bool onleft, bool isroot, XLogReaderState *record)
 
static void btree_xlog_vacuum (XLogReaderState *record)
 
static TransactionId btree_xlog_delete_get_latestRemovedXid (XLogReaderState *record)
 
static void btree_xlog_delete (XLogReaderState *record)
 
static void btree_xlog_mark_page_halfdead (uint8 info, XLogReaderState *record)
 
static void btree_xlog_unlink_page (uint8 info, XLogReaderState *record)
 
static void btree_xlog_newroot (XLogReaderState *record)
 
static void btree_xlog_reuse_page (XLogReaderState *record)
 
void btree_redo (XLogReaderState *record)
 
void btree_mask (char *pagedata, BlockNumber blkno)
 

Function Documentation

static void _bt_clear_incomplete_split ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 128 of file nbtxlog.c.

References Assert, BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTPageOpaqueData::btpo_flags, buf, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, MarkBufferDirty(), PageGetSpecialPointer, PageSetLSN, UnlockReleaseBuffer(), and XLogReadBufferForRedo().

Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_split().

129 {
130  XLogRecPtr lsn = record->EndRecPtr;
131  Buffer buf;
132 
133  if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
134  {
135  Page page = (Page) BufferGetPage(buf);
137 
138  Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
139  pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
140 
141  PageSetLSN(page, lsn);
142  MarkBufferDirty(buf);
143  }
144  if (BufferIsValid(buf))
145  UnlockReleaseBuffer(buf);
146 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:77
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
static char * buf
Definition: pg_test_fsync.c:66
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
static void _bt_restore_meta ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 77 of file nbtxlog.c.

References _bt_pageinit(), Assert, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_METAPAGE, BTREE_VERSION, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, XLogReaderState::EndRecPtr, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::level, MarkBufferDirty(), PageGetSpecialPointer, PageSetLSN, xl_btree_metadata::root, UnlockReleaseBuffer(), XLogInitBufferForRedo(), and XLogRecGetBlockData().

Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_unlink_page().

78 {
79  XLogRecPtr lsn = record->EndRecPtr;
80  Buffer metabuf;
81  Page metapg;
82  BTMetaPageData *md;
83  BTPageOpaque pageop;
84  xl_btree_metadata *xlrec;
85  char *ptr;
86  Size len;
87 
88  metabuf = XLogInitBufferForRedo(record, block_id);
89  ptr = XLogRecGetBlockData(record, block_id, &len);
90 
91  Assert(len == sizeof(xl_btree_metadata));
93  xlrec = (xl_btree_metadata *) ptr;
94  metapg = BufferGetPage(metabuf);
95 
96  _bt_pageinit(metapg, BufferGetPageSize(metabuf));
97 
98  md = BTPageGetMeta(metapg);
99  md->btm_magic = BTREE_MAGIC;
101  md->btm_root = xlrec->root;
102  md->btm_level = xlrec->level;
103  md->btm_fastroot = xlrec->fastroot;
104  md->btm_fastlevel = xlrec->fastlevel;
105 
106  pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
107  pageop->btpo_flags = BTP_META;
108 
109  /*
110  * Set pd_lower just past the end of the metadata. This is not essential
111  * but it makes the page look compressible to xlog.c.
112  */
113  ((PageHeader) metapg)->pd_lower =
114  ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
115 
116  PageSetLSN(metapg, lsn);
117  MarkBufferDirty(metabuf);
118  UnlockReleaseBuffer(metabuf);
119 }
uint32 btm_version
Definition: nbtree.h:99
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define BTREE_VERSION
Definition: nbtree.h:111
uint32 btm_magic
Definition: nbtree.h:98
BlockNumber root
Definition: nbtxlog.h:48
#define BTP_META
Definition: nbtree.h:73
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
BlockNumber btm_fastroot
Definition: nbtree.h:102
#define BTREE_MAGIC
Definition: nbtree.h:110
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:302
#define BTPageGetMeta(p)
Definition: nbtree.h:106
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define BTREE_METAPAGE
Definition: nbtree.h:109
uint32 btm_fastlevel
Definition: nbtree.h:103
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
BlockNumber btm_root
Definition: nbtree.h:100
PageHeaderData * PageHeader
Definition: bufpage.h:162
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
uint32 fastlevel
Definition: nbtxlog.h:51
uint32 btm_level
Definition: nbtree.h:101
uint32 level
Definition: nbtxlog.h:49
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
BlockNumber fastroot
Definition: nbtxlog.h:50
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
static void _bt_restore_page ( Page  page,
char *  from,
int  len 
)
static

Definition at line 36 of file nbtxlog.c.

References elog, i, IndexTupleDSize, InvalidOffsetNumber, MAXALIGN, MaxIndexTuplesPerPage, PageAddItem, and PANIC.

Referenced by btree_xlog_newroot(), and btree_xlog_split().

37 {
38  IndexTupleData itupdata;
39  Size itemsz;
40  char *end = from + len;
42  uint16 itemsizes[MaxIndexTuplesPerPage];
43  int i;
44  int nitems;
45 
46  /*
47  * To get the items back in the original order, we add them to the page in
48  * reverse. To figure out where one tuple ends and another begins, we
49  * have to scan them in forward order first.
50  */
51  i = 0;
52  while (from < end)
53  {
54  /* Need to copy tuple header due to alignment considerations */
55  memcpy(&itupdata, from, sizeof(IndexTupleData));
56  itemsz = IndexTupleDSize(itupdata);
57  itemsz = MAXALIGN(itemsz);
58 
59  items[i] = (Item) from;
60  itemsizes[i] = itemsz;
61  i++;
62 
63  from += itemsz;
64  }
65  nitems = i;
66 
67  for (i = nitems - 1; i >= 0; i--)
68  {
69  if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
70  false, false) == InvalidOffsetNumber)
71  elog(PANIC, "_bt_restore_page: cannot add item to page");
72  from += itemsz;
73  }
74 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define PANIC
Definition: elog.h:53
unsigned short uint16
Definition: c.h:267
#define IndexTupleDSize(itup)
Definition: itup.h:71
#define InvalidOffsetNumber
Definition: off.h:26
size_t Size
Definition: c.h:356
#define MAXALIGN(LEN)
Definition: c.h:588
#define MaxIndexTuplesPerPage
Definition: itup.h:137
int i
#define elog
Definition: elog.h:219
void btree_mask ( char *  pagedata,
BlockNumber  blkno 
)

Definition at line 1038 of file nbtxlog.c.

References BTP_HAS_GARBAGE, BTP_SPLIT_END, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, mask_lp_flags(), mask_page_content(), mask_page_hint_bits(), mask_page_lsn(), mask_unused_space(), P_ISDELETED, P_ISLEAF, and PageGetSpecialPointer.

1039 {
1040  Page page = (Page) pagedata;
1041  BTPageOpaque maskopaq;
1042 
1043  mask_page_lsn(page);
1044 
1045  mask_page_hint_bits(page);
1046  mask_unused_space(page);
1047 
1048  maskopaq = (BTPageOpaque) PageGetSpecialPointer(page);
1049 
1050  if (P_ISDELETED(maskopaq))
1051  {
1052  /*
1053  * Mask page content on a DELETED page since it will be re-initialized
1054  * during replay. See btree_xlog_unlink_page() for details.
1055  */
1056  mask_page_content(page);
1057  }
1058  else if (P_ISLEAF(maskopaq))
1059  {
1060  /*
1061  * In btree leaf pages, it is possible to modify the LP_FLAGS without
1062  * emitting any WAL record. Hence, mask the line pointer flags. See
1063  * _bt_killitems(), _bt_check_unique() for details.
1064  */
1065  mask_lp_flags(page);
1066  }
1067 
1068  /*
1069  * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
1070  * _bt_killitems(), _bt_check_unique() for details.
1071  */
1072  maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
1073 
1074  /*
1075  * During replay of a btree page split, we don't set the BTP_SPLIT_END
1076  * flag of the right sibling and initialize the cycle_id to 0 for the same
1077  * page. See btree_xlog_split() for details.
1078  */
1079  maskopaq->btpo_flags &= ~BTP_SPLIT_END;
1080  maskopaq->btpo_cycleid = 0;
1081 }
#define BTP_SPLIT_END
Definition: nbtree.h:75
void mask_page_hint_bits(Page page)
Definition: bufmask.c:44
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
void mask_unused_space(Page page)
Definition: bufmask.c:69
void mask_page_content(Page page)
Definition: bufmask.c:117
void mask_page_lsn(Page page)
Definition: bufmask.c:30
BTCycleId btpo_cycleid
Definition: nbtree.h:64
#define P_ISDELETED(opaque)
Definition: nbtree.h:178
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
uint16 btpo_flags
Definition: nbtree.h:63
void mask_lp_flags(Page page)
Definition: bufmask.c:93
#define BTP_HAS_GARBAGE
Definition: nbtree.h:76
Pointer Page
Definition: bufpage.h:74
#define P_ISLEAF(opaque)
Definition: nbtree.h:176
void btree_redo ( XLogReaderState record)

Definition at line 983 of file nbtxlog.c.

References btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_reuse_page(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), elog, PANIC, XLOG_BTREE_DELETE, XLOG_BTREE_INSERT_LEAF, XLOG_BTREE_INSERT_META, XLOG_BTREE_INSERT_UPPER, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLOG_BTREE_NEWROOT, XLOG_BTREE_REUSE_PAGE, XLOG_BTREE_SPLIT_L, XLOG_BTREE_SPLIT_L_ROOT, XLOG_BTREE_SPLIT_R, XLOG_BTREE_SPLIT_R_ROOT, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLOG_BTREE_VACUUM, XLogRecGetInfo, and XLR_INFO_MASK.

984 {
985  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
986 
987  switch (info)
988  {
990  btree_xlog_insert(true, false, record);
991  break;
993  btree_xlog_insert(false, false, record);
994  break;
996  btree_xlog_insert(false, true, record);
997  break;
998  case XLOG_BTREE_SPLIT_L:
999  btree_xlog_split(true, false, record);
1000  break;
1001  case XLOG_BTREE_SPLIT_R:
1002  btree_xlog_split(false, false, record);
1003  break;
1005  btree_xlog_split(true, true, record);
1006  break;
1008  btree_xlog_split(false, true, record);
1009  break;
1010  case XLOG_BTREE_VACUUM:
1011  btree_xlog_vacuum(record);
1012  break;
1013  case XLOG_BTREE_DELETE:
1014  btree_xlog_delete(record);
1015  break;
1017  btree_xlog_mark_page_halfdead(info, record);
1018  break;
1021  btree_xlog_unlink_page(info, record);
1022  break;
1023  case XLOG_BTREE_NEWROOT:
1024  btree_xlog_newroot(record);
1025  break;
1026  case XLOG_BTREE_REUSE_PAGE:
1027  btree_xlog_reuse_page(record);
1028  break;
1029  default:
1030  elog(PANIC, "btree_redo: unknown op code %u", info);
1031  }
1032 }
static void btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
Definition: nbtxlog.c:196
static void btree_xlog_vacuum(XLogReaderState *record)
Definition: nbtxlog.c:387
#define XLOG_BTREE_SPLIT_L_ROOT
Definition: nbtxlog.h:31
unsigned char uint8
Definition: c.h:266
#define XLOG_BTREE_INSERT_META
Definition: nbtxlog.h:28
static void btree_xlog_delete(XLogReaderState *record)
Definition: nbtxlog.c:663
#define PANIC
Definition: elog.h:53
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:36
static void btree_xlog_newroot(XLogReaderState *record)
Definition: nbtxlog.c:920
static void btree_xlog_reuse_page(XLogReaderState *record)
Definition: nbtxlog.c:960
#define XLOG_BTREE_INSERT_LEAF
Definition: nbtxlog.h:26
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:38
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:34
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:216
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:33
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:37
#define XLOG_BTREE_SPLIT_R
Definition: nbtxlog.h:30
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:806
#define XLOG_BTREE_INSERT_UPPER
Definition: nbtxlog.h:27
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:724
#define XLOG_BTREE_SPLIT_L
Definition: nbtxlog.h:29
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:35
#define elog
Definition: elog.h:219
#define XLOG_BTREE_SPLIT_R_ROOT
Definition: nbtxlog.h:32
static void btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
Definition: nbtxlog.c:149
static void btree_xlog_delete ( XLogReaderState record)
static

Definition at line 663 of file nbtxlog.c.

References BLK_NEEDS_REDO, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, btree_xlog_delete_get_latestRemovedXid(), buffer, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, InHotStandby, MarkBufferDirty(), xl_btree_delete::nitems, NULL, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, ResolveRecoveryConflictWithSnapshot(), SizeOfBtreeDelete, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockTag(), XLogRecGetData, and XLogRecGetDataLen.

Referenced by btree_redo().

664 {
665  XLogRecPtr lsn = record->EndRecPtr;
666  xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
667  Buffer buffer;
668  Page page;
669  BTPageOpaque opaque;
670 
671  /*
672  * If we have any conflict processing to do, it must happen before we
673  * update the page.
674  *
675  * Btree delete records can conflict with standby queries. You might
676  * think that vacuum records would conflict as well, but we've handled
677  * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
678  * cleaned by the vacuum of the heap and so we can resolve any conflicts
679  * just once when that arrives. After that we know that no conflicts
680  * exist from individual btree vacuum records on that index.
681  */
682  if (InHotStandby)
683  {
684  TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
685  RelFileNode rnode;
686 
687  XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
688 
689  ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
690  }
691 
692  /*
693  * We don't need to take a cleanup lock to apply these changes. See
694  * nbtree/README for details.
695  */
696  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
697  {
698  page = (Page) BufferGetPage(buffer);
699 
700  if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
701  {
702  OffsetNumber *unused;
703 
704  unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
705 
706  PageIndexMultiDelete(page, unused, xlrec->nitems);
707  }
708 
709  /*
710  * Mark the page as not containing any LP_DEAD items --- see comments
711  * in _bt_delitems_delete().
712  */
713  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
714  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
715 
716  PageSetLSN(page, lsn);
717  MarkBufferDirty(buffer);
718  }
719  if (BufferIsValid(buffer))
720  UnlockReleaseBuffer(buffer);
721 }
uint32 TransactionId
Definition: c.h:397
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define InHotStandby
Definition: xlog.h:74
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define XLogRecGetDataLen(decoder)
Definition: xlogreader.h:221
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1307
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:836
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static TransactionId btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
Definition: nbtxlog.c:521
#define SizeOfBtreeDelete
Definition: nbtxlog.h:128
void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
Definition: standby.c:267
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define BTP_HAS_GARBAGE
Definition: nbtree.h:76
Pointer Page
Definition: bufpage.h:74
static TransactionId btree_xlog_delete_get_latestRemovedXid ( XLogReaderState record)
static

Definition at line 521 of file nbtxlog.c.

References Assert, BT_READ, BUFFER_LOCK_SHARE, BufferGetPage, BufferIsValid, CHECK_FOR_INTERRUPTS, CountDBBackends(), elog, HeapTupleHeaderAdvanceLatestRemovedXid(), xl_btree_delete::hnode, i, InvalidOid, InvalidTransactionId, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, LockBuffer(), MAIN_FORKNUM, xl_btree_delete::nitems, NULL, PageGetItem, PageGetItemId, PANIC, RBM_NORMAL, reachedConsistency, SizeOfBtreeDelete, IndexTupleData::t_tid, UnlockReleaseBuffer(), XLogReadBufferExtended(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_xlog_delete().

522 {
523  xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
524  OffsetNumber *unused;
525  Buffer ibuffer,
526  hbuffer;
527  Page ipage,
528  hpage;
529  RelFileNode rnode;
530  BlockNumber blkno;
531  ItemId iitemid,
532  hitemid;
533  IndexTuple itup;
534  HeapTupleHeader htuphdr;
535  BlockNumber hblkno;
536  OffsetNumber hoffnum;
537  TransactionId latestRemovedXid = InvalidTransactionId;
538  int i;
539 
540  /*
541  * If there's nothing running on the standby we don't need to derive a
542  * full latestRemovedXid value, so use a fast path out of here. This
543  * returns InvalidTransactionId, and so will conflict with all HS
544  * transactions; but since we just worked out that that's zero people,
545  * it's OK.
546  *
547  * XXX There is a race condition here, which is that a new backend might
548  * start just after we look. If so, it cannot need to conflict, but this
549  * coding will result in throwing a conflict anyway.
550  */
551  if (CountDBBackends(InvalidOid) == 0)
552  return latestRemovedXid;
553 
554  /*
555  * In what follows, we have to examine the previous state of the index
556  * page, as well as the heap page(s) it points to. This is only valid if
557  * WAL replay has reached a consistent database state; which means that
558  * the preceding check is not just an optimization, but is *necessary*. We
559  * won't have let in any user sessions before we reach consistency.
560  */
561  if (!reachedConsistency)
562  elog(PANIC, "btree_xlog_delete_get_latestRemovedXid: cannot operate with inconsistent data");
563 
564  /*
565  * Get index page. If the DB is consistent, this should not fail, nor
566  * should any of the heap page fetches below. If one does, we return
567  * InvalidTransactionId to cancel all HS transactions. That's probably
568  * overkill, but it's safe, and certainly better than panicking here.
569  */
570  XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
571  ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
572  if (!BufferIsValid(ibuffer))
573  return InvalidTransactionId;
574  LockBuffer(ibuffer, BT_READ);
575  ipage = (Page) BufferGetPage(ibuffer);
576 
577  /*
578  * Loop through the deleted index items to obtain the TransactionId from
579  * the heap items they point to.
580  */
581  unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
582 
583  for (i = 0; i < xlrec->nitems; i++)
584  {
585  /*
586  * Identify the index tuple about to be deleted
587  */
588  iitemid = PageGetItemId(ipage, unused[i]);
589  itup = (IndexTuple) PageGetItem(ipage, iitemid);
590 
591  /*
592  * Locate the heap page that the index tuple points at
593  */
594  hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
595  hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
596  if (!BufferIsValid(hbuffer))
597  {
598  UnlockReleaseBuffer(ibuffer);
599  return InvalidTransactionId;
600  }
601  LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
602  hpage = (Page) BufferGetPage(hbuffer);
603 
604  /*
605  * Look up the heap tuple header that the index tuple points at by
606  * using the heap node supplied with the xlrec. We can't use
607  * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
608  * Note that we are not looking at tuple data here, just headers.
609  */
610  hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
611  hitemid = PageGetItemId(hpage, hoffnum);
612 
613  /*
614  * Follow any redirections until we find something useful.
615  */
616  while (ItemIdIsRedirected(hitemid))
617  {
618  hoffnum = ItemIdGetRedirect(hitemid);
619  hitemid = PageGetItemId(hpage, hoffnum);
621  }
622 
623  /*
624  * If the heap item has storage, then read the header and use that to
625  * set latestRemovedXid.
626  *
627  * Some LP_DEAD items may not be accessible, so we ignore them.
628  */
629  if (ItemIdHasStorage(hitemid))
630  {
631  htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
632 
633  HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
634  }
635  else if (ItemIdIsDead(hitemid))
636  {
637  /*
638  * Conjecture: if hitemid is dead then it had xids before the xids
639  * marked on LP_NORMAL items. So we just ignore this item and move
640  * onto the next, for the purposes of calculating
641  * latestRemovedxids.
642  */
643  }
644  else
645  Assert(!ItemIdIsUsed(hitemid));
646 
647  UnlockReleaseBuffer(hbuffer);
648  }
649 
650  UnlockReleaseBuffer(ibuffer);
651 
652  /*
653  * If all heap tuples were LP_DEAD then we will be returning
654  * InvalidTransactionId here, which avoids conflicts. This matches
655  * existing logic which assumes that LP_DEAD tuples must already be older
656  * than the latestRemovedXid on the cleanup record that set them as
657  * LP_DEAD, hence must already have generated a conflict.
658  */
659  return latestRemovedXid;
660 }
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:7280
int CountDBBackends(Oid databaseid)
Definition: procarray.c:2738
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:105
uint32 TransactionId
Definition: c.h:397
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:77
RelFileNode hnode
Definition: nbtxlog.h:121
ItemPointerData t_tid
Definition: itup.h:37
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:438
uint32 BlockNumber
Definition: block.h:31
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define PANIC
Definition: elog.h:53
uint16 OffsetNumber
Definition: off.h:24
#define BT_READ
Definition: nbtree.h:238
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
IndexTupleData * IndexTuple
Definition: itup.h:53
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1307
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define InvalidOid
Definition: postgres_ext.h:36
#define ItemIdHasStorage(itemId)
Definition: itemid.h:119
bool reachedConsistency
Definition: xlog.c:830
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:95
int i
#define SizeOfBtreeDelete
Definition: nbtxlog.h:128
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:100
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:76
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
static void btree_xlog_insert ( bool  isleaf,
bool  ismeta,
XLogReaderState record 
)
static

Definition at line 149 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_restore_meta(), BLK_NEEDS_REDO, buffer, BufferGetPage, BufferIsValid, elog, XLogReaderState::EndRecPtr, InvalidOffsetNumber, MarkBufferDirty(), xl_btree_insert::offnum, PageAddItem, PageSetLSN, PANIC, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

150 {
151  XLogRecPtr lsn = record->EndRecPtr;
152  xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
153  Buffer buffer;
154  Page page;
155 
156  /*
157  * Insertion to an internal page finishes an incomplete split at the child
158  * level. Clear the incomplete-split flag in the child. Note: during
159  * normal operation, the child and parent pages are locked at the same
160  * time, so that clearing the flag and inserting the downlink appear
161  * atomic to other backends. We don't bother with that during replay,
162  * because readers don't care about the incomplete-split flag and there
163  * cannot be updates happening.
164  */
165  if (!isleaf)
166  _bt_clear_incomplete_split(record, 1);
167  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
168  {
169  Size datalen;
170  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
171 
172  page = BufferGetPage(buffer);
173 
174  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
175  false, false) == InvalidOffsetNumber)
176  elog(PANIC, "btree_insert_redo: failed to add item");
177 
178  PageSetLSN(page, lsn);
179  MarkBufferDirty(buffer);
180  }
181  if (BufferIsValid(buffer))
182  UnlockReleaseBuffer(buffer);
183 
184  /*
185  * Note: in normal operation, we'd update the metapage while still holding
186  * lock on the page we inserted into. But during replay it's not
187  * necessary to hold that lock, since no other index updates can be
188  * happening concurrently, and readers will cope fine with following an
189  * obsolete link from the metapage.
190  */
191  if (ismeta)
192  _bt_restore_meta(record, 2);
193 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define PANIC
Definition: elog.h:53
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:77
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:128
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
OffsetNumber offnum
Definition: nbtxlog.h:66
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
size_t Size
Definition: c.h:356
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define elog
Definition: elog.h:219
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
static void btree_xlog_mark_page_halfdead ( uint8  info,
XLogReaderState record 
)
static

Definition at line 724 of file nbtxlog.c.

References _bt_pageinit(), BLK_NEEDS_REDO, BTP_HALF_DEAD, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buffer, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerGetBlockNumber, ItemPointerSet, ItemPointerSetInvalid, xl_btree_mark_page_halfdead::leftblk, BTPageOpaqueData::level, MarkBufferDirty(), MemSet, OffsetNumberNext, P_HIKEY, PageAddItem, PageGetItem, PageGetItemId, PageGetSpecialPointer, PageIndexTupleDelete(), PageSetLSN, xl_btree_mark_page_halfdead::poffset, xl_btree_mark_page_halfdead::rightblk, IndexTupleData::t_info, IndexTupleData::t_tid, xl_btree_mark_page_halfdead::topparent, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), and XLogRecGetData.

Referenced by btree_redo().

725 {
726  XLogRecPtr lsn = record->EndRecPtr;
728  Buffer buffer;
729  Page page;
730  BTPageOpaque pageop;
731  IndexTupleData trunctuple;
732 
733  /*
734  * In normal operation, we would lock all the pages this WAL record
735  * touches before changing any of them. In WAL replay, it should be okay
736  * to lock just one page at a time, since no concurrent index updates can
737  * be happening, and readers should not care whether they arrive at the
738  * target page or not (since it's surely empty).
739  */
740 
741  /* parent page */
742  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
743  {
744  OffsetNumber poffset;
745  ItemId itemid;
746  IndexTuple itup;
747  OffsetNumber nextoffset;
748  BlockNumber rightsib;
749 
750  page = (Page) BufferGetPage(buffer);
751  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
752 
753  poffset = xlrec->poffset;
754 
755  nextoffset = OffsetNumberNext(poffset);
756  itemid = PageGetItemId(page, nextoffset);
757  itup = (IndexTuple) PageGetItem(page, itemid);
758  rightsib = ItemPointerGetBlockNumber(&itup->t_tid);
759 
760  itemid = PageGetItemId(page, poffset);
761  itup = (IndexTuple) PageGetItem(page, itemid);
762  ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
763  nextoffset = OffsetNumberNext(poffset);
764  PageIndexTupleDelete(page, nextoffset);
765 
766  PageSetLSN(page, lsn);
767  MarkBufferDirty(buffer);
768  }
769  if (BufferIsValid(buffer))
770  UnlockReleaseBuffer(buffer);
771 
772  /* Rewrite the leaf page as a halfdead page */
773  buffer = XLogInitBufferForRedo(record, 0);
774  page = (Page) BufferGetPage(buffer);
775 
776  _bt_pageinit(page, BufferGetPageSize(buffer));
777  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
778 
779  pageop->btpo_prev = xlrec->leftblk;
780  pageop->btpo_next = xlrec->rightblk;
781  pageop->btpo.level = 0;
782  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
783  pageop->btpo_cycleid = 0;
784 
785  /*
786  * Construct a dummy hikey item that points to the next parent to be
787  * deleted (if any).
788  */
789  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
790  trunctuple.t_info = sizeof(IndexTupleData);
791  if (xlrec->topparent != InvalidBlockNumber)
792  ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
793  else
794  ItemPointerSetInvalid(&trunctuple.t_tid);
795  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
796  false, false) == InvalidOffsetNumber)
797  elog(ERROR, "could not add dummy high key to half-dead page");
798 
799  PageSetLSN(page, lsn);
800  MarkBufferDirty(buffer);
801  UnlockReleaseBuffer(buffer);
802 }
BlockNumber btpo_next
Definition: nbtree.h:57
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:727
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define BTP_LEAF
Definition: nbtree.h:70
ItemPointerData t_tid
Definition: itup.h:37
#define BTP_HALF_DEAD
Definition: nbtree.h:74
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define MemSet(start, val, len)
Definition: c.h:857
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:302
BTCycleId btpo_cycleid
Definition: nbtree.h:64
BlockNumber btpo_prev
Definition: nbtree.h:56
IndexTupleData * IndexTuple
Definition: itup.h:53
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 level
Definition: nbtree.h:60
struct IndexTupleData IndexTupleData
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define P_HIKEY
Definition: nbtree.h:203
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:150
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:76
unsigned short t_info
Definition: itup.h:49
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
static void btree_xlog_newroot ( XLogReaderState record)
static

Definition at line 920 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_meta(), _bt_restore_page(), BTP_LEAF, BTP_ROOT, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buffer, BufferGetPage, BufferGetPageSize, XLogReaderState::EndRecPtr, BTPageOpaqueData::level, xl_btree_newroot::level, MarkBufferDirty(), P_NONE, PageGetSpecialPointer, PageSetLSN, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

921 {
922  XLogRecPtr lsn = record->EndRecPtr;
923  xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
924  Buffer buffer;
925  Page page;
926  BTPageOpaque pageop;
927  char *ptr;
928  Size len;
929 
930  buffer = XLogInitBufferForRedo(record, 0);
931  page = (Page) BufferGetPage(buffer);
932 
933  _bt_pageinit(page, BufferGetPageSize(buffer));
934  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
935 
936  pageop->btpo_flags = BTP_ROOT;
937  pageop->btpo_prev = pageop->btpo_next = P_NONE;
938  pageop->btpo.level = xlrec->level;
939  if (xlrec->level == 0)
940  pageop->btpo_flags |= BTP_LEAF;
941  pageop->btpo_cycleid = 0;
942 
943  if (xlrec->level > 0)
944  {
945  ptr = XLogRecGetBlockData(record, 0, &len);
946  _bt_restore_page(page, ptr, len);
947 
948  /* Clear the incomplete-split flag in left child */
949  _bt_clear_incomplete_split(record, 1);
950  }
951 
952  PageSetLSN(page, lsn);
953  MarkBufferDirty(buffer);
954  UnlockReleaseBuffer(buffer);
955 
956  _bt_restore_meta(record, 2);
957 }
#define BTP_ROOT
Definition: nbtree.h:71
BlockNumber btpo_next
Definition: nbtree.h:57
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define BTP_LEAF
Definition: nbtree.h:70
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:168
uint32 level
Definition: nbtxlog.h:241
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:77
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:302
BTCycleId btpo_cycleid
Definition: nbtree.h:64
BlockNumber btpo_prev
Definition: nbtree.h:56
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:128
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
uint32 level
Definition: nbtree.h:60
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:36
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
static void btree_xlog_reuse_page ( XLogReaderState record)
static

Definition at line 960 of file nbtxlog.c.

References InHotStandby, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, ResolveRecoveryConflictWithSnapshot(), and XLogRecGetData.

Referenced by btree_redo().

961 {
963 
964  /*
965  * Btree reuse_page records exist to provide a conflict point when we
966  * reuse pages in the index via the FSM. That's all they do though.
967  *
968  * latestRemovedXid was the page's btpo.xact. The btpo.xact <
969  * RecentGlobalXmin test in _bt_page_recyclable() conceptually mirrors the
970  * pgxact->xmin > limitXmin test in GetConflictingVirtualXIDs().
971  * Consequently, one XID value achieves the same exclusion effect on
972  * master and standby.
973  */
974  if (InHotStandby)
975  {
977  xlrec->node);
978  }
979 }
RelFileNode node
Definition: nbtxlog.h:135
#define InHotStandby
Definition: xlog.h:74
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
Definition: standby.c:267
TransactionId latestRemovedXid
Definition: nbtxlog.h:137
static void btree_xlog_split ( bool  onleft,
bool  isroot,
XLogReaderState record 
)
static

Definition at line 196 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_page(), Assert, BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buffer, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, xl_btree_split::firstright, IndexTupleSize, InvalidOffsetNumber, ItemIdGetLength, BTPageOpaqueData::level, xl_btree_split::level, MarkBufferDirty(), MAXALIGN, xl_btree_split::newitemoff, NULL, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_NONE, PageAddItem, PageGetItem, PageGetItemId, PageGetSpecialPointer, PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN, PANIC, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

197 {
198  XLogRecPtr lsn = record->EndRecPtr;
199  xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
200  bool isleaf = (xlrec->level == 0);
201  Buffer lbuf;
202  Buffer rbuf;
203  Page rpage;
204  BTPageOpaque ropaque;
205  char *datapos;
206  Size datalen;
207  Item left_hikey = NULL;
208  Size left_hikeysz = 0;
209  BlockNumber leftsib;
210  BlockNumber rightsib;
211  BlockNumber rnext;
212 
213  XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
214  XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
215  if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
216  rnext = P_NONE;
217 
218  /*
219  * Clear the incomplete split flag on the left sibling of the child page
220  * this is a downlink for. (Like in btree_xlog_insert, this can be done
221  * before locking the other pages)
222  */
223  if (!isleaf)
224  _bt_clear_incomplete_split(record, 3);
225 
226  /* Reconstruct right (new) sibling page from scratch */
227  rbuf = XLogInitBufferForRedo(record, 1);
228  datapos = XLogRecGetBlockData(record, 1, &datalen);
229  rpage = (Page) BufferGetPage(rbuf);
230 
231  _bt_pageinit(rpage, BufferGetPageSize(rbuf));
232  ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
233 
234  ropaque->btpo_prev = leftsib;
235  ropaque->btpo_next = rnext;
236  ropaque->btpo.level = xlrec->level;
237  ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
238  ropaque->btpo_cycleid = 0;
239 
240  _bt_restore_page(rpage, datapos, datalen);
241 
242  /*
243  * On leaf level, the high key of the left page is equal to the first key
244  * on the right page.
245  */
246  if (isleaf)
247  {
248  ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
249 
250  left_hikey = PageGetItem(rpage, hiItemId);
251  left_hikeysz = ItemIdGetLength(hiItemId);
252  }
253 
254  PageSetLSN(rpage, lsn);
255  MarkBufferDirty(rbuf);
256 
257  /* don't release the buffer yet; we touch right page's first item below */
258 
259  /* Now reconstruct left (original) sibling page */
260  if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
261  {
262  /*
263  * To retain the same physical order of the tuples that they had, we
264  * initialize a temporary empty page for the left page and add all the
265  * items to that in item number order. This mirrors how _bt_split()
266  * works. It's not strictly required to retain the same physical
267  * order, as long as the items are in the correct item number order,
268  * but it helps debugging. See also _bt_restore_page(), which does
269  * the same for the right page.
270  */
271  Page lpage = (Page) BufferGetPage(lbuf);
273  OffsetNumber off;
274  Item newitem = NULL;
275  Size newitemsz = 0;
276  Page newlpage;
277  OffsetNumber leftoff;
278 
279  datapos = XLogRecGetBlockData(record, 0, &datalen);
280 
281  if (onleft)
282  {
283  newitem = (Item) datapos;
284  newitemsz = MAXALIGN(IndexTupleSize(newitem));
285  datapos += newitemsz;
286  datalen -= newitemsz;
287  }
288 
289  /* Extract left hikey and its size (assuming 16-bit alignment) */
290  if (!isleaf)
291  {
292  left_hikey = (Item) datapos;
293  left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
294  datapos += left_hikeysz;
295  datalen -= left_hikeysz;
296  }
297  Assert(datalen == 0);
298 
299  newlpage = PageGetTempPageCopySpecial(lpage);
300 
301  /* Set high key */
302  leftoff = P_HIKEY;
303  if (PageAddItem(newlpage, left_hikey, left_hikeysz,
304  P_HIKEY, false, false) == InvalidOffsetNumber)
305  elog(PANIC, "failed to add high key to left page after split");
306  leftoff = OffsetNumberNext(leftoff);
307 
308  for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++)
309  {
310  ItemId itemid;
311  Size itemsz;
312  Item item;
313 
314  /* add the new item if it was inserted on left page */
315  if (onleft && off == xlrec->newitemoff)
316  {
317  if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
318  false, false) == InvalidOffsetNumber)
319  elog(ERROR, "failed to add new item to left page after split");
320  leftoff = OffsetNumberNext(leftoff);
321  }
322 
323  itemid = PageGetItemId(lpage, off);
324  itemsz = ItemIdGetLength(itemid);
325  item = PageGetItem(lpage, itemid);
326  if (PageAddItem(newlpage, item, itemsz, leftoff,
327  false, false) == InvalidOffsetNumber)
328  elog(ERROR, "failed to add old item to left page after split");
329  leftoff = OffsetNumberNext(leftoff);
330  }
331 
332  /* cope with possibility that newitem goes at the end */
333  if (onleft && off == xlrec->newitemoff)
334  {
335  if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
336  false, false) == InvalidOffsetNumber)
337  elog(ERROR, "failed to add new item to left page after split");
338  leftoff = OffsetNumberNext(leftoff);
339  }
340 
341  PageRestoreTempPage(newlpage, lpage);
342 
343  /* Fix opaque fields */
344  lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
345  if (isleaf)
346  lopaque->btpo_flags |= BTP_LEAF;
347  lopaque->btpo_next = rightsib;
348  lopaque->btpo_cycleid = 0;
349 
350  PageSetLSN(lpage, lsn);
351  MarkBufferDirty(lbuf);
352  }
353 
354  /* We no longer need the buffers */
355  if (BufferIsValid(lbuf))
356  UnlockReleaseBuffer(lbuf);
357  UnlockReleaseBuffer(rbuf);
358 
359  /*
360  * Fix left-link of the page to the right of the new right sibling.
361  *
362  * Note: in normal operation, we do this while still holding lock on the
363  * two split pages. However, that's not necessary for correctness in WAL
364  * replay, because no other index update can be in progress, and readers
365  * will cope properly when following an obsolete left-link.
366  */
367  if (rnext != P_NONE)
368  {
369  Buffer buffer;
370 
371  if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
372  {
373  Page page = (Page) BufferGetPage(buffer);
375 
376  pageop->btpo_prev = rightsib;
377 
378  PageSetLSN(page, lsn);
379  MarkBufferDirty(buffer);
380  }
381  if (BufferIsValid(buffer))
382  UnlockReleaseBuffer(buffer);
383  }
384 }
BlockNumber btpo_next
Definition: nbtree.h:57
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:407
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:205
#define BTP_LEAF
Definition: nbtree.h:70
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:168
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:77
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
uint32 BlockNumber
Definition: block.h:31
#define PANIC
Definition: elog.h:53
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
uint16 OffsetNumber
Definition: off.h:24
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:385
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:302
OffsetNumber newitemoff
Definition: nbtxlog.h:107
BTCycleId btpo_cycleid
Definition: nbtree.h:64
BlockNumber btpo_prev
Definition: nbtree.h:56
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:128
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
uint32 level
Definition: nbtree.h:60
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1307
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
uint32 level
Definition: nbtxlog.h:105
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:36
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
OffsetNumber firstright
Definition: nbtxlog.h:106
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define MAXALIGN(LEN)
Definition: c.h:588
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define P_HIKEY
Definition: nbtree.h:203
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
#define elog
Definition: elog.h:219
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define IndexTupleSize(itup)
Definition: itup.h:70
static void btree_xlog_unlink_page ( uint8  info,
XLogReaderState record 
)
static

Definition at line 806 of file nbtxlog.c.

References _bt_pageinit(), _bt_restore_meta(), BLK_NEEDS_REDO, BTP_DELETED, BTP_HALF_DEAD, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_unlink_page::btpo_xact, buffer, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, InvalidBlockNumber, InvalidOffsetNumber, ItemPointerSet, ItemPointerSetInvalid, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leftsib, BTPageOpaqueData::level, MarkBufferDirty(), MemSet, P_HIKEY, P_NONE, PageAddItem, PageGetSpecialPointer, PageSetLSN, xl_btree_unlink_page::rightsib, IndexTupleData::t_info, IndexTupleData::t_tid, xl_btree_unlink_page::topparent, UnlockReleaseBuffer(), BTPageOpaqueData::xact, XLOG_BTREE_UNLINK_PAGE_META, XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetData, and XLogRecHasBlockRef.

Referenced by btree_redo().

807 {
808  XLogRecPtr lsn = record->EndRecPtr;
810  BlockNumber leftsib;
811  BlockNumber rightsib;
812  Buffer buffer;
813  Page page;
814  BTPageOpaque pageop;
815 
816  leftsib = xlrec->leftsib;
817  rightsib = xlrec->rightsib;
818 
819  /*
820  * In normal operation, we would lock all the pages this WAL record
821  * touches before changing any of them. In WAL replay, it should be okay
822  * to lock just one page at a time, since no concurrent index updates can
823  * be happening, and readers should not care whether they arrive at the
824  * target page or not (since it's surely empty).
825  */
826 
827  /* Fix left-link of right sibling */
828  if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
829  {
830  page = (Page) BufferGetPage(buffer);
831  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
832  pageop->btpo_prev = leftsib;
833 
834  PageSetLSN(page, lsn);
835  MarkBufferDirty(buffer);
836  }
837  if (BufferIsValid(buffer))
838  UnlockReleaseBuffer(buffer);
839 
840  /* Fix right-link of left sibling, if any */
841  if (leftsib != P_NONE)
842  {
843  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
844  {
845  page = (Page) BufferGetPage(buffer);
846  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
847  pageop->btpo_next = rightsib;
848 
849  PageSetLSN(page, lsn);
850  MarkBufferDirty(buffer);
851  }
852  if (BufferIsValid(buffer))
853  UnlockReleaseBuffer(buffer);
854  }
855 
856  /* Rewrite target page as empty deleted page */
857  buffer = XLogInitBufferForRedo(record, 0);
858  page = (Page) BufferGetPage(buffer);
859 
860  _bt_pageinit(page, BufferGetPageSize(buffer));
861  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
862 
863  pageop->btpo_prev = leftsib;
864  pageop->btpo_next = rightsib;
865  pageop->btpo.xact = xlrec->btpo_xact;
866  pageop->btpo_flags = BTP_DELETED;
867  pageop->btpo_cycleid = 0;
868 
869  PageSetLSN(page, lsn);
870  MarkBufferDirty(buffer);
871  UnlockReleaseBuffer(buffer);
872 
873  /*
874  * If we deleted a parent of the targeted leaf page, instead of the leaf
875  * itself, update the leaf to point to the next remaining child in the
876  * branch.
877  */
878  if (XLogRecHasBlockRef(record, 3))
879  {
880  /*
881  * There is no real data on the page, so we just re-create it from
882  * scratch using the information from the WAL record.
883  */
884  IndexTupleData trunctuple;
885 
886  buffer = XLogInitBufferForRedo(record, 3);
887  page = (Page) BufferGetPage(buffer);
888 
889  _bt_pageinit(page, BufferGetPageSize(buffer));
890  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
891 
892  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
893  pageop->btpo_prev = xlrec->leafleftsib;
894  pageop->btpo_next = xlrec->leafrightsib;
895  pageop->btpo.level = 0;
896  pageop->btpo_cycleid = 0;
897 
898  /* Add a dummy hikey item */
899  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
900  trunctuple.t_info = sizeof(IndexTupleData);
901  if (xlrec->topparent != InvalidBlockNumber)
902  ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
903  else
904  ItemPointerSetInvalid(&trunctuple.t_tid);
905  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
906  false, false) == InvalidOffsetNumber)
907  elog(ERROR, "could not add dummy high key to half-dead page");
908 
909  PageSetLSN(page, lsn);
910  MarkBufferDirty(buffer);
911  UnlockReleaseBuffer(buffer);
912  }
913 
914  /* Update metapage if needed */
915  if (info == XLOG_BTREE_UNLINK_PAGE_META)
916  _bt_restore_meta(record, 4);
917 }
BlockNumber btpo_next
Definition: nbtree.h:57
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define BTP_LEAF
Definition: nbtree.h:70
ItemPointerData t_tid
Definition: itup.h:37
#define BTP_HALF_DEAD
Definition: nbtree.h:74
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:168
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:223
#define MemSet(start, val, len)
Definition: c.h:857
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
uint32 BlockNumber
Definition: block.h:31
#define BTP_DELETED
Definition: nbtree.h:72
TransactionId xact
Definition: nbtree.h:61
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:77
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:302
BTCycleId btpo_cycleid
Definition: nbtree.h:64
BlockNumber btpo_prev
Definition: nbtree.h:56
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
uint32 level
Definition: nbtree.h:60
struct IndexTupleData IndexTupleData
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define InvalidBlockNumber
Definition: block.h:33
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define P_HIKEY
Definition: nbtree.h:203
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:150
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:732
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:35
#define elog
Definition: elog.h:219
unsigned short t_info
Definition: itup.h:49
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
static void btree_xlog_vacuum ( XLogReaderState record)
static

Definition at line 387 of file nbtxlog.c.

References BLK_NEEDS_REDO, BlockNumberIsValid, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, buffer, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, HotStandbyActiveInReplay(), xl_btree_vacuum::lastBlockVacuumed, LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirty(), NULL, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, RBM_NORMAL, RBM_NORMAL_NO_LOG, UnlockReleaseBuffer(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

388 {
389  XLogRecPtr lsn = record->EndRecPtr;
390  Buffer buffer;
391  Page page;
392  BTPageOpaque opaque;
393 #ifdef UNUSED
394  xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
395 
396  /*
397  * This section of code is thought to be no longer needed, after analysis
398  * of the calling paths. It is retained to allow the code to be reinstated
399  * if a flaw is revealed in that thinking.
400  *
401  * If we are running non-MVCC scans using this index we need to do some
402  * additional work to ensure correctness, which is known as a "pin scan"
403  * described in more detail in next paragraphs. We used to do the extra
404  * work in all cases, whereas we now avoid that work in most cases. If
405  * lastBlockVacuumed is set to InvalidBlockNumber then we skip the
406  * additional work required for the pin scan.
407  *
408  * Avoiding this extra work is important since it requires us to touch
409  * every page in the index, so is an O(N) operation. Worse, it is an
410  * operation performed in the foreground during redo, so it delays
411  * replication directly.
412  *
413  * If queries might be active then we need to ensure every leaf page is
414  * unpinned between the lastBlockVacuumed and the current block, if there
415  * are any. This prevents replay of the VACUUM from reaching the stage of
416  * removing heap tuples while there could still be indexscans "in flight"
417  * to those particular tuples for those scans which could be confused by
418  * finding new tuples at the old TID locations (see nbtree/README).
419  *
420  * It might be worth checking if there are actually any backends running;
421  * if not, we could just skip this.
422  *
423  * Since VACUUM can visit leaf pages out-of-order, it might issue records
424  * with lastBlockVacuumed >= block; that's not an error, it just means
425  * nothing to do now.
426  *
427  * Note: since we touch all pages in the range, we will lock non-leaf
428  * pages, and also any empty (all-zero) pages that may be in the index. It
429  * doesn't seem worth the complexity to avoid that. But it's important
430  * that HotStandbyActiveInReplay() will not return true if the database
431  * isn't yet consistent; so we need not fear reading still-corrupt blocks
432  * here during crash recovery.
433  */
435  {
436  RelFileNode thisrnode;
437  BlockNumber thisblkno;
438  BlockNumber blkno;
439 
440  XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
441 
442  for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
443  {
444  /*
445  * We use RBM_NORMAL_NO_LOG mode because it's not an error
446  * condition to see all-zero pages. The original btvacuumpage
447  * scan would have skipped over all-zero pages, noting them in FSM
448  * but not bothering to initialize them just yet; so we mustn't
449  * throw an error here. (We could skip acquiring the cleanup lock
450  * if PageIsNew, but it's probably not worth the cycles to test.)
451  *
452  * XXX we don't actually need to read the block, we just need to
453  * confirm it is unpinned. If we had a special call into the
454  * buffer manager we could optimise this so that if the block is
455  * not in shared_buffers we confirm it as unpinned. Optimizing
456  * this is now moot, since in most cases we avoid the scan.
457  */
458  buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
460  if (BufferIsValid(buffer))
461  {
462  LockBufferForCleanup(buffer);
463  UnlockReleaseBuffer(buffer);
464  }
465  }
466  }
467 #endif
468 
469  /*
470  * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
471  * page. See nbtree/README for details.
472  */
473  if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
474  == BLK_NEEDS_REDO)
475  {
476  char *ptr;
477  Size len;
478 
479  ptr = XLogRecGetBlockData(record, 0, &len);
480 
481  page = (Page) BufferGetPage(buffer);
482 
483  if (len > 0)
484  {
485  OffsetNumber *unused;
486  OffsetNumber *unend;
487 
488  unused = (OffsetNumber *) ptr;
489  unend = (OffsetNumber *) ((char *) ptr + len);
490 
491  if ((unend - unused) > 0)
492  PageIndexMultiDelete(page, unused, unend - unused);
493  }
494 
495  /*
496  * Mark the page as not containing any LP_DEAD items --- see comments
497  * in _bt_delitems_vacuum().
498  */
499  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
500  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
501 
502  PageSetLSN(page, lsn);
503  MarkBufferDirty(buffer);
504  }
505  if (BufferIsValid(buffer))
506  UnlockReleaseBuffer(buffer);
507 }
BlockNumber lastBlockVacuumed
Definition: nbtxlog.h:167
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:438
uint32 BlockNumber
Definition: block.h:31
bool HotStandbyActiveInReplay(void)
Definition: xlog.c:7953
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1307
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:836
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:322
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:327
uint16 btpo_flags
Definition: nbtree.h:63
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
int Buffer
Definition: buf.h:23
#define BTP_HAS_GARBAGE
Definition: nbtree.h:76
Pointer Page
Definition: bufpage.h:74