PostgreSQL Source Code  git master
nbtxlog.c File Reference
#include "postgres.h"
#include "access/bufmask.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "storage/procarray.h"
Include dependency graph for nbtxlog.c:

Go to the source code of this file.

Functions

static void _bt_restore_page (Page page, char *from, int len)
 
static void _bt_restore_meta (XLogReaderState *record, uint8 block_id)
 
static void _bt_clear_incomplete_split (XLogReaderState *record, uint8 block_id)
 
static void btree_xlog_insert (bool isleaf, bool ismeta, XLogReaderState *record)
 
static void btree_xlog_split (bool onleft, XLogReaderState *record)
 
static void btree_xlog_vacuum (XLogReaderState *record)
 
static void btree_xlog_delete (XLogReaderState *record)
 
static void btree_xlog_mark_page_halfdead (uint8 info, XLogReaderState *record)
 
static void btree_xlog_unlink_page (uint8 info, XLogReaderState *record)
 
static void btree_xlog_newroot (XLogReaderState *record)
 
static void btree_xlog_reuse_page (XLogReaderState *record)
 
void btree_redo (XLogReaderState *record)
 
void btree_mask (char *pagedata, BlockNumber blkno)
 

Function Documentation

◆ _bt_clear_incomplete_split()

static void _bt_clear_incomplete_split ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 138 of file nbtxlog.c.

References Assert, BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTPageOpaqueData::btpo_flags, buf, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, MarkBufferDirty(), P_INCOMPLETE_SPLIT, PageGetSpecialPointer, PageSetLSN, UnlockReleaseBuffer(), and XLogReadBufferForRedo().

Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_split().

139 {
140  XLogRecPtr lsn = record->EndRecPtr;
141  Buffer buf;
142 
143  if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
144  {
145  Page page = (Page) BufferGetPage(buf);
147 
148  Assert(P_INCOMPLETE_SPLIT(pageop));
149  pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
150 
151  PageSetLSN(page, lsn);
152  MarkBufferDirty(buf);
153  }
154  if (BufferIsValid(buf))
155  UnlockReleaseBuffer(buf);
156 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:79
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:197
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_restore_meta()

static void _bt_restore_meta ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 82 of file nbtxlog.c.

References _bt_pageinit(), Assert, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_oldest_btpo_xact, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, XLogReaderState::EndRecPtr, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_heap_tuples, xl_btree_metadata::level, MarkBufferDirty(), xl_btree_metadata::oldest_btpo_xact, PageGetSpecialPointer, PageSetLSN, xl_btree_metadata::root, UnlockReleaseBuffer(), xl_btree_metadata::version, XLogInitBufferForRedo(), and XLogRecGetBlockData().

Referenced by btree_redo(), btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_unlink_page().

83 {
84  XLogRecPtr lsn = record->EndRecPtr;
85  Buffer metabuf;
86  Page metapg;
87  BTMetaPageData *md;
88  BTPageOpaque pageop;
89  xl_btree_metadata *xlrec;
90  char *ptr;
91  Size len;
92 
93  metabuf = XLogInitBufferForRedo(record, block_id);
94  ptr = XLogRecGetBlockData(record, block_id, &len);
95 
96  Assert(len == sizeof(xl_btree_metadata));
98  xlrec = (xl_btree_metadata *) ptr;
99  metapg = BufferGetPage(metabuf);
100 
101  _bt_pageinit(metapg, BufferGetPageSize(metabuf));
102 
103  md = BTPageGetMeta(metapg);
104  md->btm_magic = BTREE_MAGIC;
105  md->btm_version = xlrec->version;
106  md->btm_root = xlrec->root;
107  md->btm_level = xlrec->level;
108  md->btm_fastroot = xlrec->fastroot;
109  md->btm_fastlevel = xlrec->fastlevel;
110  /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
114 
115  pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
116  pageop->btpo_flags = BTP_META;
117 
118  /*
119  * Set pd_lower just past the end of the metadata. This is essential,
120  * because without doing so, metadata will be lost if xlog.c compresses
121  * the page.
122  */
123  ((PageHeader) metapg)->pd_lower =
124  ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
125 
126  PageSetLSN(metapg, lsn);
127  MarkBufferDirty(metabuf);
128  UnlockReleaseBuffer(metabuf);
129 }
uint32 btm_version
Definition: nbtree.h:101
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
uint32 btm_magic
Definition: nbtree.h:100
BlockNumber root
Definition: nbtxlog.h:50
#define BTP_META
Definition: nbtree.h:75
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
BlockNumber btm_fastroot
Definition: nbtree.h:104
#define BTREE_MAGIC
Definition: nbtree.h:133
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
float8 last_cleanup_num_heap_tuples
Definition: nbtxlog.h:55
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:301
TransactionId oldest_btpo_xact
Definition: nbtxlog.h:54
#define BTPageGetMeta(p)
Definition: nbtree.h:113
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:136
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define BTREE_METAPAGE
Definition: nbtree.h:132
uint32 version
Definition: nbtxlog.h:49
uint32 btm_fastlevel
Definition: nbtree.h:105
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1484
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:146
BlockNumber btm_root
Definition: nbtree.h:102
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
size_t Size
Definition: c.h:467
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:109
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2613
uint32 fastlevel
Definition: nbtxlog.h:53
uint32 btm_level
Definition: nbtree.h:103
uint32 level
Definition: nbtxlog.h:51
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
BlockNumber fastroot
Definition: nbtxlog.h:52
TransactionId btm_oldest_btpo_xact
Definition: nbtree.h:107
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ _bt_restore_page()

static void _bt_restore_page ( Page  page,
char *  from,
int  len 
)
static

Definition at line 35 of file nbtxlog.c.

References elog, i, IndexTupleSize, InvalidOffsetNumber, MAXALIGN, MaxIndexTuplesPerPage, PageAddItem, and PANIC.

Referenced by btree_xlog_newroot(), and btree_xlog_split().

36 {
37  IndexTupleData itupdata;
38  Size itemsz;
39  char *end = from + len;
41  uint16 itemsizes[MaxIndexTuplesPerPage];
42  int i;
43  int nitems;
44 
45  /*
46  * To get the items back in the original order, we add them to the page in
47  * reverse. To figure out where one tuple ends and another begins, we
48  * have to scan them in forward order first.
49  */
50  i = 0;
51  while (from < end)
52  {
53  /*
54  * As we step through the items, 'from' won't always be properly
55  * aligned, so we need to use memcpy(). Further, we use Item (which
56  * is just a char*) here for our items array for the same reason;
57  * wouldn't want the compiler or anyone thinking that an item is
58  * aligned when it isn't.
59  */
60  memcpy(&itupdata, from, sizeof(IndexTupleData));
61  itemsz = IndexTupleSize(&itupdata);
62  itemsz = MAXALIGN(itemsz);
63 
64  items[i] = (Item) from;
65  itemsizes[i] = itemsz;
66  i++;
67 
68  from += itemsz;
69  }
70  nitems = i;
71 
72  for (i = nitems - 1; i >= 0; i--)
73  {
74  if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
75  false, false) == InvalidOffsetNumber)
76  elog(PANIC, "_bt_restore_page: cannot add item to page");
77  from += itemsz;
78  }
79 }
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define PANIC
Definition: elog.h:53
unsigned short uint16
Definition: c.h:358
#define InvalidOffsetNumber
Definition: off.h:26
size_t Size
Definition: c.h:467
#define MAXALIGN(LEN)
Definition: c.h:692
#define MaxIndexTuplesPerPage
Definition: itup.h:145
#define elog(elevel,...)
Definition: elog.h:228
int i
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ btree_mask()

void btree_mask ( char *  pagedata,
BlockNumber  blkno 
)

Definition at line 872 of file nbtxlog.c.

References BTP_HAS_GARBAGE, BTP_SPLIT_END, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, mask_lp_flags(), mask_page_content(), mask_page_hint_bits(), mask_page_lsn_and_checksum(), mask_unused_space(), P_ISDELETED, P_ISLEAF, and PageGetSpecialPointer.

873 {
874  Page page = (Page) pagedata;
875  BTPageOpaque maskopaq;
876 
878 
879  mask_page_hint_bits(page);
880  mask_unused_space(page);
881 
882  maskopaq = (BTPageOpaque) PageGetSpecialPointer(page);
883 
884  if (P_ISDELETED(maskopaq))
885  {
886  /*
887  * Mask page content on a DELETED page since it will be re-initialized
888  * during replay. See btree_xlog_unlink_page() for details.
889  */
890  mask_page_content(page);
891  }
892  else if (P_ISLEAF(maskopaq))
893  {
894  /*
895  * In btree leaf pages, it is possible to modify the LP_FLAGS without
896  * emitting any WAL record. Hence, mask the line pointer flags. See
897  * _bt_killitems(), _bt_check_unique() for details.
898  */
899  mask_lp_flags(page);
900  }
901 
902  /*
903  * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
904  * _bt_killitems(), _bt_check_unique() for details.
905  */
906  maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
907 
908  /*
909  * During replay of a btree page split, we don't set the BTP_SPLIT_END
910  * flag of the right sibling and initialize the cycle_id to 0 for the same
911  * page. See btree_xlog_split() for details.
912  */
913  maskopaq->btpo_flags &= ~BTP_SPLIT_END;
914  maskopaq->btpo_cycleid = 0;
915 }
#define BTP_SPLIT_END
Definition: nbtree.h:77
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_content(Page page)
Definition: bufmask.c:119
BTCycleId btpo_cycleid
Definition: nbtree.h:66
#define P_ISDELETED(opaque)
Definition: nbtree.h:192
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
uint16 btpo_flags
Definition: nbtree.h:65
void mask_lp_flags(Page page)
Definition: bufmask.c:95
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78
#define P_ISLEAF(opaque)
Definition: nbtree.h:190

◆ btree_redo()

void btree_redo ( XLogReaderState record)

Definition at line 820 of file nbtxlog.c.

References _bt_restore_meta(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_reuse_page(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), elog, PANIC, XLOG_BTREE_DELETE, XLOG_BTREE_INSERT_LEAF, XLOG_BTREE_INSERT_META, XLOG_BTREE_INSERT_UPPER, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLOG_BTREE_META_CLEANUP, XLOG_BTREE_NEWROOT, XLOG_BTREE_REUSE_PAGE, XLOG_BTREE_SPLIT_L, XLOG_BTREE_SPLIT_R, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLOG_BTREE_VACUUM, XLogRecGetInfo, and XLR_INFO_MASK.

821 {
822  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
823 
824  switch (info)
825  {
827  btree_xlog_insert(true, false, record);
828  break;
830  btree_xlog_insert(false, false, record);
831  break;
833  btree_xlog_insert(false, true, record);
834  break;
835  case XLOG_BTREE_SPLIT_L:
836  btree_xlog_split(true, record);
837  break;
838  case XLOG_BTREE_SPLIT_R:
839  btree_xlog_split(false, record);
840  break;
841  case XLOG_BTREE_VACUUM:
842  btree_xlog_vacuum(record);
843  break;
844  case XLOG_BTREE_DELETE:
845  btree_xlog_delete(record);
846  break;
848  btree_xlog_mark_page_halfdead(info, record);
849  break;
852  btree_xlog_unlink_page(info, record);
853  break;
854  case XLOG_BTREE_NEWROOT:
855  btree_xlog_newroot(record);
856  break;
858  btree_xlog_reuse_page(record);
859  break;
861  _bt_restore_meta(record, 0);
862  break;
863  default:
864  elog(PANIC, "btree_redo: unknown op code %u", info);
865  }
866 }
static void btree_xlog_vacuum(XLogReaderState *record)
Definition: nbtxlog.c:383
unsigned char uint8
Definition: c.h:357
#define XLOG_BTREE_INSERT_META
Definition: nbtxlog.h:28
static void btree_xlog_delete(XLogReaderState *record)
Definition: nbtxlog.c:506
#define PANIC
Definition: elog.h:53
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:35
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:82
static void btree_xlog_newroot(XLogReaderState *record)
Definition: nbtxlog.c:758
static void btree_xlog_reuse_page(XLogReaderState *record)
Definition: nbtxlog.c:798
#define XLOG_BTREE_INSERT_LEAF
Definition: nbtxlog.h:26
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:37
static void btree_xlog_split(bool onleft, XLogReaderState *record)
Definition: nbtxlog.c:206
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:33
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:279
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:32
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:39
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:36
#define XLOG_BTREE_SPLIT_R
Definition: nbtxlog.h:30
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:646
#define XLOG_BTREE_INSERT_UPPER
Definition: nbtxlog.h:27
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:566
#define elog(elevel,...)
Definition: elog.h:228
#define XLOG_BTREE_SPLIT_L
Definition: nbtxlog.h:29
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:34
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:41
static void btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
Definition: nbtxlog.c:159

◆ btree_xlog_delete()

static void btree_xlog_delete ( XLogReaderState record)
static

Definition at line 506 of file nbtxlog.c.

References BLK_NEEDS_REDO, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, InHotStandby, xl_btree_delete::latestRemovedXid, MarkBufferDirty(), xl_btree_delete::nitems, PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, ResolveRecoveryConflictWithSnapshot(), SizeOfBtreeDelete, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockTag(), XLogRecGetData, and XLogRecGetDataLen.

Referenced by btree_redo().

507 {
508  XLogRecPtr lsn = record->EndRecPtr;
509  xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
510  Buffer buffer;
511  Page page;
512  BTPageOpaque opaque;
513 
514  /*
515  * If we have any conflict processing to do, it must happen before we
516  * update the page.
517  *
518  * Btree delete records can conflict with standby queries. You might
519  * think that vacuum records would conflict as well, but we've handled
520  * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
521  * cleaned by the vacuum of the heap and so we can resolve any conflicts
522  * just once when that arrives. After that we know that no conflicts
523  * exist from individual btree vacuum records on that index.
524  */
525  if (InHotStandby)
526  {
527  RelFileNode rnode;
528 
529  XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
530 
532  }
533 
534  /*
535  * We don't need to take a cleanup lock to apply these changes. See
536  * nbtree/README for details.
537  */
538  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
539  {
540  page = (Page) BufferGetPage(buffer);
541 
542  if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
543  {
544  OffsetNumber *unused;
545 
546  unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
547 
548  PageIndexMultiDelete(page, unused, xlrec->nitems);
549  }
550 
551  /*
552  * Mark the page as not containing any LP_DEAD items --- see comments
553  * in _bt_delitems_delete().
554  */
555  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
556  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
557 
558  PageSetLSN(page, lsn);
559  MarkBufferDirty(buffer);
560  }
561  if (BufferIsValid(buffer))
562  UnlockReleaseBuffer(buffer);
563 }
TransactionId latestRemovedXid
Definition: nbtxlog.h:128
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define InHotStandby
Definition: xlog.h:74
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
#define XLogRecGetDataLen(decoder)
Definition: xlogreader.h:284
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1460
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:835
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define SizeOfBtreeDelete
Definition: nbtxlog.h:134
void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
Definition: standby.c:294
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78

◆ btree_xlog_insert()

static void btree_xlog_insert ( bool  isleaf,
bool  ismeta,
XLogReaderState record 
)
static

Definition at line 159 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_restore_meta(), BLK_NEEDS_REDO, BufferGetPage, BufferIsValid, elog, XLogReaderState::EndRecPtr, InvalidOffsetNumber, MarkBufferDirty(), xl_btree_insert::offnum, PageAddItem, PageSetLSN, PANIC, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

160 {
161  XLogRecPtr lsn = record->EndRecPtr;
162  xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
163  Buffer buffer;
164  Page page;
165 
166  /*
167  * Insertion to an internal page finishes an incomplete split at the child
168  * level. Clear the incomplete-split flag in the child. Note: during
169  * normal operation, the child and parent pages are locked at the same
170  * time, so that clearing the flag and inserting the downlink appear
171  * atomic to other backends. We don't bother with that during replay,
172  * because readers don't care about the incomplete-split flag and there
173  * cannot be updates happening.
174  */
175  if (!isleaf)
176  _bt_clear_incomplete_split(record, 1);
177  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
178  {
179  Size datalen;
180  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
181 
182  page = BufferGetPage(buffer);
183 
184  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
185  false, false) == InvalidOffsetNumber)
186  elog(PANIC, "btree_xlog_insert: failed to add item");
187 
188  PageSetLSN(page, lsn);
189  MarkBufferDirty(buffer);
190  }
191  if (BufferIsValid(buffer))
192  UnlockReleaseBuffer(buffer);
193 
194  /*
195  * Note: in normal operation, we'd update the metapage while still holding
196  * lock on the page we inserted into. But during replay it's not
197  * necessary to hold that lock, since no other index updates can be
198  * happening concurrently, and readers will cope fine with following an
199  * obsolete link from the metapage.
200  */
201  if (ismeta)
202  _bt_restore_meta(record, 2);
203 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
Pointer Item
Definition: item.h:17
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define PANIC
Definition: elog.h:53
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:82
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:138
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1484
OffsetNumber offnum
Definition: nbtxlog.h:70
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
size_t Size
Definition: c.h:467
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define elog(elevel,...)
Definition: elog.h:228
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ btree_xlog_mark_page_halfdead()

static void btree_xlog_mark_page_halfdead ( uint8  info,
XLogReaderState record 
)
static

Definition at line 566 of file nbtxlog.c.

References _bt_pageinit(), BLK_NEEDS_REDO, BTP_HALF_DEAD, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeInnerTupleGetDownLink, BTreeInnerTupleSetDownLink, BTreeTupleSetTopParent, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, InvalidOffsetNumber, xl_btree_mark_page_halfdead::leftblk, BTPageOpaqueData::level, MarkBufferDirty(), MemSet, OffsetNumberNext, P_HIKEY, PageAddItem, PageGetItem, PageGetItemId, PageGetSpecialPointer, PageIndexTupleDelete(), PageSetLSN, xl_btree_mark_page_halfdead::poffset, xl_btree_mark_page_halfdead::rightblk, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), and XLogRecGetData.

Referenced by btree_redo().

567 {
568  XLogRecPtr lsn = record->EndRecPtr;
570  Buffer buffer;
571  Page page;
572  BTPageOpaque pageop;
573  IndexTupleData trunctuple;
574 
575  /*
576  * In normal operation, we would lock all the pages this WAL record
577  * touches before changing any of them. In WAL replay, it should be okay
578  * to lock just one page at a time, since no concurrent index updates can
579  * be happening, and readers should not care whether they arrive at the
580  * target page or not (since it's surely empty).
581  */
582 
583  /* parent page */
584  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
585  {
586  OffsetNumber poffset;
587  ItemId itemid;
588  IndexTuple itup;
589  OffsetNumber nextoffset;
590  BlockNumber rightsib;
591 
592  page = (Page) BufferGetPage(buffer);
593  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
594 
595  poffset = xlrec->poffset;
596 
597  nextoffset = OffsetNumberNext(poffset);
598  itemid = PageGetItemId(page, nextoffset);
599  itup = (IndexTuple) PageGetItem(page, itemid);
600  rightsib = BTreeInnerTupleGetDownLink(itup);
601 
602  itemid = PageGetItemId(page, poffset);
603  itup = (IndexTuple) PageGetItem(page, itemid);
604  BTreeInnerTupleSetDownLink(itup, rightsib);
605  nextoffset = OffsetNumberNext(poffset);
606  PageIndexTupleDelete(page, nextoffset);
607 
608  PageSetLSN(page, lsn);
609  MarkBufferDirty(buffer);
610  }
611  if (BufferIsValid(buffer))
612  UnlockReleaseBuffer(buffer);
613 
614  /* Rewrite the leaf page as a halfdead page */
615  buffer = XLogInitBufferForRedo(record, 0);
616  page = (Page) BufferGetPage(buffer);
617 
618  _bt_pageinit(page, BufferGetPageSize(buffer));
619  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
620 
621  pageop->btpo_prev = xlrec->leftblk;
622  pageop->btpo_next = xlrec->rightblk;
623  pageop->btpo.level = 0;
624  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
625  pageop->btpo_cycleid = 0;
626 
627  /*
628  * Construct a dummy hikey item that points to the next parent to be
629  * deleted (if any).
630  */
631  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
632  trunctuple.t_info = sizeof(IndexTupleData);
633  BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
634 
635  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
636  false, false) == InvalidOffsetNumber)
637  elog(ERROR, "could not add dummy high key to half-dead page");
638 
639  PageSetLSN(page, lsn);
640  MarkBufferDirty(buffer);
641  UnlockReleaseBuffer(buffer);
642 }
BlockNumber btpo_next
Definition: nbtree.h:59
#define BTreeInnerTupleGetDownLink(itup)
Definition: nbtree.h:302
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:726
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define BTP_LEAF
Definition: nbtree.h:72
#define BTP_HALF_DEAD
Definition: nbtree.h:76
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define MemSet(start, val, len)
Definition: c.h:962
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
uint32 BlockNumber
Definition: block.h:31
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:301
#define BTreeTupleSetTopParent(itup, blkno)
Definition: nbtree.h:315
BTCycleId btpo_cycleid
Definition: nbtree.h:66
BlockNumber btpo_prev
Definition: nbtree.h:58
IndexTupleData * IndexTuple
Definition: itup.h:53
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 level
Definition: nbtree.h:62
struct IndexTupleData IndexTupleData
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:146
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define BTreeInnerTupleSetDownLink(itup, blkno)
Definition: nbtree.h:304
#define P_HIKEY
Definition: nbtree.h:218
#define elog(elevel,...)
Definition: elog.h:228
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
unsigned short t_info
Definition: itup.h:49
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78

◆ btree_xlog_newroot()

static void btree_xlog_newroot ( XLogReaderState record)
static

Definition at line 758 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_meta(), _bt_restore_page(), BTP_LEAF, BTP_ROOT, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage, BufferGetPageSize, XLogReaderState::EndRecPtr, BTPageOpaqueData::level, xl_btree_newroot::level, MarkBufferDirty(), P_NONE, PageGetSpecialPointer, PageSetLSN, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

759 {
760  XLogRecPtr lsn = record->EndRecPtr;
761  xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
762  Buffer buffer;
763  Page page;
764  BTPageOpaque pageop;
765  char *ptr;
766  Size len;
767 
768  buffer = XLogInitBufferForRedo(record, 0);
769  page = (Page) BufferGetPage(buffer);
770 
771  _bt_pageinit(page, BufferGetPageSize(buffer));
772  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
773 
774  pageop->btpo_flags = BTP_ROOT;
775  pageop->btpo_prev = pageop->btpo_next = P_NONE;
776  pageop->btpo.level = xlrec->level;
777  if (xlrec->level == 0)
778  pageop->btpo_flags |= BTP_LEAF;
779  pageop->btpo_cycleid = 0;
780 
781  if (xlrec->level > 0)
782  {
783  ptr = XLogRecGetBlockData(record, 0, &len);
784  _bt_restore_page(page, ptr, len);
785 
786  /* Clear the incomplete-split flag in left child */
787  _bt_clear_incomplete_split(record, 1);
788  }
789 
790  PageSetLSN(page, lsn);
791  MarkBufferDirty(buffer);
792  UnlockReleaseBuffer(buffer);
793 
794  _bt_restore_meta(record, 2);
795 }
#define BTP_ROOT
Definition: nbtree.h:73
BlockNumber btpo_next
Definition: nbtree.h:59
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define BTP_LEAF
Definition: nbtree.h:72
union BTPageOpaqueData::@46 btpo
#define P_NONE
Definition: nbtree.h:182
uint32 level
Definition: nbtxlog.h:247
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:82
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:301
BTCycleId btpo_cycleid
Definition: nbtree.h:66
BlockNumber btpo_prev
Definition: nbtree.h:58
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:138
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
uint32 level
Definition: nbtree.h:62
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1484
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:35
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:146
uint64 XLogRecPtr
Definition: xlogdefs.h:21
size_t Size
Definition: c.h:467
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ btree_xlog_reuse_page()

static void btree_xlog_reuse_page ( XLogReaderState record)
static

Definition at line 798 of file nbtxlog.c.

References InHotStandby, xl_btree_reuse_page::latestRemovedXid, xl_btree_reuse_page::node, ResolveRecoveryConflictWithSnapshot(), and XLogRecGetData.

Referenced by btree_redo().

799 {
801 
802  /*
803  * Btree reuse_page records exist to provide a conflict point when we
804  * reuse pages in the index via the FSM. That's all they do though.
805  *
806  * latestRemovedXid was the page's btpo.xact. The btpo.xact <
807  * RecentGlobalXmin test in _bt_page_recyclable() conceptually mirrors the
808  * pgxact->xmin > limitXmin test in GetConflictingVirtualXIDs().
809  * Consequently, one XID value achieves the same exclusion effect on
810  * master and standby.
811  */
812  if (InHotStandby)
813  {
815  xlrec->node);
816  }
817 }
RelFileNode node
Definition: nbtxlog.h:141
#define InHotStandby
Definition: xlog.h:74
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node)
Definition: standby.c:294
TransactionId latestRemovedXid
Definition: nbtxlog.h:143

◆ btree_xlog_split()

static void btree_xlog_split ( bool  onleft,
XLogReaderState record 
)
static

Definition at line 206 of file nbtxlog.c.

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_page(), Assert, BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, xl_btree_split::firstright, IndexTupleSize, InvalidOffsetNumber, ItemIdGetLength, BTPageOpaqueData::level, xl_btree_split::level, MarkBufferDirty(), MAXALIGN, xl_btree_split::newitemoff, OffsetNumberNext, P_FIRSTDATAKEY, P_HIKEY, P_NONE, PageAddItem, PageGetItem, PageGetItemId, PageGetSpecialPointer, PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN, PANIC, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

207 {
208  XLogRecPtr lsn = record->EndRecPtr;
209  xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
210  bool isleaf = (xlrec->level == 0);
211  Buffer lbuf;
212  Buffer rbuf;
213  Page rpage;
214  BTPageOpaque ropaque;
215  char *datapos;
216  Size datalen;
217  BlockNumber leftsib;
218  BlockNumber rightsib;
219  BlockNumber rnext;
220 
221  XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
222  XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
223  if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
224  rnext = P_NONE;
225 
226  /*
227  * Clear the incomplete split flag on the left sibling of the child page
228  * this is a downlink for. (Like in btree_xlog_insert, this can be done
229  * before locking the other pages)
230  */
231  if (!isleaf)
232  _bt_clear_incomplete_split(record, 3);
233 
234  /* Reconstruct right (new) sibling page from scratch */
235  rbuf = XLogInitBufferForRedo(record, 1);
236  datapos = XLogRecGetBlockData(record, 1, &datalen);
237  rpage = (Page) BufferGetPage(rbuf);
238 
239  _bt_pageinit(rpage, BufferGetPageSize(rbuf));
240  ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
241 
242  ropaque->btpo_prev = leftsib;
243  ropaque->btpo_next = rnext;
244  ropaque->btpo.level = xlrec->level;
245  ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
246  ropaque->btpo_cycleid = 0;
247 
248  _bt_restore_page(rpage, datapos, datalen);
249 
250  PageSetLSN(rpage, lsn);
251  MarkBufferDirty(rbuf);
252 
253  /* Now reconstruct left (original) sibling page */
254  if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
255  {
256  /*
257  * To retain the same physical order of the tuples that they had, we
258  * initialize a temporary empty page for the left page and add all the
259  * items to that in item number order. This mirrors how _bt_split()
260  * works. Retaining the same physical order makes WAL consistency
261  * checking possible. See also _bt_restore_page(), which does the
262  * same for the right page.
263  */
264  Page lpage = (Page) BufferGetPage(lbuf);
266  OffsetNumber off;
267  IndexTuple newitem = NULL,
268  left_hikey = NULL;
269  Size newitemsz = 0,
270  left_hikeysz = 0;
271  Page newlpage;
272  OffsetNumber leftoff;
273 
274  datapos = XLogRecGetBlockData(record, 0, &datalen);
275 
276  if (onleft)
277  {
278  newitem = (IndexTuple) datapos;
279  newitemsz = MAXALIGN(IndexTupleSize(newitem));
280  datapos += newitemsz;
281  datalen -= newitemsz;
282  }
283 
284  /* Extract left hikey and its size (assuming 16-bit alignment) */
285  left_hikey = (IndexTuple) datapos;
286  left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
287  datapos += left_hikeysz;
288  datalen -= left_hikeysz;
289 
290  Assert(datalen == 0);
291 
292  newlpage = PageGetTempPageCopySpecial(lpage);
293 
294  /* Set high key */
295  leftoff = P_HIKEY;
296  if (PageAddItem(newlpage, (Item) left_hikey, left_hikeysz,
297  P_HIKEY, false, false) == InvalidOffsetNumber)
298  elog(PANIC, "failed to add high key to left page after split");
299  leftoff = OffsetNumberNext(leftoff);
300 
301  for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++)
302  {
303  ItemId itemid;
304  Size itemsz;
305  IndexTuple item;
306 
307  /* add the new item if it was inserted on left page */
308  if (onleft && off == xlrec->newitemoff)
309  {
310  if (PageAddItem(newlpage, (Item) newitem, newitemsz, leftoff,
311  false, false) == InvalidOffsetNumber)
312  elog(ERROR, "failed to add new item to left page after split");
313  leftoff = OffsetNumberNext(leftoff);
314  }
315 
316  itemid = PageGetItemId(lpage, off);
317  itemsz = ItemIdGetLength(itemid);
318  item = (IndexTuple) PageGetItem(lpage, itemid);
319  if (PageAddItem(newlpage, (Item) item, itemsz, leftoff,
320  false, false) == InvalidOffsetNumber)
321  elog(ERROR, "failed to add old item to left page after split");
322  leftoff = OffsetNumberNext(leftoff);
323  }
324 
325  /* cope with possibility that newitem goes at the end */
326  if (onleft && off == xlrec->newitemoff)
327  {
328  if (PageAddItem(newlpage, (Item) newitem, newitemsz, leftoff,
329  false, false) == InvalidOffsetNumber)
330  elog(ERROR, "failed to add new item to left page after split");
331  leftoff = OffsetNumberNext(leftoff);
332  }
333 
334  PageRestoreTempPage(newlpage, lpage);
335 
336  /* Fix opaque fields */
337  lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
338  if (isleaf)
339  lopaque->btpo_flags |= BTP_LEAF;
340  lopaque->btpo_next = rightsib;
341  lopaque->btpo_cycleid = 0;
342 
343  PageSetLSN(lpage, lsn);
344  MarkBufferDirty(lbuf);
345  }
346 
347  /*
348  * We no longer need the buffers. They must be released together, so that
349  * readers cannot observe two inconsistent halves.
350  */
351  if (BufferIsValid(lbuf))
352  UnlockReleaseBuffer(lbuf);
353  UnlockReleaseBuffer(rbuf);
354 
355  /*
356  * Fix left-link of the page to the right of the new right sibling.
357  *
358  * Note: in normal operation, we do this while still holding lock on the
359  * two split pages. However, that's not necessary for correctness in WAL
360  * replay, because no other index update can be in progress, and readers
361  * will cope properly when following an obsolete left-link.
362  */
363  if (rnext != P_NONE)
364  {
365  Buffer buffer;
366 
367  if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
368  {
369  Page page = (Page) BufferGetPage(buffer);
371 
372  pageop->btpo_prev = rightsib;
373 
374  PageSetLSN(page, lsn);
375  MarkBufferDirty(buffer);
376  }
377  if (BufferIsValid(buffer))
378  UnlockReleaseBuffer(buffer);
379  }
380 }
BlockNumber btpo_next
Definition: nbtree.h:59
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:410
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:220
#define BTP_LEAF
Definition: nbtree.h:72
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:182
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:79
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
uint32 BlockNumber
Definition: block.h:31
#define PANIC
Definition: elog.h:53
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
uint16 OffsetNumber
Definition: off.h:24
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:388
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:301
OffsetNumber newitemoff
Definition: nbtxlog.h:114
BTCycleId btpo_cycleid
Definition: nbtree.h:66
BlockNumber btpo_prev
Definition: nbtree.h:58
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:138
IndexTupleData * IndexTuple
Definition: itup.h:53
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uint32 level
Definition: nbtree.h:62
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1460
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1484
uint32 level
Definition: nbtxlog.h:112
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:35
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:146
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
OffsetNumber firstright
Definition: nbtxlog.h:113
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:467
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define MAXALIGN(LEN)
Definition: c.h:692
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define P_HIKEY
Definition: nbtree.h:218
#define elog(elevel,...)
Definition: elog.h:228
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define IndexTupleSize(itup)
Definition: itup.h:71

◆ btree_xlog_unlink_page()

static void btree_xlog_unlink_page ( uint8  info,
XLogReaderState record 
)
static

Definition at line 646 of file nbtxlog.c.

References _bt_pageinit(), _bt_restore_meta(), BLK_NEEDS_REDO, BTP_DELETED, BTP_HALF_DEAD, BTP_LEAF, BTPageOpaqueData::btpo, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, xl_btree_unlink_page::btpo_xact, BTreeTupleSetTopParent, BufferGetPage, BufferGetPageSize, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, InvalidOffsetNumber, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leftsib, BTPageOpaqueData::level, MarkBufferDirty(), MemSet, P_HIKEY, P_NONE, PageAddItem, PageGetSpecialPointer, PageSetLSN, xl_btree_unlink_page::rightsib, IndexTupleData::t_info, xl_btree_unlink_page::topparent, UnlockReleaseBuffer(), BTPageOpaqueData::xact, XLOG_BTREE_UNLINK_PAGE_META, XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetData, and XLogRecHasBlockRef.

Referenced by btree_redo().

647 {
648  XLogRecPtr lsn = record->EndRecPtr;
650  BlockNumber leftsib;
651  BlockNumber rightsib;
652  Buffer buffer;
653  Page page;
654  BTPageOpaque pageop;
655 
656  leftsib = xlrec->leftsib;
657  rightsib = xlrec->rightsib;
658 
659  /*
660  * In normal operation, we would lock all the pages this WAL record
661  * touches before changing any of them. In WAL replay, it should be okay
662  * to lock just one page at a time, since no concurrent index updates can
663  * be happening, and readers should not care whether they arrive at the
664  * target page or not (since it's surely empty).
665  */
666 
667  /* Fix left-link of right sibling */
668  if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
669  {
670  page = (Page) BufferGetPage(buffer);
671  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
672  pageop->btpo_prev = leftsib;
673 
674  PageSetLSN(page, lsn);
675  MarkBufferDirty(buffer);
676  }
677  if (BufferIsValid(buffer))
678  UnlockReleaseBuffer(buffer);
679 
680  /* Fix right-link of left sibling, if any */
681  if (leftsib != P_NONE)
682  {
683  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
684  {
685  page = (Page) BufferGetPage(buffer);
686  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
687  pageop->btpo_next = rightsib;
688 
689  PageSetLSN(page, lsn);
690  MarkBufferDirty(buffer);
691  }
692  if (BufferIsValid(buffer))
693  UnlockReleaseBuffer(buffer);
694  }
695 
696  /* Rewrite target page as empty deleted page */
697  buffer = XLogInitBufferForRedo(record, 0);
698  page = (Page) BufferGetPage(buffer);
699 
700  _bt_pageinit(page, BufferGetPageSize(buffer));
701  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
702 
703  pageop->btpo_prev = leftsib;
704  pageop->btpo_next = rightsib;
705  pageop->btpo.xact = xlrec->btpo_xact;
706  pageop->btpo_flags = BTP_DELETED;
707  pageop->btpo_cycleid = 0;
708 
709  PageSetLSN(page, lsn);
710  MarkBufferDirty(buffer);
711  UnlockReleaseBuffer(buffer);
712 
713  /*
714  * If we deleted a parent of the targeted leaf page, instead of the leaf
715  * itself, update the leaf to point to the next remaining child in the
716  * branch.
717  */
718  if (XLogRecHasBlockRef(record, 3))
719  {
720  /*
721  * There is no real data on the page, so we just re-create it from
722  * scratch using the information from the WAL record.
723  */
724  IndexTupleData trunctuple;
725 
726  buffer = XLogInitBufferForRedo(record, 3);
727  page = (Page) BufferGetPage(buffer);
728 
729  _bt_pageinit(page, BufferGetPageSize(buffer));
730  pageop = (BTPageOpaque) PageGetSpecialPointer(page);
731 
732  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
733  pageop->btpo_prev = xlrec->leafleftsib;
734  pageop->btpo_next = xlrec->leafrightsib;
735  pageop->btpo.level = 0;
736  pageop->btpo_cycleid = 0;
737 
738  /* Add a dummy hikey item */
739  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
740  trunctuple.t_info = sizeof(IndexTupleData);
741  BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
742 
743  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
744  false, false) == InvalidOffsetNumber)
745  elog(ERROR, "could not add dummy high key to half-dead page");
746 
747  PageSetLSN(page, lsn);
748  MarkBufferDirty(buffer);
749  UnlockReleaseBuffer(buffer);
750  }
751 
752  /* Update metapage if needed */
753  if (info == XLOG_BTREE_UNLINK_PAGE_META)
754  _bt_restore_meta(record, 4);
755 }
BlockNumber btpo_next
Definition: nbtree.h:59
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
#define BTP_LEAF
Definition: nbtree.h:72
#define BTP_HALF_DEAD
Definition: nbtree.h:76
union BTPageOpaqueData::@46 btpo
Pointer Item
Definition: item.h:17
#define P_NONE
Definition: nbtree.h:182
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:286
#define MemSet(start, val, len)
Definition: c.h:962
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
uint32 BlockNumber
Definition: block.h:31
#define BTP_DELETED
Definition: nbtree.h:74
TransactionId xact
Definition: nbtree.h:63
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:82
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:301
#define BTreeTupleSetTopParent(itup, blkno)
Definition: nbtree.h:315
BTCycleId btpo_cycleid
Definition: nbtree.h:66
BlockNumber btpo_prev
Definition: nbtree.h:58
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
uint32 level
Definition: nbtree.h:62
struct IndexTupleData IndexTupleData
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:146
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:289
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define P_HIKEY
Definition: nbtree.h:218
#define elog(elevel,...)
Definition: elog.h:228
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:924
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:34
unsigned short t_info
Definition: itup.h:49
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78

◆ btree_xlog_vacuum()

static void btree_xlog_vacuum ( XLogReaderState record)
static

Definition at line 383 of file nbtxlog.c.

References BLK_NEEDS_REDO, BlockNumberIsValid, BTP_HAS_GARBAGE, BTPageOpaqueData::btpo_flags, BufferGetPage, BufferIsValid, XLogReaderState::EndRecPtr, HotStandbyActiveInReplay(), xl_btree_vacuum::lastBlockVacuumed, LockBufferForCleanup(), MAIN_FORKNUM, MarkBufferDirty(), PageGetSpecialPointer, PageIndexMultiDelete(), PageSetLSN, RBM_NORMAL, RBM_NORMAL_NO_LOG, UnlockReleaseBuffer(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

384 {
385  XLogRecPtr lsn = record->EndRecPtr;
386  Buffer buffer;
387  Page page;
388  BTPageOpaque opaque;
389 #ifdef UNUSED
390  xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
391 
392  /*
393  * This section of code is thought to be no longer needed, after analysis
394  * of the calling paths. It is retained to allow the code to be reinstated
395  * if a flaw is revealed in that thinking.
396  *
397  * If we are running non-MVCC scans using this index we need to do some
398  * additional work to ensure correctness, which is known as a "pin scan"
399  * described in more detail in next paragraphs. We used to do the extra
400  * work in all cases, whereas we now avoid that work in most cases. If
401  * lastBlockVacuumed is set to InvalidBlockNumber then we skip the
402  * additional work required for the pin scan.
403  *
404  * Avoiding this extra work is important since it requires us to touch
405  * every page in the index, so is an O(N) operation. Worse, it is an
406  * operation performed in the foreground during redo, so it delays
407  * replication directly.
408  *
409  * If queries might be active then we need to ensure every leaf page is
410  * unpinned between the lastBlockVacuumed and the current block, if there
411  * are any. This prevents replay of the VACUUM from reaching the stage of
412  * removing heap tuples while there could still be indexscans "in flight"
413  * to those particular tuples for those scans which could be confused by
414  * finding new tuples at the old TID locations (see nbtree/README).
415  *
416  * It might be worth checking if there are actually any backends running;
417  * if not, we could just skip this.
418  *
419  * Since VACUUM can visit leaf pages out-of-order, it might issue records
420  * with lastBlockVacuumed >= block; that's not an error, it just means
421  * nothing to do now.
422  *
423  * Note: since we touch all pages in the range, we will lock non-leaf
424  * pages, and also any empty (all-zero) pages that may be in the index. It
425  * doesn't seem worth the complexity to avoid that. But it's important
426  * that HotStandbyActiveInReplay() will not return true if the database
427  * isn't yet consistent; so we need not fear reading still-corrupt blocks
428  * here during crash recovery.
429  */
431  {
432  RelFileNode thisrnode;
433  BlockNumber thisblkno;
434  BlockNumber blkno;
435 
436  XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
437 
438  for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
439  {
440  /*
441  * We use RBM_NORMAL_NO_LOG mode because it's not an error
442  * condition to see all-zero pages. The original btvacuumpage
443  * scan would have skipped over all-zero pages, noting them in FSM
444  * but not bothering to initialize them just yet; so we mustn't
445  * throw an error here. (We could skip acquiring the cleanup lock
446  * if PageIsNew, but it's probably not worth the cycles to test.)
447  *
448  * XXX we don't actually need to read the block, we just need to
449  * confirm it is unpinned. If we had a special call into the
450  * buffer manager we could optimise this so that if the block is
451  * not in shared_buffers we confirm it as unpinned. Optimizing
452  * this is now moot, since in most cases we avoid the scan.
453  */
454  buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
456  if (BufferIsValid(buffer))
457  {
458  LockBufferForCleanup(buffer);
459  UnlockReleaseBuffer(buffer);
460  }
461  }
462  }
463 #endif
464 
465  /*
466  * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
467  * page. See nbtree/README for details.
468  */
469  if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
470  == BLK_NEEDS_REDO)
471  {
472  char *ptr;
473  Size len;
474 
475  ptr = XLogRecGetBlockData(record, 0, &len);
476 
477  page = (Page) BufferGetPage(buffer);
478 
479  if (len > 0)
480  {
481  OffsetNumber *unused;
482  OffsetNumber *unend;
483 
484  unused = (OffsetNumber *) ptr;
485  unend = (OffsetNumber *) ((char *) ptr + len);
486 
487  if ((unend - unused) > 0)
488  PageIndexMultiDelete(page, unused, unend - unused);
489  }
490 
491  /*
492  * Mark the page as not containing any LP_DEAD items --- see comments
493  * in _bt_delitems_vacuum().
494  */
495  opaque = (BTPageOpaque) PageGetSpecialPointer(page);
496  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
497 
498  PageSetLSN(page, lsn);
499  MarkBufferDirty(buffer);
500  }
501  if (BufferIsValid(buffer))
502  UnlockReleaseBuffer(buffer);
503 }
BlockNumber lastBlockVacuumed
Definition: nbtxlog.h:173
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3659
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1458
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:437
uint32 BlockNumber
Definition: block.h:31
bool HotStandbyActiveInReplay(void)
Definition: xlog.c:8016
BTPageOpaqueData * BTPageOpaque
Definition: nbtree.h:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
uint16 OffsetNumber
Definition: off.h:24
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1460
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1484
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:835
size_t Size
Definition: c.h:467
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:326
uint16 btpo_flags
Definition: nbtree.h:65
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
int Buffer
Definition: buf.h:23
#define BTP_HAS_GARBAGE
Definition: nbtree.h:78
Pointer Page
Definition: bufpage.h:78