PostgreSQL Source Code  git master
nbtxlog.c File Reference
#include "postgres.h"
#include "access/bufmask.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
Include dependency graph for nbtxlog.c:

Go to the source code of this file.

Functions

static void _bt_restore_page (Page page, char *from, int len)
 
static void _bt_restore_meta (XLogReaderState *record, uint8 block_id)
 
static void _bt_clear_incomplete_split (XLogReaderState *record, uint8 block_id)
 
static void btree_xlog_insert (bool isleaf, bool ismeta, bool posting, XLogReaderState *record)
 
static void btree_xlog_split (bool newitemonleft, XLogReaderState *record)
 
static void btree_xlog_dedup (XLogReaderState *record)
 
static void btree_xlog_updates (Page page, OffsetNumber *updatedoffsets, xl_btree_update *updates, int nupdated)
 
static void btree_xlog_vacuum (XLogReaderState *record)
 
static void btree_xlog_delete (XLogReaderState *record)
 
static void btree_xlog_mark_page_halfdead (uint8 info, XLogReaderState *record)
 
static void btree_xlog_unlink_page (uint8 info, XLogReaderState *record)
 
static void btree_xlog_newroot (XLogReaderState *record)
 
static void btree_xlog_reuse_page (XLogReaderState *record)
 
void btree_redo (XLogReaderState *record)
 
void btree_xlog_startup (void)
 
void btree_xlog_cleanup (void)
 
void btree_mask (char *pagedata, BlockNumber blkno)
 

Variables

static MemoryContext opCtx
 

Function Documentation

◆ _bt_clear_incomplete_split()

static void _bt_clear_incomplete_split ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 141 of file nbtxlog.c.

142 {
143  XLogRecPtr lsn = record->EndRecPtr;
144  Buffer buf;
145 
146  if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
147  {
148  Page page = (Page) BufferGetPage(buf);
149  BTPageOpaque pageop = BTPageGetOpaque(page);
150 
151  Assert(P_INCOMPLETE_SPLIT(pageop));
152  pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
153 
154  PageSetLSN(page, lsn);
156  }
157  if (BufferIsValid(buf))
159 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4590
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2198
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
Assert(fmt[strlen(fmt) - 1] !='\n')
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:227
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:83
static char * buf
Definition: pg_test_fsync.c:73
uint16 btpo_flags
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:317
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71

References Assert(), BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, buf, BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, MarkBufferDirty(), P_INCOMPLETE_SPLIT, PageSetLSN(), UnlockReleaseBuffer(), and XLogReadBufferForRedo().

Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_split().

◆ _bt_restore_meta()

static void _bt_restore_meta ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 84 of file nbtxlog.c.

85 {
86  XLogRecPtr lsn = record->EndRecPtr;
87  Buffer metabuf;
88  Page metapg;
89  BTMetaPageData *md;
90  BTPageOpaque pageop;
91  xl_btree_metadata *xlrec;
92  char *ptr;
93  Size len;
94 
95  metabuf = XLogInitBufferForRedo(record, block_id);
96  ptr = XLogRecGetBlockData(record, block_id, &len);
97 
98  Assert(len == sizeof(xl_btree_metadata));
100  xlrec = (xl_btree_metadata *) ptr;
101  metapg = BufferGetPage(metabuf);
102 
103  _bt_pageinit(metapg, BufferGetPageSize(metabuf));
104 
105  md = BTPageGetMeta(metapg);
106  md->btm_magic = BTREE_MAGIC;
107  md->btm_version = xlrec->version;
108  md->btm_root = xlrec->root;
109  md->btm_level = xlrec->level;
110  md->btm_fastroot = xlrec->fastroot;
111  md->btm_fastlevel = xlrec->fastlevel;
112  /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
116  md->btm_allequalimage = xlrec->allequalimage;
117 
118  pageop = BTPageGetOpaque(metapg);
119  pageop->btpo_flags = BTP_META;
120 
121  /*
122  * Set pd_lower just past the end of the metadata. This is essential,
123  * because without doing so, metadata will be lost if xlog.c compresses
124  * the page.
125  */
126  ((PageHeader) metapg)->pd_lower =
127  ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
128 
129  PageSetLSN(metapg, lsn);
130  MarkBufferDirty(metabuf);
131  UnlockReleaseBuffer(metabuf);
132 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3386
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:339
PageHeaderData * PageHeader
Definition: bufpage.h:170
size_t Size
Definition: c.h:594
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1129
#define BTPageGetMeta(p)
Definition: nbtree.h:121
#define BTREE_MAGIC
Definition: nbtree.h:149
#define BTP_META
Definition: nbtree.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:148
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:152
const void size_t len
uint32 btm_last_cleanup_num_delpages
Definition: nbtree.h:114
uint32 btm_level
Definition: nbtree.h:108
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:116
BlockNumber btm_fastroot
Definition: nbtree.h:109
uint32 btm_version
Definition: nbtree.h:106
uint32 btm_magic
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:107
bool btm_allequalimage
Definition: nbtree.h:118
uint32 btm_fastlevel
Definition: nbtree.h:110
uint32 level
Definition: nbtxlog.h:50
uint32 version
Definition: nbtxlog.h:48
bool allequalimage
Definition: nbtxlog.h:54
BlockNumber fastroot
Definition: nbtxlog.h:51
uint32 fastlevel
Definition: nbtxlog.h:52
BlockNumber root
Definition: nbtxlog.h:49
uint32 last_cleanup_num_delpages
Definition: nbtxlog.h:53
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2030
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:329

References _bt_pageinit(), xl_btree_metadata::allequalimage, Assert(), BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_delpages, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), XLogReaderState::EndRecPtr, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_delpages, len, xl_btree_metadata::level, MarkBufferDirty(), PageSetLSN(), xl_btree_metadata::root, UnlockReleaseBuffer(), xl_btree_metadata::version, XLogInitBufferForRedo(), and XLogRecGetBlockData().

Referenced by btree_redo(), btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_unlink_page().

◆ _bt_restore_page()

static void _bt_restore_page ( Page  page,
char *  from,
int  len 
)
static

Definition at line 38 of file nbtxlog.c.

39 {
40  IndexTupleData itupdata;
41  Size itemsz;
42  char *end = from + len;
44  uint16 itemsizes[MaxIndexTuplesPerPage];
45  int i;
46  int nitems;
47 
48  /*
49  * To get the items back in the original order, we add them to the page in
50  * reverse. To figure out where one tuple ends and another begins, we
51  * have to scan them in forward order first.
52  */
53  i = 0;
54  while (from < end)
55  {
56  /*
57  * As we step through the items, 'from' won't always be properly
58  * aligned, so we need to use memcpy(). Further, we use Item (which
59  * is just a char*) here for our items array for the same reason;
60  * wouldn't want the compiler or anyone thinking that an item is
61  * aligned when it isn't.
62  */
63  memcpy(&itupdata, from, sizeof(IndexTupleData));
64  itemsz = IndexTupleSize(&itupdata);
65  itemsz = MAXALIGN(itemsz);
66 
67  items[i] = (Item) from;
68  itemsizes[i] = itemsz;
69  i++;
70 
71  from += itemsz;
72  }
73  nitems = i;
74 
75  for (i = nitems - 1; i >= 0; i--)
76  {
77  if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
78  false, false) == InvalidOffsetNumber)
79  elog(PANIC, "_bt_restore_page: cannot add item to page");
80  }
81 }
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:494
#define MAXALIGN(LEN)
Definition: c.h:800
#define PANIC
Definition: elog.h:42
#define nitems(x)
Definition: indent.h:31
int i
Definition: isn.c:73
Pointer Item
Definition: item.h:17
#define IndexTupleSize(itup)
Definition: itup.h:70
#define MaxIndexTuplesPerPage
Definition: itup.h:165
#define InvalidOffsetNumber
Definition: off.h:26

References elog(), i, IndexTupleSize, InvalidOffsetNumber, len, MAXALIGN, MaxIndexTuplesPerPage, nitems, PageAddItem, and PANIC.

Referenced by btree_xlog_newroot(), and btree_xlog_split().

◆ btree_mask()

void btree_mask ( char *  pagedata,
BlockNumber  blkno 
)

Definition at line 1093 of file nbtxlog.c.

1094 {
1095  Page page = (Page) pagedata;
1096  BTPageOpaque maskopaq;
1097 
1099 
1100  mask_page_hint_bits(page);
1101  mask_unused_space(page);
1102 
1103  maskopaq = BTPageGetOpaque(page);
1104 
1105  if (P_ISLEAF(maskopaq))
1106  {
1107  /*
1108  * In btree leaf pages, it is possible to modify the LP_FLAGS without
1109  * emitting any WAL record. Hence, mask the line pointer flags. See
1110  * _bt_killitems(), _bt_check_unique() for details.
1111  */
1112  mask_lp_flags(page);
1113  }
1114 
1115  /*
1116  * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
1117  * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
1118  * for details.
1119  */
1120  maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
1121 
1122  /*
1123  * During replay of a btree page split, we don't set the BTP_SPLIT_END
1124  * flag of the right sibling and initialize the cycle_id to 0 for the same
1125  * page. See btree_xlog_split() for details.
1126  */
1127  maskopaq->btpo_flags &= ~BTP_SPLIT_END;
1128  maskopaq->btpo_cycleid = 0;
1129 }
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
#define BTP_HAS_GARBAGE
Definition: nbtree.h:82
#define BTP_SPLIT_END
Definition: nbtree.h:81
BTCycleId btpo_cycleid
Definition: nbtree.h:68

References BTP_HAS_GARBAGE, BTP_SPLIT_END, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, mask_lp_flags(), mask_page_hint_bits(), mask_page_lsn_and_checksum(), mask_unused_space(), and P_ISLEAF.

◆ btree_redo()

void btree_redo ( XLogReaderState record)

Definition at line 1016 of file nbtxlog.c.

1017 {
1018  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1019  MemoryContext oldCtx;
1020 
1021  oldCtx = MemoryContextSwitchTo(opCtx);
1022  switch (info)
1023  {
1025  btree_xlog_insert(true, false, false, record);
1026  break;
1028  btree_xlog_insert(false, false, false, record);
1029  break;
1031  btree_xlog_insert(false, true, false, record);
1032  break;
1033  case XLOG_BTREE_SPLIT_L:
1034  btree_xlog_split(true, record);
1035  break;
1036  case XLOG_BTREE_SPLIT_R:
1037  btree_xlog_split(false, record);
1038  break;
1040  btree_xlog_insert(true, false, true, record);
1041  break;
1042  case XLOG_BTREE_DEDUP:
1043  btree_xlog_dedup(record);
1044  break;
1045  case XLOG_BTREE_VACUUM:
1046  btree_xlog_vacuum(record);
1047  break;
1048  case XLOG_BTREE_DELETE:
1049  btree_xlog_delete(record);
1050  break;
1052  btree_xlog_mark_page_halfdead(info, record);
1053  break;
1056  btree_xlog_unlink_page(info, record);
1057  break;
1058  case XLOG_BTREE_NEWROOT:
1059  btree_xlog_newroot(record);
1060  break;
1061  case XLOG_BTREE_REUSE_PAGE:
1062  btree_xlog_reuse_page(record);
1063  break;
1065  _bt_restore_meta(record, 0);
1066  break;
1067  default:
1068  elog(PANIC, "btree_redo: unknown op code %u", info);
1069  }
1070  MemoryContextSwitchTo(oldCtx);
1072 }
unsigned char uint8
Definition: c.h:493
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:330
static void btree_xlog_delete(XLogReaderState *record)
Definition: nbtxlog.c:653
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:84
static void btree_xlog_newroot(XLogReaderState *record)
Definition: nbtxlog.c:939
static void btree_xlog_dedup(XLogReaderState *record)
Definition: nbtxlog.c:466
static void btree_xlog_insert(bool isleaf, bool ismeta, bool posting, XLogReaderState *record)
Definition: nbtxlog.c:162
static void btree_xlog_split(bool newitemonleft, XLogReaderState *record)
Definition: nbtxlog.c:253
static void btree_xlog_reuse_page(XLogReaderState *record)
Definition: nbtxlog.c:1005
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:715
static MemoryContext opCtx
Definition: nbtxlog.c:27
static void btree_xlog_vacuum(XLogReaderState *record)
Definition: nbtxlog.c:600
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:800
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:41
#define XLOG_BTREE_INSERT_POST
Definition: nbtxlog.h:32
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:39
#define XLOG_BTREE_SPLIT_R
Definition: nbtxlog.h:31
#define XLOG_BTREE_INSERT_LEAF
Definition: nbtxlog.h:27
#define XLOG_BTREE_INSERT_UPPER
Definition: nbtxlog.h:28
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:35
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:36
#define XLOG_BTREE_INSERT_META
Definition: nbtxlog.h:29
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:38
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
#define XLOG_BTREE_SPLIT_L
Definition: nbtxlog.h:30
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:37
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:34
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLR_INFO_MASK
Definition: xlogrecord.h:62

References _bt_restore_meta(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_reuse_page(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), elog(), MemoryContextReset(), MemoryContextSwitchTo(), opCtx, PANIC, XLOG_BTREE_DEDUP, XLOG_BTREE_DELETE, XLOG_BTREE_INSERT_LEAF, XLOG_BTREE_INSERT_META, XLOG_BTREE_INSERT_POST, XLOG_BTREE_INSERT_UPPER, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLOG_BTREE_META_CLEANUP, XLOG_BTREE_NEWROOT, XLOG_BTREE_REUSE_PAGE, XLOG_BTREE_SPLIT_L, XLOG_BTREE_SPLIT_R, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLOG_BTREE_VACUUM, XLogRecGetInfo, and XLR_INFO_MASK.

◆ btree_xlog_cleanup()

void btree_xlog_cleanup ( void  )

Definition at line 1083 of file nbtxlog.c.

1084 {
1086  opCtx = NULL;
1087 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403

References MemoryContextDelete(), and opCtx.

◆ btree_xlog_dedup()

static void btree_xlog_dedup ( XLogReaderState record)
static

Definition at line 466 of file nbtxlog.c.

467 {
468  XLogRecPtr lsn = record->EndRecPtr;
469  xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
470  Buffer buf;
471 
472  if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
473  {
474  char *ptr = XLogRecGetBlockData(record, 0, NULL);
475  Page page = (Page) BufferGetPage(buf);
476  BTPageOpaque opaque = BTPageGetOpaque(page);
477  OffsetNumber offnum,
478  minoff,
479  maxoff;
482  Page newpage;
483 
485  state->deduplicate = true; /* unused */
486  state->nmaxitems = 0; /* unused */
487  /* Conservatively use larger maxpostingsize than primary */
488  state->maxpostingsize = BTMaxItemSize(page);
489  state->base = NULL;
490  state->baseoff = InvalidOffsetNumber;
491  state->basetupsize = 0;
492  state->htids = palloc(state->maxpostingsize);
493  state->nhtids = 0;
494  state->nitems = 0;
495  state->phystupsize = 0;
496  state->nintervals = 0;
497 
498  minoff = P_FIRSTDATAKEY(opaque);
499  maxoff = PageGetMaxOffsetNumber(page);
500  newpage = PageGetTempPageCopySpecial(page);
501 
502  if (!P_RIGHTMOST(opaque))
503  {
504  ItemId itemid = PageGetItemId(page, P_HIKEY);
505  Size itemsz = ItemIdGetLength(itemid);
506  IndexTuple item = (IndexTuple) PageGetItem(page, itemid);
507 
508  if (PageAddItem(newpage, (Item) item, itemsz, P_HIKEY,
509  false, false) == InvalidOffsetNumber)
510  elog(ERROR, "deduplication failed to add highkey");
511  }
512 
513  intervals = (BTDedupInterval *) ptr;
514  for (offnum = minoff;
515  offnum <= maxoff;
516  offnum = OffsetNumberNext(offnum))
517  {
518  ItemId itemid = PageGetItemId(page, offnum);
519  IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
520 
521  if (offnum == minoff)
522  _bt_dedup_start_pending(state, itup, offnum);
523  else if (state->nintervals < xlrec->nintervals &&
524  state->baseoff == intervals[state->nintervals].baseoff &&
525  state->nitems < intervals[state->nintervals].nitems)
526  {
527  if (!_bt_dedup_save_htid(state, itup))
528  elog(ERROR, "deduplication failed to add heap tid to pending posting list");
529  }
530  else
531  {
533  _bt_dedup_start_pending(state, itup, offnum);
534  }
535  }
536 
538  Assert(state->nintervals == xlrec->nintervals);
539  Assert(memcmp(state->intervals, intervals,
540  state->nintervals * sizeof(BTDedupInterval)) == 0);
541 
542  if (P_HAS_GARBAGE(opaque))
543  {
544  BTPageOpaque nopaque = BTPageGetOpaque(newpage);
545 
546  nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
547  }
548 
549  PageRestoreTempPage(newpage, page);
550  PageSetLSN(page, lsn);
552  }
553 
554  if (BufferIsValid(buf))
556 }
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define ERROR
Definition: elog.h:39
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
IndexTupleData * IndexTuple
Definition: itup.h:53
void * palloc(Size size)
Definition: mcxt.c:1226
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:433
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:555
#define P_HIKEY
Definition: nbtree.h:367
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:226
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BTMaxItemSize(page)
Definition: nbtree.h:164
BTDedupStateData * BTDedupState
Definition: nbtree.h:893
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
char * intervals[]
Definition: regguts.h:323
uint16 nintervals
Definition: nbtxlog.h:169
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415

References _bt_dedup_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), Assert(), BLK_NEEDS_REDO, BTMaxItemSize, BTP_HAS_GARBAGE, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, buf, BufferGetPage(), BufferIsValid(), elog(), XLogReaderState::EndRecPtr, ERROR, intervals, InvalidOffsetNumber, ItemIdGetLength, MarkBufferDirty(), xl_btree_dedup::nintervals, OffsetNumberNext, P_FIRSTDATAKEY, P_HAS_GARBAGE, P_HIKEY, P_RIGHTMOST, PageAddItem, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN(), palloc(), UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_delete()

static void btree_xlog_delete ( XLogReaderState record)
static

Definition at line 653 of file nbtxlog.c.

654 {
655  XLogRecPtr lsn = record->EndRecPtr;
656  xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
657  Buffer buffer;
658  Page page;
659  BTPageOpaque opaque;
660 
661  /*
662  * If we have any conflict processing to do, it must happen before we
663  * update the page
664  */
665  if (InHotStandby)
666  {
667  RelFileLocator rlocator;
668 
669  XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
670 
672  xlrec->isCatalogRel,
673  rlocator);
674  }
675 
676  /*
677  * We don't need to take a cleanup lock to apply these changes. See
678  * nbtree/README for details.
679  */
680  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
681  {
682  char *ptr = XLogRecGetBlockData(record, 0, NULL);
683 
684  page = (Page) BufferGetPage(buffer);
685 
686  if (xlrec->nupdated > 0)
687  {
688  OffsetNumber *updatedoffsets;
689  xl_btree_update *updates;
690 
691  updatedoffsets = (OffsetNumber *)
692  (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
693  updates = (xl_btree_update *) ((char *) updatedoffsets +
694  xlrec->nupdated *
695  sizeof(OffsetNumber));
696 
697  btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
698  }
699 
700  if (xlrec->ndeleted > 0)
701  PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
702 
703  /* Mark the page as not containing any LP_DEAD items */
704  opaque = BTPageGetOpaque(page);
705  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
706 
707  PageSetLSN(page, lsn);
708  MarkBufferDirty(buffer);
709  }
710  if (BufferIsValid(buffer))
711  UnlockReleaseBuffer(buffer);
712 }
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
static void btree_xlog_updates(Page page, OffsetNumber *updatedoffsets, xl_btree_update *updates, int nupdated)
Definition: nbtxlog.c:559
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
TransactionId snapshotConflictHorizon
Definition: nbtxlog.h:238
bool isCatalogRel
Definition: nbtxlog.h:241
uint16 ndeleted
Definition: nbtxlog.h:239
uint16 nupdated
Definition: nbtxlog.h:240
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1976
#define InHotStandby
Definition: xlogutils.h:57

References BLK_NEEDS_REDO, BTP_HAS_GARBAGE, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, btree_xlog_updates(), BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, InHotStandby, xl_btree_delete::isCatalogRel, MarkBufferDirty(), xl_btree_delete::ndeleted, xl_btree_delete::nupdated, PageIndexMultiDelete(), PageSetLSN(), ResolveRecoveryConflictWithSnapshot(), xl_btree_delete::snapshotConflictHorizon, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_insert()

static void btree_xlog_insert ( bool  isleaf,
bool  ismeta,
bool  posting,
XLogReaderState record 
)
static

Definition at line 162 of file nbtxlog.c.

164 {
165  XLogRecPtr lsn = record->EndRecPtr;
166  xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
167  Buffer buffer;
168  Page page;
169 
170  /*
171  * Insertion to an internal page finishes an incomplete split at the child
172  * level. Clear the incomplete-split flag in the child. Note: during
173  * normal operation, the child and parent pages are locked at the same
174  * time (the locks are coupled), so that clearing the flag and inserting
175  * the downlink appear atomic to other backends. We don't bother with
176  * that during replay, because readers don't care about the
177  * incomplete-split flag and there cannot be updates happening.
178  */
179  if (!isleaf)
180  _bt_clear_incomplete_split(record, 1);
181  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
182  {
183  Size datalen;
184  char *datapos = XLogRecGetBlockData(record, 0, &datalen);
185 
186  page = BufferGetPage(buffer);
187 
188  if (!posting)
189  {
190  /* Simple retail insertion */
191  if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
192  false, false) == InvalidOffsetNumber)
193  elog(PANIC, "failed to add new item");
194  }
195  else
196  {
197  ItemId itemid;
198  IndexTuple oposting,
199  newitem,
200  nposting;
201  uint16 postingoff;
202 
203  /*
204  * A posting list split occurred during leaf page insertion. WAL
205  * record data will start with an offset number representing the
206  * point in an existing posting list that a split occurs at.
207  *
208  * Use _bt_swap_posting() to repeat posting list split steps from
209  * primary. Note that newitem from WAL record is 'orignewitem',
210  * not the final version of newitem that is actually inserted on
211  * page.
212  */
213  postingoff = *((uint16 *) datapos);
214  datapos += sizeof(uint16);
215  datalen -= sizeof(uint16);
216 
217  itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
218  oposting = (IndexTuple) PageGetItem(page, itemid);
219 
220  /* Use mutable, aligned newitem copy in _bt_swap_posting() */
221  Assert(isleaf && postingoff > 0);
222  newitem = CopyIndexTuple((IndexTuple) datapos);
223  nposting = _bt_swap_posting(newitem, oposting, postingoff);
224 
225  /* Replace existing posting list with post-split version */
226  memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
227 
228  /* Insert "final" new item (not orignewitem from WAL stream) */
229  Assert(IndexTupleSize(newitem) == datalen);
230  if (PageAddItem(page, (Item) newitem, datalen, xlrec->offnum,
231  false, false) == InvalidOffsetNumber)
232  elog(PANIC, "failed to add posting split new item");
233  }
234 
235  PageSetLSN(page, lsn);
236  MarkBufferDirty(buffer);
237  }
238  if (BufferIsValid(buffer))
239  UnlockReleaseBuffer(buffer);
240 
241  /*
242  * Note: in normal operation, we'd update the metapage while still holding
243  * lock on the page we inserted into. But during replay it's not
244  * necessary to hold that lock, since no other index updates can be
245  * happening concurrently, and readers will cope fine with following an
246  * obsolete link from the metapage.
247  */
248  if (ismeta)
249  _bt_restore_meta(record, 2);
250 }
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
Definition: nbtdedup.c:1022
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:141
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
OffsetNumber offnum
Definition: nbtxlog.h:78

References _bt_clear_incomplete_split(), _bt_restore_meta(), _bt_swap_posting(), Assert(), BLK_NEEDS_REDO, BufferGetPage(), BufferIsValid(), CopyIndexTuple(), elog(), XLogReaderState::EndRecPtr, IndexTupleSize, InvalidOffsetNumber, MarkBufferDirty(), MAXALIGN, xl_btree_insert::offnum, OffsetNumberPrev, PageAddItem, PageGetItem(), PageGetItemId(), PageSetLSN(), PANIC, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_mark_page_halfdead()

static void btree_xlog_mark_page_halfdead ( uint8  info,
XLogReaderState record 
)
static

Definition at line 715 of file nbtxlog.c.

716 {
717  XLogRecPtr lsn = record->EndRecPtr;
719  Buffer buffer;
720  Page page;
721  BTPageOpaque pageop;
722  IndexTupleData trunctuple;
723 
724  /*
725  * In normal operation, we would lock all the pages this WAL record
726  * touches before changing any of them. In WAL replay, it should be okay
727  * to lock just one page at a time, since no concurrent index updates can
728  * be happening, and readers should not care whether they arrive at the
729  * target page or not (since it's surely empty).
730  */
731 
732  /* to-be-deleted subtree's parent page */
733  if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
734  {
735  OffsetNumber poffset;
736  ItemId itemid;
737  IndexTuple itup;
738  OffsetNumber nextoffset;
739  BlockNumber rightsib;
740 
741  page = (Page) BufferGetPage(buffer);
742  pageop = BTPageGetOpaque(page);
743 
744  poffset = xlrec->poffset;
745 
746  nextoffset = OffsetNumberNext(poffset);
747  itemid = PageGetItemId(page, nextoffset);
748  itup = (IndexTuple) PageGetItem(page, itemid);
749  rightsib = BTreeTupleGetDownLink(itup);
750 
751  itemid = PageGetItemId(page, poffset);
752  itup = (IndexTuple) PageGetItem(page, itemid);
753  BTreeTupleSetDownLink(itup, rightsib);
754  nextoffset = OffsetNumberNext(poffset);
755  PageIndexTupleDelete(page, nextoffset);
756 
757  PageSetLSN(page, lsn);
758  MarkBufferDirty(buffer);
759  }
760 
761  /*
762  * Don't need to couple cross-level locks in REDO routines, so release
763  * lock on internal page immediately
764  */
765  if (BufferIsValid(buffer))
766  UnlockReleaseBuffer(buffer);
767 
768  /* Rewrite the leaf page as a halfdead page */
769  buffer = XLogInitBufferForRedo(record, 0);
770  page = (Page) BufferGetPage(buffer);
771 
772  _bt_pageinit(page, BufferGetPageSize(buffer));
773  pageop = BTPageGetOpaque(page);
774 
775  pageop->btpo_prev = xlrec->leftblk;
776  pageop->btpo_next = xlrec->rightblk;
777  pageop->btpo_level = 0;
778  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
779  pageop->btpo_cycleid = 0;
780 
781  /*
782  * Construct a dummy high key item that points to top parent page (value
783  * is InvalidBlockNumber when the top parent page is the leaf page itself)
784  */
785  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
786  trunctuple.t_info = sizeof(IndexTupleData);
787  BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
788 
789  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
790  false, false) == InvalidOffsetNumber)
791  elog(ERROR, "could not add dummy high key to half-dead page");
792 
793  PageSetLSN(page, lsn);
794  MarkBufferDirty(buffer);
795  UnlockReleaseBuffer(buffer);
796 }
uint32 BlockNumber
Definition: block.h:31
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
#define MemSet(start, val, len)
Definition: c.h:1009
struct IndexTupleData IndexTupleData
#define BTP_LEAF
Definition: nbtree.h:76
#define BTP_HALF_DEAD
Definition: nbtree.h:80
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:626
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:562
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:556
BlockNumber btpo_next
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:64
uint32 btpo_level
Definition: nbtree.h:66
unsigned short t_info
Definition: itup.h:49

References _bt_pageinit(), BLK_NEEDS_REDO, BTP_HALF_DEAD, BTP_LEAF, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetDownLink(), BTreeTupleSetDownLink(), BTreeTupleSetTopParent(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), elog(), XLogReaderState::EndRecPtr, ERROR, InvalidOffsetNumber, xl_btree_mark_page_halfdead::leftblk, MarkBufferDirty(), MemSet, OffsetNumberNext, P_HIKEY, PageAddItem, PageGetItem(), PageGetItemId(), PageIndexTupleDelete(), PageSetLSN(), xl_btree_mark_page_halfdead::poffset, xl_btree_mark_page_halfdead::rightblk, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_newroot()

static void btree_xlog_newroot ( XLogReaderState record)
static

Definition at line 939 of file nbtxlog.c.

940 {
941  XLogRecPtr lsn = record->EndRecPtr;
942  xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
943  Buffer buffer;
944  Page page;
945  BTPageOpaque pageop;
946  char *ptr;
947  Size len;
948 
949  buffer = XLogInitBufferForRedo(record, 0);
950  page = (Page) BufferGetPage(buffer);
951 
952  _bt_pageinit(page, BufferGetPageSize(buffer));
953  pageop = BTPageGetOpaque(page);
954 
955  pageop->btpo_flags = BTP_ROOT;
956  pageop->btpo_prev = pageop->btpo_next = P_NONE;
957  pageop->btpo_level = xlrec->level;
958  if (xlrec->level == 0)
959  pageop->btpo_flags |= BTP_LEAF;
960  pageop->btpo_cycleid = 0;
961 
962  if (xlrec->level > 0)
963  {
964  ptr = XLogRecGetBlockData(record, 0, &len);
965  _bt_restore_page(page, ptr, len);
966 
967  /* Clear the incomplete-split flag in left child */
968  _bt_clear_incomplete_split(record, 1);
969  }
970 
971  PageSetLSN(page, lsn);
972  MarkBufferDirty(buffer);
973  UnlockReleaseBuffer(buffer);
974 
975  _bt_restore_meta(record, 2);
976 }
#define BTP_ROOT
Definition: nbtree.h:77
#define P_NONE
Definition: nbtree.h:212
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:38
uint32 level
Definition: nbtxlog.h:344

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_meta(), _bt_restore_page(), BTP_LEAF, BTP_ROOT, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage(), BufferGetPageSize(), XLogReaderState::EndRecPtr, len, xl_btree_newroot::level, MarkBufferDirty(), P_NONE, PageSetLSN(), UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_reuse_page()

static void btree_xlog_reuse_page ( XLogReaderState record)
static

Definition at line 1005 of file nbtxlog.c.

1006 {
1008 
1009  if (InHotStandby)
1011  xlrec->isCatalogRel,
1012  xlrec->locator);
1013 }
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:512
FullTransactionId snapshotConflictHorizon
Definition: nbtxlog.h:187
RelFileLocator locator
Definition: nbtxlog.h:185

References InHotStandby, xl_btree_reuse_page::isCatalogRel, xl_btree_reuse_page::locator, ResolveRecoveryConflictWithSnapshotFullXid(), xl_btree_reuse_page::snapshotConflictHorizon, and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_split()

static void btree_xlog_split ( bool  newitemonleft,
XLogReaderState record 
)
static

Definition at line 253 of file nbtxlog.c.

254 {
255  XLogRecPtr lsn = record->EndRecPtr;
256  xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
257  bool isleaf = (xlrec->level == 0);
258  Buffer buf;
259  Buffer rbuf;
260  Page rpage;
261  BTPageOpaque ropaque;
262  char *datapos;
263  Size datalen;
264  BlockNumber origpagenumber;
265  BlockNumber rightpagenumber;
266  BlockNumber spagenumber;
267 
268  XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
269  XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
270  if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
271  spagenumber = P_NONE;
272 
273  /*
274  * Clear the incomplete split flag on the appropriate child page one level
275  * down when origpage/buf is an internal page (there must have been
276  * cascading page splits during original execution in the event of an
277  * internal page split). This is like the corresponding btree_xlog_insert
278  * call for internal pages. We're not clearing the incomplete split flag
279  * for the current page split here (you can think of this as part of the
280  * insert of newitem that the page split action needs to perform in
281  * passing).
282  *
283  * Like in btree_xlog_insert, this can be done before locking other pages.
284  * We never need to couple cross-level locks in REDO routines.
285  */
286  if (!isleaf)
287  _bt_clear_incomplete_split(record, 3);
288 
289  /* Reconstruct right (new) sibling page from scratch */
290  rbuf = XLogInitBufferForRedo(record, 1);
291  datapos = XLogRecGetBlockData(record, 1, &datalen);
292  rpage = (Page) BufferGetPage(rbuf);
293 
294  _bt_pageinit(rpage, BufferGetPageSize(rbuf));
295  ropaque = BTPageGetOpaque(rpage);
296 
297  ropaque->btpo_prev = origpagenumber;
298  ropaque->btpo_next = spagenumber;
299  ropaque->btpo_level = xlrec->level;
300  ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
301  ropaque->btpo_cycleid = 0;
302 
303  _bt_restore_page(rpage, datapos, datalen);
304 
305  PageSetLSN(rpage, lsn);
306  MarkBufferDirty(rbuf);
307 
308  /* Now reconstruct original page (left half of split) */
309  if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
310  {
311  /*
312  * To retain the same physical order of the tuples that they had, we
313  * initialize a temporary empty page for the left page and add all the
314  * items to that in item number order. This mirrors how _bt_split()
315  * works. Retaining the same physical order makes WAL consistency
316  * checking possible. See also _bt_restore_page(), which does the
317  * same for the right page.
318  */
319  Page origpage = (Page) BufferGetPage(buf);
320  BTPageOpaque oopaque = BTPageGetOpaque(origpage);
321  OffsetNumber off;
322  IndexTuple newitem = NULL,
323  left_hikey = NULL,
324  nposting = NULL;
325  Size newitemsz = 0,
326  left_hikeysz = 0;
327  Page leftpage;
328  OffsetNumber leftoff,
329  replacepostingoff = InvalidOffsetNumber;
330 
331  datapos = XLogRecGetBlockData(record, 0, &datalen);
332 
333  if (newitemonleft || xlrec->postingoff != 0)
334  {
335  newitem = (IndexTuple) datapos;
336  newitemsz = MAXALIGN(IndexTupleSize(newitem));
337  datapos += newitemsz;
338  datalen -= newitemsz;
339 
340  if (xlrec->postingoff != 0)
341  {
342  ItemId itemid;
343  IndexTuple oposting;
344 
345  /* Posting list must be at offset number before new item's */
346  replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
347 
348  /* Use mutable, aligned newitem copy in _bt_swap_posting() */
349  newitem = CopyIndexTuple(newitem);
350  itemid = PageGetItemId(origpage, replacepostingoff);
351  oposting = (IndexTuple) PageGetItem(origpage, itemid);
352  nposting = _bt_swap_posting(newitem, oposting,
353  xlrec->postingoff);
354  }
355  }
356 
357  /*
358  * Extract left hikey and its size. We assume that 16-bit alignment
359  * is enough to apply IndexTupleSize (since it's fetching from a
360  * uint16 field).
361  */
362  left_hikey = (IndexTuple) datapos;
363  left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
364  datapos += left_hikeysz;
365  datalen -= left_hikeysz;
366 
367  Assert(datalen == 0);
368 
369  leftpage = PageGetTempPageCopySpecial(origpage);
370 
371  /* Add high key tuple from WAL record to temp page */
372  leftoff = P_HIKEY;
373  if (PageAddItem(leftpage, (Item) left_hikey, left_hikeysz, P_HIKEY,
374  false, false) == InvalidOffsetNumber)
375  elog(ERROR, "failed to add high key to left page after split");
376  leftoff = OffsetNumberNext(leftoff);
377 
378  for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
379  {
380  ItemId itemid;
381  Size itemsz;
382  IndexTuple item;
383 
384  /* Add replacement posting list when required */
385  if (off == replacepostingoff)
386  {
387  Assert(newitemonleft ||
388  xlrec->firstrightoff == xlrec->newitemoff);
389  if (PageAddItem(leftpage, (Item) nposting,
390  MAXALIGN(IndexTupleSize(nposting)), leftoff,
391  false, false) == InvalidOffsetNumber)
392  elog(ERROR, "failed to add new posting list item to left page after split");
393  leftoff = OffsetNumberNext(leftoff);
394  continue; /* don't insert oposting */
395  }
396 
397  /* add the new item if it was inserted on left page */
398  else if (newitemonleft && off == xlrec->newitemoff)
399  {
400  if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
401  false, false) == InvalidOffsetNumber)
402  elog(ERROR, "failed to add new item to left page after split");
403  leftoff = OffsetNumberNext(leftoff);
404  }
405 
406  itemid = PageGetItemId(origpage, off);
407  itemsz = ItemIdGetLength(itemid);
408  item = (IndexTuple) PageGetItem(origpage, itemid);
409  if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
410  false, false) == InvalidOffsetNumber)
411  elog(ERROR, "failed to add old item to left page after split");
412  leftoff = OffsetNumberNext(leftoff);
413  }
414 
415  /* cope with possibility that newitem goes at the end */
416  if (newitemonleft && off == xlrec->newitemoff)
417  {
418  if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
419  false, false) == InvalidOffsetNumber)
420  elog(ERROR, "failed to add new item to left page after split");
421  leftoff = OffsetNumberNext(leftoff);
422  }
423 
424  PageRestoreTempPage(leftpage, origpage);
425 
426  /* Fix opaque fields */
427  oopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
428  if (isleaf)
429  oopaque->btpo_flags |= BTP_LEAF;
430  oopaque->btpo_next = rightpagenumber;
431  oopaque->btpo_cycleid = 0;
432 
433  PageSetLSN(origpage, lsn);
435  }
436 
437  /* Fix left-link of the page to the right of the new right sibling */
438  if (spagenumber != P_NONE)
439  {
440  Buffer sbuf;
441 
442  if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
443  {
444  Page spage = (Page) BufferGetPage(sbuf);
445  BTPageOpaque spageop = BTPageGetOpaque(spage);
446 
447  spageop->btpo_prev = rightpagenumber;
448 
449  PageSetLSN(spage, lsn);
450  MarkBufferDirty(sbuf);
451  }
452  if (BufferIsValid(sbuf))
453  UnlockReleaseBuffer(sbuf);
454  }
455 
456  /*
457  * Finally, release the remaining buffers. sbuf, rbuf, and buf must be
458  * released together, so that readers cannot observe inconsistencies.
459  */
460  UnlockReleaseBuffer(rbuf);
461  if (BufferIsValid(buf))
463 }
uint16 postingoff
Definition: nbtxlog.h:155
OffsetNumber firstrightoff
Definition: nbtxlog.h:153
uint32 level
Definition: nbtxlog.h:152
OffsetNumber newitemoff
Definition: nbtxlog.h:154
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:2002

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_page(), _bt_swap_posting(), Assert(), BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTP_LEAF, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buf, BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CopyIndexTuple(), elog(), XLogReaderState::EndRecPtr, ERROR, xl_btree_split::firstrightoff, IndexTupleSize, InvalidOffsetNumber, ItemIdGetLength, xl_btree_split::level, MarkBufferDirty(), MAXALIGN, xl_btree_split::newitemoff, OffsetNumberNext, OffsetNumberPrev, P_FIRSTDATAKEY, P_HIKEY, P_NONE, PageAddItem, PageGetItem(), PageGetItemId(), PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN(), xl_btree_split::postingoff, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), XLogRecGetBlockTagExtended(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_startup()

void btree_xlog_startup ( void  )

Definition at line 1075 of file nbtxlog.c.

1076 {
1078  "Btree recovery temporary context",
1080 }
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
#define AllocSetContextCreate
Definition: memutils.h:126
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:150

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, CurrentMemoryContext, and opCtx.

◆ btree_xlog_unlink_page()

static void btree_xlog_unlink_page ( uint8  info,
XLogReaderState record 
)
static

Definition at line 800 of file nbtxlog.c.

801 {
802  XLogRecPtr lsn = record->EndRecPtr;
804  BlockNumber leftsib;
805  BlockNumber rightsib;
806  uint32 level;
807  bool isleaf;
808  FullTransactionId safexid;
809  Buffer leftbuf;
810  Buffer target;
811  Buffer rightbuf;
812  Page page;
813  BTPageOpaque pageop;
814 
815  leftsib = xlrec->leftsib;
816  rightsib = xlrec->rightsib;
817  level = xlrec->level;
818  isleaf = (level == 0);
819  safexid = xlrec->safexid;
820 
821  /* No leaftopparent for level 0 (leaf page) or level 1 target */
822  Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
823 
824  /*
825  * In normal operation, we would lock all the pages this WAL record
826  * touches before changing any of them. In WAL replay, we at least lock
827  * the pages in the same standard left-to-right order (leftsib, target,
828  * rightsib), and don't release the sibling locks until the target is
829  * marked deleted.
830  */
831 
832  /* Fix right-link of left sibling, if any */
833  if (leftsib != P_NONE)
834  {
835  if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
836  {
837  page = (Page) BufferGetPage(leftbuf);
838  pageop = BTPageGetOpaque(page);
839  pageop->btpo_next = rightsib;
840 
841  PageSetLSN(page, lsn);
842  MarkBufferDirty(leftbuf);
843  }
844  }
845  else
846  leftbuf = InvalidBuffer;
847 
848  /* Rewrite target page as empty deleted page */
849  target = XLogInitBufferForRedo(record, 0);
850  page = (Page) BufferGetPage(target);
851 
852  _bt_pageinit(page, BufferGetPageSize(target));
853  pageop = BTPageGetOpaque(page);
854 
855  pageop->btpo_prev = leftsib;
856  pageop->btpo_next = rightsib;
857  pageop->btpo_level = level;
858  BTPageSetDeleted(page, safexid);
859  if (isleaf)
860  pageop->btpo_flags |= BTP_LEAF;
861  pageop->btpo_cycleid = 0;
862 
863  PageSetLSN(page, lsn);
864  MarkBufferDirty(target);
865 
866  /* Fix left-link of right sibling */
867  if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
868  {
869  page = (Page) BufferGetPage(rightbuf);
870  pageop = BTPageGetOpaque(page);
871  pageop->btpo_prev = leftsib;
872 
873  PageSetLSN(page, lsn);
874  MarkBufferDirty(rightbuf);
875  }
876 
877  /* Release siblings */
878  if (BufferIsValid(leftbuf))
879  UnlockReleaseBuffer(leftbuf);
880  if (BufferIsValid(rightbuf))
881  UnlockReleaseBuffer(rightbuf);
882 
883  /* Release target */
884  UnlockReleaseBuffer(target);
885 
886  /*
887  * If we deleted a parent of the targeted leaf page, instead of the leaf
888  * itself, update the leaf to point to the next remaining child in the
889  * to-be-deleted subtree
890  */
891  if (XLogRecHasBlockRef(record, 3))
892  {
893  /*
894  * There is no real data on the page, so we just re-create it from
895  * scratch using the information from the WAL record.
896  *
897  * Note that we don't end up here when the target page is also the
898  * leafbuf page. There is no need to add a dummy hikey item with a
899  * top parent link when deleting leafbuf because it's the last page
900  * we'll delete in the subtree undergoing deletion.
901  */
902  Buffer leafbuf;
903  IndexTupleData trunctuple;
904 
905  Assert(!isleaf);
906 
907  leafbuf = XLogInitBufferForRedo(record, 3);
908  page = (Page) BufferGetPage(leafbuf);
909 
910  _bt_pageinit(page, BufferGetPageSize(leafbuf));
911  pageop = BTPageGetOpaque(page);
912 
913  pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
914  pageop->btpo_prev = xlrec->leafleftsib;
915  pageop->btpo_next = xlrec->leafrightsib;
916  pageop->btpo_level = 0;
917  pageop->btpo_cycleid = 0;
918 
919  /* Add a dummy hikey item */
920  MemSet(&trunctuple, 0, sizeof(IndexTupleData));
921  trunctuple.t_info = sizeof(IndexTupleData);
922  BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
923 
924  if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
925  false, false) == InvalidOffsetNumber)
926  elog(ERROR, "could not add dummy high key to half-dead page");
927 
928  PageSetLSN(page, lsn);
929  MarkBufferDirty(leafbuf);
930  UnlockReleaseBuffer(leafbuf);
931  }
932 
933  /* Update metapage if needed */
934  if (info == XLOG_BTREE_UNLINK_PAGE_META)
935  _bt_restore_meta(record, 4);
936 }
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define InvalidBuffer
Definition: buf.h:25
unsigned int uint32
Definition: c.h:495
static void BTPageSetDeleted(Page page, FullTransactionId safexid)
Definition: nbtree.h:239
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420

References _bt_pageinit(), _bt_restore_meta(), Assert(), BLK_NEEDS_REDO, BlockNumberIsValid(), BTP_HALF_DEAD, BTP_LEAF, BTPageGetOpaque, BTPageSetDeleted(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleSetTopParent(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), elog(), XLogReaderState::EndRecPtr, ERROR, InvalidBuffer, InvalidOffsetNumber, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leaftopparent, xl_btree_unlink_page::leftsib, xl_btree_unlink_page::level, MarkBufferDirty(), MemSet, P_HIKEY, P_NONE, PageAddItem, PageSetLSN(), xl_btree_unlink_page::rightsib, xl_btree_unlink_page::safexid, IndexTupleData::t_info, UnlockReleaseBuffer(), XLOG_BTREE_UNLINK_PAGE_META, XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetData, and XLogRecHasBlockRef.

Referenced by btree_redo().

◆ btree_xlog_updates()

static void btree_xlog_updates ( Page  page,
OffsetNumber updatedoffsets,
xl_btree_update updates,
int  nupdated 
)
static

Definition at line 559 of file nbtxlog.c.

561 {
562  BTVacuumPosting vacposting;
563  IndexTuple origtuple;
564  ItemId itemid;
565  Size itemsz;
566 
567  for (int i = 0; i < nupdated; i++)
568  {
569  itemid = PageGetItemId(page, updatedoffsets[i]);
570  origtuple = (IndexTuple) PageGetItem(page, itemid);
571 
572  vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
573  updates->ndeletedtids * sizeof(uint16));
574  vacposting->updatedoffset = updatedoffsets[i];
575  vacposting->itup = origtuple;
576  vacposting->ndeletedtids = updates->ndeletedtids;
577  memcpy(vacposting->deletetids,
578  (char *) updates + SizeOfBtreeUpdate,
579  updates->ndeletedtids * sizeof(uint16));
580 
581  _bt_update_posting(vacposting);
582 
583  /* Overwrite updated version of tuple */
584  itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
585  if (!PageIndexTupleOverwrite(page, updatedoffsets[i],
586  (Item) vacposting->itup, itemsz))
587  elog(PANIC, "failed to update partially dead item");
588 
589  pfree(vacposting->itup);
590  pfree(vacposting);
591 
592  /* advance to next xl_btree_update from array */
593  updates = (xl_btree_update *)
594  ((char *) updates + SizeOfBtreeUpdate +
595  updates->ndeletedtids * sizeof(uint16));
596  }
597 }
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void pfree(void *pointer)
Definition: mcxt.c:1456
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:924
#define SizeOfBtreeUpdate
Definition: nbtxlog.h:268
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:911
uint16 ndeletedtids
Definition: nbtree.h:910
IndexTuple itup
Definition: nbtree.h:906
OffsetNumber updatedoffset
Definition: nbtree.h:907
uint16 ndeletedtids
Definition: nbtxlog.h:263

References _bt_update_posting(), BTVacuumPostingData::deletetids, elog(), i, IndexTupleSize, BTVacuumPostingData::itup, MAXALIGN, BTVacuumPostingData::ndeletedtids, xl_btree_update::ndeletedtids, PageGetItem(), PageGetItemId(), PageIndexTupleOverwrite(), palloc(), PANIC, pfree(), SizeOfBtreeUpdate, and BTVacuumPostingData::updatedoffset.

Referenced by btree_xlog_delete(), and btree_xlog_vacuum().

◆ btree_xlog_vacuum()

static void btree_xlog_vacuum ( XLogReaderState record)
static

Definition at line 600 of file nbtxlog.c.

601 {
602  XLogRecPtr lsn = record->EndRecPtr;
603  xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
604  Buffer buffer;
605  Page page;
606  BTPageOpaque opaque;
607 
608  /*
609  * We need to take a cleanup lock here, just like btvacuumpage(). However,
610  * it isn't necessary to exhaustively get a cleanup lock on every block in
611  * the index during recovery (just getting a cleanup lock on pages with
612  * items to kill suffices). See nbtree/README for details.
613  */
614  if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
615  == BLK_NEEDS_REDO)
616  {
617  char *ptr = XLogRecGetBlockData(record, 0, NULL);
618 
619  page = (Page) BufferGetPage(buffer);
620 
621  if (xlrec->nupdated > 0)
622  {
623  OffsetNumber *updatedoffsets;
624  xl_btree_update *updates;
625 
626  updatedoffsets = (OffsetNumber *)
627  (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
628  updates = (xl_btree_update *) ((char *) updatedoffsets +
629  xlrec->nupdated *
630  sizeof(OffsetNumber));
631 
632  btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
633  }
634 
635  if (xlrec->ndeleted > 0)
636  PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
637 
638  /*
639  * Mark the page as not containing any LP_DEAD items --- see comments
640  * in _bt_delitems_vacuum().
641  */
642  opaque = BTPageGetOpaque(page);
643  opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
644 
645  PageSetLSN(page, lsn);
646  MarkBufferDirty(buffer);
647  }
648  if (BufferIsValid(buffer))
649  UnlockReleaseBuffer(buffer);
650 }
@ RBM_NORMAL
Definition: bufmgr.h:44
uint16 ndeleted
Definition: nbtxlog.h:222
uint16 nupdated
Definition: nbtxlog.h:223
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:354

References BLK_NEEDS_REDO, BTP_HAS_GARBAGE, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, btree_xlog_updates(), BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, MarkBufferDirty(), xl_btree_vacuum::ndeleted, xl_btree_vacuum::nupdated, PageIndexMultiDelete(), PageSetLSN(), RBM_NORMAL, UnlockReleaseBuffer(), XLogReadBufferForRedoExtended(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

Variable Documentation

◆ opCtx

MemoryContext opCtx
static

Definition at line 27 of file nbtxlog.c.

Referenced by btree_redo(), btree_xlog_cleanup(), and btree_xlog_startup().