PostgreSQL Source Code git master
nbtxlog.c File Reference
#include "postgres.h"
#include "access/bufmask.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/transam.h"
#include "access/xlogutils.h"
#include "storage/standby.h"
#include "utils/memutils.h"
Include dependency graph for nbtxlog.c:

Go to the source code of this file.

Functions

static void _bt_restore_page (Page page, char *from, int len)
 
static void _bt_restore_meta (XLogReaderState *record, uint8 block_id)
 
static void _bt_clear_incomplete_split (XLogReaderState *record, uint8 block_id)
 
static void btree_xlog_insert (bool isleaf, bool ismeta, bool posting, XLogReaderState *record)
 
static void btree_xlog_split (bool newitemonleft, XLogReaderState *record)
 
static void btree_xlog_dedup (XLogReaderState *record)
 
static void btree_xlog_updates (Page page, OffsetNumber *updatedoffsets, xl_btree_update *updates, int nupdated)
 
static void btree_xlog_vacuum (XLogReaderState *record)
 
static void btree_xlog_delete (XLogReaderState *record)
 
static void btree_xlog_mark_page_halfdead (uint8 info, XLogReaderState *record)
 
static void btree_xlog_unlink_page (uint8 info, XLogReaderState *record)
 
static void btree_xlog_newroot (XLogReaderState *record)
 
static void btree_xlog_reuse_page (XLogReaderState *record)
 
void btree_redo (XLogReaderState *record)
 
void btree_xlog_startup (void)
 
void btree_xlog_cleanup (void)
 
void btree_mask (char *pagedata, BlockNumber blkno)
 

Variables

static MemoryContext opCtx
 

Function Documentation

◆ _bt_clear_incomplete_split()

static void _bt_clear_incomplete_split ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 139 of file nbtxlog.c.

140{
141 XLogRecPtr lsn = record->EndRecPtr;
142 Buffer buf;
143
144 if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
145 {
146 Page page = (Page) BufferGetPage(buf);
147 BTPageOpaque pageop = BTPageGetOpaque(page);
148
149 Assert(P_INCOMPLETE_SPLIT(pageop));
150 pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
151
152 PageSetLSN(page, lsn);
154 }
155 if (BufferIsValid(buf))
157}
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4883
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:396
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:347
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
PageData * Page
Definition: bufpage.h:82
#define Assert(condition)
Definition: c.h:815
#define BTPageGetOpaque(page)
Definition: nbtree.h:73
#define P_INCOMPLETE_SPLIT(opaque)
Definition: nbtree.h:227
static char * buf
Definition: pg_test_fsync.c:72
uint16 btpo_flags
Definition: nbtree.h:67
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74

References Assert, BLK_NEEDS_REDO, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, buf, BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, MarkBufferDirty(), P_INCOMPLETE_SPLIT, PageSetLSN(), UnlockReleaseBuffer(), and XLogReadBufferForRedo().

Referenced by btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_split().

◆ _bt_restore_meta()

static void _bt_restore_meta ( XLogReaderState record,
uint8  block_id 
)
static

Definition at line 82 of file nbtxlog.c.

83{
84 XLogRecPtr lsn = record->EndRecPtr;
85 Buffer metabuf;
86 Page metapg;
88 BTPageOpaque pageop;
89 xl_btree_metadata *xlrec;
90 char *ptr;
91 Size len;
92
93 metabuf = XLogInitBufferForRedo(record, block_id);
94 ptr = XLogRecGetBlockData(record, block_id, &len);
95
96 Assert(len == sizeof(xl_btree_metadata));
98 xlrec = (xl_btree_metadata *) ptr;
99 metapg = BufferGetPage(metabuf);
100
101 _bt_pageinit(metapg, BufferGetPageSize(metabuf));
102
103 md = BTPageGetMeta(metapg);
105 md->btm_version = xlrec->version;
106 md->btm_root = xlrec->root;
107 md->btm_level = xlrec->level;
108 md->btm_fastroot = xlrec->fastroot;
109 md->btm_fastlevel = xlrec->fastlevel;
110 /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
114 md->btm_allequalimage = xlrec->allequalimage;
115
116 pageop = BTPageGetOpaque(metapg);
117 pageop->btpo_flags = BTP_META;
118
119 /*
120 * Set pd_lower just past the end of the metadata. This is essential,
121 * because without doing so, metadata will be lost if xlog.c compresses
122 * the page.
123 */
124 ((PageHeader) metapg)->pd_lower =
125 ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
126
127 PageSetLSN(metapg, lsn);
128 MarkBufferDirty(metabuf);
129 UnlockReleaseBuffer(metabuf);
130}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:385
PageHeaderData * PageHeader
Definition: bufpage.h:174
size_t Size
Definition: c.h:562
void _bt_pageinit(Page page, Size size)
Definition: nbtpage.c:1129
#define BTPageGetMeta(p)
Definition: nbtree.h:121
#define BTREE_MAGIC
Definition: nbtree.h:149
#define BTP_META
Definition: nbtree.h:79
#define BTREE_METAPAGE
Definition: nbtree.h:148
#define BTREE_NOVAC_VERSION
Definition: nbtree.h:152
const void size_t len
uint32 btm_last_cleanup_num_delpages
Definition: nbtree.h:114
uint32 btm_level
Definition: nbtree.h:108
float8 btm_last_cleanup_num_heap_tuples
Definition: nbtree.h:116
BlockNumber btm_fastroot
Definition: nbtree.h:109
uint32 btm_version
Definition: nbtree.h:106
uint32 btm_magic
Definition: nbtree.h:105
BlockNumber btm_root
Definition: nbtree.h:107
bool btm_allequalimage
Definition: nbtree.h:118
uint32 btm_fastlevel
Definition: nbtree.h:110
uint32 level
Definition: nbtxlog.h:50
uint32 version
Definition: nbtxlog.h:48
bool allequalimage
Definition: nbtxlog.h:54
BlockNumber fastroot
Definition: nbtxlog.h:51
uint32 fastlevel
Definition: nbtxlog.h:52
BlockNumber root
Definition: nbtxlog.h:49
uint32 last_cleanup_num_delpages
Definition: nbtxlog.h:53
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:326

References _bt_pageinit(), xl_btree_metadata::allequalimage, Assert, BTMetaPageData::btm_allequalimage, BTMetaPageData::btm_fastlevel, BTMetaPageData::btm_fastroot, BTMetaPageData::btm_last_cleanup_num_delpages, BTMetaPageData::btm_last_cleanup_num_heap_tuples, BTMetaPageData::btm_level, BTMetaPageData::btm_magic, BTMetaPageData::btm_root, BTMetaPageData::btm_version, BTP_META, BTPageGetMeta, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, BTREE_MAGIC, BTREE_METAPAGE, BTREE_NOVAC_VERSION, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), XLogReaderState::EndRecPtr, xl_btree_metadata::fastlevel, xl_btree_metadata::fastroot, xl_btree_metadata::last_cleanup_num_delpages, len, xl_btree_metadata::level, MarkBufferDirty(), PageSetLSN(), xl_btree_metadata::root, UnlockReleaseBuffer(), xl_btree_metadata::version, XLogInitBufferForRedo(), and XLogRecGetBlockData().

Referenced by btree_redo(), btree_xlog_insert(), btree_xlog_newroot(), and btree_xlog_unlink_page().

◆ _bt_restore_page()

static void _bt_restore_page ( Page  page,
char *  from,
int  len 
)
static

Definition at line 36 of file nbtxlog.c.

37{
38 IndexTupleData itupdata;
39 Size itemsz;
40 char *end = from + len;
43 int i;
44 int nitems;
45
46 /*
47 * To get the items back in the original order, we add them to the page in
48 * reverse. To figure out where one tuple ends and another begins, we
49 * have to scan them in forward order first.
50 */
51 i = 0;
52 while (from < end)
53 {
54 /*
55 * As we step through the items, 'from' won't always be properly
56 * aligned, so we need to use memcpy(). Further, we use Item (which
57 * is just a char*) here for our items array for the same reason;
58 * wouldn't want the compiler or anyone thinking that an item is
59 * aligned when it isn't.
60 */
61 memcpy(&itupdata, from, sizeof(IndexTupleData));
62 itemsz = IndexTupleSize(&itupdata);
63 itemsz = MAXALIGN(itemsz);
64
65 items[i] = (Item) from;
66 itemsizes[i] = itemsz;
67 i++;
68
69 from += itemsz;
70 }
71 nitems = i;
72
73 for (i = nitems - 1; i >= 0; i--)
74 {
75 if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
76 false, false) == InvalidOffsetNumber)
77 elog(PANIC, "_bt_restore_page: cannot add item to page");
78 }
79}
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define MAXALIGN(LEN)
Definition: c.h:768
uint16_t uint16
Definition: c.h:487
#define PANIC
Definition: elog.h:42
#define elog(elevel,...)
Definition: elog.h:225
#define nitems(x)
Definition: indent.h:31
int i
Definition: isn.c:72
Pointer Item
Definition: item.h:17
#define IndexTupleSize(itup)
Definition: itup.h:70
#define MaxIndexTuplesPerPage
Definition: itup.h:167
#define InvalidOffsetNumber
Definition: off.h:26
static ItemArray items
Definition: test_tidstore.c:48

References elog, i, IndexTupleSize, InvalidOffsetNumber, items, len, MAXALIGN, MaxIndexTuplesPerPage, nitems, PageAddItem, and PANIC.

Referenced by btree_xlog_newroot(), and btree_xlog_split().

◆ btree_mask()

void btree_mask ( char *  pagedata,
BlockNumber  blkno 
)

Definition at line 1095 of file nbtxlog.c.

1096{
1097 Page page = (Page) pagedata;
1098 BTPageOpaque maskopaq;
1099
1101
1102 mask_page_hint_bits(page);
1103 mask_unused_space(page);
1104
1105 maskopaq = BTPageGetOpaque(page);
1106
1107 if (P_ISLEAF(maskopaq))
1108 {
1109 /*
1110 * In btree leaf pages, it is possible to modify the LP_FLAGS without
1111 * emitting any WAL record. Hence, mask the line pointer flags. See
1112 * _bt_killitems(), _bt_check_unique() for details.
1113 */
1114 mask_lp_flags(page);
1115 }
1116
1117 /*
1118 * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
1119 * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
1120 * for details.
1121 */
1122 maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
1123
1124 /*
1125 * During replay of a btree page split, we don't set the BTP_SPLIT_END
1126 * flag of the right sibling and initialize the cycle_id to 0 for the same
1127 * page. See btree_xlog_split() for details.
1128 */
1129 maskopaq->btpo_flags &= ~BTP_SPLIT_END;
1130 maskopaq->btpo_cycleid = 0;
1131}
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
#define P_ISLEAF(opaque)
Definition: nbtree.h:220
BTCycleId btpo_cycleid
Definition: nbtree.h:68

References BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, mask_lp_flags(), mask_page_hint_bits(), mask_page_lsn_and_checksum(), mask_unused_space(), and P_ISLEAF.

◆ btree_redo()

void btree_redo ( XLogReaderState record)

Definition at line 1018 of file nbtxlog.c.

1019{
1020 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1021 MemoryContext oldCtx;
1022
1023 oldCtx = MemoryContextSwitchTo(opCtx);
1024 switch (info)
1025 {
1027 btree_xlog_insert(true, false, false, record);
1028 break;
1030 btree_xlog_insert(false, false, false, record);
1031 break;
1033 btree_xlog_insert(false, true, false, record);
1034 break;
1035 case XLOG_BTREE_SPLIT_L:
1036 btree_xlog_split(true, record);
1037 break;
1038 case XLOG_BTREE_SPLIT_R:
1039 btree_xlog_split(false, record);
1040 break;
1042 btree_xlog_insert(true, false, true, record);
1043 break;
1044 case XLOG_BTREE_DEDUP:
1045 btree_xlog_dedup(record);
1046 break;
1047 case XLOG_BTREE_VACUUM:
1048 btree_xlog_vacuum(record);
1049 break;
1050 case XLOG_BTREE_DELETE:
1051 btree_xlog_delete(record);
1052 break;
1054 btree_xlog_mark_page_halfdead(info, record);
1055 break;
1058 btree_xlog_unlink_page(info, record);
1059 break;
1060 case XLOG_BTREE_NEWROOT:
1061 btree_xlog_newroot(record);
1062 break;
1064 btree_xlog_reuse_page(record);
1065 break;
1067 _bt_restore_meta(record, 0);
1068 break;
1069 default:
1070 elog(PANIC, "btree_redo: unknown op code %u", info);
1071 }
1072 MemoryContextSwitchTo(oldCtx);
1074}
uint8_t uint8
Definition: c.h:486
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
static void btree_xlog_delete(XLogReaderState *record)
Definition: nbtxlog.c:652
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:82
static void btree_xlog_newroot(XLogReaderState *record)
Definition: nbtxlog.c:941
static void btree_xlog_dedup(XLogReaderState *record)
Definition: nbtxlog.c:464
static void btree_xlog_insert(bool isleaf, bool ismeta, bool posting, XLogReaderState *record)
Definition: nbtxlog.c:160
static void btree_xlog_split(bool newitemonleft, XLogReaderState *record)
Definition: nbtxlog.c:251
static void btree_xlog_reuse_page(XLogReaderState *record)
Definition: nbtxlog.c:1007
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:717
static MemoryContext opCtx
Definition: nbtxlog.c:25
static void btree_xlog_vacuum(XLogReaderState *record)
Definition: nbtxlog.c:598
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
Definition: nbtxlog.c:802
#define XLOG_BTREE_META_CLEANUP
Definition: nbtxlog.h:41
#define XLOG_BTREE_INSERT_POST
Definition: nbtxlog.h:32
#define XLOG_BTREE_VACUUM
Definition: nbtxlog.h:39
#define XLOG_BTREE_SPLIT_R
Definition: nbtxlog.h:31
#define XLOG_BTREE_INSERT_LEAF
Definition: nbtxlog.h:27
#define XLOG_BTREE_INSERT_UPPER
Definition: nbtxlog.h:28
#define XLOG_BTREE_DEDUP
Definition: nbtxlog.h:33
#define XLOG_BTREE_UNLINK_PAGE
Definition: nbtxlog.h:35
#define XLOG_BTREE_UNLINK_PAGE_META
Definition: nbtxlog.h:36
#define XLOG_BTREE_INSERT_META
Definition: nbtxlog.h:29
#define XLOG_BTREE_MARK_PAGE_HALFDEAD
Definition: nbtxlog.h:38
#define XLOG_BTREE_REUSE_PAGE
Definition: nbtxlog.h:40
#define XLOG_BTREE_SPLIT_L
Definition: nbtxlog.h:30
#define XLOG_BTREE_NEWROOT
Definition: nbtxlog.h:37
#define XLOG_BTREE_DELETE
Definition: nbtxlog.h:34
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410

References _bt_restore_meta(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_reuse_page(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), elog, MemoryContextReset(), MemoryContextSwitchTo(), opCtx, PANIC, XLOG_BTREE_DEDUP, XLOG_BTREE_DELETE, XLOG_BTREE_INSERT_LEAF, XLOG_BTREE_INSERT_META, XLOG_BTREE_INSERT_POST, XLOG_BTREE_INSERT_UPPER, XLOG_BTREE_MARK_PAGE_HALFDEAD, XLOG_BTREE_META_CLEANUP, XLOG_BTREE_NEWROOT, XLOG_BTREE_REUSE_PAGE, XLOG_BTREE_SPLIT_L, XLOG_BTREE_SPLIT_R, XLOG_BTREE_UNLINK_PAGE, XLOG_BTREE_UNLINK_PAGE_META, XLOG_BTREE_VACUUM, and XLogRecGetInfo.

◆ btree_xlog_cleanup()

void btree_xlog_cleanup ( void  )

Definition at line 1085 of file nbtxlog.c.

1086{
1088 opCtx = NULL;
1089}
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454

References MemoryContextDelete(), and opCtx.

◆ btree_xlog_dedup()

static void btree_xlog_dedup ( XLogReaderState record)
static

Definition at line 464 of file nbtxlog.c.

465{
466 XLogRecPtr lsn = record->EndRecPtr;
467 xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
468 Buffer buf;
469
470 if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
471 {
472 char *ptr = XLogRecGetBlockData(record, 0, NULL);
473 Page page = (Page) BufferGetPage(buf);
474 BTPageOpaque opaque = BTPageGetOpaque(page);
475 OffsetNumber offnum,
476 minoff,
477 maxoff;
480 Page newpage;
481
483 state->deduplicate = true; /* unused */
484 state->nmaxitems = 0; /* unused */
485 /* Conservatively use larger maxpostingsize than primary */
486 state->maxpostingsize = BTMaxItemSize(page);
487 state->base = NULL;
488 state->baseoff = InvalidOffsetNumber;
489 state->basetupsize = 0;
490 state->htids = palloc(state->maxpostingsize);
491 state->nhtids = 0;
492 state->nitems = 0;
493 state->phystupsize = 0;
494 state->nintervals = 0;
495
496 minoff = P_FIRSTDATAKEY(opaque);
497 maxoff = PageGetMaxOffsetNumber(page);
498 newpage = PageGetTempPageCopySpecial(page);
499
500 if (!P_RIGHTMOST(opaque))
501 {
502 ItemId itemid = PageGetItemId(page, P_HIKEY);
503 Size itemsz = ItemIdGetLength(itemid);
504 IndexTuple item = (IndexTuple) PageGetItem(page, itemid);
505
506 if (PageAddItem(newpage, (Item) item, itemsz, P_HIKEY,
507 false, false) == InvalidOffsetNumber)
508 elog(ERROR, "deduplication failed to add highkey");
509 }
510
511 intervals = (BTDedupInterval *) ptr;
512 for (offnum = minoff;
513 offnum <= maxoff;
514 offnum = OffsetNumberNext(offnum))
515 {
516 ItemId itemid = PageGetItemId(page, offnum);
517 IndexTuple itup = (IndexTuple) PageGetItem(page, itemid);
518
519 if (offnum == minoff)
520 _bt_dedup_start_pending(state, itup, offnum);
521 else if (state->nintervals < xlrec->nintervals &&
522 state->baseoff == intervals[state->nintervals].baseoff &&
523 state->nitems < intervals[state->nintervals].nitems)
524 {
525 if (!_bt_dedup_save_htid(state, itup))
526 elog(ERROR, "deduplication failed to add heap tid to pending posting list");
527 }
528 else
529 {
531 _bt_dedup_start_pending(state, itup, offnum);
532 }
533 }
534
536 Assert(state->nintervals == xlrec->nintervals);
537 Assert(memcmp(state->intervals, intervals,
538 state->nintervals * sizeof(BTDedupInterval)) == 0);
539
540 if (P_HAS_GARBAGE(opaque))
541 {
542 BTPageOpaque nopaque = BTPageGetOpaque(newpage);
543
544 nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
545 }
546
547 PageRestoreTempPage(newpage, page);
548 PageSetLSN(page, lsn);
550 }
551
552 if (BufferIsValid(buf))
554}
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:413
Page PageGetTempPageCopySpecial(const PageData *page)
Definition: bufpage.c:391
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
#define ERROR
Definition: elog.h:39
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
IndexTupleData * IndexTuple
Definition: itup.h:53
void * palloc(Size size)
Definition: mcxt.c:1317
bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
Definition: nbtdedup.c:484
void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, OffsetNumber baseoff)
Definition: nbtdedup.c:433
Size _bt_dedup_finish_pending(Page newpage, BTDedupState state)
Definition: nbtdedup.c:555
#define P_HIKEY
Definition: nbtree.h:367
#define P_HAS_GARBAGE(opaque)
Definition: nbtree.h:226
#define P_FIRSTDATAKEY(opaque)
Definition: nbtree.h:369
#define P_RIGHTMOST(opaque)
Definition: nbtree.h:219
#define BTMaxItemSize(page)
Definition: nbtree.h:164
BTDedupStateData * BTDedupState
Definition: nbtree.h:898
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
char * intervals[]
Definition: regguts.h:323
uint16 nintervals
Definition: nbtxlog.h:169
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415

References _bt_dedup_finish_pending(), _bt_dedup_save_htid(), _bt_dedup_start_pending(), Assert, BLK_NEEDS_REDO, BTMaxItemSize, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, buf, BufferGetPage(), BufferIsValid(), elog, XLogReaderState::EndRecPtr, ERROR, intervals, InvalidOffsetNumber, ItemIdGetLength, MarkBufferDirty(), xl_btree_dedup::nintervals, OffsetNumberNext, P_FIRSTDATAKEY, P_HAS_GARBAGE, P_HIKEY, P_RIGHTMOST, PageAddItem, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN(), palloc(), UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_delete()

static void btree_xlog_delete ( XLogReaderState record)
static

Definition at line 652 of file nbtxlog.c.

653{
654 XLogRecPtr lsn = record->EndRecPtr;
656 Buffer buffer;
657 Page page;
658 BTPageOpaque opaque;
659
660 /*
661 * If we have any conflict processing to do, it must happen before we
662 * update the page
663 */
664 if (InHotStandby)
665 {
666 RelFileLocator rlocator;
667
668 XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
669
671 xlrec->isCatalogRel,
672 rlocator);
673 }
674
675 /*
676 * We don't need to take a cleanup lock to apply these changes. See
677 * nbtree/README for details.
678 */
679 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
680 {
681 char *ptr = XLogRecGetBlockData(record, 0, NULL);
682
683 page = (Page) BufferGetPage(buffer);
684
685 if (xlrec->nupdated > 0)
686 {
687 OffsetNumber *updatedoffsets;
688 xl_btree_update *updates;
689
690 updatedoffsets = (OffsetNumber *)
691 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
692 updates = (xl_btree_update *) ((char *) updatedoffsets +
693 xlrec->nupdated *
694 sizeof(OffsetNumber));
695
696 btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
697 }
698
699 if (xlrec->ndeleted > 0)
700 PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
701
702 /*
703 * Do *not* clear the vacuum cycle ID, but do mark the page as not
704 * containing any LP_DEAD items
705 */
706 opaque = BTPageGetOpaque(page);
707 opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
708
709 PageSetLSN(page, lsn);
710 MarkBufferDirty(buffer);
711 }
712 if (BufferIsValid(buffer))
713 UnlockReleaseBuffer(buffer);
714}
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1150
static void btree_xlog_updates(Page page, OffsetNumber *updatedoffsets, xl_btree_update *updates, int nupdated)
Definition: nbtxlog.c:557
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:467
TransactionId snapshotConflictHorizon
Definition: nbtxlog.h:238
bool isCatalogRel
Definition: nbtxlog.h:241
uint16 ndeleted
Definition: nbtxlog.h:239
uint16 nupdated
Definition: nbtxlog.h:240
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1971
#define InHotStandby
Definition: xlogutils.h:60

References BLK_NEEDS_REDO, BTPageGetOpaque, BTPageOpaqueData::btpo_flags, btree_xlog_updates(), BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, InHotStandby, xl_btree_delete::isCatalogRel, MarkBufferDirty(), xl_btree_delete::ndeleted, xl_btree_delete::nupdated, PageIndexMultiDelete(), PageSetLSN(), ResolveRecoveryConflictWithSnapshot(), xl_btree_delete::snapshotConflictHorizon, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_insert()

static void btree_xlog_insert ( bool  isleaf,
bool  ismeta,
bool  posting,
XLogReaderState record 
)
static

Definition at line 160 of file nbtxlog.c.

162{
163 XLogRecPtr lsn = record->EndRecPtr;
165 Buffer buffer;
166 Page page;
167
168 /*
169 * Insertion to an internal page finishes an incomplete split at the child
170 * level. Clear the incomplete-split flag in the child. Note: during
171 * normal operation, the child and parent pages are locked at the same
172 * time (the locks are coupled), so that clearing the flag and inserting
173 * the downlink appear atomic to other backends. We don't bother with
174 * that during replay, because readers don't care about the
175 * incomplete-split flag and there cannot be updates happening.
176 */
177 if (!isleaf)
179 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
180 {
181 Size datalen;
182 char *datapos = XLogRecGetBlockData(record, 0, &datalen);
183
184 page = BufferGetPage(buffer);
185
186 if (!posting)
187 {
188 /* Simple retail insertion */
189 if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
190 false, false) == InvalidOffsetNumber)
191 elog(PANIC, "failed to add new item");
192 }
193 else
194 {
195 ItemId itemid;
196 IndexTuple oposting,
197 newitem,
198 nposting;
199 uint16 postingoff;
200
201 /*
202 * A posting list split occurred during leaf page insertion. WAL
203 * record data will start with an offset number representing the
204 * point in an existing posting list that a split occurs at.
205 *
206 * Use _bt_swap_posting() to repeat posting list split steps from
207 * primary. Note that newitem from WAL record is 'orignewitem',
208 * not the final version of newitem that is actually inserted on
209 * page.
210 */
211 postingoff = *((uint16 *) datapos);
212 datapos += sizeof(uint16);
213 datalen -= sizeof(uint16);
214
215 itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
216 oposting = (IndexTuple) PageGetItem(page, itemid);
217
218 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
219 Assert(isleaf && postingoff > 0);
220 newitem = CopyIndexTuple((IndexTuple) datapos);
221 nposting = _bt_swap_posting(newitem, oposting, postingoff);
222
223 /* Replace existing posting list with post-split version */
224 memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
225
226 /* Insert "final" new item (not orignewitem from WAL stream) */
227 Assert(IndexTupleSize(newitem) == datalen);
228 if (PageAddItem(page, (Item) newitem, datalen, xlrec->offnum,
229 false, false) == InvalidOffsetNumber)
230 elog(PANIC, "failed to add posting split new item");
231 }
232
233 PageSetLSN(page, lsn);
234 MarkBufferDirty(buffer);
235 }
236 if (BufferIsValid(buffer))
237 UnlockReleaseBuffer(buffer);
238
239 /*
240 * Note: in normal operation, we'd update the metapage while still holding
241 * lock on the page we inserted into. But during replay it's not
242 * necessary to hold that lock, since no other index updates can be
243 * happening concurrently, and readers will cope fine with following an
244 * obsolete link from the metapage.
245 */
246 if (ismeta)
247 _bt_restore_meta(record, 2);
248}
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
Definition: nbtdedup.c:1022
static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
Definition: nbtxlog.c:139
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
OffsetNumber offnum
Definition: nbtxlog.h:78

References _bt_clear_incomplete_split(), _bt_restore_meta(), _bt_swap_posting(), Assert, BLK_NEEDS_REDO, BufferGetPage(), BufferIsValid(), CopyIndexTuple(), elog, XLogReaderState::EndRecPtr, IndexTupleSize, InvalidOffsetNumber, MarkBufferDirty(), MAXALIGN, xl_btree_insert::offnum, OffsetNumberPrev, PageAddItem, PageGetItem(), PageGetItemId(), PageSetLSN(), PANIC, UnlockReleaseBuffer(), XLogReadBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_mark_page_halfdead()

static void btree_xlog_mark_page_halfdead ( uint8  info,
XLogReaderState record 
)
static

Definition at line 717 of file nbtxlog.c.

718{
719 XLogRecPtr lsn = record->EndRecPtr;
721 Buffer buffer;
722 Page page;
723 BTPageOpaque pageop;
724 IndexTupleData trunctuple;
725
726 /*
727 * In normal operation, we would lock all the pages this WAL record
728 * touches before changing any of them. In WAL replay, it should be okay
729 * to lock just one page at a time, since no concurrent index updates can
730 * be happening, and readers should not care whether they arrive at the
731 * target page or not (since it's surely empty).
732 */
733
734 /* to-be-deleted subtree's parent page */
735 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
736 {
737 OffsetNumber poffset;
738 ItemId itemid;
739 IndexTuple itup;
740 OffsetNumber nextoffset;
741 BlockNumber rightsib;
742
743 page = (Page) BufferGetPage(buffer);
744 pageop = BTPageGetOpaque(page);
745
746 poffset = xlrec->poffset;
747
748 nextoffset = OffsetNumberNext(poffset);
749 itemid = PageGetItemId(page, nextoffset);
750 itup = (IndexTuple) PageGetItem(page, itemid);
751 rightsib = BTreeTupleGetDownLink(itup);
752
753 itemid = PageGetItemId(page, poffset);
754 itup = (IndexTuple) PageGetItem(page, itemid);
755 BTreeTupleSetDownLink(itup, rightsib);
756 nextoffset = OffsetNumberNext(poffset);
757 PageIndexTupleDelete(page, nextoffset);
758
759 PageSetLSN(page, lsn);
760 MarkBufferDirty(buffer);
761 }
762
763 /*
764 * Don't need to couple cross-level locks in REDO routines, so release
765 * lock on internal page immediately
766 */
767 if (BufferIsValid(buffer))
768 UnlockReleaseBuffer(buffer);
769
770 /* Rewrite the leaf page as a halfdead page */
771 buffer = XLogInitBufferForRedo(record, 0);
772 page = (Page) BufferGetPage(buffer);
773
774 _bt_pageinit(page, BufferGetPageSize(buffer));
775 pageop = BTPageGetOpaque(page);
776
777 pageop->btpo_prev = xlrec->leftblk;
778 pageop->btpo_next = xlrec->rightblk;
779 pageop->btpo_level = 0;
781 pageop->btpo_cycleid = 0;
782
783 /*
784 * Construct a dummy high key item that points to top parent page (value
785 * is InvalidBlockNumber when the top parent page is the leaf page itself)
786 */
787 MemSet(&trunctuple, 0, sizeof(IndexTupleData));
788 trunctuple.t_info = sizeof(IndexTupleData);
789 BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
790
791 if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
792 false, false) == InvalidOffsetNumber)
793 elog(ERROR, "could not add dummy high key to half-dead page");
794
795 PageSetLSN(page, lsn);
796 MarkBufferDirty(buffer);
797 UnlockReleaseBuffer(buffer);
798}
uint32 BlockNumber
Definition: block.h:31
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1041
#define MemSet(start, val, len)
Definition: c.h:977
struct IndexTupleData IndexTupleData
#define BTP_LEAF
Definition: nbtree.h:76
#define BTP_HALF_DEAD
Definition: nbtree.h:80
static void BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
Definition: nbtree.h:626
static void BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
Definition: nbtree.h:562
static BlockNumber BTreeTupleGetDownLink(IndexTuple pivot)
Definition: nbtree.h:556
BlockNumber btpo_next
Definition: nbtree.h:65
BlockNumber btpo_prev
Definition: nbtree.h:64
uint32 btpo_level
Definition: nbtree.h:66
unsigned short t_info
Definition: itup.h:49

References _bt_pageinit(), BLK_NEEDS_REDO, BTP_HALF_DEAD, BTP_LEAF, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleGetDownLink(), BTreeTupleSetDownLink(), BTreeTupleSetTopParent(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), elog, XLogReaderState::EndRecPtr, ERROR, InvalidOffsetNumber, xl_btree_mark_page_halfdead::leftblk, MarkBufferDirty(), MemSet, OffsetNumberNext, P_HIKEY, PageAddItem, PageGetItem(), PageGetItemId(), PageIndexTupleDelete(), PageSetLSN(), xl_btree_mark_page_halfdead::poffset, xl_btree_mark_page_halfdead::rightblk, IndexTupleData::t_info, xl_btree_mark_page_halfdead::topparent, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_newroot()

static void btree_xlog_newroot ( XLogReaderState record)
static

Definition at line 941 of file nbtxlog.c.

942{
943 XLogRecPtr lsn = record->EndRecPtr;
945 Buffer buffer;
946 Page page;
947 BTPageOpaque pageop;
948 char *ptr;
949 Size len;
950
951 buffer = XLogInitBufferForRedo(record, 0);
952 page = (Page) BufferGetPage(buffer);
953
954 _bt_pageinit(page, BufferGetPageSize(buffer));
955 pageop = BTPageGetOpaque(page);
956
957 pageop->btpo_flags = BTP_ROOT;
958 pageop->btpo_prev = pageop->btpo_next = P_NONE;
959 pageop->btpo_level = xlrec->level;
960 if (xlrec->level == 0)
961 pageop->btpo_flags |= BTP_LEAF;
962 pageop->btpo_cycleid = 0;
963
964 if (xlrec->level > 0)
965 {
966 ptr = XLogRecGetBlockData(record, 0, &len);
967 _bt_restore_page(page, ptr, len);
968
969 /* Clear the incomplete-split flag in left child */
971 }
972
973 PageSetLSN(page, lsn);
974 MarkBufferDirty(buffer);
975 UnlockReleaseBuffer(buffer);
976
977 _bt_restore_meta(record, 2);
978}
#define BTP_ROOT
Definition: nbtree.h:77
#define P_NONE
Definition: nbtree.h:212
static void _bt_restore_page(Page page, char *from, int len)
Definition: nbtxlog.c:36
uint32 level
Definition: nbtxlog.h:344

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_meta(), _bt_restore_page(), BTP_LEAF, BTP_ROOT, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BufferGetPage(), BufferGetPageSize(), XLogReaderState::EndRecPtr, len, xl_btree_newroot::level, MarkBufferDirty(), P_NONE, PageSetLSN(), UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_reuse_page()

static void btree_xlog_reuse_page ( XLogReaderState record)
static

Definition at line 1007 of file nbtxlog.c.

1008{
1010
1011 if (InHotStandby)
1013 xlrec->isCatalogRel,
1014 xlrec->locator);
1015}
void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:511
FullTransactionId snapshotConflictHorizon
Definition: nbtxlog.h:187
RelFileLocator locator
Definition: nbtxlog.h:185

References InHotStandby, xl_btree_reuse_page::isCatalogRel, xl_btree_reuse_page::locator, ResolveRecoveryConflictWithSnapshotFullXid(), xl_btree_reuse_page::snapshotConflictHorizon, and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_split()

static void btree_xlog_split ( bool  newitemonleft,
XLogReaderState record 
)
static

Definition at line 251 of file nbtxlog.c.

252{
253 XLogRecPtr lsn = record->EndRecPtr;
254 xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
255 bool isleaf = (xlrec->level == 0);
256 Buffer buf;
257 Buffer rbuf;
258 Page rpage;
259 BTPageOpaque ropaque;
260 char *datapos;
261 Size datalen;
262 BlockNumber origpagenumber;
263 BlockNumber rightpagenumber;
264 BlockNumber spagenumber;
265
266 XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
267 XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
268 if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
269 spagenumber = P_NONE;
270
271 /*
272 * Clear the incomplete split flag on the appropriate child page one level
273 * down when origpage/buf is an internal page (there must have been
274 * cascading page splits during original execution in the event of an
275 * internal page split). This is like the corresponding btree_xlog_insert
276 * call for internal pages. We're not clearing the incomplete split flag
277 * for the current page split here (you can think of this as part of the
278 * insert of newitem that the page split action needs to perform in
279 * passing).
280 *
281 * Like in btree_xlog_insert, this can be done before locking other pages.
282 * We never need to couple cross-level locks in REDO routines.
283 */
284 if (!isleaf)
286
287 /* Reconstruct right (new) sibling page from scratch */
288 rbuf = XLogInitBufferForRedo(record, 1);
289 datapos = XLogRecGetBlockData(record, 1, &datalen);
290 rpage = (Page) BufferGetPage(rbuf);
291
292 _bt_pageinit(rpage, BufferGetPageSize(rbuf));
293 ropaque = BTPageGetOpaque(rpage);
294
295 ropaque->btpo_prev = origpagenumber;
296 ropaque->btpo_next = spagenumber;
297 ropaque->btpo_level = xlrec->level;
298 ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
299 ropaque->btpo_cycleid = 0;
300
301 _bt_restore_page(rpage, datapos, datalen);
302
303 PageSetLSN(rpage, lsn);
304 MarkBufferDirty(rbuf);
305
306 /* Now reconstruct original page (left half of split) */
307 if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
308 {
309 /*
310 * To retain the same physical order of the tuples that they had, we
311 * initialize a temporary empty page for the left page and add all the
312 * items to that in item number order. This mirrors how _bt_split()
313 * works. Retaining the same physical order makes WAL consistency
314 * checking possible. See also _bt_restore_page(), which does the
315 * same for the right page.
316 */
317 Page origpage = (Page) BufferGetPage(buf);
318 BTPageOpaque oopaque = BTPageGetOpaque(origpage);
319 OffsetNumber off;
320 IndexTuple newitem = NULL,
321 left_hikey = NULL,
322 nposting = NULL;
323 Size newitemsz = 0,
324 left_hikeysz = 0;
325 Page leftpage;
326 OffsetNumber leftoff,
327 replacepostingoff = InvalidOffsetNumber;
328
329 datapos = XLogRecGetBlockData(record, 0, &datalen);
330
331 if (newitemonleft || xlrec->postingoff != 0)
332 {
333 newitem = (IndexTuple) datapos;
334 newitemsz = MAXALIGN(IndexTupleSize(newitem));
335 datapos += newitemsz;
336 datalen -= newitemsz;
337
338 if (xlrec->postingoff != 0)
339 {
340 ItemId itemid;
341 IndexTuple oposting;
342
343 /* Posting list must be at offset number before new item's */
344 replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
345
346 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
347 newitem = CopyIndexTuple(newitem);
348 itemid = PageGetItemId(origpage, replacepostingoff);
349 oposting = (IndexTuple) PageGetItem(origpage, itemid);
350 nposting = _bt_swap_posting(newitem, oposting,
351 xlrec->postingoff);
352 }
353 }
354
355 /*
356 * Extract left hikey and its size. We assume that 16-bit alignment
357 * is enough to apply IndexTupleSize (since it's fetching from a
358 * uint16 field).
359 */
360 left_hikey = (IndexTuple) datapos;
361 left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
362 datapos += left_hikeysz;
363 datalen -= left_hikeysz;
364
365 Assert(datalen == 0);
366
367 leftpage = PageGetTempPageCopySpecial(origpage);
368
369 /* Add high key tuple from WAL record to temp page */
370 leftoff = P_HIKEY;
371 if (PageAddItem(leftpage, (Item) left_hikey, left_hikeysz, P_HIKEY,
372 false, false) == InvalidOffsetNumber)
373 elog(ERROR, "failed to add high key to left page after split");
374 leftoff = OffsetNumberNext(leftoff);
375
376 for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
377 {
378 ItemId itemid;
379 Size itemsz;
380 IndexTuple item;
381
382 /* Add replacement posting list when required */
383 if (off == replacepostingoff)
384 {
385 Assert(newitemonleft ||
386 xlrec->firstrightoff == xlrec->newitemoff);
387 if (PageAddItem(leftpage, (Item) nposting,
388 MAXALIGN(IndexTupleSize(nposting)), leftoff,
389 false, false) == InvalidOffsetNumber)
390 elog(ERROR, "failed to add new posting list item to left page after split");
391 leftoff = OffsetNumberNext(leftoff);
392 continue; /* don't insert oposting */
393 }
394
395 /* add the new item if it was inserted on left page */
396 else if (newitemonleft && off == xlrec->newitemoff)
397 {
398 if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
399 false, false) == InvalidOffsetNumber)
400 elog(ERROR, "failed to add new item to left page after split");
401 leftoff = OffsetNumberNext(leftoff);
402 }
403
404 itemid = PageGetItemId(origpage, off);
405 itemsz = ItemIdGetLength(itemid);
406 item = (IndexTuple) PageGetItem(origpage, itemid);
407 if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
408 false, false) == InvalidOffsetNumber)
409 elog(ERROR, "failed to add old item to left page after split");
410 leftoff = OffsetNumberNext(leftoff);
411 }
412
413 /* cope with possibility that newitem goes at the end */
414 if (newitemonleft && off == xlrec->newitemoff)
415 {
416 if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
417 false, false) == InvalidOffsetNumber)
418 elog(ERROR, "failed to add new item to left page after split");
419 leftoff = OffsetNumberNext(leftoff);
420 }
421
422 PageRestoreTempPage(leftpage, origpage);
423
424 /* Fix opaque fields */
426 if (isleaf)
427 oopaque->btpo_flags |= BTP_LEAF;
428 oopaque->btpo_next = rightpagenumber;
429 oopaque->btpo_cycleid = 0;
430
431 PageSetLSN(origpage, lsn);
433 }
434
435 /* Fix left-link of the page to the right of the new right sibling */
436 if (spagenumber != P_NONE)
437 {
438 Buffer sbuf;
439
440 if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
441 {
442 Page spage = (Page) BufferGetPage(sbuf);
443 BTPageOpaque spageop = BTPageGetOpaque(spage);
444
445 spageop->btpo_prev = rightpagenumber;
446
447 PageSetLSN(spage, lsn);
448 MarkBufferDirty(sbuf);
449 }
450 if (BufferIsValid(sbuf))
452 }
453
454 /*
455 * Finally, release the remaining buffers. sbuf, rbuf, and buf must be
456 * released together, so that readers cannot observe inconsistencies.
457 */
459 if (BufferIsValid(buf))
461}
#define BTP_INCOMPLETE_SPLIT
Definition: nbtree.h:83
uint16 postingoff
Definition: nbtxlog.h:155
OffsetNumber firstrightoff
Definition: nbtxlog.h:153
uint32 level
Definition: nbtxlog.h:152
OffsetNumber newitemoff
Definition: nbtxlog.h:154
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:1997

References _bt_clear_incomplete_split(), _bt_pageinit(), _bt_restore_page(), _bt_swap_posting(), Assert, BLK_NEEDS_REDO, BTP_INCOMPLETE_SPLIT, BTP_LEAF, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, buf, BufferGetPage(), BufferGetPageSize(), BufferIsValid(), CopyIndexTuple(), elog, XLogReaderState::EndRecPtr, ERROR, xl_btree_split::firstrightoff, IndexTupleSize, InvalidOffsetNumber, ItemIdGetLength, xl_btree_split::level, MarkBufferDirty(), MAXALIGN, xl_btree_split::newitemoff, OffsetNumberNext, OffsetNumberPrev, P_FIRSTDATAKEY, P_HIKEY, P_NONE, PageAddItem, PageGetItem(), PageGetItemId(), PageGetTempPageCopySpecial(), PageRestoreTempPage(), PageSetLSN(), xl_btree_split::postingoff, UnlockReleaseBuffer(), XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetBlockData(), XLogRecGetBlockTag(), XLogRecGetBlockTagExtended(), and XLogRecGetData.

Referenced by btree_redo().

◆ btree_xlog_startup()

void btree_xlog_startup ( void  )

Definition at line 1077 of file nbtxlog.c.

1078{
1080 "Btree recovery temporary context",
1082}
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, CurrentMemoryContext, and opCtx.

◆ btree_xlog_unlink_page()

static void btree_xlog_unlink_page ( uint8  info,
XLogReaderState record 
)
static

Definition at line 802 of file nbtxlog.c.

803{
804 XLogRecPtr lsn = record->EndRecPtr;
806 BlockNumber leftsib;
807 BlockNumber rightsib;
808 uint32 level;
809 bool isleaf;
810 FullTransactionId safexid;
811 Buffer leftbuf;
812 Buffer target;
813 Buffer rightbuf;
814 Page page;
815 BTPageOpaque pageop;
816
817 leftsib = xlrec->leftsib;
818 rightsib = xlrec->rightsib;
819 level = xlrec->level;
820 isleaf = (level == 0);
821 safexid = xlrec->safexid;
822
823 /* No leaftopparent for level 0 (leaf page) or level 1 target */
824 Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
825
826 /*
827 * In normal operation, we would lock all the pages this WAL record
828 * touches before changing any of them. In WAL replay, we at least lock
829 * the pages in the same standard left-to-right order (leftsib, target,
830 * rightsib), and don't release the sibling locks until the target is
831 * marked deleted.
832 */
833
834 /* Fix right-link of left sibling, if any */
835 if (leftsib != P_NONE)
836 {
837 if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
838 {
839 page = (Page) BufferGetPage(leftbuf);
840 pageop = BTPageGetOpaque(page);
841 pageop->btpo_next = rightsib;
842
843 PageSetLSN(page, lsn);
844 MarkBufferDirty(leftbuf);
845 }
846 }
847 else
848 leftbuf = InvalidBuffer;
849
850 /* Rewrite target page as empty deleted page */
851 target = XLogInitBufferForRedo(record, 0);
852 page = (Page) BufferGetPage(target);
853
854 _bt_pageinit(page, BufferGetPageSize(target));
855 pageop = BTPageGetOpaque(page);
856
857 pageop->btpo_prev = leftsib;
858 pageop->btpo_next = rightsib;
859 pageop->btpo_level = level;
860 BTPageSetDeleted(page, safexid);
861 if (isleaf)
862 pageop->btpo_flags |= BTP_LEAF;
863 pageop->btpo_cycleid = 0;
864
865 PageSetLSN(page, lsn);
866 MarkBufferDirty(target);
867
868 /* Fix left-link of right sibling */
869 if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
870 {
871 page = (Page) BufferGetPage(rightbuf);
872 pageop = BTPageGetOpaque(page);
873 pageop->btpo_prev = leftsib;
874
875 PageSetLSN(page, lsn);
876 MarkBufferDirty(rightbuf);
877 }
878
879 /* Release siblings */
880 if (BufferIsValid(leftbuf))
881 UnlockReleaseBuffer(leftbuf);
882 if (BufferIsValid(rightbuf))
883 UnlockReleaseBuffer(rightbuf);
884
885 /* Release target */
886 UnlockReleaseBuffer(target);
887
888 /*
889 * If we deleted a parent of the targeted leaf page, instead of the leaf
890 * itself, update the leaf to point to the next remaining child in the
891 * to-be-deleted subtree
892 */
893 if (XLogRecHasBlockRef(record, 3))
894 {
895 /*
896 * There is no real data on the page, so we just re-create it from
897 * scratch using the information from the WAL record.
898 *
899 * Note that we don't end up here when the target page is also the
900 * leafbuf page. There is no need to add a dummy hikey item with a
901 * top parent link when deleting leafbuf because it's the last page
902 * we'll delete in the subtree undergoing deletion.
903 */
904 Buffer leafbuf;
905 IndexTupleData trunctuple;
906
907 Assert(!isleaf);
908
909 leafbuf = XLogInitBufferForRedo(record, 3);
910 page = (Page) BufferGetPage(leafbuf);
911
912 _bt_pageinit(page, BufferGetPageSize(leafbuf));
913 pageop = BTPageGetOpaque(page);
914
916 pageop->btpo_prev = xlrec->leafleftsib;
917 pageop->btpo_next = xlrec->leafrightsib;
918 pageop->btpo_level = 0;
919 pageop->btpo_cycleid = 0;
920
921 /* Add a dummy hikey item */
922 MemSet(&trunctuple, 0, sizeof(IndexTupleData));
923 trunctuple.t_info = sizeof(IndexTupleData);
924 BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
925
926 if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
927 false, false) == InvalidOffsetNumber)
928 elog(ERROR, "could not add dummy high key to half-dead page");
929
930 PageSetLSN(page, lsn);
931 MarkBufferDirty(leafbuf);
932 UnlockReleaseBuffer(leafbuf);
933 }
934
935 /* Update metapage if needed */
936 if (info == XLOG_BTREE_UNLINK_PAGE_META)
937 _bt_restore_meta(record, 4);
938}
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define InvalidBuffer
Definition: buf.h:25
uint32_t uint32
Definition: c.h:488
static void BTPageSetDeleted(Page page, FullTransactionId safexid)
Definition: nbtree.h:239
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420

References _bt_pageinit(), _bt_restore_meta(), Assert, BLK_NEEDS_REDO, BlockNumberIsValid(), BTP_HALF_DEAD, BTP_LEAF, BTPageGetOpaque, BTPageSetDeleted(), BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, BTPageOpaqueData::btpo_level, BTPageOpaqueData::btpo_next, BTPageOpaqueData::btpo_prev, BTreeTupleSetTopParent(), BufferGetPage(), BufferGetPageSize(), BufferIsValid(), elog, XLogReaderState::EndRecPtr, ERROR, InvalidBuffer, InvalidOffsetNumber, xl_btree_unlink_page::leafleftsib, xl_btree_unlink_page::leafrightsib, xl_btree_unlink_page::leaftopparent, xl_btree_unlink_page::leftsib, xl_btree_unlink_page::level, MarkBufferDirty(), MemSet, P_HIKEY, P_NONE, PageAddItem, PageSetLSN(), xl_btree_unlink_page::rightsib, xl_btree_unlink_page::safexid, IndexTupleData::t_info, UnlockReleaseBuffer(), XLOG_BTREE_UNLINK_PAGE_META, XLogInitBufferForRedo(), XLogReadBufferForRedo(), XLogRecGetData, and XLogRecHasBlockRef.

Referenced by btree_redo().

◆ btree_xlog_updates()

static void btree_xlog_updates ( Page  page,
OffsetNumber updatedoffsets,
xl_btree_update updates,
int  nupdated 
)
static

Definition at line 557 of file nbtxlog.c.

559{
560 BTVacuumPosting vacposting;
561 IndexTuple origtuple;
562 ItemId itemid;
563 Size itemsz;
564
565 for (int i = 0; i < nupdated; i++)
566 {
567 itemid = PageGetItemId(page, updatedoffsets[i]);
568 origtuple = (IndexTuple) PageGetItem(page, itemid);
569
570 vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
571 updates->ndeletedtids * sizeof(uint16));
572 vacposting->updatedoffset = updatedoffsets[i];
573 vacposting->itup = origtuple;
574 vacposting->ndeletedtids = updates->ndeletedtids;
575 memcpy(vacposting->deletetids,
576 (char *) updates + SizeOfBtreeUpdate,
577 updates->ndeletedtids * sizeof(uint16));
578
579 _bt_update_posting(vacposting);
580
581 /* Overwrite updated version of tuple */
582 itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
583 if (!PageIndexTupleOverwrite(page, updatedoffsets[i],
584 (Item) vacposting->itup, itemsz))
585 elog(PANIC, "failed to update partially dead item");
586
587 pfree(vacposting->itup);
588 pfree(vacposting);
589
590 /* advance to next xl_btree_update from array */
591 updates = (xl_btree_update *)
592 ((char *) updates + SizeOfBtreeUpdate +
593 updates->ndeletedtids * sizeof(uint16));
594 }
595}
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1394
void pfree(void *pointer)
Definition: mcxt.c:1521
void _bt_update_posting(BTVacuumPosting vacposting)
Definition: nbtdedup.c:924
#define SizeOfBtreeUpdate
Definition: nbtxlog.h:268
uint16 deletetids[FLEXIBLE_ARRAY_MEMBER]
Definition: nbtree.h:916
uint16 ndeletedtids
Definition: nbtree.h:915
IndexTuple itup
Definition: nbtree.h:911
OffsetNumber updatedoffset
Definition: nbtree.h:912
uint16 ndeletedtids
Definition: nbtxlog.h:263

References _bt_update_posting(), BTVacuumPostingData::deletetids, elog, i, IndexTupleSize, BTVacuumPostingData::itup, MAXALIGN, BTVacuumPostingData::ndeletedtids, xl_btree_update::ndeletedtids, PageGetItem(), PageGetItemId(), PageIndexTupleOverwrite(), palloc(), PANIC, pfree(), SizeOfBtreeUpdate, and BTVacuumPostingData::updatedoffset.

Referenced by btree_xlog_delete(), and btree_xlog_vacuum().

◆ btree_xlog_vacuum()

static void btree_xlog_vacuum ( XLogReaderState record)
static

Definition at line 598 of file nbtxlog.c.

599{
600 XLogRecPtr lsn = record->EndRecPtr;
602 Buffer buffer;
603 Page page;
604 BTPageOpaque opaque;
605
606 /*
607 * We need to take a cleanup lock here, just like btvacuumpage(). However,
608 * it isn't necessary to exhaustively get a cleanup lock on every block in
609 * the index during recovery (just getting a cleanup lock on pages with
610 * items to kill suffices). See nbtree/README for details.
611 */
612 if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
614 {
615 char *ptr = XLogRecGetBlockData(record, 0, NULL);
616
617 page = (Page) BufferGetPage(buffer);
618
619 if (xlrec->nupdated > 0)
620 {
621 OffsetNumber *updatedoffsets;
622 xl_btree_update *updates;
623
624 updatedoffsets = (OffsetNumber *)
625 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
626 updates = (xl_btree_update *) ((char *) updatedoffsets +
627 xlrec->nupdated *
628 sizeof(OffsetNumber));
629
630 btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
631 }
632
633 if (xlrec->ndeleted > 0)
634 PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
635
636 /*
637 * Clear the vacuum cycle ID, and mark the page as not containing any
638 * LP_DEAD items
639 */
640 opaque = BTPageGetOpaque(page);
641 opaque->btpo_cycleid = 0;
642 opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
643
644 PageSetLSN(page, lsn);
645 MarkBufferDirty(buffer);
646 }
647 if (BufferIsValid(buffer))
648 UnlockReleaseBuffer(buffer);
649}
@ RBM_NORMAL
Definition: bufmgr.h:45
uint16 ndeleted
Definition: nbtxlog.h:222
uint16 nupdated
Definition: nbtxlog.h:223
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:351

References BLK_NEEDS_REDO, BTPageGetOpaque, BTPageOpaqueData::btpo_cycleid, BTPageOpaqueData::btpo_flags, btree_xlog_updates(), BufferGetPage(), BufferIsValid(), XLogReaderState::EndRecPtr, MarkBufferDirty(), xl_btree_vacuum::ndeleted, xl_btree_vacuum::nupdated, PageIndexMultiDelete(), PageSetLSN(), RBM_NORMAL, UnlockReleaseBuffer(), XLogReadBufferForRedoExtended(), XLogRecGetBlockData(), and XLogRecGetData.

Referenced by btree_redo().

Variable Documentation

◆ opCtx

MemoryContext opCtx
static

Definition at line 25 of file nbtxlog.c.

Referenced by btree_redo(), btree_xlog_cleanup(), and btree_xlog_startup().