PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nbtxlog.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nbtxlog.h
4  * header file for postgres btree xlog routines
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/include/access/nbtxlog.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 #ifndef NBTXLOG_H
14 #define NBTXLOG_H
15 
16 #include "access/xlogreader.h"
17 #include "lib/stringinfo.h"
18 #include "storage/off.h"
19 
20 /*
21  * XLOG records for btree operations
22  *
23  * XLOG allows to store some information in high 4 bits of log
24  * record xl_info field
25  */
26 #define XLOG_BTREE_INSERT_LEAF 0x00 /* add index tuple without split */
27 #define XLOG_BTREE_INSERT_UPPER 0x10 /* same, on a non-leaf page */
28 #define XLOG_BTREE_INSERT_META 0x20 /* same, plus update metapage */
29 #define XLOG_BTREE_SPLIT_L 0x30 /* add index tuple with split */
30 #define XLOG_BTREE_SPLIT_R 0x40 /* as above, new item on right */
31 #define XLOG_BTREE_SPLIT_L_ROOT 0x50 /* add tuple with split of root */
32 #define XLOG_BTREE_SPLIT_R_ROOT 0x60 /* as above, new item on right */
33 #define XLOG_BTREE_DELETE 0x70 /* delete leaf index tuples for a page */
34 #define XLOG_BTREE_UNLINK_PAGE 0x80 /* delete a half-dead page */
35 #define XLOG_BTREE_UNLINK_PAGE_META 0x90 /* same, and update metapage */
36 #define XLOG_BTREE_NEWROOT 0xA0 /* new root page */
37 #define XLOG_BTREE_MARK_PAGE_HALFDEAD 0xB0 /* mark a leaf as half-dead */
38 #define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during
39  * vacuum */
40 #define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
41  * FSM */
42 
43 /*
44  * All that we need to regenerate the meta-data page
45  */
46 typedef struct xl_btree_metadata
47 {
53 
54 /*
55  * This is what we need to know about simple (without split) insert.
56  *
57  * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
58  * Note that INSERT_META implies it's not a leaf page.
59  *
60  * Backup Blk 0: original page (data contains the inserted tuple)
61  * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
62  * Backup Blk 2: xl_btree_metadata, if INSERT_META
63  */
64 typedef struct xl_btree_insert
65 {
68 
69 #define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
70 
71 /*
72  * On insert with split, we save all the items going into the right sibling
73  * so that we can restore it completely from the log record. This way takes
74  * less xlog space than the normal approach, because if we did it standardly,
75  * XLogInsert would almost always think the right page is new and store its
76  * whole page image. The left page, however, is handled in the normal
77  * incremental-update fashion.
78  *
79  * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
80  * The _L and _R variants indicate whether the inserted tuple went into the
81  * left or right split page (and thus, whether newitemoff and the new item
82  * are stored or not). The _ROOT variants indicate that we are splitting
83  * the root page, and thus that a newroot record rather than an insert or
84  * split record should follow. Note that a split record never carries a
85  * metapage update --- we'll do that in the parent-level update.
86  *
87  * Backup Blk 0: original page / new left page
88  *
89  * The left page's data portion contains the new item, if it's the _L variant.
90  * (In the _R variants, the new item is one of the right page's tuples.)
91  * If level > 0, an IndexTuple representing the HIKEY of the left page
92  * follows. We don't need this on leaf pages, because it's the same as the
93  * leftmost key in the new right page.
94  *
95  * Backup Blk 1: new right page
96  *
97  * The right page's data portion contains the right page's tuples in the
98  * form used by _bt_restore_page.
99  *
100  * Backup Blk 2: next block (orig page's rightlink), if any
101  * Backup Blk 3: child's left sibling, if non-leaf split
102  */
103 typedef struct xl_btree_split
104 {
105  uint32 level; /* tree level of page being split */
106  OffsetNumber firstright; /* first item moved to right page */
107  OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
109 
110 #define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
111 
112 /*
113  * This is what we need to know about delete of individual leaf index tuples.
114  * The WAL record can represent deletion of any number of index tuples on a
115  * single index page when *not* executed by VACUUM.
116  *
117  * Backup Blk 0: index page
118  */
119 typedef struct xl_btree_delete
120 {
121  RelFileNode hnode; /* RelFileNode of the heap the index currently
122  * points at */
123  int nitems;
124 
125  /* TARGET OFFSET NUMBERS FOLLOW AT THE END */
127 
128 #define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
129 
130 /*
131  * This is what we need to know about page reuse within btree.
132  */
133 typedef struct xl_btree_reuse_page
134 {
139 
140 #define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
141 
142 /*
143  * This is what we need to know about vacuum of individual leaf index tuples.
144  * The WAL record can represent deletion of any number of index tuples on a
145  * single index page when executed by VACUUM.
146  *
147  * For MVCC scans, lastBlockVacuumed will be set to InvalidBlockNumber.
148  * For a non-MVCC index scans there is an additional correctness requirement
149  * for applying these changes during recovery, which is that we must do one
150  * of these two things for every block in the index:
151  * * lock the block for cleanup and apply any required changes
152  * * EnsureBlockUnpinned()
153  * The purpose of this is to ensure that no index scans started before we
154  * finish scanning the index are still running by the time we begin to remove
155  * heap tuples.
156  *
157  * Any changes to any one block are registered on just one WAL record. All
158  * blocks that we need to run EnsureBlockUnpinned() are listed as a block range
159  * starting from the last block vacuumed through until this one. Individual
160  * block numbers aren't given.
161  *
162  * Note that the *last* WAL record in any vacuum of an index is allowed to
163  * have a zero length array of offsets. Earlier records must have at least one.
164  */
165 typedef struct xl_btree_vacuum
166 {
168 
169  /* TARGET OFFSET NUMBERS FOLLOW */
171 
172 #define SizeOfBtreeVacuum (offsetof(xl_btree_vacuum, lastBlockVacuumed) + sizeof(BlockNumber))
173 
174 /*
175  * This is what we need to know about marking an empty branch for deletion.
176  * The target identifies the tuple removed from the parent page (note that we
177  * remove this tuple's downlink and the *following* tuple's key). Note that
178  * the leaf page is empty, so we don't need to store its content --- it is
179  * just reinitialized during recovery using the rest of the fields.
180  *
181  * Backup Blk 0: leaf block
182  * Backup Blk 1: top parent
183  */
185 {
186  OffsetNumber poffset; /* deleted tuple id in parent page */
187 
188  /* information needed to recreate the leaf page: */
189  BlockNumber leafblk; /* leaf block ultimately being deleted */
190  BlockNumber leftblk; /* leaf block's left sibling, if any */
191  BlockNumber rightblk; /* leaf block's right sibling */
192  BlockNumber topparent; /* topmost internal page in the branch */
194 
195 #define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
196 
197 /*
198  * This is what we need to know about deletion of a btree page. Note we do
199  * not store any content for the deleted page --- it is just rewritten as empty
200  * during recovery, apart from resetting the btpo.xact.
201  *
202  * Backup Blk 0: target block being deleted
203  * Backup Blk 1: target block's left sibling, if any
204  * Backup Blk 2: target block's right sibling
205  * Backup Blk 3: leaf block (if different from target)
206  * Backup Blk 4: metapage (if rightsib becomes new fast root)
207  */
208 typedef struct xl_btree_unlink_page
209 {
210  BlockNumber leftsib; /* target block's left sibling, if any */
211  BlockNumber rightsib; /* target block's right sibling */
212 
213  /*
214  * Information needed to recreate the leaf page, when target is an
215  * internal page.
216  */
219  BlockNumber topparent; /* next child down in the branch */
220 
221  TransactionId btpo_xact; /* value of btpo.xact for use in recovery */
222  /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */
224 
225 #define SizeOfBtreeUnlinkPage (offsetof(xl_btree_unlink_page, btpo_xact) + sizeof(TransactionId))
226 
227 /*
228  * New root log record. There are zero tuples if this is to establish an
229  * empty root, or two if it is the result of splitting an old root.
230  *
231  * Note that although this implies rewriting the metadata page, we don't need
232  * an xl_btree_metadata record --- the rootblk and level are sufficient.
233  *
234  * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
235  * Backup Blk 1: left child (if splitting an old root)
236  * Backup Blk 2: metapage
237  */
238 typedef struct xl_btree_newroot
239 {
240  BlockNumber rootblk; /* location of new root (redundant with blk 0) */
241  uint32 level; /* its tree level */
243 
244 #define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
245 
246 
247 /*
248  * prototypes for functions in nbtxlog.c
249  */
250 extern void btree_redo(XLogReaderState *record);
251 extern void btree_desc(StringInfo buf, XLogReaderState *record);
252 extern const char *btree_identify(uint8 info);
253 extern void btree_mask(char *pagedata, BlockNumber blkno);
254 
255 #endif /* NBXLOG_H */
BlockNumber lastBlockVacuumed
Definition: nbtxlog.h:167
void btree_mask(char *pagedata, BlockNumber blkno)
Definition: nbtxlog.c:1038
BlockNumber rootblk
Definition: nbtxlog.h:240
void btree_desc(StringInfo buf, XLogReaderState *record)
Definition: nbtdesc.c:20
uint32 TransactionId
Definition: c.h:397
struct xl_btree_split xl_btree_split
struct xl_btree_reuse_page xl_btree_reuse_page
RelFileNode hnode
Definition: nbtxlog.h:121
struct xl_btree_metadata xl_btree_metadata
BlockNumber root
Definition: nbtxlog.h:48
unsigned char uint8
Definition: c.h:266
RelFileNode node
Definition: nbtxlog.h:135
uint32 level
Definition: nbtxlog.h:241
uint32 BlockNumber
Definition: block.h:31
struct xl_btree_newroot xl_btree_newroot
struct xl_btree_insert xl_btree_insert
uint16 OffsetNumber
Definition: off.h:24
struct xl_btree_delete xl_btree_delete
BlockNumber block
Definition: nbtxlog.h:136
const char * btree_identify(uint8 info)
Definition: nbtdesc.c:103
OffsetNumber newitemoff
Definition: nbtxlog.h:107
struct xl_btree_mark_page_halfdead xl_btree_mark_page_halfdead
static char * buf
Definition: pg_test_fsync.c:66
unsigned int uint32
Definition: c.h:268
struct xl_btree_vacuum xl_btree_vacuum
uint32 level
Definition: nbtxlog.h:105
OffsetNumber offnum
Definition: nbtxlog.h:66
OffsetNumber firstright
Definition: nbtxlog.h:106
uint32 fastlevel
Definition: nbtxlog.h:51
struct xl_btree_unlink_page xl_btree_unlink_page
uint32 level
Definition: nbtxlog.h:49
BlockNumber fastroot
Definition: nbtxlog.h:50
TransactionId latestRemovedXid
Definition: nbtxlog.h:137
void btree_redo(XLogReaderState *record)
Definition: nbtxlog.c:983