PostgreSQL Source Code  git master
bufpage.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.h
4  * Standard POSTGRES buffer page definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/bufpage.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef BUFPAGE_H
15 #define BUFPAGE_H
16 
17 #include "access/xlogdefs.h"
18 #include "storage/block.h"
19 #include "storage/item.h"
20 #include "storage/off.h"
21 
22 /*
23  * A postgres disk page is an abstraction layered on top of a postgres
24  * disk block (which is simply a unit of i/o, see block.h).
25  *
26  * specifically, while a disk block can be unformatted, a postgres
27  * disk page is always a slotted page of the form:
28  *
29  * +----------------+---------------------------------+
30  * | PageHeaderData | linp1 linp2 linp3 ... |
31  * +-----------+----+---------------------------------+
32  * | ... linpN | |
33  * +-----------+--------------------------------------+
34  * | ^ pd_lower |
35  * | |
36  * | v pd_upper |
37  * +-------------+------------------------------------+
38  * | | tupleN ... |
39  * +-------------+------------------+-----------------+
40  * | ... tuple3 tuple2 tuple1 | "special space" |
41  * +--------------------------------+-----------------+
42  * ^ pd_special
43  *
44  * a page is full when nothing can be added between pd_lower and
45  * pd_upper.
46  *
47  * all blocks written out by an access method must be disk pages.
48  *
49  * EXCEPTIONS:
50  *
51  * obviously, a page is not formatted before it is initialized by
52  * a call to PageInit.
53  *
54  * NOTES:
55  *
56  * linp1..N form an ItemId (line pointer) array. ItemPointers point
57  * to a physical block number and a logical offset (line pointer
58  * number) within that block/page. Note that OffsetNumbers
59  * conventionally start at 1, not 0.
60  *
61  * tuple1..N are added "backwards" on the page. Since an ItemPointer
62  * offset is used to access an ItemId entry rather than an actual
63  * byte-offset position, tuples can be physically shuffled on a page
64  * whenever the need arises. This indirection also keeps crash recovery
65  * relatively simple, because the low-level details of page space
66  * management can be controlled by standard buffer page code during
67  * logging, and during recovery.
68  *
69  * AM-generic per-page information is kept in PageHeaderData.
70  *
71  * AM-specific per-page data (if any) is kept in the area marked "special
72  * space"; each AM has an "opaque" structure defined somewhere that is
73  * stored as the page trailer. an access method should always
74  * initialize its pages with PageInit and then set its own opaque
75  * fields.
76  */
77 
78 typedef Pointer Page;
79 
80 
81 /*
82  * location (byte offset) within a page.
83  *
84  * note that this is actually limited to 2^15 because we have limited
85  * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
86  */
88 
89 
90 /*
91  * For historical reasons, the 64-bit LSN value is stored as two 32-bit
92  * values.
93  */
94 typedef struct
95 {
96  uint32 xlogid; /* high bits */
97  uint32 xrecoff; /* low bits */
99 
100 static inline XLogRecPtr
102 {
103  return (uint64) val.xlogid << 32 | val.xrecoff;
104 }
105 
106 #define PageXLogRecPtrSet(ptr, lsn) \
107  ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
108 
109 /*
110  * disk page organization
111  *
112  * space management information generic to any page
113  *
114  * pd_lsn - identifies xlog record for last change to this page.
115  * pd_checksum - page checksum, if set.
116  * pd_flags - flag bits.
117  * pd_lower - offset to start of free space.
118  * pd_upper - offset to end of free space.
119  * pd_special - offset to start of special space.
120  * pd_pagesize_version - size in bytes and page layout version number.
121  * pd_prune_xid - oldest XID among potentially prunable tuples on page.
122  *
123  * The LSN is used by the buffer manager to enforce the basic rule of WAL:
124  * "thou shalt write xlog before data". A dirty buffer cannot be dumped
125  * to disk until xlog has been flushed at least as far as the page's LSN.
126  *
127  * pd_checksum stores the page checksum, if it has been set for this page;
128  * zero is a valid value for a checksum. If a checksum is not in use then
129  * we leave the field unset. This will typically mean the field is zero
130  * though non-zero values may also be present if databases have been
131  * pg_upgraded from releases prior to 9.3, when the same byte offset was
132  * used to store the current timelineid when the page was last updated.
133  * Note that there is no indication on a page as to whether the checksum
134  * is valid or not, a deliberate design choice which avoids the problem
135  * of relying on the page contents to decide whether to verify it. Hence
136  * there are no flag bits relating to checksums.
137  *
138  * pd_prune_xid is a hint field that helps determine whether pruning will be
139  * useful. It is currently unused in index pages.
140  *
141  * The page version number and page size are packed together into a single
142  * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
143  * there was no concept of a page version number, and doing it this way
144  * lets us pretend that pre-7.3 databases have page version number zero.
145  * We constrain page sizes to be multiples of 256, leaving the low eight
146  * bits available for a version number.
147  *
148  * Minimum possible page size is perhaps 64B to fit page header, opaque space
149  * and a minimal tuple; of course, in reality you want it much bigger, so
150  * the constraint on pagesize mod 256 is not an important restriction.
151  * On the high end, we can only support pages up to 32KB because lp_off/lp_len
152  * are 15 bits.
153  */
154 
155 typedef struct PageHeaderData
156 {
157  /* XXX LSN is member of *any* block, not only page-organized ones */
158  PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
159  * record for last change to this page */
160  uint16 pd_checksum; /* checksum */
161  uint16 pd_flags; /* flag bits, see below */
162  LocationIndex pd_lower; /* offset to start of free space */
163  LocationIndex pd_upper; /* offset to end of free space */
164  LocationIndex pd_special; /* offset to start of special space */
166  TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
167  ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
169 
171 
172 /*
173  * pd_flags contains the following flag bits. Undefined bits are initialized
174  * to zero and may be used in the future.
175  *
176  * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
177  * pd_lower. This should be considered a hint rather than the truth, since
178  * changes to it are not WAL-logged.
179  *
180  * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
181  * page for its new tuple version; this suggests that a prune is needed.
182  * Again, this is just a hint.
183  */
184 #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
185 #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
186 #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
187  * everyone */
188 
189 #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
190 
191 /*
192  * Page layout version number 0 is for pre-7.3 Postgres releases.
193  * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
194  * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
195  * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
196  * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
197  * added the pd_flags field (by stealing some bits from pd_tli),
198  * as well as adding the pd_prune_xid field (which enlarges the header).
199  *
200  * As of Release 9.3, the checksum version must also be considered when
201  * handling pages.
202  */
203 #define PG_PAGE_LAYOUT_VERSION 4
204 #define PG_DATA_CHECKSUM_VERSION 1
205 
206 /* ----------------------------------------------------------------
207  * page support functions
208  * ----------------------------------------------------------------
209  */
210 
211 /*
212  * line pointer(s) do not count as part of header
213  */
214 #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
215 
216 /*
217  * PageIsEmpty
218  * returns true iff no itemid has been allocated on the page
219  */
220 static inline bool
221 PageIsEmpty(Page page)
222 {
223  return ((PageHeader) page)->pd_lower <= SizeOfPageHeaderData;
224 }
225 
226 /*
227  * PageIsNew
228  * returns true iff page has not been initialized (by PageInit)
229  */
230 static inline bool
231 PageIsNew(Page page)
232 {
233  return ((PageHeader) page)->pd_upper == 0;
234 }
235 
236 /*
237  * PageGetItemId
238  * Returns an item identifier of a page.
239  */
240 static inline ItemId
241 PageGetItemId(Page page, OffsetNumber offsetNumber)
242 {
243  return &((PageHeader) page)->pd_linp[offsetNumber - 1];
244 }
245 
246 /*
247  * PageGetContents
248  * To be used in cases where the page does not contain line pointers.
249  *
250  * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
251  * Now it is. Beware of old code that might think the offset to the contents
252  * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
253  */
254 static inline char *
255 PageGetContents(Page page)
256 {
257  return (char *) page + MAXALIGN(SizeOfPageHeaderData);
258 }
259 
260 /* ----------------
261  * functions to access page size info
262  * ----------------
263  */
264 
265 /*
266  * PageGetPageSize
267  * Returns the page size of a page.
268  *
269  * this can only be called on a formatted page (unlike
270  * BufferGetPageSize, which can be called on an unformatted page).
271  * however, it can be called on a page that is not stored in a buffer.
272  */
273 static inline Size
274 PageGetPageSize(Page page)
275 {
276  return (Size) (((PageHeader) page)->pd_pagesize_version & (uint16) 0xFF00);
277 }
278 
279 /*
280  * PageGetPageLayoutVersion
281  * Returns the page layout version of a page.
282  */
283 static inline uint8
285 {
286  return (((PageHeader) page)->pd_pagesize_version & 0x00FF);
287 }
288 
289 /*
290  * PageSetPageSizeAndVersion
291  * Sets the page size and page layout version number of a page.
292  *
293  * We could support setting these two values separately, but there's
294  * no real need for it at the moment.
295  */
296 static inline void
298 {
299  Assert((size & 0xFF00) == size);
300  Assert((version & 0x00FF) == version);
301 
302  ((PageHeader) page)->pd_pagesize_version = size | version;
303 }
304 
305 /* ----------------
306  * page special data functions
307  * ----------------
308  */
309 /*
310  * PageGetSpecialSize
311  * Returns size of special space on a page.
312  */
313 static inline uint16
315 {
316  return (PageGetPageSize(page) - ((PageHeader) page)->pd_special);
317 }
318 
319 /*
320  * Using assertions, validate that the page special pointer is OK.
321  *
322  * This is intended to catch use of the pointer before page initialization.
323  */
324 static inline void
326 {
327  Assert(page);
328  Assert(((PageHeader) page)->pd_special <= BLCKSZ);
329  Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData);
330 }
331 
332 /*
333  * PageGetSpecialPointer
334  * Returns pointer to special space on a page.
335  */
336 static inline char *
338 {
340  return (char *) page + ((PageHeader) page)->pd_special;
341 }
342 
343 /*
344  * PageGetItem
345  * Retrieves an item on the given page.
346  *
347  * Note:
348  * This does not change the status of any of the resources passed.
349  * The semantics may change in the future.
350  */
351 static inline Item
352 PageGetItem(Page page, ItemId itemId)
353 {
354  Assert(page);
355  Assert(ItemIdHasStorage(itemId));
356 
357  return (Item) (((char *) page) + ItemIdGetOffset(itemId));
358 }
359 
360 /*
361  * PageGetMaxOffsetNumber
362  * Returns the maximum offset number used by the given page.
363  * Since offset numbers are 1-based, this is also the number
364  * of items on the page.
365  *
366  * NOTE: if the page is not initialized (pd_lower == 0), we must
367  * return zero to ensure sane behavior.
368  */
369 static inline OffsetNumber
371 {
372  PageHeader pageheader = (PageHeader) page;
373 
374  if (pageheader->pd_lower <= SizeOfPageHeaderData)
375  return 0;
376  else
377  return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
378 }
379 
380 /*
381  * Additional functions for access to page headers.
382  */
383 static inline XLogRecPtr
384 PageGetLSN(Page page)
385 {
386  return PageXLogRecPtrGet(((PageHeader) page)->pd_lsn);
387 }
388 static inline void
389 PageSetLSN(Page page, XLogRecPtr lsn)
390 {
391  PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
392 }
393 
394 static inline bool
396 {
397  return ((PageHeader) page)->pd_flags & PD_HAS_FREE_LINES;
398 }
399 static inline void
401 {
402  ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
403 }
404 static inline void
406 {
407  ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
408 }
409 
410 static inline bool
411 PageIsFull(Page page)
412 {
413  return ((PageHeader) page)->pd_flags & PD_PAGE_FULL;
414 }
415 static inline void
416 PageSetFull(Page page)
417 {
418  ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
419 }
420 static inline void
421 PageClearFull(Page page)
422 {
423  ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
424 }
425 
426 static inline bool
428 {
429  return ((PageHeader) page)->pd_flags & PD_ALL_VISIBLE;
430 }
431 static inline void
433 {
434  ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
435 }
436 static inline void
438 {
439  ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
440 }
441 
442 /*
443  * These two require "access/transam.h", so left as macros.
444  */
445 #define PageSetPrunable(page, xid) \
446 do { \
447  Assert(TransactionIdIsNormal(xid)); \
448  if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
449  TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
450  ((PageHeader) (page))->pd_prune_xid = (xid); \
451 } while (0)
452 #define PageClearPrunable(page) \
453  (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
454 
455 
456 /* ----------------------------------------------------------------
457  * extern declarations
458  * ----------------------------------------------------------------
459  */
460 
461 /* flags for PageAddItemExtended() */
462 #define PAI_OVERWRITE (1 << 0)
463 #define PAI_IS_HEAP (1 << 1)
464 
465 /* flags for PageIsVerifiedExtended() */
466 #define PIV_LOG_WARNING (1 << 0)
467 #define PIV_REPORT_STAT (1 << 1)
468 
469 #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
470  PageAddItemExtended(page, item, size, offsetNumber, \
471  ((overwrite) ? PAI_OVERWRITE : 0) | \
472  ((is_heap) ? PAI_IS_HEAP : 0))
473 
474 #define PageIsVerified(page, blkno) \
475  PageIsVerifiedExtended(page, blkno, \
476  PIV_LOG_WARNING | PIV_REPORT_STAT)
477 
478 /*
479  * Check that BLCKSZ is a multiple of sizeof(size_t). In
480  * PageIsVerifiedExtended(), it is much faster to check if a page is
481  * full of zeroes using the native word size. Note that this assertion
482  * is kept within a header to make sure that StaticAssertDecl() works
483  * across various combinations of platforms and compilers.
484  */
485 StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
486  "BLCKSZ has to be a multiple of sizeof(size_t)");
487 
488 extern void PageInit(Page page, Size pageSize, Size specialSize);
489 extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
491  OffsetNumber offsetNumber, int flags);
492 extern Page PageGetTempPage(Page page);
493 extern Page PageGetTempPageCopy(Page page);
495 extern void PageRestoreTempPage(Page tempPage, Page oldPage);
496 extern void PageRepairFragmentation(Page page);
497 extern void PageTruncateLinePointerArray(Page page);
498 extern Size PageGetFreeSpace(Page page);
499 extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
500 extern Size PageGetExactFreeSpace(Page page);
501 extern Size PageGetHeapFreeSpace(Page page);
502 extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
503 extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
504 extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
505 extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
506  Item newtup, Size newsize);
507 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
508 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
509 
510 #endif /* BUFPAGE_H */
uint32 BlockNumber
Definition: block.h:31
struct PageHeaderData PageHeaderData
#define PD_PAGE_FULL
Definition: bufpage.h:185
PageHeaderData * PageHeader
Definition: bufpage.h:170
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
static char * PageGetContents(Page page)
Definition: bufpage.h:254
Pointer Page
Definition: bufpage.h:78
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:934
static void PageSetHasFreeLinePointers(Page page)
Definition: bufpage.h:399
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:194
static Size PageGetPageSize(Page page)
Definition: bufpage.h:273
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageClearFull(Page page)
Definition: bufpage.h:420
static XLogRecPtr PageXLogRecPtrGet(PageXLogRecPtr val)
Definition: bufpage.h:101
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
static bool PageIsNew(Page page)
Definition: bufpage.h:230
#define PD_ALL_VISIBLE
Definition: bufpage.h:186
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
static void PageSetFull(Page page)
Definition: bufpage.h:415
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static void PageValidateSpecialPointer(Page page)
Definition: bufpage.h:324
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
static char * PageGetSpecialPointer(Page page)
Definition: bufpage.h:336
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static void PageClearHasFreeLinePointers(Page page)
Definition: bufpage.h:404
Page PageGetTempPageCopy(Page page)
Definition: bufpage.c:382
StaticAssertDecl(BLCKSZ==((BLCKSZ/sizeof(size_t)) *sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)")
static bool PageHasFreeLinePointers(Page page)
Definition: bufpage.h:394
Page PageGetTempPage(Page page)
Definition: bufpage.c:365
#define PD_HAS_FREE_LINES
Definition: bufpage.h:184
uint16 LocationIndex
Definition: bufpage.h:87
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1295
static bool PageIsFull(Page page)
Definition: bufpage.h:410
static uint8 PageGetPageLayoutVersion(Page page)
Definition: bufpage.h:283
static void PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
Definition: bufpage.h:296
#define PageXLogRecPtrSet(ptr, lsn)
Definition: bufpage.h:106
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:313
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
unsigned short uint16
Definition: c.h:492
unsigned int uint32
Definition: c.h:493
#define MAXALIGN(LEN)
Definition: c.h:798
char * Pointer
Definition: c.h:470
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:385
unsigned char uint8
Definition: c.h:491
uint32 TransactionId
Definition: c.h:639
size_t Size
Definition: c.h:592
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:664
Pointer Item
Definition: item.h:17
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
struct ItemIdData ItemIdData
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
Assert(fmt[strlen(fmt) - 1] !='\n')
uint16 OffsetNumber
Definition: off.h:24
static pg_noinline void Size size
Definition: slab.c:607
PageXLogRecPtr pd_lsn
Definition: bufpage.h:158
LocationIndex pd_special
Definition: bufpage.h:164
LocationIndex pd_upper
Definition: bufpage.h:163
uint16 pd_flags
Definition: bufpage.h:161
uint16 pd_checksum
Definition: bufpage.h:160
LocationIndex pd_lower
Definition: bufpage.h:162
uint16 pd_pagesize_version
Definition: bufpage.h:165
TransactionId pd_prune_xid
Definition: bufpage.h:166
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:167
uint32 xrecoff
Definition: bufpage.h:97
uint32 xlogid
Definition: bufpage.h:96
uint64 XLogRecPtr
Definition: xlogdefs.h:21