PostgreSQL Source Code  git master
bufpage.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.h
4  * Standard POSTGRES buffer page definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/bufpage.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef BUFPAGE_H
15 #define BUFPAGE_H
16 
17 #include "access/xlogdefs.h"
18 #include "storage/block.h"
19 #include "storage/item.h"
20 #include "storage/off.h"
21 
22 /* GUC variable */
24 
25 /*
26  * A postgres disk page is an abstraction layered on top of a postgres
27  * disk block (which is simply a unit of i/o, see block.h).
28  *
29  * specifically, while a disk block can be unformatted, a postgres
30  * disk page is always a slotted page of the form:
31  *
32  * +----------------+---------------------------------+
33  * | PageHeaderData | linp1 linp2 linp3 ... |
34  * +-----------+----+---------------------------------+
35  * | ... linpN | |
36  * +-----------+--------------------------------------+
37  * | ^ pd_lower |
38  * | |
39  * | v pd_upper |
40  * +-------------+------------------------------------+
41  * | | tupleN ... |
42  * +-------------+------------------+-----------------+
43  * | ... tuple3 tuple2 tuple1 | "special space" |
44  * +--------------------------------+-----------------+
45  * ^ pd_special
46  *
47  * a page is full when nothing can be added between pd_lower and
48  * pd_upper.
49  *
50  * all blocks written out by an access method must be disk pages.
51  *
52  * EXCEPTIONS:
53  *
54  * obviously, a page is not formatted before it is initialized by
55  * a call to PageInit.
56  *
57  * NOTES:
58  *
59  * linp1..N form an ItemId (line pointer) array. ItemPointers point
60  * to a physical block number and a logical offset (line pointer
61  * number) within that block/page. Note that OffsetNumbers
62  * conventionally start at 1, not 0.
63  *
64  * tuple1..N are added "backwards" on the page. Since an ItemPointer
65  * offset is used to access an ItemId entry rather than an actual
66  * byte-offset position, tuples can be physically shuffled on a page
67  * whenever the need arises. This indirection also keeps crash recovery
68  * relatively simple, because the low-level details of page space
69  * management can be controlled by standard buffer page code during
70  * logging, and during recovery.
71  *
72  * AM-generic per-page information is kept in PageHeaderData.
73  *
74  * AM-specific per-page data (if any) is kept in the area marked "special
75  * space"; each AM has an "opaque" structure defined somewhere that is
76  * stored as the page trailer. an access method should always
77  * initialize its pages with PageInit and then set its own opaque
78  * fields.
79  */
80 
81 typedef Pointer Page;
82 
83 
84 /*
85  * location (byte offset) within a page.
86  *
87  * note that this is actually limited to 2^15 because we have limited
88  * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
89  */
91 
92 
93 /*
94  * For historical reasons, the 64-bit LSN value is stored as two 32-bit
95  * values.
96  */
97 typedef struct
98 {
99  uint32 xlogid; /* high bits */
100  uint32 xrecoff; /* low bits */
102 
103 static inline XLogRecPtr
105 {
106  return (uint64) val.xlogid << 32 | val.xrecoff;
107 }
108 
109 #define PageXLogRecPtrSet(ptr, lsn) \
110  ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
111 
112 /*
113  * disk page organization
114  *
115  * space management information generic to any page
116  *
117  * pd_lsn - identifies xlog record for last change to this page.
118  * pd_checksum - page checksum, if set.
119  * pd_flags - flag bits.
120  * pd_lower - offset to start of free space.
121  * pd_upper - offset to end of free space.
122  * pd_special - offset to start of special space.
123  * pd_pagesize_version - size in bytes and page layout version number.
124  * pd_prune_xid - oldest XID among potentially prunable tuples on page.
125  *
126  * The LSN is used by the buffer manager to enforce the basic rule of WAL:
127  * "thou shalt write xlog before data". A dirty buffer cannot be dumped
128  * to disk until xlog has been flushed at least as far as the page's LSN.
129  *
130  * pd_checksum stores the page checksum, if it has been set for this page;
131  * zero is a valid value for a checksum. If a checksum is not in use then
132  * we leave the field unset. This will typically mean the field is zero
133  * though non-zero values may also be present if databases have been
134  * pg_upgraded from releases prior to 9.3, when the same byte offset was
135  * used to store the current timelineid when the page was last updated.
136  * Note that there is no indication on a page as to whether the checksum
137  * is valid or not, a deliberate design choice which avoids the problem
138  * of relying on the page contents to decide whether to verify it. Hence
139  * there are no flag bits relating to checksums.
140  *
141  * pd_prune_xid is a hint field that helps determine whether pruning will be
142  * useful. It is currently unused in index pages.
143  *
144  * The page version number and page size are packed together into a single
145  * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
146  * there was no concept of a page version number, and doing it this way
147  * lets us pretend that pre-7.3 databases have page version number zero.
148  * We constrain page sizes to be multiples of 256, leaving the low eight
149  * bits available for a version number.
150  *
151  * Minimum possible page size is perhaps 64B to fit page header, opaque space
152  * and a minimal tuple; of course, in reality you want it much bigger, so
153  * the constraint on pagesize mod 256 is not an important restriction.
154  * On the high end, we can only support pages up to 32KB because lp_off/lp_len
155  * are 15 bits.
156  */
157 
158 typedef struct PageHeaderData
159 {
160  /* XXX LSN is member of *any* block, not only page-organized ones */
161  PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
162  * record for last change to this page */
163  uint16 pd_checksum; /* checksum */
164  uint16 pd_flags; /* flag bits, see below */
165  LocationIndex pd_lower; /* offset to start of free space */
166  LocationIndex pd_upper; /* offset to end of free space */
167  LocationIndex pd_special; /* offset to start of special space */
169  TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
170  ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
172 
174 
175 /*
176  * pd_flags contains the following flag bits. Undefined bits are initialized
177  * to zero and may be used in the future.
178  *
179  * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
180  * pd_lower. This should be considered a hint rather than the truth, since
181  * changes to it are not WAL-logged.
182  *
183  * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
184  * page for its new tuple version; this suggests that a prune is needed.
185  * Again, this is just a hint.
186  */
187 #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
188 #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
189 #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
190  * everyone */
191 
192 #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
193 
194 /*
195  * Page layout version number 0 is for pre-7.3 Postgres releases.
196  * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
197  * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
198  * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
199  * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
200  * added the pd_flags field (by stealing some bits from pd_tli),
201  * as well as adding the pd_prune_xid field (which enlarges the header).
202  *
203  * As of Release 9.3, the checksum version must also be considered when
204  * handling pages.
205  */
206 #define PG_PAGE_LAYOUT_VERSION 4
207 #define PG_DATA_CHECKSUM_VERSION 1
208 
209 /* ----------------------------------------------------------------
210  * page support functions
211  * ----------------------------------------------------------------
212  */
213 
214 /*
215  * line pointer(s) do not count as part of header
216  */
217 #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
218 
219 /*
220  * PageIsEmpty
221  * returns true iff no itemid has been allocated on the page
222  */
223 static inline bool
224 PageIsEmpty(Page page)
225 {
226  return ((PageHeader) page)->pd_lower <= SizeOfPageHeaderData;
227 }
228 
229 /*
230  * PageIsNew
231  * returns true iff page has not been initialized (by PageInit)
232  */
233 static inline bool
234 PageIsNew(Page page)
235 {
236  return ((PageHeader) page)->pd_upper == 0;
237 }
238 
239 /*
240  * PageGetItemId
241  * Returns an item identifier of a page.
242  */
243 static inline ItemId
244 PageGetItemId(Page page, OffsetNumber offsetNumber)
245 {
246  return &((PageHeader) page)->pd_linp[offsetNumber - 1];
247 }
248 
249 /*
250  * PageGetContents
251  * To be used in cases where the page does not contain line pointers.
252  *
253  * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
254  * Now it is. Beware of old code that might think the offset to the contents
255  * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
256  */
257 static inline char *
258 PageGetContents(Page page)
259 {
260  return (char *) page + MAXALIGN(SizeOfPageHeaderData);
261 }
262 
263 /* ----------------
264  * functions to access page size info
265  * ----------------
266  */
267 
268 /*
269  * PageGetPageSize
270  * Returns the page size of a page.
271  *
272  * this can only be called on a formatted page (unlike
273  * BufferGetPageSize, which can be called on an unformatted page).
274  * however, it can be called on a page that is not stored in a buffer.
275  */
276 static inline Size
277 PageGetPageSize(Page page)
278 {
279  return (Size) (((PageHeader) page)->pd_pagesize_version & (uint16) 0xFF00);
280 }
281 
282 /*
283  * PageGetPageLayoutVersion
284  * Returns the page layout version of a page.
285  */
286 static inline uint8
288 {
289  return (((PageHeader) page)->pd_pagesize_version & 0x00FF);
290 }
291 
292 /*
293  * PageSetPageSizeAndVersion
294  * Sets the page size and page layout version number of a page.
295  *
296  * We could support setting these two values separately, but there's
297  * no real need for it at the moment.
298  */
299 static inline void
301 {
302  Assert((size & 0xFF00) == size);
303  Assert((version & 0x00FF) == version);
304 
305  ((PageHeader) page)->pd_pagesize_version = size | version;
306 }
307 
308 /* ----------------
309  * page special data functions
310  * ----------------
311  */
312 /*
313  * PageGetSpecialSize
314  * Returns size of special space on a page.
315  */
316 static inline uint16
318 {
319  return (PageGetPageSize(page) - ((PageHeader) page)->pd_special);
320 }
321 
322 /*
323  * Using assertions, validate that the page special pointer is OK.
324  *
325  * This is intended to catch use of the pointer before page initialization.
326  */
327 static inline void
329 {
330  Assert(page);
331  Assert(((PageHeader) page)->pd_special <= BLCKSZ);
332  Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData);
333 }
334 
335 /*
336  * PageGetSpecialPointer
337  * Returns pointer to special space on a page.
338  */
339 static inline char *
341 {
343  return (char *) page + ((PageHeader) page)->pd_special;
344 }
345 
346 /*
347  * PageGetItem
348  * Retrieves an item on the given page.
349  *
350  * Note:
351  * This does not change the status of any of the resources passed.
352  * The semantics may change in the future.
353  */
354 static inline Item
355 PageGetItem(Page page, ItemId itemId)
356 {
357  Assert(page);
358  Assert(ItemIdHasStorage(itemId));
359 
360  return (Item) (((char *) page) + ItemIdGetOffset(itemId));
361 }
362 
363 /*
364  * PageGetMaxOffsetNumber
365  * Returns the maximum offset number used by the given page.
366  * Since offset numbers are 1-based, this is also the number
367  * of items on the page.
368  *
369  * NOTE: if the page is not initialized (pd_lower == 0), we must
370  * return zero to ensure sane behavior.
371  */
372 static inline OffsetNumber
374 {
375  PageHeader pageheader = (PageHeader) page;
376 
377  if (pageheader->pd_lower <= SizeOfPageHeaderData)
378  return 0;
379  else
380  return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
381 }
382 
383 /*
384  * Additional functions for access to page headers.
385  */
386 static inline XLogRecPtr
387 PageGetLSN(Page page)
388 {
389  return PageXLogRecPtrGet(((PageHeader) page)->pd_lsn);
390 }
391 static inline void
392 PageSetLSN(Page page, XLogRecPtr lsn)
393 {
394  PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
395 }
396 
397 static inline bool
399 {
400  return ((PageHeader) page)->pd_flags & PD_HAS_FREE_LINES;
401 }
402 static inline void
404 {
405  ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
406 }
407 static inline void
409 {
410  ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
411 }
412 
413 static inline bool
414 PageIsFull(Page page)
415 {
416  return ((PageHeader) page)->pd_flags & PD_PAGE_FULL;
417 }
418 static inline void
419 PageSetFull(Page page)
420 {
421  ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
422 }
423 static inline void
424 PageClearFull(Page page)
425 {
426  ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
427 }
428 
429 static inline bool
431 {
432  return ((PageHeader) page)->pd_flags & PD_ALL_VISIBLE;
433 }
434 static inline void
436 {
437  ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
438 }
439 static inline void
441 {
442  ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
443 }
444 
445 /*
446  * These two require "access/transam.h", so left as macros.
447  */
448 #define PageSetPrunable(page, xid) \
449 do { \
450  Assert(TransactionIdIsNormal(xid)); \
451  if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
452  TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
453  ((PageHeader) (page))->pd_prune_xid = (xid); \
454 } while (0)
455 #define PageClearPrunable(page) \
456  (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
457 
458 
459 /* ----------------------------------------------------------------
460  * extern declarations
461  * ----------------------------------------------------------------
462  */
463 
464 /* flags for PageAddItemExtended() */
465 #define PAI_OVERWRITE (1 << 0)
466 #define PAI_IS_HEAP (1 << 1)
467 
468 /* flags for PageIsVerifiedExtended() */
469 #define PIV_LOG_WARNING (1 << 0)
470 #define PIV_REPORT_STAT (1 << 1)
471 
472 #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
473  PageAddItemExtended(page, item, size, offsetNumber, \
474  ((overwrite) ? PAI_OVERWRITE : 0) | \
475  ((is_heap) ? PAI_IS_HEAP : 0))
476 
477 #define PageIsVerified(page, blkno) \
478  PageIsVerifiedExtended(page, blkno, \
479  PIV_LOG_WARNING | PIV_REPORT_STAT)
480 
481 /*
482  * Check that BLCKSZ is a multiple of sizeof(size_t). In
483  * PageIsVerifiedExtended(), it is much faster to check if a page is
484  * full of zeroes using the native word size. Note that this assertion
485  * is kept within a header to make sure that StaticAssertDecl() works
486  * across various combinations of platforms and compilers.
487  */
488 StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
489  "BLCKSZ has to be a multiple of sizeof(size_t)");
490 
491 extern void PageInit(Page page, Size pageSize, Size specialSize);
492 extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
494  OffsetNumber offsetNumber, int flags);
495 extern Page PageGetTempPage(Page page);
496 extern Page PageGetTempPageCopy(Page page);
498 extern void PageRestoreTempPage(Page tempPage, Page oldPage);
499 extern void PageRepairFragmentation(Page page);
500 extern void PageTruncateLinePointerArray(Page page);
501 extern Size PageGetFreeSpace(Page page);
502 extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
503 extern Size PageGetExactFreeSpace(Page page);
504 extern Size PageGetHeapFreeSpace(Page page);
505 extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
506 extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
507 extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
508 extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
509  Item newtup, Size newsize);
510 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
511 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
512 
513 #endif /* BUFPAGE_H */
uint32 BlockNumber
Definition: block.h:31
struct PageHeaderData PageHeaderData
#define PD_PAGE_FULL
Definition: bufpage.h:188
PageHeaderData * PageHeader
Definition: bufpage.h:173
static bool PageIsEmpty(Page page)
Definition: bufpage.h:223
static char * PageGetContents(Page page)
Definition: bufpage.h:257
Pointer Page
Definition: bufpage.h:81
PGDLLIMPORT bool ignore_checksum_failure
Definition: bufpage.c:27
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:934
static void PageSetHasFreeLinePointers(Page page)
Definition: bufpage.h:402
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:194
static Size PageGetPageSize(Page page)
Definition: bufpage.h:276
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageClearFull(Page page)
Definition: bufpage.h:423
static XLogRecPtr PageXLogRecPtrGet(PageXLogRecPtr val)
Definition: bufpage.h:104
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
static bool PageIsNew(Page page)
Definition: bufpage.h:233
#define PD_ALL_VISIBLE
Definition: bufpage.h:189
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
static void PageSetFull(Page page)
Definition: bufpage.h:418
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
static void PageValidateSpecialPointer(Page page)
Definition: bufpage.h:327
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:386
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
static char * PageGetSpecialPointer(Page page)
Definition: bufpage.h:339
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static void PageClearHasFreeLinePointers(Page page)
Definition: bufpage.h:407
Page PageGetTempPageCopy(Page page)
Definition: bufpage.c:382
StaticAssertDecl(BLCKSZ==((BLCKSZ/sizeof(size_t)) *sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)")
static bool PageHasFreeLinePointers(Page page)
Definition: bufpage.h:397
Page PageGetTempPage(Page page)
Definition: bufpage.c:365
#define PD_HAS_FREE_LINES
Definition: bufpage.h:187
uint16 LocationIndex
Definition: bufpage.h:90
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1295
static bool PageIsFull(Page page)
Definition: bufpage.h:413
static uint8 PageGetPageLayoutVersion(Page page)
Definition: bufpage.h:286
static void PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
Definition: bufpage.h:299
#define PageXLogRecPtrSet(ptr, lsn)
Definition: bufpage.h:109
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:316
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
#define PGDLLIMPORT
Definition: c.h:1316
#define MAXALIGN(LEN)
Definition: c.h:811
char * Pointer
Definition: c.h:483
#define Assert(condition)
Definition: c.h:858
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
unsigned char uint8
Definition: c.h:504
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:670
Pointer Item
Definition: item.h:17
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
struct ItemIdData ItemIdData
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
uint16 OffsetNumber
Definition: off.h:24
static pg_noinline void Size size
Definition: slab.c:607
PageXLogRecPtr pd_lsn
Definition: bufpage.h:161
LocationIndex pd_special
Definition: bufpage.h:167
LocationIndex pd_upper
Definition: bufpage.h:166
uint16 pd_flags
Definition: bufpage.h:164
uint16 pd_checksum
Definition: bufpage.h:163
LocationIndex pd_lower
Definition: bufpage.h:165
uint16 pd_pagesize_version
Definition: bufpage.h:168
TransactionId pd_prune_xid
Definition: bufpage.h:169
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:170
uint32 xrecoff
Definition: bufpage.h:100
uint32 xlogid
Definition: bufpage.h:99
uint64 XLogRecPtr
Definition: xlogdefs.h:21