PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
bufpage.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * bufpage.h
4 * Standard POSTGRES buffer page definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/storage/bufpage.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef BUFPAGE_H
15#define BUFPAGE_H
16
17#include "access/xlogdefs.h"
18#include "storage/block.h"
19#include "storage/item.h"
20#include "storage/off.h"
21
22/* GUC variable */
24
25/*
26 * A postgres disk page is an abstraction layered on top of a postgres
27 * disk block (which is simply a unit of i/o, see block.h).
28 *
29 * specifically, while a disk block can be unformatted, a postgres
30 * disk page is always a slotted page of the form:
31 *
32 * +----------------+---------------------------------+
33 * | PageHeaderData | linp1 linp2 linp3 ... |
34 * +-----------+----+---------------------------------+
35 * | ... linpN | |
36 * +-----------+--------------------------------------+
37 * | ^ pd_lower |
38 * | |
39 * | v pd_upper |
40 * +-------------+------------------------------------+
41 * | | tupleN ... |
42 * +-------------+------------------+-----------------+
43 * | ... tuple3 tuple2 tuple1 | "special space" |
44 * +--------------------------------+-----------------+
45 * ^ pd_special
46 *
47 * a page is full when nothing can be added between pd_lower and
48 * pd_upper.
49 *
50 * all blocks written out by an access method must be disk pages.
51 *
52 * EXCEPTIONS:
53 *
54 * obviously, a page is not formatted before it is initialized by
55 * a call to PageInit.
56 *
57 * NOTES:
58 *
59 * linp1..N form an ItemId (line pointer) array. ItemPointers point
60 * to a physical block number and a logical offset (line pointer
61 * number) within that block/page. Note that OffsetNumbers
62 * conventionally start at 1, not 0.
63 *
64 * tuple1..N are added "backwards" on the page. Since an ItemPointer
65 * offset is used to access an ItemId entry rather than an actual
66 * byte-offset position, tuples can be physically shuffled on a page
67 * whenever the need arises. This indirection also keeps crash recovery
68 * relatively simple, because the low-level details of page space
69 * management can be controlled by standard buffer page code during
70 * logging, and during recovery.
71 *
72 * AM-generic per-page information is kept in PageHeaderData.
73 *
74 * AM-specific per-page data (if any) is kept in the area marked "special
75 * space"; each AM has an "opaque" structure defined somewhere that is
76 * stored as the page trailer. An access method should always
77 * initialize its pages with PageInit and then set its own opaque
78 * fields.
79 */
80
81typedef char PageData;
82typedef PageData *Page;
83
84
85/*
86 * location (byte offset) within a page.
87 *
88 * note that this is actually limited to 2^15 because we have limited
89 * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
90 */
92
93
94/*
95 * For historical reasons, the 64-bit LSN value is stored as two 32-bit
96 * values.
97 */
98typedef struct
99{
100 uint32 xlogid; /* high bits */
101 uint32 xrecoff; /* low bits */
103
104static inline XLogRecPtr
106{
107 return (uint64) val.xlogid << 32 | val.xrecoff;
108}
109
110#define PageXLogRecPtrSet(ptr, lsn) \
111 ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
112
113/*
114 * disk page organization
115 *
116 * space management information generic to any page
117 *
118 * pd_lsn - identifies xlog record for last change to this page.
119 * pd_checksum - page checksum, if set.
120 * pd_flags - flag bits.
121 * pd_lower - offset to start of free space.
122 * pd_upper - offset to end of free space.
123 * pd_special - offset to start of special space.
124 * pd_pagesize_version - size in bytes and page layout version number.
125 * pd_prune_xid - oldest XID among potentially prunable tuples on page.
126 *
127 * The LSN is used by the buffer manager to enforce the basic rule of WAL:
128 * "thou shalt write xlog before data". A dirty buffer cannot be dumped
129 * to disk until xlog has been flushed at least as far as the page's LSN.
130 *
131 * pd_checksum stores the page checksum, if it has been set for this page;
132 * zero is a valid value for a checksum. If a checksum is not in use then
133 * we leave the field unset. This will typically mean the field is zero
134 * though non-zero values may also be present if databases have been
135 * pg_upgraded from releases prior to 9.3, when the same byte offset was
136 * used to store the current timelineid when the page was last updated.
137 * Note that there is no indication on a page as to whether the checksum
138 * is valid or not, a deliberate design choice which avoids the problem
139 * of relying on the page contents to decide whether to verify it. Hence
140 * there are no flag bits relating to checksums.
141 *
142 * pd_prune_xid is a hint field that helps determine whether pruning will be
143 * useful. It is currently unused in index pages.
144 *
145 * The page version number and page size are packed together into a single
146 * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
147 * there was no concept of a page version number, and doing it this way
148 * lets us pretend that pre-7.3 databases have page version number zero.
149 * We constrain page sizes to be multiples of 256, leaving the low eight
150 * bits available for a version number.
151 *
152 * Minimum possible page size is perhaps 64B to fit page header, opaque space
153 * and a minimal tuple; of course, in reality you want it much bigger, so
154 * the constraint on pagesize mod 256 is not an important restriction.
155 * On the high end, we can only support pages up to 32KB because lp_off/lp_len
156 * are 15 bits.
157 */
158
159typedef struct PageHeaderData
160{
161 /* XXX LSN is member of *any* block, not only page-organized ones */
162 PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
163 * record for last change to this page */
164 uint16 pd_checksum; /* checksum */
165 uint16 pd_flags; /* flag bits, see below */
166 LocationIndex pd_lower; /* offset to start of free space */
167 LocationIndex pd_upper; /* offset to end of free space */
168 LocationIndex pd_special; /* offset to start of special space */
170 TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
171 ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
173
175
176/*
177 * pd_flags contains the following flag bits. Undefined bits are initialized
178 * to zero and may be used in the future.
179 *
180 * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
181 * pd_lower. This should be considered a hint rather than the truth, since
182 * changes to it are not WAL-logged.
183 *
184 * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
185 * page for its new tuple version; this suggests that a prune is needed.
186 * Again, this is just a hint.
187 */
188#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
189#define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
190#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
191 * everyone */
193#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
194
195/*
196 * Page layout version number 0 is for pre-7.3 Postgres releases.
197 * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
198 * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
199 * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
200 * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
201 * added the pd_flags field (by stealing some bits from pd_tli),
202 * as well as adding the pd_prune_xid field (which enlarges the header).
203 *
204 * As of Release 9.3, the checksum version must also be considered when
205 * handling pages.
206 */
207#define PG_PAGE_LAYOUT_VERSION 4
208#define PG_DATA_CHECKSUM_VERSION 1
209
210/* ----------------------------------------------------------------
211 * page support functions
212 * ----------------------------------------------------------------
213 */
214
215/*
216 * line pointer(s) do not count as part of header
217 */
218#define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
219
220/*
221 * PageIsEmpty
222 * returns true iff no itemid has been allocated on the page
223 */
224static inline bool
225PageIsEmpty(const PageData *page)
226{
227 return ((const PageHeaderData *) page)->pd_lower <= SizeOfPageHeaderData;
228}
229
230/*
231 * PageIsNew
232 * returns true iff page has not been initialized (by PageInit)
233 */
234static inline bool
235PageIsNew(const PageData *page)
236{
237 return ((const PageHeaderData *) page)->pd_upper == 0;
238}
239
240/*
241 * PageGetItemId
242 * Returns an item identifier of a page.
243 */
244static inline ItemId
245PageGetItemId(Page page, OffsetNumber offsetNumber)
246{
247 return &((PageHeader) page)->pd_linp[offsetNumber - 1];
248}
249
250/*
251 * PageGetContents
252 * To be used in cases where the page does not contain line pointers.
253 *
254 * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
255 * Now it is. Beware of old code that might think the offset to the contents
256 * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
257 */
258static inline char *
260{
261 return (char *) page + MAXALIGN(SizeOfPageHeaderData);
262}
263
264/* ----------------
265 * functions to access page size info
266 * ----------------
267 */
268
269/*
270 * PageGetPageSize
271 * Returns the page size of a page.
272 *
273 * this can only be called on a formatted page (unlike
274 * BufferGetPageSize, which can be called on an unformatted page).
275 * however, it can be called on a page that is not stored in a buffer.
276 */
277static inline Size
278PageGetPageSize(const PageData *page)
279{
280 return (Size) (((const PageHeaderData *) page)->pd_pagesize_version & (uint16) 0xFF00);
281}
282
283/*
284 * PageGetPageLayoutVersion
285 * Returns the page layout version of a page.
286 */
287static inline uint8
289{
290 return (((const PageHeaderData *) page)->pd_pagesize_version & 0x00FF);
291}
292
293/*
294 * PageSetPageSizeAndVersion
295 * Sets the page size and page layout version number of a page.
296 *
297 * We could support setting these two values separately, but there's
298 * no real need for it at the moment.
299 */
300static inline void
301PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
302{
303 Assert((size & 0xFF00) == size);
304 Assert((version & 0x00FF) == version);
305
306 ((PageHeader) page)->pd_pagesize_version = size | version;
307}
308
309/* ----------------
310 * page special data functions
311 * ----------------
312 */
313/*
314 * PageGetSpecialSize
315 * Returns size of special space on a page.
316 */
317static inline uint16
318PageGetSpecialSize(const PageData *page)
319{
320 return (PageGetPageSize(page) - ((const PageHeaderData *) page)->pd_special);
321}
322
323/*
324 * Using assertions, validate that the page special pointer is OK.
325 *
326 * This is intended to catch use of the pointer before page initialization.
327 */
328static inline void
330{
331 Assert(page);
332 Assert(((const PageHeaderData *) page)->pd_special <= BLCKSZ);
333 Assert(((const PageHeaderData *) page)->pd_special >= SizeOfPageHeaderData);
334}
335
336/*
337 * PageGetSpecialPointer
338 * Returns pointer to special space on a page.
339 */
340#define PageGetSpecialPointer(page) \
341( \
342 PageValidateSpecialPointer(page), \
343 ((page) + ((PageHeader) (page))->pd_special) \
344)
345
346/*
347 * PageGetItem
348 * Retrieves an item on the given page.
349 *
350 * Note:
351 * This does not change the status of any of the resources passed.
352 * The semantics may change in the future.
353 */
354static inline Item
355PageGetItem(const PageData *page, const ItemIdData *itemId)
356{
357 Assert(page);
358 Assert(ItemIdHasStorage(itemId));
359
360 return (Item) (((const char *) page) + ItemIdGetOffset(itemId));
361}
362
363/*
364 * PageGetMaxOffsetNumber
365 * Returns the maximum offset number used by the given page.
366 * Since offset numbers are 1-based, this is also the number
367 * of items on the page.
368 *
369 * NOTE: if the page is not initialized (pd_lower == 0), we must
370 * return zero to ensure sane behavior.
371 */
372static inline OffsetNumber
374{
375 const PageHeaderData *pageheader = (const PageHeaderData *) page;
376
377 if (pageheader->pd_lower <= SizeOfPageHeaderData)
378 return 0;
379 else
380 return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
381}
382
383/*
384 * Additional functions for access to page headers.
385 */
386static inline XLogRecPtr
387PageGetLSN(const PageData *page)
388{
389 return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
390}
391static inline void
392PageSetLSN(Page page, XLogRecPtr lsn)
393{
394 PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
395}
396
397static inline bool
399{
400 return ((const PageHeaderData *) page)->pd_flags & PD_HAS_FREE_LINES;
401}
402static inline void
404{
405 ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
406}
407static inline void
409{
410 ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
411}
412
413static inline bool
414PageIsFull(const PageData *page)
415{
416 return ((const PageHeaderData *) page)->pd_flags & PD_PAGE_FULL;
417}
418static inline void
419PageSetFull(Page page)
420{
421 ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
422}
423static inline void
425{
426 ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
427}
428
429static inline bool
430PageIsAllVisible(const PageData *page)
431{
432 return ((const PageHeaderData *) page)->pd_flags & PD_ALL_VISIBLE;
433}
434static inline void
436{
437 ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
438}
439static inline void
441{
442 ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
443}
444
445/*
446 * These two require "access/transam.h", so left as macros.
447 */
448#define PageSetPrunable(page, xid) \
449do { \
450 Assert(TransactionIdIsNormal(xid)); \
451 if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
452 TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
453 ((PageHeader) (page))->pd_prune_xid = (xid); \
454} while (0)
455#define PageClearPrunable(page) \
456 (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
457
458
459/* ----------------------------------------------------------------
460 * extern declarations
461 * ----------------------------------------------------------------
462 */
463
464/* flags for PageAddItemExtended() */
465#define PAI_OVERWRITE (1 << 0)
466#define PAI_IS_HEAP (1 << 1)
467
468/* flags for PageIsVerified() */
469#define PIV_LOG_WARNING (1 << 0)
470#define PIV_LOG_LOG (1 << 1)
471#define PIV_IGNORE_CHECKSUM_FAILURE (1 << 2)
473#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
474 PageAddItemExtended(page, item, size, offsetNumber, \
475 ((overwrite) ? PAI_OVERWRITE : 0) | \
476 ((is_heap) ? PAI_IS_HEAP : 0))
477
478/*
479 * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), it
480 * is much faster to check if a page is full of zeroes using the native word
481 * size. Note that this assertion is kept within a header to make sure that
482 * StaticAssertDecl() works across various combinations of platforms and
483 * compilers.
484 */
485StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
486 "BLCKSZ has to be a multiple of sizeof(size_t)");
487
488extern void PageInit(Page page, Size pageSize, Size specialSize);
489extern bool PageIsVerified(PageData *page, BlockNumber blkno, int flags,
490 bool *checksum_failure_p);
491extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
492 OffsetNumber offsetNumber, int flags);
493extern Page PageGetTempPage(const PageData *page);
494extern Page PageGetTempPageCopy(const PageData *page);
495extern Page PageGetTempPageCopySpecial(const PageData *page);
496extern void PageRestoreTempPage(Page tempPage, Page oldPage);
497extern void PageRepairFragmentation(Page page);
498extern void PageTruncateLinePointerArray(Page page);
499extern Size PageGetFreeSpace(const PageData *page);
500extern Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups);
501extern Size PageGetExactFreeSpace(const PageData *page);
502extern Size PageGetHeapFreeSpace(const PageData *page);
503extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
504extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
505extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
506extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
507 Item newtup, Size newsize);
508extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
509extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
510
511#endif /* BUFPAGE_H */
uint32 BlockNumber
Definition: block.h:31
struct PageHeaderData PageHeaderData
Size PageGetFreeSpace(const PageData *page)
Definition: bufpage.c:906
#define PD_PAGE_FULL
Definition: bufpage.h:189
static bool PageIsEmpty(const PageData *page)
Definition: bufpage.h:224
PageHeaderData * PageHeader
Definition: bufpage.h:174
PGDLLIMPORT bool ignore_checksum_failure
Definition: bufpage.c:27
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:423
static void PageSetHasFreeLinePointers(Page page)
Definition: bufpage.h:402
Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
Definition: bufpage.c:933
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static uint16 PageGetSpecialSize(const PageData *page)
Definition: bufpage.h:317
char PageData
Definition: bufpage.h:81
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
Page PageGetTempPage(const PageData *page)
Definition: bufpage.c:364
static Size PageGetPageSize(const PageData *page)
Definition: bufpage.h:277
Page PageGetTempPageCopy(const PageData *page)
Definition: bufpage.c:381
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:193
static void PageClearFull(Page page)
Definition: bufpage.h:423
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static XLogRecPtr PageXLogRecPtrGet(PageXLogRecPtr val)
Definition: bufpage.h:105
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:234
#define SizeOfPageHeaderData
Definition: bufpage.h:217
static char * PageGetContents(Page page)
Definition: bufpage.h:258
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
static uint8 PageGetPageLayoutVersion(const PageData *page)
Definition: bufpage.h:287
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
#define PD_ALL_VISIBLE
Definition: bufpage.h:190
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1404
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1541
static void PageSetFull(Page page)
Definition: bufpage.h:418
static bool PageHasFreeLinePointers(const PageData *page)
Definition: bufpage.h:397
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1051
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1509
void PageRepairFragmentation(Page page)
Definition: bufpage.c:698
Size PageGetExactFreeSpace(const PageData *page)
Definition: bufpage.c:957
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:834
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static void PageClearHasFreeLinePointers(Page page)
Definition: bufpage.h:407
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition: bufpage.c:94
StaticAssertDecl(BLCKSZ==((BLCKSZ/sizeof(size_t)) *sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)")
PageData * Page
Definition: bufpage.h:82
#define PD_HAS_FREE_LINES
Definition: bufpage.h:188
Page PageGetTempPageCopySpecial(const PageData *page)
Definition: bufpage.c:401
uint16 LocationIndex
Definition: bufpage.h:91
static void PageValidateSpecialPointer(const PageData *page)
Definition: bufpage.h:328
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1294
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:386
static void PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
Definition: bufpage.h:300
#define PageXLogRecPtrSet(ptr, lsn)
Definition: bufpage.h:110
static bool PageIsFull(const PageData *page)
Definition: bufpage.h:413
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
#define PGDLLIMPORT
Definition: c.h:1291
#define MAXALIGN(LEN)
Definition: c.h:782
uint8_t uint8
Definition: c.h:500
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
uint32 TransactionId
Definition: c.h:623
size_t Size
Definition: c.h:576
Assert(PointerIsAligned(start, uint64))
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:689
Pointer Item
Definition: item.h:17
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
struct ItemIdData ItemIdData
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
uint16 OffsetNumber
Definition: off.h:24
PageXLogRecPtr pd_lsn
Definition: bufpage.h:162
LocationIndex pd_special
Definition: bufpage.h:168
LocationIndex pd_upper
Definition: bufpage.h:167
uint16 pd_flags
Definition: bufpage.h:165
uint16 pd_checksum
Definition: bufpage.h:164
LocationIndex pd_lower
Definition: bufpage.h:166
uint16 pd_pagesize_version
Definition: bufpage.h:169
TransactionId pd_prune_xid
Definition: bufpage.h:170
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:171
uint32 xrecoff
Definition: bufpage.h:101
uint32 xlogid
Definition: bufpage.h:100
uint64 XLogRecPtr
Definition: xlogdefs.h:21