PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
generic_xlog.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * generic_xlog.c
4 * Implementation of generic xlog records.
5 *
6 *
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/backend/access/transam/generic_xlog.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/bufmask.h"
17#include "access/generic_xlog.h"
18#include "access/xlogutils.h"
19#include "miscadmin.h"
20
21/*-------------------------------------------------------------------------
22 * Internally, a delta between pages consists of a set of fragments. Each
23 * fragment represents changes made in a given region of a page. A fragment
24 * is made up as follows:
25 *
26 * - offset of page region (OffsetNumber)
27 * - length of page region (OffsetNumber)
28 * - data - the data to place into the region ('length' number of bytes)
29 *
30 * Unchanged regions of a page are not represented in its delta. As a result,
31 * a delta can be more compact than the full page image. But having an
32 * unchanged region between two fragments that is smaller than the fragment
33 * header (offset+length) does not pay off in terms of the overall size of
34 * the delta. For this reason, we merge adjacent fragments if the unchanged
35 * region between them is <= MATCH_THRESHOLD bytes.
36 *
37 * We do not bother to merge fragments across the "lower" and "upper" parts
38 * of a page; it's very seldom the case that pd_lower and pd_upper are within
39 * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
40 * would complicate and slow down the delta-computation code unduly.
41 * Therefore, the worst-case delta size includes two fragment headers plus
42 * a full page's worth of data.
43 *-------------------------------------------------------------------------
44 */
45#define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber))
46#define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE
47#define MAX_DELTA_SIZE (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
48
49/* Struct of generic xlog data for single page */
50typedef struct
51{
52 Buffer buffer; /* registered buffer */
53 int flags; /* flags for this buffer */
54 int deltaLen; /* space consumed in delta field */
55 char *image; /* copy of page image for modification, do not
56 * do it in-place to have aligned memory chunk */
57 char delta[MAX_DELTA_SIZE]; /* delta between page images */
59
60/*
61 * State of generic xlog record construction. Must be allocated at an I/O
62 * aligned address.
63 */
65{
66 /* Page images (properly aligned, must be first) */
68 /* Info about each page, see above */
71};
72
73static void writeFragment(GenericXLogPageData *pageData, OffsetNumber offset,
74 OffsetNumber length, const char *data);
75static void computeRegionDelta(GenericXLogPageData *pageData,
76 const char *curpage, const char *targetpage,
77 int targetStart, int targetEnd,
78 int validStart, int validEnd);
79static void computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage);
80static void applyPageRedo(Page page, const char *delta, Size deltaSize);
81
82
83/*
84 * Write next fragment into pageData's delta.
85 *
86 * The fragment has the given offset and length, and data points to the
87 * actual data (of length length).
88 */
89static void
91 const char *data)
92{
93 char *ptr = pageData->delta + pageData->deltaLen;
94
95 /* Verify we have enough space */
96 Assert(pageData->deltaLen + sizeof(offset) +
97 sizeof(length) + length <= sizeof(pageData->delta));
98
99 /* Write fragment data */
100 memcpy(ptr, &offset, sizeof(offset));
101 ptr += sizeof(offset);
102 memcpy(ptr, &length, sizeof(length));
103 ptr += sizeof(length);
104 memcpy(ptr, data, length);
105 ptr += length;
106
107 pageData->deltaLen = ptr - pageData->delta;
108}
109
110/*
111 * Compute the XLOG fragments needed to transform a region of curpage into the
112 * corresponding region of targetpage, and append them to pageData's delta
113 * field. The region to transform runs from targetStart to targetEnd-1.
114 * Bytes in curpage outside the range validStart to validEnd-1 should be
115 * considered invalid, and always overwritten with target data.
116 *
117 * This function is a hot spot, so it's worth being as tense as possible
118 * about the data-matching loops.
119 */
120static void
122 const char *curpage, const char *targetpage,
123 int targetStart, int targetEnd,
124 int validStart, int validEnd)
125{
126 int i,
127 loopEnd,
128 fragmentBegin = -1,
129 fragmentEnd = -1;
130
131 /* Deal with any invalid start region by including it in first fragment */
132 if (validStart > targetStart)
133 {
134 fragmentBegin = targetStart;
135 targetStart = validStart;
136 }
137
138 /* We'll deal with any invalid end region after the main loop */
139 loopEnd = Min(targetEnd, validEnd);
140
141 /* Examine all the potentially matchable bytes */
142 i = targetStart;
143 while (i < loopEnd)
144 {
145 if (curpage[i] != targetpage[i])
146 {
147 /* On unmatched byte, start new fragment if not already in one */
148 if (fragmentBegin < 0)
149 fragmentBegin = i;
150 /* Mark unmatched-data endpoint as uncertain */
151 fragmentEnd = -1;
152 /* Extend the fragment as far as possible in a tight loop */
153 i++;
154 while (i < loopEnd && curpage[i] != targetpage[i])
155 i++;
156 if (i >= loopEnd)
157 break;
158 }
159
160 /* Found a matched byte, so remember end of unmatched fragment */
161 fragmentEnd = i;
162
163 /*
164 * Extend the match as far as possible in a tight loop. (On typical
165 * workloads, this inner loop is the bulk of this function's runtime.)
166 */
167 i++;
168 while (i < loopEnd && curpage[i] == targetpage[i])
169 i++;
170
171 /*
172 * There are several possible cases at this point:
173 *
174 * 1. We have no unwritten fragment (fragmentBegin < 0). There's
175 * nothing to write; and it doesn't matter what fragmentEnd is.
176 *
177 * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
178 * Dump out the unwritten fragment, stopping at fragmentEnd.
179 *
180 * 3. The match extends to loopEnd. We'll do nothing here, exit the
181 * loop, and then dump the unwritten fragment, after merging it with
182 * the invalid end region if any. If we don't so merge, fragmentEnd
183 * establishes how much the final writeFragment call needs to write.
184 *
185 * 4. We found an unmatched byte before loopEnd. The loop will repeat
186 * and will enter the unmatched-byte stanza above. So in this case
187 * also, it doesn't matter what fragmentEnd is. The matched bytes
188 * will get merged into the continuing unmatched fragment.
189 *
190 * Only in case 3 do we reach the bottom of the loop with a meaningful
191 * fragmentEnd value, which is why it's OK that we unconditionally
192 * assign "fragmentEnd = i" above.
193 */
194 if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
195 {
196 writeFragment(pageData, fragmentBegin,
197 fragmentEnd - fragmentBegin,
198 targetpage + fragmentBegin);
199 fragmentBegin = -1;
200 fragmentEnd = -1; /* not really necessary */
201 }
202 }
203
204 /* Deal with any invalid end region by including it in final fragment */
205 if (loopEnd < targetEnd)
206 {
207 if (fragmentBegin < 0)
208 fragmentBegin = loopEnd;
209 fragmentEnd = targetEnd;
210 }
211
212 /* Write final fragment if any */
213 if (fragmentBegin >= 0)
214 {
215 if (fragmentEnd < 0)
216 fragmentEnd = targetEnd;
217 writeFragment(pageData, fragmentBegin,
218 fragmentEnd - fragmentBegin,
219 targetpage + fragmentBegin);
220 }
221}
222
223/*
224 * Compute the XLOG delta record needed to transform curpage into targetpage,
225 * and store it in pageData's delta field.
226 */
227static void
228computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage)
229{
230 int targetLower = ((PageHeader) targetpage)->pd_lower,
231 targetUpper = ((PageHeader) targetpage)->pd_upper,
232 curLower = ((PageHeader) curpage)->pd_lower,
233 curUpper = ((PageHeader) curpage)->pd_upper;
234
235 pageData->deltaLen = 0;
236
237 /* Compute delta records for lower part of page ... */
238 computeRegionDelta(pageData, curpage, targetpage,
239 0, targetLower,
240 0, curLower);
241 /* ... and for upper part, ignoring what's between */
242 computeRegionDelta(pageData, curpage, targetpage,
243 targetUpper, BLCKSZ,
244 curUpper, BLCKSZ);
245
246 /*
247 * If xlog debug is enabled, then check produced delta. Result of delta
248 * application to curpage should be equivalent to targetpage.
249 */
250#ifdef WAL_DEBUG
251 if (XLOG_DEBUG)
252 {
253 PGAlignedBlock tmp;
254
255 memcpy(tmp.data, curpage, BLCKSZ);
256 applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
257 if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
258 memcmp(tmp.data + targetUpper, targetpage + targetUpper,
259 BLCKSZ - targetUpper) != 0)
260 elog(ERROR, "result of generic xlog apply does not match");
261 }
262#endif
263}
264
265/*
266 * Start new generic xlog record for modifications to specified relation.
267 */
270{
272 int i;
273
276 0);
277 state->isLogged = RelationNeedsWAL(relation);
278
279 for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
280 {
281 state->pages[i].image = state->images[i].data;
282 state->pages[i].buffer = InvalidBuffer;
283 }
284
285 return state;
286}
287
288/*
289 * Register new buffer for generic xlog record.
290 *
291 * Returns pointer to the page's image in the GenericXLogState, which
292 * is what the caller should modify.
293 *
294 * If the buffer is already registered, just return its existing entry.
295 * (It's not very clear what to do with the flags in such a case, but
296 * for now we stay with the original flags.)
297 */
298Page
300{
301 int block_id;
302
303 /* Search array for existing entry or first unused slot */
304 for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
305 {
306 GenericXLogPageData *page = &state->pages[block_id];
307
308 if (BufferIsInvalid(page->buffer))
309 {
310 /* Empty slot, so use it (there cannot be a match later) */
311 page->buffer = buffer;
312 page->flags = flags;
313 memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
314 return (Page) page->image;
315 }
316 else if (page->buffer == buffer)
317 {
318 /*
319 * Buffer is already registered. Just return the image, which is
320 * already prepared.
321 */
322 return (Page) page->image;
323 }
324 }
325
326 elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
328 /* keep compiler quiet */
329 return NULL;
330}
331
332/*
333 * Apply changes represented by GenericXLogState to the actual buffers,
334 * and emit a generic xlog record.
335 */
338{
339 XLogRecPtr lsn;
340 int i;
341
342 if (state->isLogged)
343 {
344 /* Logged relation: make xlog record in critical section. */
346
348
349 /*
350 * Compute deltas if necessary, write changes to buffers, mark buffers
351 * dirty, and register changes.
352 */
353 for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
354 {
355 GenericXLogPageData *pageData = &state->pages[i];
356 Page page;
357 PageHeader pageHeader;
358
359 if (BufferIsInvalid(pageData->buffer))
360 continue;
361
362 page = BufferGetPage(pageData->buffer);
363 pageHeader = (PageHeader) pageData->image;
364
365 /*
366 * Compute delta while we still have both the unmodified page and
367 * the new image. Not needed if we are logging the full image.
368 */
369 if (!(pageData->flags & GENERIC_XLOG_FULL_IMAGE))
370 computeDelta(pageData, page, (Page) pageData->image);
371
372 /*
373 * Apply the image, being careful to zero the "hole" between
374 * pd_lower and pd_upper in order to avoid divergence between
375 * actual page state and what replay would produce.
376 */
377 memcpy(page, pageData->image, pageHeader->pd_lower);
378 memset(page + pageHeader->pd_lower, 0,
379 pageHeader->pd_upper - pageHeader->pd_lower);
380 memcpy(page + pageHeader->pd_upper,
381 pageData->image + pageHeader->pd_upper,
382 BLCKSZ - pageHeader->pd_upper);
383
384 MarkBufferDirty(pageData->buffer);
385
386 if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
387 {
388 XLogRegisterBuffer(i, pageData->buffer,
390 }
391 else
392 {
394 XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
395 }
396 }
397
398 /* Insert xlog record */
399 lsn = XLogInsert(RM_GENERIC_ID, 0);
400
401 /* Set LSN */
402 for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
403 {
404 GenericXLogPageData *pageData = &state->pages[i];
405
406 if (BufferIsInvalid(pageData->buffer))
407 continue;
408 PageSetLSN(BufferGetPage(pageData->buffer), lsn);
409 }
411 }
412 else
413 {
414 /* Unlogged relation: skip xlog-related stuff */
416 for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
417 {
418 GenericXLogPageData *pageData = &state->pages[i];
419
420 if (BufferIsInvalid(pageData->buffer))
421 continue;
422 memcpy(BufferGetPage(pageData->buffer),
423 pageData->image,
424 BLCKSZ);
425 /* We don't worry about zeroing the "hole" in this case */
426 MarkBufferDirty(pageData->buffer);
427 }
429 /* We don't have a LSN to return, in this case */
430 lsn = InvalidXLogRecPtr;
431 }
432
433 pfree(state);
434
435 return lsn;
436}
437
438/*
439 * Abort generic xlog record construction. No changes are applied to buffers.
440 *
441 * Note: caller is responsible for releasing locks/pins on buffers, if needed.
442 */
443void
445{
446 pfree(state);
447}
448
449/*
450 * Apply delta to given page image.
451 */
452static void
453applyPageRedo(Page page, const char *delta, Size deltaSize)
454{
455 const char *ptr = delta;
456 const char *end = delta + deltaSize;
457
458 while (ptr < end)
459 {
460 OffsetNumber offset,
461 length;
462
463 memcpy(&offset, ptr, sizeof(offset));
464 ptr += sizeof(offset);
465 memcpy(&length, ptr, sizeof(length));
466 ptr += sizeof(length);
467
468 memcpy(page + offset, ptr, length);
469
470 ptr += length;
471 }
472}
473
474/*
475 * Redo function for generic xlog record.
476 */
477void
479{
480 XLogRecPtr lsn = record->EndRecPtr;
482 uint8 block_id;
483
484 /* Protect limited size of buffers[] array */
486
487 /* Iterate over blocks */
488 for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
489 {
491
492 if (!XLogRecHasBlockRef(record, block_id))
493 {
494 buffers[block_id] = InvalidBuffer;
495 continue;
496 }
497
498 action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
499
500 /* Apply redo to given block if needed */
501 if (action == BLK_NEEDS_REDO)
502 {
503 Page page;
504 PageHeader pageHeader;
505 char *blockDelta;
506 Size blockDeltaSize;
507
508 page = BufferGetPage(buffers[block_id]);
509 blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
510 applyPageRedo(page, blockDelta, blockDeltaSize);
511
512 /*
513 * Since the delta contains no information about what's in the
514 * "hole" between pd_lower and pd_upper, set that to zero to
515 * ensure we produce the same page state that application of the
516 * logged action by GenericXLogFinish did.
517 */
518 pageHeader = (PageHeader) page;
519 memset(page + pageHeader->pd_lower, 0,
520 pageHeader->pd_upper - pageHeader->pd_lower);
521
522 PageSetLSN(page, lsn);
523 MarkBufferDirty(buffers[block_id]);
524 }
525 }
526
527 /* Changes are done: unlock and release all buffers */
528 for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
529 {
530 if (BufferIsValid(buffers[block_id]))
531 UnlockReleaseBuffer(buffers[block_id]);
532 }
533}
534
535/*
536 * Mask a generic page before performing consistency checks on it.
537 */
538void
539generic_mask(char *page, BlockNumber blkno)
540{
542
543 mask_unused_space(page);
544}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
PageHeaderData * PageHeader
Definition: bufpage.h:173
Pointer Page
Definition: bufpage.h:81
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define Min(x, y)
Definition: c.h:958
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
size_t Size
Definition: c.h:559
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
Page GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
Definition: generic_xlog.c:299
static void applyPageRedo(Page page, const char *delta, Size deltaSize)
Definition: generic_xlog.c:453
void generic_redo(XLogReaderState *record)
Definition: generic_xlog.c:478
static void computeRegionDelta(GenericXLogPageData *pageData, const char *curpage, const char *targetpage, int targetStart, int targetEnd, int validStart, int validEnd)
Definition: generic_xlog.c:121
void generic_mask(char *page, BlockNumber blkno)
Definition: generic_xlog.c:539
#define MAX_DELTA_SIZE
Definition: generic_xlog.c:47
static void computeDelta(GenericXLogPageData *pageData, Page curpage, Page targetpage)
Definition: generic_xlog.c:228
static void writeFragment(GenericXLogPageData *pageData, OffsetNumber offset, OffsetNumber length, const char *data)
Definition: generic_xlog.c:90
GenericXLogState * GenericXLogStart(Relation relation)
Definition: generic_xlog.c:269
XLogRecPtr GenericXLogFinish(GenericXLogState *state)
Definition: generic_xlog.c:337
void GenericXLogAbort(GenericXLogState *state)
Definition: generic_xlog.c:444
#define MATCH_THRESHOLD
Definition: generic_xlog.c:46
#define GENERIC_XLOG_FULL_IMAGE
Definition: generic_xlog.h:26
#define MAX_GENERIC_XLOG_PAGES
Definition: generic_xlog.h:23
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1511
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
uint16 OffsetNumber
Definition: off.h:24
#define PG_IO_ALIGN_SIZE
const void * data
#define RelationNeedsWAL(relation)
Definition: rel.h:628
char delta[MAX_DELTA_SIZE]
Definition: generic_xlog.c:57
PGIOAlignedBlock images[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:67
GenericXLogPageData pages[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:69
LocationIndex pd_upper
Definition: bufpage.h:166
LocationIndex pd_lower
Definition: bufpage.h:165
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
Definition: regguts.h:323
char data[BLCKSZ]
Definition: c.h:1073
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_FORCE_IMAGE
Definition: xloginsert.h:31
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
XLogRedoAction
Definition: xlogutils.h:73
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74