PostgreSQL Source Code  git master
generic_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * generic_xlog.c
4  * Implementation of generic xlog records.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/access/transam/generic_xlog.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/bufmask.h"
17 #include "access/generic_xlog.h"
18 #include "access/xlogutils.h"
19 #include "miscadmin.h"
20 
21 /*-------------------------------------------------------------------------
22  * Internally, a delta between pages consists of a set of fragments. Each
23  * fragment represents changes made in a given region of a page. A fragment
24  * is made up as follows:
25  *
26  * - offset of page region (OffsetNumber)
27  * - length of page region (OffsetNumber)
28  * - data - the data to place into the region ('length' number of bytes)
29  *
30  * Unchanged regions of a page are not represented in its delta. As a result,
31  * a delta can be more compact than the full page image. But having an
32  * unchanged region between two fragments that is smaller than the fragment
33  * header (offset+length) does not pay off in terms of the overall size of
34  * the delta. For this reason, we merge adjacent fragments if the unchanged
35  * region between them is <= MATCH_THRESHOLD bytes.
36  *
37  * We do not bother to merge fragments across the "lower" and "upper" parts
38  * of a page; it's very seldom the case that pd_lower and pd_upper are within
39  * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
40  * would complicate and slow down the delta-computation code unduly.
41  * Therefore, the worst-case delta size includes two fragment headers plus
42  * a full page's worth of data.
43  *-------------------------------------------------------------------------
44  */
45 #define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber))
46 #define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE
47 #define MAX_DELTA_SIZE (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
48 
49 /* Struct of generic xlog data for single page */
50 typedef struct
51 {
52  Buffer buffer; /* registered buffer */
53  int flags; /* flags for this buffer */
54  int deltaLen; /* space consumed in delta field */
55  char *image; /* copy of page image for modification, do not
56  * do it in-place to have aligned memory chunk */
57  char delta[MAX_DELTA_SIZE]; /* delta between page images */
58 } PageData;
59 
60 /*
61  * State of generic xlog record construction. Must be allocated at an I/O
62  * aligned address.
63  */
65 {
66  /* Page images (properly aligned, must be first) */
68  /* Info about each page, see above */
70  bool isLogged;
71 };
72 
73 static void writeFragment(PageData *pageData, OffsetNumber offset,
74  OffsetNumber length, const char *data);
75 static void computeRegionDelta(PageData *pageData,
76  const char *curpage, const char *targetpage,
77  int targetStart, int targetEnd,
78  int validStart, int validEnd);
79 static void computeDelta(PageData *pageData, Page curpage, Page targetpage);
80 static void applyPageRedo(Page page, const char *delta, Size deltaSize);
81 
82 
83 /*
84  * Write next fragment into pageData's delta.
85  *
86  * The fragment has the given offset and length, and data points to the
87  * actual data (of length length).
88  */
89 static void
90 writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length,
91  const char *data)
92 {
93  char *ptr = pageData->delta + pageData->deltaLen;
94 
95  /* Verify we have enough space */
96  Assert(pageData->deltaLen + sizeof(offset) +
97  sizeof(length) + length <= sizeof(pageData->delta));
98 
99  /* Write fragment data */
100  memcpy(ptr, &offset, sizeof(offset));
101  ptr += sizeof(offset);
102  memcpy(ptr, &length, sizeof(length));
103  ptr += sizeof(length);
104  memcpy(ptr, data, length);
105  ptr += length;
106 
107  pageData->deltaLen = ptr - pageData->delta;
108 }
109 
110 /*
111  * Compute the XLOG fragments needed to transform a region of curpage into the
112  * corresponding region of targetpage, and append them to pageData's delta
113  * field. The region to transform runs from targetStart to targetEnd-1.
114  * Bytes in curpage outside the range validStart to validEnd-1 should be
115  * considered invalid, and always overwritten with target data.
116  *
117  * This function is a hot spot, so it's worth being as tense as possible
118  * about the data-matching loops.
119  */
120 static void
122  const char *curpage, const char *targetpage,
123  int targetStart, int targetEnd,
124  int validStart, int validEnd)
125 {
126  int i,
127  loopEnd,
128  fragmentBegin = -1,
129  fragmentEnd = -1;
130 
131  /* Deal with any invalid start region by including it in first fragment */
132  if (validStart > targetStart)
133  {
134  fragmentBegin = targetStart;
135  targetStart = validStart;
136  }
137 
138  /* We'll deal with any invalid end region after the main loop */
139  loopEnd = Min(targetEnd, validEnd);
140 
141  /* Examine all the potentially matchable bytes */
142  i = targetStart;
143  while (i < loopEnd)
144  {
145  if (curpage[i] != targetpage[i])
146  {
147  /* On unmatched byte, start new fragment if not already in one */
148  if (fragmentBegin < 0)
149  fragmentBegin = i;
150  /* Mark unmatched-data endpoint as uncertain */
151  fragmentEnd = -1;
152  /* Extend the fragment as far as possible in a tight loop */
153  i++;
154  while (i < loopEnd && curpage[i] != targetpage[i])
155  i++;
156  if (i >= loopEnd)
157  break;
158  }
159 
160  /* Found a matched byte, so remember end of unmatched fragment */
161  fragmentEnd = i;
162 
163  /*
164  * Extend the match as far as possible in a tight loop. (On typical
165  * workloads, this inner loop is the bulk of this function's runtime.)
166  */
167  i++;
168  while (i < loopEnd && curpage[i] == targetpage[i])
169  i++;
170 
171  /*
172  * There are several possible cases at this point:
173  *
174  * 1. We have no unwritten fragment (fragmentBegin < 0). There's
175  * nothing to write; and it doesn't matter what fragmentEnd is.
176  *
177  * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
178  * Dump out the unwritten fragment, stopping at fragmentEnd.
179  *
180  * 3. The match extends to loopEnd. We'll do nothing here, exit the
181  * loop, and then dump the unwritten fragment, after merging it with
182  * the invalid end region if any. If we don't so merge, fragmentEnd
183  * establishes how much the final writeFragment call needs to write.
184  *
185  * 4. We found an unmatched byte before loopEnd. The loop will repeat
186  * and will enter the unmatched-byte stanza above. So in this case
187  * also, it doesn't matter what fragmentEnd is. The matched bytes
188  * will get merged into the continuing unmatched fragment.
189  *
190  * Only in case 3 do we reach the bottom of the loop with a meaningful
191  * fragmentEnd value, which is why it's OK that we unconditionally
192  * assign "fragmentEnd = i" above.
193  */
194  if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
195  {
196  writeFragment(pageData, fragmentBegin,
197  fragmentEnd - fragmentBegin,
198  targetpage + fragmentBegin);
199  fragmentBegin = -1;
200  fragmentEnd = -1; /* not really necessary */
201  }
202  }
203 
204  /* Deal with any invalid end region by including it in final fragment */
205  if (loopEnd < targetEnd)
206  {
207  if (fragmentBegin < 0)
208  fragmentBegin = loopEnd;
209  fragmentEnd = targetEnd;
210  }
211 
212  /* Write final fragment if any */
213  if (fragmentBegin >= 0)
214  {
215  if (fragmentEnd < 0)
216  fragmentEnd = targetEnd;
217  writeFragment(pageData, fragmentBegin,
218  fragmentEnd - fragmentBegin,
219  targetpage + fragmentBegin);
220  }
221 }
222 
223 /*
224  * Compute the XLOG delta record needed to transform curpage into targetpage,
225  * and store it in pageData's delta field.
226  */
227 static void
228 computeDelta(PageData *pageData, Page curpage, Page targetpage)
229 {
230  int targetLower = ((PageHeader) targetpage)->pd_lower,
231  targetUpper = ((PageHeader) targetpage)->pd_upper,
232  curLower = ((PageHeader) curpage)->pd_lower,
233  curUpper = ((PageHeader) curpage)->pd_upper;
234 
235  pageData->deltaLen = 0;
236 
237  /* Compute delta records for lower part of page ... */
238  computeRegionDelta(pageData, curpage, targetpage,
239  0, targetLower,
240  0, curLower);
241  /* ... and for upper part, ignoring what's between */
242  computeRegionDelta(pageData, curpage, targetpage,
243  targetUpper, BLCKSZ,
244  curUpper, BLCKSZ);
245 
246  /*
247  * If xlog debug is enabled, then check produced delta. Result of delta
248  * application to curpage should be equivalent to targetpage.
249  */
250 #ifdef WAL_DEBUG
251  if (XLOG_DEBUG)
252  {
253  PGAlignedBlock tmp;
254 
255  memcpy(tmp.data, curpage, BLCKSZ);
256  applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
257  if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
258  memcmp(tmp.data + targetUpper, targetpage + targetUpper,
259  BLCKSZ - targetUpper) != 0)
260  elog(ERROR, "result of generic xlog apply does not match");
261  }
262 #endif
263 }
264 
265 /*
266  * Start new generic xlog record for modifications to specified relation.
267  */
270 {
272  int i;
273 
276  0);
277  state->isLogged = RelationNeedsWAL(relation);
278 
279  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
280  {
281  state->pages[i].image = state->images[i].data;
282  state->pages[i].buffer = InvalidBuffer;
283  }
284 
285  return state;
286 }
287 
288 /*
289  * Register new buffer for generic xlog record.
290  *
291  * Returns pointer to the page's image in the GenericXLogState, which
292  * is what the caller should modify.
293  *
294  * If the buffer is already registered, just return its existing entry.
295  * (It's not very clear what to do with the flags in such a case, but
296  * for now we stay with the original flags.)
297  */
298 Page
300 {
301  int block_id;
302 
303  /* Search array for existing entry or first unused slot */
304  for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
305  {
306  PageData *page = &state->pages[block_id];
307 
308  if (BufferIsInvalid(page->buffer))
309  {
310  /* Empty slot, so use it (there cannot be a match later) */
311  page->buffer = buffer;
312  page->flags = flags;
313  memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
314  return (Page) page->image;
315  }
316  else if (page->buffer == buffer)
317  {
318  /*
319  * Buffer is already registered. Just return the image, which is
320  * already prepared.
321  */
322  return (Page) page->image;
323  }
324  }
325 
326  elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
328  /* keep compiler quiet */
329  return NULL;
330 }
331 
332 /*
333  * Apply changes represented by GenericXLogState to the actual buffers,
334  * and emit a generic xlog record.
335  */
338 {
339  XLogRecPtr lsn;
340  int i;
341 
342  if (state->isLogged)
343  {
344  /* Logged relation: make xlog record in critical section. */
345  XLogBeginInsert();
346 
348 
349  /*
350  * Compute deltas if necessary, write changes to buffers, mark buffers
351  * dirty, and register changes.
352  */
353  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
354  {
355  PageData *pageData = &state->pages[i];
356  Page page;
357  PageHeader pageHeader;
358 
359  if (BufferIsInvalid(pageData->buffer))
360  continue;
361 
362  page = BufferGetPage(pageData->buffer);
363  pageHeader = (PageHeader) pageData->image;
364 
365  /*
366  * Compute delta while we still have both the unmodified page and
367  * the new image. Not needed if we are logging the full image.
368  */
369  if (!(pageData->flags & GENERIC_XLOG_FULL_IMAGE))
370  computeDelta(pageData, page, (Page) pageData->image);
371 
372  /*
373  * Apply the image, being careful to zero the "hole" between
374  * pd_lower and pd_upper in order to avoid divergence between
375  * actual page state and what replay would produce.
376  */
377  memcpy(page, pageData->image, pageHeader->pd_lower);
378  memset(page + pageHeader->pd_lower, 0,
379  pageHeader->pd_upper - pageHeader->pd_lower);
380  memcpy(page + pageHeader->pd_upper,
381  pageData->image + pageHeader->pd_upper,
382  BLCKSZ - pageHeader->pd_upper);
383 
384  MarkBufferDirty(pageData->buffer);
385 
386  if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
387  {
388  XLogRegisterBuffer(i, pageData->buffer,
390  }
391  else
392  {
394  XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
395  }
396  }
397 
398  /* Insert xlog record */
399  lsn = XLogInsert(RM_GENERIC_ID, 0);
400 
401  /* Set LSN */
402  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
403  {
404  PageData *pageData = &state->pages[i];
405 
406  if (BufferIsInvalid(pageData->buffer))
407  continue;
408  PageSetLSN(BufferGetPage(pageData->buffer), lsn);
409  }
411  }
412  else
413  {
414  /* Unlogged relation: skip xlog-related stuff */
416  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
417  {
418  PageData *pageData = &state->pages[i];
419 
420  if (BufferIsInvalid(pageData->buffer))
421  continue;
422  memcpy(BufferGetPage(pageData->buffer),
423  pageData->image,
424  BLCKSZ);
425  /* We don't worry about zeroing the "hole" in this case */
426  MarkBufferDirty(pageData->buffer);
427  }
429  /* We don't have a LSN to return, in this case */
430  lsn = InvalidXLogRecPtr;
431  }
432 
433  pfree(state);
434 
435  return lsn;
436 }
437 
438 /*
439  * Abort generic xlog record construction. No changes are applied to buffers.
440  *
441  * Note: caller is responsible for releasing locks/pins on buffers, if needed.
442  */
443 void
445 {
446  pfree(state);
447 }
448 
449 /*
450  * Apply delta to given page image.
451  */
452 static void
453 applyPageRedo(Page page, const char *delta, Size deltaSize)
454 {
455  const char *ptr = delta;
456  const char *end = delta + deltaSize;
457 
458  while (ptr < end)
459  {
460  OffsetNumber offset,
461  length;
462 
463  memcpy(&offset, ptr, sizeof(offset));
464  ptr += sizeof(offset);
465  memcpy(&length, ptr, sizeof(length));
466  ptr += sizeof(length);
467 
468  memcpy(page + offset, ptr, length);
469 
470  ptr += length;
471  }
472 }
473 
474 /*
475  * Redo function for generic xlog record.
476  */
477 void
479 {
480  XLogRecPtr lsn = record->EndRecPtr;
482  uint8 block_id;
483 
484  /* Protect limited size of buffers[] array */
486 
487  /* Iterate over blocks */
488  for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
489  {
491 
492  if (!XLogRecHasBlockRef(record, block_id))
493  {
494  buffers[block_id] = InvalidBuffer;
495  continue;
496  }
497 
498  action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
499 
500  /* Apply redo to given block if needed */
501  if (action == BLK_NEEDS_REDO)
502  {
503  Page page;
504  PageHeader pageHeader;
505  char *blockDelta;
506  Size blockDeltaSize;
507 
508  page = BufferGetPage(buffers[block_id]);
509  blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
510  applyPageRedo(page, blockDelta, blockDeltaSize);
511 
512  /*
513  * Since the delta contains no information about what's in the
514  * "hole" between pd_lower and pd_upper, set that to zero to
515  * ensure we produce the same page state that application of the
516  * logged action by GenericXLogFinish did.
517  */
518  pageHeader = (PageHeader) page;
519  memset(page + pageHeader->pd_lower, 0,
520  pageHeader->pd_upper - pageHeader->pd_lower);
521 
522  PageSetLSN(page, lsn);
523  MarkBufferDirty(buffers[block_id]);
524  }
525  }
526 
527  /* Changes are done: unlock and release all buffers */
528  for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
529  {
530  if (BufferIsValid(buffers[block_id]))
531  UnlockReleaseBuffer(buffers[block_id]);
532  }
533 }
534 
535 /*
536  * Mask a generic page before performing consistency checks on it.
537  */
538 void
539 generic_mask(char *page, BlockNumber blkno)
540 {
542 
543  mask_unused_space(page);
544 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
PageHeaderData * PageHeader
Definition: bufpage.h:170
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define Min(x, y)
Definition: c.h:991
unsigned char uint8
Definition: c.h:491
size_t Size
Definition: c.h:592
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
Page GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
Definition: generic_xlog.c:299
static void computeRegionDelta(PageData *pageData, const char *curpage, const char *targetpage, int targetStart, int targetEnd, int validStart, int validEnd)
Definition: generic_xlog.c:121
static void writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length, const char *data)
Definition: generic_xlog.c:90
static void applyPageRedo(Page page, const char *delta, Size deltaSize)
Definition: generic_xlog.c:453
void generic_redo(XLogReaderState *record)
Definition: generic_xlog.c:478
static void computeDelta(PageData *pageData, Page curpage, Page targetpage)
Definition: generic_xlog.c:228
GenericXLogState * GenericXLogStart(Relation relation)
Definition: generic_xlog.c:269
void generic_mask(char *page, BlockNumber blkno)
Definition: generic_xlog.c:539
#define MAX_DELTA_SIZE
Definition: generic_xlog.c:47
XLogRecPtr GenericXLogFinish(GenericXLogState *state)
Definition: generic_xlog.c:337
void GenericXLogAbort(GenericXLogState *state)
Definition: generic_xlog.c:444
#define MATCH_THRESHOLD
Definition: generic_xlog.c:46
#define GENERIC_XLOG_FULL_IMAGE
Definition: generic_xlog.h:26
#define MAX_GENERIC_XLOG_PAGES
Definition: generic_xlog.h:23
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1508
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1498
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
uint16 OffsetNumber
Definition: off.h:24
#define PG_IO_ALIGN_SIZE
const void * data
#define RelationNeedsWAL(relation)
Definition: rel.h:628
PGIOAlignedBlock images[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:67
PageData pages[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:69
char * image
Definition: generic_xlog.c:55
char delta[MAX_DELTA_SIZE]
Definition: generic_xlog.c:57
int deltaLen
Definition: generic_xlog.c:54
Buffer buffer
Definition: generic_xlog.c:52
LocationIndex pd_upper
Definition: bufpage.h:163
LocationIndex pd_lower
Definition: bufpage.h:162
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
Definition: regguts.h:323
char data[BLCKSZ]
Definition: c.h:1106
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_FORCE_IMAGE
Definition: xloginsert.h:31
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
XLogRedoAction
Definition: xlogutils.h:70
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71