PostgreSQL Source Code  git master
generic_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * generic_xlog.c
4  * Implementation of generic xlog records.
5  *
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/access/transam/generic_xlog.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/bufmask.h"
17 #include "access/generic_xlog.h"
18 #include "access/xlogutils.h"
19 #include "miscadmin.h"
20 #include "utils/memutils.h"
21 
22 /*-------------------------------------------------------------------------
23  * Internally, a delta between pages consists of a set of fragments. Each
24  * fragment represents changes made in a given region of a page. A fragment
25  * is made up as follows:
26  *
27  * - offset of page region (OffsetNumber)
28  * - length of page region (OffsetNumber)
29  * - data - the data to place into the region ('length' number of bytes)
30  *
31  * Unchanged regions of a page are not represented in its delta. As a result,
32  * a delta can be more compact than the full page image. But having an
33  * unchanged region between two fragments that is smaller than the fragment
34  * header (offset+length) does not pay off in terms of the overall size of
35  * the delta. For this reason, we merge adjacent fragments if the unchanged
36  * region between them is <= MATCH_THRESHOLD bytes.
37  *
38  * We do not bother to merge fragments across the "lower" and "upper" parts
39  * of a page; it's very seldom the case that pd_lower and pd_upper are within
40  * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
41  * would complicate and slow down the delta-computation code unduly.
42  * Therefore, the worst-case delta size includes two fragment headers plus
43  * a full page's worth of data.
44  *-------------------------------------------------------------------------
45  */
46 #define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber))
47 #define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE
48 #define MAX_DELTA_SIZE (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
49 
50 /* Struct of generic xlog data for single page */
51 typedef struct
52 {
53  Buffer buffer; /* registered buffer */
54  int flags; /* flags for this buffer */
55  int deltaLen; /* space consumed in delta field */
56  char *image; /* copy of page image for modification, do not
57  * do it in-place to have aligned memory chunk */
58  char delta[MAX_DELTA_SIZE]; /* delta between page images */
59 } PageData;
60 
61 /*
62  * State of generic xlog record construction. Must be allocated at an I/O
63  * aligned address.
64  */
66 {
67  /* Page images (properly aligned, must be first) */
69  /* Info about each page, see above */
71  bool isLogged;
72 };
73 
74 static void writeFragment(PageData *pageData, OffsetNumber offset,
75  OffsetNumber length, const char *data);
76 static void computeRegionDelta(PageData *pageData,
77  const char *curpage, const char *targetpage,
78  int targetStart, int targetEnd,
79  int validStart, int validEnd);
80 static void computeDelta(PageData *pageData, Page curpage, Page targetpage);
81 static void applyPageRedo(Page page, const char *delta, Size deltaSize);
82 
83 
84 /*
85  * Write next fragment into pageData's delta.
86  *
87  * The fragment has the given offset and length, and data points to the
88  * actual data (of length length).
89  */
90 static void
91 writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length,
92  const char *data)
93 {
94  char *ptr = pageData->delta + pageData->deltaLen;
95 
96  /* Verify we have enough space */
97  Assert(pageData->deltaLen + sizeof(offset) +
98  sizeof(length) + length <= sizeof(pageData->delta));
99 
100  /* Write fragment data */
101  memcpy(ptr, &offset, sizeof(offset));
102  ptr += sizeof(offset);
103  memcpy(ptr, &length, sizeof(length));
104  ptr += sizeof(length);
105  memcpy(ptr, data, length);
106  ptr += length;
107 
108  pageData->deltaLen = ptr - pageData->delta;
109 }
110 
111 /*
112  * Compute the XLOG fragments needed to transform a region of curpage into the
113  * corresponding region of targetpage, and append them to pageData's delta
114  * field. The region to transform runs from targetStart to targetEnd-1.
115  * Bytes in curpage outside the range validStart to validEnd-1 should be
116  * considered invalid, and always overwritten with target data.
117  *
118  * This function is a hot spot, so it's worth being as tense as possible
119  * about the data-matching loops.
120  */
121 static void
123  const char *curpage, const char *targetpage,
124  int targetStart, int targetEnd,
125  int validStart, int validEnd)
126 {
127  int i,
128  loopEnd,
129  fragmentBegin = -1,
130  fragmentEnd = -1;
131 
132  /* Deal with any invalid start region by including it in first fragment */
133  if (validStart > targetStart)
134  {
135  fragmentBegin = targetStart;
136  targetStart = validStart;
137  }
138 
139  /* We'll deal with any invalid end region after the main loop */
140  loopEnd = Min(targetEnd, validEnd);
141 
142  /* Examine all the potentially matchable bytes */
143  i = targetStart;
144  while (i < loopEnd)
145  {
146  if (curpage[i] != targetpage[i])
147  {
148  /* On unmatched byte, start new fragment if not already in one */
149  if (fragmentBegin < 0)
150  fragmentBegin = i;
151  /* Mark unmatched-data endpoint as uncertain */
152  fragmentEnd = -1;
153  /* Extend the fragment as far as possible in a tight loop */
154  i++;
155  while (i < loopEnd && curpage[i] != targetpage[i])
156  i++;
157  if (i >= loopEnd)
158  break;
159  }
160 
161  /* Found a matched byte, so remember end of unmatched fragment */
162  fragmentEnd = i;
163 
164  /*
165  * Extend the match as far as possible in a tight loop. (On typical
166  * workloads, this inner loop is the bulk of this function's runtime.)
167  */
168  i++;
169  while (i < loopEnd && curpage[i] == targetpage[i])
170  i++;
171 
172  /*
173  * There are several possible cases at this point:
174  *
175  * 1. We have no unwritten fragment (fragmentBegin < 0). There's
176  * nothing to write; and it doesn't matter what fragmentEnd is.
177  *
178  * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
179  * Dump out the unwritten fragment, stopping at fragmentEnd.
180  *
181  * 3. The match extends to loopEnd. We'll do nothing here, exit the
182  * loop, and then dump the unwritten fragment, after merging it with
183  * the invalid end region if any. If we don't so merge, fragmentEnd
184  * establishes how much the final writeFragment call needs to write.
185  *
186  * 4. We found an unmatched byte before loopEnd. The loop will repeat
187  * and will enter the unmatched-byte stanza above. So in this case
188  * also, it doesn't matter what fragmentEnd is. The matched bytes
189  * will get merged into the continuing unmatched fragment.
190  *
191  * Only in case 3 do we reach the bottom of the loop with a meaningful
192  * fragmentEnd value, which is why it's OK that we unconditionally
193  * assign "fragmentEnd = i" above.
194  */
195  if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
196  {
197  writeFragment(pageData, fragmentBegin,
198  fragmentEnd - fragmentBegin,
199  targetpage + fragmentBegin);
200  fragmentBegin = -1;
201  fragmentEnd = -1; /* not really necessary */
202  }
203  }
204 
205  /* Deal with any invalid end region by including it in final fragment */
206  if (loopEnd < targetEnd)
207  {
208  if (fragmentBegin < 0)
209  fragmentBegin = loopEnd;
210  fragmentEnd = targetEnd;
211  }
212 
213  /* Write final fragment if any */
214  if (fragmentBegin >= 0)
215  {
216  if (fragmentEnd < 0)
217  fragmentEnd = targetEnd;
218  writeFragment(pageData, fragmentBegin,
219  fragmentEnd - fragmentBegin,
220  targetpage + fragmentBegin);
221  }
222 }
223 
224 /*
225  * Compute the XLOG delta record needed to transform curpage into targetpage,
226  * and store it in pageData's delta field.
227  */
228 static void
229 computeDelta(PageData *pageData, Page curpage, Page targetpage)
230 {
231  int targetLower = ((PageHeader) targetpage)->pd_lower,
232  targetUpper = ((PageHeader) targetpage)->pd_upper,
233  curLower = ((PageHeader) curpage)->pd_lower,
234  curUpper = ((PageHeader) curpage)->pd_upper;
235 
236  pageData->deltaLen = 0;
237 
238  /* Compute delta records for lower part of page ... */
239  computeRegionDelta(pageData, curpage, targetpage,
240  0, targetLower,
241  0, curLower);
242  /* ... and for upper part, ignoring what's between */
243  computeRegionDelta(pageData, curpage, targetpage,
244  targetUpper, BLCKSZ,
245  curUpper, BLCKSZ);
246 
247  /*
248  * If xlog debug is enabled, then check produced delta. Result of delta
249  * application to curpage should be equivalent to targetpage.
250  */
251 #ifdef WAL_DEBUG
252  if (XLOG_DEBUG)
253  {
254  PGAlignedBlock tmp;
255 
256  memcpy(tmp.data, curpage, BLCKSZ);
257  applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
258  if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
259  memcmp(tmp.data + targetUpper, targetpage + targetUpper,
260  BLCKSZ - targetUpper) != 0)
261  elog(ERROR, "result of generic xlog apply does not match");
262  }
263 #endif
264 }
265 
266 /*
267  * Start new generic xlog record for modifications to specified relation.
268  */
271 {
273  int i;
274 
277  0);
278  state->isLogged = RelationNeedsWAL(relation);
279 
280  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
281  {
282  state->pages[i].image = state->images[i].data;
283  state->pages[i].buffer = InvalidBuffer;
284  }
285 
286  return state;
287 }
288 
289 /*
290  * Register new buffer for generic xlog record.
291  *
292  * Returns pointer to the page's image in the GenericXLogState, which
293  * is what the caller should modify.
294  *
295  * If the buffer is already registered, just return its existing entry.
296  * (It's not very clear what to do with the flags in such a case, but
297  * for now we stay with the original flags.)
298  */
299 Page
301 {
302  int block_id;
303 
304  /* Search array for existing entry or first unused slot */
305  for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
306  {
307  PageData *page = &state->pages[block_id];
308 
309  if (BufferIsInvalid(page->buffer))
310  {
311  /* Empty slot, so use it (there cannot be a match later) */
312  page->buffer = buffer;
313  page->flags = flags;
314  memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
315  return (Page) page->image;
316  }
317  else if (page->buffer == buffer)
318  {
319  /*
320  * Buffer is already registered. Just return the image, which is
321  * already prepared.
322  */
323  return (Page) page->image;
324  }
325  }
326 
327  elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
329  /* keep compiler quiet */
330  return NULL;
331 }
332 
333 /*
334  * Apply changes represented by GenericXLogState to the actual buffers,
335  * and emit a generic xlog record.
336  */
339 {
340  XLogRecPtr lsn;
341  int i;
342 
343  if (state->isLogged)
344  {
345  /* Logged relation: make xlog record in critical section. */
346  XLogBeginInsert();
347 
349 
350  /*
351  * Compute deltas if necessary, write changes to buffers, mark buffers
352  * dirty, and register changes.
353  */
354  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
355  {
356  PageData *pageData = &state->pages[i];
357  Page page;
358  PageHeader pageHeader;
359 
360  if (BufferIsInvalid(pageData->buffer))
361  continue;
362 
363  page = BufferGetPage(pageData->buffer);
364  pageHeader = (PageHeader) pageData->image;
365 
366  /*
367  * Compute delta while we still have both the unmodified page and
368  * the new image. Not needed if we are logging the full image.
369  */
370  if (!(pageData->flags & GENERIC_XLOG_FULL_IMAGE))
371  computeDelta(pageData, page, (Page) pageData->image);
372 
373  /*
374  * Apply the image, being careful to zero the "hole" between
375  * pd_lower and pd_upper in order to avoid divergence between
376  * actual page state and what replay would produce.
377  */
378  memcpy(page, pageData->image, pageHeader->pd_lower);
379  memset(page + pageHeader->pd_lower, 0,
380  pageHeader->pd_upper - pageHeader->pd_lower);
381  memcpy(page + pageHeader->pd_upper,
382  pageData->image + pageHeader->pd_upper,
383  BLCKSZ - pageHeader->pd_upper);
384 
385  MarkBufferDirty(pageData->buffer);
386 
387  if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
388  {
389  XLogRegisterBuffer(i, pageData->buffer,
391  }
392  else
393  {
395  XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
396  }
397  }
398 
399  /* Insert xlog record */
400  lsn = XLogInsert(RM_GENERIC_ID, 0);
401 
402  /* Set LSN */
403  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
404  {
405  PageData *pageData = &state->pages[i];
406 
407  if (BufferIsInvalid(pageData->buffer))
408  continue;
409  PageSetLSN(BufferGetPage(pageData->buffer), lsn);
410  }
412  }
413  else
414  {
415  /* Unlogged relation: skip xlog-related stuff */
417  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
418  {
419  PageData *pageData = &state->pages[i];
420 
421  if (BufferIsInvalid(pageData->buffer))
422  continue;
423  memcpy(BufferGetPage(pageData->buffer),
424  pageData->image,
425  BLCKSZ);
426  /* We don't worry about zeroing the "hole" in this case */
427  MarkBufferDirty(pageData->buffer);
428  }
430  /* We don't have a LSN to return, in this case */
431  lsn = InvalidXLogRecPtr;
432  }
433 
434  pfree(state);
435 
436  return lsn;
437 }
438 
439 /*
440  * Abort generic xlog record construction. No changes are applied to buffers.
441  *
442  * Note: caller is responsible for releasing locks/pins on buffers, if needed.
443  */
444 void
446 {
447  pfree(state);
448 }
449 
450 /*
451  * Apply delta to given page image.
452  */
453 static void
454 applyPageRedo(Page page, const char *delta, Size deltaSize)
455 {
456  const char *ptr = delta;
457  const char *end = delta + deltaSize;
458 
459  while (ptr < end)
460  {
461  OffsetNumber offset,
462  length;
463 
464  memcpy(&offset, ptr, sizeof(offset));
465  ptr += sizeof(offset);
466  memcpy(&length, ptr, sizeof(length));
467  ptr += sizeof(length);
468 
469  memcpy(page + offset, ptr, length);
470 
471  ptr += length;
472  }
473 }
474 
475 /*
476  * Redo function for generic xlog record.
477  */
478 void
480 {
481  XLogRecPtr lsn = record->EndRecPtr;
483  uint8 block_id;
484 
485  /* Protect limited size of buffers[] array */
487 
488  /* Iterate over blocks */
489  for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
490  {
492 
493  if (!XLogRecHasBlockRef(record, block_id))
494  {
495  buffers[block_id] = InvalidBuffer;
496  continue;
497  }
498 
499  action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
500 
501  /* Apply redo to given block if needed */
502  if (action == BLK_NEEDS_REDO)
503  {
504  Page page;
505  PageHeader pageHeader;
506  char *blockDelta;
507  Size blockDeltaSize;
508 
509  page = BufferGetPage(buffers[block_id]);
510  blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
511  applyPageRedo(page, blockDelta, blockDeltaSize);
512 
513  /*
514  * Since the delta contains no information about what's in the
515  * "hole" between pd_lower and pd_upper, set that to zero to
516  * ensure we produce the same page state that application of the
517  * logged action by GenericXLogFinish did.
518  */
519  pageHeader = (PageHeader) page;
520  memset(page + pageHeader->pd_lower, 0,
521  pageHeader->pd_upper - pageHeader->pd_lower);
522 
523  PageSetLSN(page, lsn);
524  MarkBufferDirty(buffers[block_id]);
525  }
526  }
527 
528  /* Changes are done: unlock and release all buffers */
529  for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
530  {
531  if (BufferIsValid(buffers[block_id]))
532  UnlockReleaseBuffer(buffers[block_id]);
533  }
534 }
535 
536 /*
537  * Mask a generic page before performing consistency checks on it.
538  */
539 void
540 generic_mask(char *page, BlockNumber blkno)
541 {
543 
544  mask_unused_space(page);
545 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4590
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2198
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
PageHeaderData * PageHeader
Definition: bufpage.h:170
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define Min(x, y)
Definition: c.h:993
unsigned char uint8
Definition: c.h:493
size_t Size
Definition: c.h:594
#define ERROR
Definition: elog.h:39
Page GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
Definition: generic_xlog.c:300
static void computeRegionDelta(PageData *pageData, const char *curpage, const char *targetpage, int targetStart, int targetEnd, int validStart, int validEnd)
Definition: generic_xlog.c:122
static void writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length, const char *data)
Definition: generic_xlog.c:91
static void applyPageRedo(Page page, const char *delta, Size deltaSize)
Definition: generic_xlog.c:454
void generic_redo(XLogReaderState *record)
Definition: generic_xlog.c:479
static void computeDelta(PageData *pageData, Page curpage, Page targetpage)
Definition: generic_xlog.c:229
GenericXLogState * GenericXLogStart(Relation relation)
Definition: generic_xlog.c:270
void generic_mask(char *page, BlockNumber blkno)
Definition: generic_xlog.c:540
#define MAX_DELTA_SIZE
Definition: generic_xlog.c:48
XLogRecPtr GenericXLogFinish(GenericXLogState *state)
Definition: generic_xlog.c:338
void GenericXLogAbort(GenericXLogState *state)
Definition: generic_xlog.c:445
#define MATCH_THRESHOLD
Definition: generic_xlog.c:47
#define GENERIC_XLOG_FULL_IMAGE
Definition: generic_xlog.h:26
#define MAX_GENERIC_XLOG_PAGES
Definition: generic_xlog.h:23
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1446
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
uint16 OffsetNumber
Definition: off.h:24
#define PG_IO_ALIGN_SIZE
const void * data
#define RelationNeedsWAL(relation)
Definition: rel.h:629
PGIOAlignedBlock images[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:68
PageData pages[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:70
char * image
Definition: generic_xlog.c:56
char delta[MAX_DELTA_SIZE]
Definition: generic_xlog.c:58
int deltaLen
Definition: generic_xlog.c:55
Buffer buffer
Definition: generic_xlog.c:53
LocationIndex pd_upper
Definition: bufpage.h:163
LocationIndex pd_lower
Definition: bufpage.h:162
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
Definition: regguts.h:323
char data[BLCKSZ]
Definition: c.h:1132
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:475
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:406
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_FORCE_IMAGE
Definition: xloginsert.h:31
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2002
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:317
XLogRedoAction
Definition: xlogutils.h:70
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71