PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
generic_xlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * generic_xlog.c
4  * Implementation of generic xlog records.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/access/transam/generic_xlog.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/bufmask.h"
17 #include "access/generic_xlog.h"
18 #include "access/xlogutils.h"
19 #include "miscadmin.h"
20 #include "utils/memutils.h"
21 
22 /*-------------------------------------------------------------------------
23  * Internally, a delta between pages consists of a set of fragments. Each
24  * fragment represents changes made in a given region of a page. A fragment
25  * is made up as follows:
26  *
27  * - offset of page region (OffsetNumber)
28  * - length of page region (OffsetNumber)
29  * - data - the data to place into the region ('length' number of bytes)
30  *
31  * Unchanged regions of a page are not represented in its delta. As a result,
32  * a delta can be more compact than the full page image. But having an
33  * unchanged region between two fragments that is smaller than the fragment
34  * header (offset+length) does not pay off in terms of the overall size of
35  * the delta. For this reason, we merge adjacent fragments if the unchanged
36  * region between them is <= MATCH_THRESHOLD bytes.
37  *
38  * We do not bother to merge fragments across the "lower" and "upper" parts
39  * of a page; it's very seldom the case that pd_lower and pd_upper are within
40  * MATCH_THRESHOLD bytes of each other, and handling that infrequent case
41  * would complicate and slow down the delta-computation code unduly.
42  * Therefore, the worst-case delta size includes two fragment headers plus
43  * a full page's worth of data.
44  *-------------------------------------------------------------------------
45  */
46 #define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber))
47 #define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE
48 #define MAX_DELTA_SIZE (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE)
49 
50 /* Struct of generic xlog data for single page */
51 typedef struct
52 {
53  Buffer buffer; /* registered buffer */
54  int flags; /* flags for this buffer */
55  int deltaLen; /* space consumed in delta field */
56  char *image; /* copy of page image for modification, do not
57  * do it in-place to have aligned memory chunk */
58  char delta[MAX_DELTA_SIZE]; /* delta between page images */
59 } PageData;
60 
61 /* State of generic xlog record construction */
63 {
64  /*
65  * page's images. Should be first in this struct to have MAXALIGN'ed
66  * images addresses, because some code working with pages directly aligns
67  * addresses, not offsets from beginning of page
68  */
71  bool isLogged;
72 };
73 
74 static void writeFragment(PageData *pageData, OffsetNumber offset,
75  OffsetNumber len, const char *data);
76 static void computeRegionDelta(PageData *pageData,
77  const char *curpage, const char *targetpage,
78  int targetStart, int targetEnd,
79  int validStart, int validEnd);
80 static void computeDelta(PageData *pageData, Page curpage, Page targetpage);
81 static void applyPageRedo(Page page, const char *delta, Size deltaSize);
82 
83 
84 /*
85  * Write next fragment into pageData's delta.
86  *
87  * The fragment has the given offset and length, and data points to the
88  * actual data (of length length).
89  */
90 static void
92  const char *data)
93 {
94  char *ptr = pageData->delta + pageData->deltaLen;
95 
96  /* Verify we have enough space */
97  Assert(pageData->deltaLen + sizeof(offset) +
98  sizeof(length) + length <= sizeof(pageData->delta));
99 
100  /* Write fragment data */
101  memcpy(ptr, &offset, sizeof(offset));
102  ptr += sizeof(offset);
103  memcpy(ptr, &length, sizeof(length));
104  ptr += sizeof(length);
105  memcpy(ptr, data, length);
106  ptr += length;
107 
108  pageData->deltaLen = ptr - pageData->delta;
109 }
110 
111 /*
112  * Compute the XLOG fragments needed to transform a region of curpage into the
113  * corresponding region of targetpage, and append them to pageData's delta
114  * field. The region to transform runs from targetStart to targetEnd-1.
115  * Bytes in curpage outside the range validStart to validEnd-1 should be
116  * considered invalid, and always overwritten with target data.
117  *
118  * This function is a hot spot, so it's worth being as tense as possible
119  * about the data-matching loops.
120  */
121 static void
123  const char *curpage, const char *targetpage,
124  int targetStart, int targetEnd,
125  int validStart, int validEnd)
126 {
127  int i,
128  loopEnd,
129  fragmentBegin = -1,
130  fragmentEnd = -1;
131 
132  /* Deal with any invalid start region by including it in first fragment */
133  if (validStart > targetStart)
134  {
135  fragmentBegin = targetStart;
136  targetStart = validStart;
137  }
138 
139  /* We'll deal with any invalid end region after the main loop */
140  loopEnd = Min(targetEnd, validEnd);
141 
142  /* Examine all the potentially matchable bytes */
143  i = targetStart;
144  while (i < loopEnd)
145  {
146  if (curpage[i] != targetpage[i])
147  {
148  /* On unmatched byte, start new fragment if not already in one */
149  if (fragmentBegin < 0)
150  fragmentBegin = i;
151  /* Mark unmatched-data endpoint as uncertain */
152  fragmentEnd = -1;
153  /* Extend the fragment as far as possible in a tight loop */
154  i++;
155  while (i < loopEnd && curpage[i] != targetpage[i])
156  i++;
157  if (i >= loopEnd)
158  break;
159  }
160 
161  /* Found a matched byte, so remember end of unmatched fragment */
162  fragmentEnd = i;
163 
164  /*
165  * Extend the match as far as possible in a tight loop. (On typical
166  * workloads, this inner loop is the bulk of this function's runtime.)
167  */
168  i++;
169  while (i < loopEnd && curpage[i] == targetpage[i])
170  i++;
171 
172  /*
173  * There are several possible cases at this point:
174  *
175  * 1. We have no unwritten fragment (fragmentBegin < 0). There's
176  * nothing to write; and it doesn't matter what fragmentEnd is.
177  *
178  * 2. We found more than MATCH_THRESHOLD consecutive matching bytes.
179  * Dump out the unwritten fragment, stopping at fragmentEnd.
180  *
181  * 3. The match extends to loopEnd. We'll do nothing here, exit the
182  * loop, and then dump the unwritten fragment, after merging it with
183  * the invalid end region if any. If we don't so merge, fragmentEnd
184  * establishes how much the final writeFragment call needs to write.
185  *
186  * 4. We found an unmatched byte before loopEnd. The loop will repeat
187  * and will enter the unmatched-byte stanza above. So in this case
188  * also, it doesn't matter what fragmentEnd is. The matched bytes
189  * will get merged into the continuing unmatched fragment.
190  *
191  * Only in case 3 do we reach the bottom of the loop with a meaningful
192  * fragmentEnd value, which is why it's OK that we unconditionally
193  * assign "fragmentEnd = i" above.
194  */
195  if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD)
196  {
197  writeFragment(pageData, fragmentBegin,
198  fragmentEnd - fragmentBegin,
199  targetpage + fragmentBegin);
200  fragmentBegin = -1;
201  fragmentEnd = -1; /* not really necessary */
202  }
203  }
204 
205  /* Deal with any invalid end region by including it in final fragment */
206  if (loopEnd < targetEnd)
207  {
208  if (fragmentBegin < 0)
209  fragmentBegin = loopEnd;
210  fragmentEnd = targetEnd;
211  }
212 
213  /* Write final fragment if any */
214  if (fragmentBegin >= 0)
215  {
216  if (fragmentEnd < 0)
217  fragmentEnd = targetEnd;
218  writeFragment(pageData, fragmentBegin,
219  fragmentEnd - fragmentBegin,
220  targetpage + fragmentBegin);
221  }
222 }
223 
224 /*
225  * Compute the XLOG delta record needed to transform curpage into targetpage,
226  * and store it in pageData's delta field.
227  */
228 static void
229 computeDelta(PageData *pageData, Page curpage, Page targetpage)
230 {
231  int targetLower = ((PageHeader) targetpage)->pd_lower,
232  targetUpper = ((PageHeader) targetpage)->pd_upper,
233  curLower = ((PageHeader) curpage)->pd_lower,
234  curUpper = ((PageHeader) curpage)->pd_upper;
235 
236  pageData->deltaLen = 0;
237 
238  /* Compute delta records for lower part of page ... */
239  computeRegionDelta(pageData, curpage, targetpage,
240  0, targetLower,
241  0, curLower);
242  /* ... and for upper part, ignoring what's between */
243  computeRegionDelta(pageData, curpage, targetpage,
244  targetUpper, BLCKSZ,
245  curUpper, BLCKSZ);
246 
247  /*
248  * If xlog debug is enabled, then check produced delta. Result of delta
249  * application to curpage should be equivalent to targetpage.
250  */
251 #ifdef WAL_DEBUG
252  if (XLOG_DEBUG)
253  {
254  char tmp[BLCKSZ];
255 
256  memcpy(tmp, curpage, BLCKSZ);
257  applyPageRedo(tmp, pageData->delta, pageData->deltaLen);
258  if (memcmp(tmp, targetpage, targetLower) != 0 ||
259  memcmp(tmp + targetUpper, targetpage + targetUpper,
260  BLCKSZ - targetUpper) != 0)
261  elog(ERROR, "result of generic xlog apply does not match");
262  }
263 #endif
264 }
265 
266 /*
267  * Start new generic xlog record for modifications to specified relation.
268  */
271 {
273  int i;
274 
275  state = (GenericXLogState *) palloc(sizeof(GenericXLogState));
276  state->isLogged = RelationNeedsWAL(relation);
277 
278  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
279  {
280  state->pages[i].image = state->images + BLCKSZ * i;
281  state->pages[i].buffer = InvalidBuffer;
282  }
283 
284  return state;
285 }
286 
287 /*
288  * Register new buffer for generic xlog record.
289  *
290  * Returns pointer to the page's image in the GenericXLogState, which
291  * is what the caller should modify.
292  *
293  * If the buffer is already registered, just return its existing entry.
294  * (It's not very clear what to do with the flags in such a case, but
295  * for now we stay with the original flags.)
296  */
297 Page
299 {
300  int block_id;
301 
302  /* Search array for existing entry or first unused slot */
303  for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
304  {
305  PageData *page = &state->pages[block_id];
306 
307  if (BufferIsInvalid(page->buffer))
308  {
309  /* Empty slot, so use it (there cannot be a match later) */
310  page->buffer = buffer;
311  page->flags = flags;
312  memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
313  return (Page) page->image;
314  }
315  else if (page->buffer == buffer)
316  {
317  /*
318  * Buffer is already registered. Just return the image, which is
319  * already prepared.
320  */
321  return (Page) page->image;
322  }
323  }
324 
325  elog(ERROR, "maximum number %d of generic xlog buffers is exceeded",
326  MAX_GENERIC_XLOG_PAGES);
327  /* keep compiler quiet */
328  return NULL;
329 }
330 
331 /*
332  * Apply changes represented by GenericXLogState to the actual buffers,
333  * and emit a generic xlog record.
334  */
337 {
338  XLogRecPtr lsn;
339  int i;
340 
341  if (state->isLogged)
342  {
343  /* Logged relation: make xlog record in critical section. */
344  XLogBeginInsert();
345 
347 
348  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
349  {
350  PageData *pageData = &state->pages[i];
351  Page page;
352  PageHeader pageHeader;
353 
354  if (BufferIsInvalid(pageData->buffer))
355  continue;
356 
357  page = BufferGetPage(pageData->buffer);
358  pageHeader = (PageHeader) pageData->image;
359 
360  if (pageData->flags & GENERIC_XLOG_FULL_IMAGE)
361  {
362  /*
363  * A full-page image does not require us to supply any xlog
364  * data. Just apply the image, being careful to zero the
365  * "hole" between pd_lower and pd_upper in order to avoid
366  * divergence between actual page state and what replay would
367  * produce.
368  */
369  memcpy(page, pageData->image, pageHeader->pd_lower);
370  memset(page + pageHeader->pd_lower, 0,
371  pageHeader->pd_upper - pageHeader->pd_lower);
372  memcpy(page + pageHeader->pd_upper,
373  pageData->image + pageHeader->pd_upper,
374  BLCKSZ - pageHeader->pd_upper);
375 
376  XLogRegisterBuffer(i, pageData->buffer,
378  }
379  else
380  {
381  /*
382  * In normal mode, calculate delta and write it as xlog data
383  * associated with this page.
384  */
385  computeDelta(pageData, page, (Page) pageData->image);
386 
387  /* Apply the image, with zeroed "hole" as above */
388  memcpy(page, pageData->image, pageHeader->pd_lower);
389  memset(page + pageHeader->pd_lower, 0,
390  pageHeader->pd_upper - pageHeader->pd_lower);
391  memcpy(page + pageHeader->pd_upper,
392  pageData->image + pageHeader->pd_upper,
393  BLCKSZ - pageHeader->pd_upper);
394 
396  XLogRegisterBufData(i, pageData->delta, pageData->deltaLen);
397  }
398  }
399 
400  /* Insert xlog record */
401  lsn = XLogInsert(RM_GENERIC_ID, 0);
402 
403  /* Set LSN and mark buffers dirty */
404  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
405  {
406  PageData *pageData = &state->pages[i];
407 
408  if (BufferIsInvalid(pageData->buffer))
409  continue;
410  PageSetLSN(BufferGetPage(pageData->buffer), lsn);
411  MarkBufferDirty(pageData->buffer);
412  }
414  }
415  else
416  {
417  /* Unlogged relation: skip xlog-related stuff */
419  for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
420  {
421  PageData *pageData = &state->pages[i];
422 
423  if (BufferIsInvalid(pageData->buffer))
424  continue;
425  memcpy(BufferGetPage(pageData->buffer),
426  pageData->image,
427  BLCKSZ);
428  /* We don't worry about zeroing the "hole" in this case */
429  MarkBufferDirty(pageData->buffer);
430  }
432  /* We don't have a LSN to return, in this case */
433  lsn = InvalidXLogRecPtr;
434  }
435 
436  pfree(state);
437 
438  return lsn;
439 }
440 
441 /*
442  * Abort generic xlog record construction. No changes are applied to buffers.
443  *
444  * Note: caller is responsible for releasing locks/pins on buffers, if needed.
445  */
446 void
448 {
449  pfree(state);
450 }
451 
452 /*
453  * Apply delta to given page image.
454  */
455 static void
456 applyPageRedo(Page page, const char *delta, Size deltaSize)
457 {
458  const char *ptr = delta;
459  const char *end = delta + deltaSize;
460 
461  while (ptr < end)
462  {
463  OffsetNumber offset,
464  length;
465 
466  memcpy(&offset, ptr, sizeof(offset));
467  ptr += sizeof(offset);
468  memcpy(&length, ptr, sizeof(length));
469  ptr += sizeof(length);
470 
471  memcpy(page + offset, ptr, length);
472 
473  ptr += length;
474  }
475 }
476 
477 /*
478  * Redo function for generic xlog record.
479  */
480 void
482 {
483  XLogRecPtr lsn = record->EndRecPtr;
485  uint8 block_id;
486 
487  /* Protect limited size of buffers[] array */
489 
490  /* Iterate over blocks */
491  for (block_id = 0; block_id <= record->max_block_id; block_id++)
492  {
493  XLogRedoAction action;
494 
495  if (!XLogRecHasBlockRef(record, block_id))
496  {
497  buffers[block_id] = InvalidBuffer;
498  continue;
499  }
500 
501  action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
502 
503  /* Apply redo to given block if needed */
504  if (action == BLK_NEEDS_REDO)
505  {
506  Page page;
507  PageHeader pageHeader;
508  char *blockDelta;
509  Size blockDeltaSize;
510 
511  page = BufferGetPage(buffers[block_id]);
512  blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize);
513  applyPageRedo(page, blockDelta, blockDeltaSize);
514 
515  /*
516  * Since the delta contains no information about what's in the
517  * "hole" between pd_lower and pd_upper, set that to zero to
518  * ensure we produce the same page state that application of the
519  * logged action by GenericXLogFinish did.
520  */
521  pageHeader = (PageHeader) page;
522  memset(page + pageHeader->pd_lower, 0,
523  pageHeader->pd_upper - pageHeader->pd_lower);
524 
525  PageSetLSN(page, lsn);
526  MarkBufferDirty(buffers[block_id]);
527  }
528  }
529 
530  /* Changes are done: unlock and release all buffers */
531  for (block_id = 0; block_id <= record->max_block_id; block_id++)
532  {
533  if (BufferIsValid(buffers[block_id]))
534  UnlockReleaseBuffer(buffers[block_id]);
535  }
536 }
537 
538 /*
539  * Mask a generic page before performing consistency checks on it.
540  */
541 void
542 generic_mask(char *page, BlockNumber blkno)
543 {
544  mask_page_lsn(page);
545 
546  mask_unused_space(page);
547 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
int length(const List *list)
Definition: list.c:1271
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void generic_redo(XLogReaderState *record)
Definition: generic_xlog.c:481
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define Min(x, y)
Definition: c.h:806
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
unsigned char uint8
Definition: c.h:266
#define InvalidBuffer
Definition: buf.h:25
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:221
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
uint32 BlockNumber
Definition: block.h:31
#define MAX_DELTA_SIZE
Definition: generic_xlog.c:48
Page GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags)
Definition: generic_xlog.c:298
void mask_unused_space(Page page)
Definition: bufmask.c:69
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
uint16 OffsetNumber
Definition: off.h:24
void generic_mask(char *page, BlockNumber blkno)
Definition: generic_xlog.c:542
void pfree(void *pointer)
Definition: mcxt.c:950
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
void mask_page_lsn(Page page)
Definition: bufmask.c:30
char delta[MAX_DELTA_SIZE]
Definition: generic_xlog.c:58
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define REGBUF_STANDARD
Definition: xloginsert.h:35
PageData pages[MAX_GENERIC_XLOG_PAGES]
Definition: generic_xlog.c:70
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
Buffer buffer
Definition: generic_xlog.c:53
#define MATCH_THRESHOLD
Definition: generic_xlog.c:47
static void computeDelta(PageData *pageData, Page curpage, Page targetpage)
Definition: generic_xlog.c:229
#define REGBUF_FORCE_IMAGE
Definition: xloginsert.h:30
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1331
char * image
Definition: generic_xlog.c:56
static void applyPageRedo(Page page, const char *delta, Size deltaSize)
Definition: generic_xlog.c:456
char images[MAX_GENERIC_XLOG_PAGES *BLCKSZ]
Definition: generic_xlog.c:69
static void computeRegionDelta(PageData *pageData, const char *curpage, const char *targetpage, int targetStart, int targetEnd, int validStart, int validEnd)
Definition: generic_xlog.c:122
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:290
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
Definition: regguts.h:298
XLogRecPtr GenericXLogFinish(GenericXLogState *state)
Definition: generic_xlog.c:336
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:207
XLogRedoAction
Definition: xlogutils.h:27
size_t Size
Definition: c.h:356
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define RelationNeedsWAL(relation)
Definition: rel.h:506
static void writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber len, const char *data)
Definition: generic_xlog.c:91
void * palloc(Size size)
Definition: mcxt.c:849
int i
void GenericXLogAbort(GenericXLogState *state)
Definition: generic_xlog.c:447
#define elog
Definition: elog.h:219
LocationIndex pd_upper
Definition: bufpage.h:155
#define MAX_GENERIC_XLOG_PAGES
Definition: generic_xlog.h:23
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
int Buffer
Definition: buf.h:23
#define GENERIC_XLOG_FULL_IMAGE
Definition: generic_xlog.h:26
GenericXLogState * GenericXLogStart(Relation relation)
Definition: generic_xlog.c:270
Pointer Page
Definition: bufpage.h:74
LocationIndex pd_lower
Definition: bufpage.h:154
int deltaLen
Definition: generic_xlog.c:55