PostgreSQL Source Code  git master
basebackup_incremental.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basebackup_incremental.c
4  * code for incremental backup support
5  *
6  * This code isn't actually in charge of taking an incremental backup;
7  * the actual construction of the incremental backup happens in
8  * basebackup.c. Here, we're concerned with providing the necessary
9  * supports for that operation. In particular, we need to parse the
10  * backup manifest supplied by the user taking the incremental backup
11  * and extract the required information from it.
12  *
13  * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
14  *
15  * IDENTIFICATION
16  * src/backend/backup/basebackup_incremental.c
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/timeline.h"
23 #include "access/xlog.h"
25 #include "backup/walsummary.h"
26 #include "common/blkreftable.h"
27 #include "common/hashfn.h"
28 #include "common/int.h"
29 #include "common/parse_manifest.h"
31 
32 #define BLOCKS_PER_READ 512
33 
34 /*
35  * We expect to find the last lines of the manifest, including the checksum,
36  * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
37  * parse step if we are about to overflow MAX_CHUNK bytes.
38  */
39 #define MIN_CHUNK 1024
40 #define MAX_CHUNK (128 * 1024)
41 
42 /*
43  * Details extracted from the WAL ranges present in the supplied backup manifest.
44  */
45 typedef struct
46 {
51 
52 /*
53  * Details extracted from the file list present in the supplied backup manifest.
54  */
55 typedef struct
56 {
58  const char *path;
61 
62 static uint32 hash_string_pointer(const char *s);
63 #define SH_PREFIX backup_file
64 #define SH_ELEMENT_TYPE backup_file_entry
65 #define SH_KEY_TYPE const char *
66 #define SH_KEY path
67 #define SH_HASH_KEY(tb, key) hash_string_pointer(key)
68 #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
69 #define SH_SCOPE static inline
70 #define SH_DECLARE
71 #define SH_DEFINE
72 #include "lib/simplehash.h"
73 
75 {
76  /* Memory context for this object and its subsidiary objects. */
78 
79  /* Temporary buffer for storing the manifest while parsing it. */
81 
82  /* WAL ranges extracted from the backup manifest. */
84 
85  /*
86  * Files extracted from the backup manifest.
87  *
88  * We don't really need this information, because we use WAL summaries to
89  * figure out what's changed. It would be unsafe to just rely on the list
90  * of files that existed before, because it's possible for a file to be
91  * removed and a new one created with the same name and different
92  * contents. In such cases, the whole file must still be sent. We can tell
93  * from the WAL summaries whether that happened, but not from the file
94  * list.
95  *
96  * Nonetheless, this data is useful for sanity checking. If a file that we
97  * think we shouldn't need to send is not present in the manifest for the
98  * prior backup, something has gone terribly wrong. We retain the file
99  * names and sizes, but not the checksums or last modified times, for
100  * which we have no use.
101  *
102  * One significant downside of storing this data is that it consumes
103  * memory. If that turns out to be a problem, we might have to decide not
104  * to retain this information, or to make it optional.
105  */
106  backup_file_hash *manifest_files;
107 
108  /*
109  * Block-reference table for the incremental backup.
110  *
111  * It's possible that storing the entire block-reference table in memory
112  * will be a problem for some users. The in-memory format that we're using
113  * here is pretty efficient, converging to little more than 1 bit per
114  * block for relation forks with large numbers of modified blocks. It's
115  * possible, however, that if you try to perform an incremental backup of
116  * a database with a sufficiently large number of relations on a
117  * sufficiently small machine, you could run out of memory here. If that
118  * turns out to be a problem in practice, we'll need to be more clever.
119  */
121 
122  /*
123  * State object for incremental JSON parsing
124  */
126 };
127 
129  int manifest_version);
131  uint64 manifest_system_identifier);
133  const char *pathname,
134  uint64 size,
135  pg_checksum_type checksum_type,
136  int checksum_length,
137  uint8 *checksum_payload);
139  TimeLineID tli,
140  XLogRecPtr start_lsn,
141  XLogRecPtr end_lsn);
143  const char *fmt,...)
145 static int compare_block_numbers(const void *a, const void *b);
146 
147 /*
148  * Create a new object for storing information extracted from the manifest
149  * supplied when creating an incremental backup.
150  */
153 {
155  MemoryContext oldcontext;
157 
158  oldcontext = MemoryContextSwitchTo(mcxt);
159 
160  ib = palloc0(sizeof(IncrementalBackupInfo));
161  ib->mcxt = mcxt;
162  initStringInfo(&ib->buf);
163 
164  /*
165  * It's hard to guess how many files a "typical" installation will have in
166  * the data directory, but a fresh initdb creates almost 1000 files as of
167  * this writing, so it seems to make sense for our estimate to
168  * substantially higher.
169  */
170  ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
171 
173  /* Parse the manifest. */
174  context->private_data = ib;
175  context->version_cb = manifest_process_version;
176  context->system_identifier_cb = manifest_process_system_identifier;
177  context->per_file_cb = manifest_process_file;
178  context->per_wal_range_cb = manifest_process_wal_range;
179  context->error_cb = manifest_report_error;
180 
182 
183  MemoryContextSwitchTo(oldcontext);
184 
185  return ib;
186 }
187 
188 /*
189  * Before taking an incremental backup, the caller must supply the backup
190  * manifest from a prior backup. Each chunk of manifest data received
191  * from the client should be passed to this function.
192  */
193 void
195  int len)
196 {
197  MemoryContext oldcontext;
198 
199  /* Switch to our memory context. */
200  oldcontext = MemoryContextSwitchTo(ib->mcxt);
201 
202  if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
203  {
204  /*
205  * time for an incremental parse. We'll do all but the last MIN_CHUNK
206  * so that we have enough left for the final piece.
207  */
209  ib->buf.len - MIN_CHUNK, false);
210  /* now remove what we just parsed */
211  memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
212  MIN_CHUNK + 1);
213  ib->buf.len = MIN_CHUNK;
214  }
215 
217 
218  /* Switch back to previous memory context. */
219  MemoryContextSwitchTo(oldcontext);
220 }
221 
222 /*
223  * Finalize an IncrementalBackupInfo object after all manifest data has
224  * been supplied via calls to AppendIncrementalManifestData.
225  */
226 void
228 {
229  MemoryContext oldcontext;
230 
231  /* Switch to our memory context. */
232  oldcontext = MemoryContextSwitchTo(ib->mcxt);
233 
234  /* Parse the last chunk of the manifest */
236  ib->buf.len, true);
237 
238  /* Done with the buffer, so release memory. */
239  pfree(ib->buf.data);
240  ib->buf.data = NULL;
241 
242  /* Done with inc_state, so release that memory too */
244 
245  /* Switch back to previous memory context. */
246  MemoryContextSwitchTo(oldcontext);
247 }
248 
249 /*
250  * Prepare to take an incremental backup.
251  *
252  * Before this function is called, AppendIncrementalManifestData and
253  * FinalizeIncrementalManifest should have already been called to pass all
254  * the manifest data to this object.
255  *
256  * This function performs sanity checks on the data extracted from the
257  * manifest and figures out for which WAL ranges we need summaries, and
258  * whether those summaries are available. Then, it reads and combines the
259  * data from those summary files. It also updates the backup_state with the
260  * reference TLI and LSN for the prior backup.
261  */
262 void
265 {
266  MemoryContext oldcontext;
268  List *all_wslist,
269  *required_wslist = NIL;
270  ListCell *lc;
271  TimeLineHistoryEntry **tlep;
272  int num_wal_ranges;
273  int i;
274  bool found_backup_start_tli = false;
275  TimeLineID earliest_wal_range_tli = 0;
276  XLogRecPtr earliest_wal_range_start_lsn = InvalidXLogRecPtr;
277  TimeLineID latest_wal_range_tli = 0;
278 
279  Assert(ib->buf.data == NULL);
280 
281  /* Switch to our memory context. */
282  oldcontext = MemoryContextSwitchTo(ib->mcxt);
283 
284  /*
285  * A valid backup manifest must always contain at least one WAL range
286  * (usually exactly one, unless the backup spanned a timeline switch).
287  */
288  num_wal_ranges = list_length(ib->manifest_wal_ranges);
289  if (num_wal_ranges == 0)
290  ereport(ERROR,
291  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
292  errmsg("manifest contains no required WAL ranges")));
293 
294  /*
295  * Match up the TLIs that appear in the WAL ranges of the backup manifest
296  * with those that appear in this server's timeline history. We expect
297  * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
298  * not, that's an error.
299  *
300  * This loop also decides which of the WAL ranges is the manifest is most
301  * ancient and which one is the newest, according to the timeline history
302  * of this server, and stores TLIs of those WAL ranges into
303  * earliest_wal_range_tli and latest_wal_range_tli. It also updates
304  * earliest_wal_range_start_lsn to the start LSN of the WAL range for
305  * earliest_wal_range_tli.
306  *
307  * Note that the return value of readTimeLineHistory puts the latest
308  * timeline at the beginning of the list, not the end. Hence, the earliest
309  * TLI is the one that occurs nearest the end of the list returned by
310  * readTimeLineHistory, and the latest TLI is the one that occurs closest
311  * to the beginning.
312  */
314  tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
315  for (i = 0; i < num_wal_ranges; ++i)
316  {
318  bool saw_earliest_wal_range_tli = false;
319  bool saw_latest_wal_range_tli = false;
320 
321  /* Search this server's history for this WAL range's TLI. */
322  foreach(lc, expectedTLEs)
323  {
324  TimeLineHistoryEntry *tle = lfirst(lc);
325 
326  if (tle->tli == range->tli)
327  {
328  tlep[i] = tle;
329  break;
330  }
331 
332  if (tle->tli == earliest_wal_range_tli)
333  saw_earliest_wal_range_tli = true;
334  if (tle->tli == latest_wal_range_tli)
335  saw_latest_wal_range_tli = true;
336  }
337 
338  /*
339  * An incremental backup can only be taken relative to a backup that
340  * represents a previous state of this server. If the backup requires
341  * WAL from a timeline that's not in our history, that definitely
342  * isn't the case.
343  */
344  if (tlep[i] == NULL)
345  ereport(ERROR,
346  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
347  errmsg("timeline %u found in manifest, but not in this server's history",
348  range->tli)));
349 
350  /*
351  * If we found this TLI in the server's history before encountering
352  * the latest TLI seen so far in the server's history, then this TLI
353  * is the latest one seen so far.
354  *
355  * If on the other hand we saw the earliest TLI seen so far before
356  * finding this TLI, this TLI is earlier than the earliest one seen so
357  * far. And if this is the first TLI for which we've searched, it's
358  * also the earliest one seen so far.
359  *
360  * On the first loop iteration, both things should necessarily be
361  * true.
362  */
363  if (!saw_latest_wal_range_tli)
364  latest_wal_range_tli = range->tli;
365  if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
366  {
367  earliest_wal_range_tli = range->tli;
368  earliest_wal_range_start_lsn = range->start_lsn;
369  }
370  }
371 
372  /*
373  * Propagate information about the prior backup into the backup_label that
374  * will be generated for this backup.
375  */
376  backup_state->istartpoint = earliest_wal_range_start_lsn;
377  backup_state->istarttli = earliest_wal_range_tli;
378 
379  /*
380  * Sanity check start and end LSNs for the WAL ranges in the manifest.
381  *
382  * Commonly, there won't be any timeline switches during the prior backup
383  * at all, but if there are, they should happen at the same LSNs that this
384  * server switched timelines.
385  *
386  * Whether there are any timeline switches during the prior backup or not,
387  * the prior backup shouldn't require any WAL from a timeline prior to the
388  * start of that timeline. It also shouldn't require any WAL from later
389  * than the start of this backup.
390  *
391  * If any of these sanity checks fail, one possible explanation is that
392  * the user has generated WAL on the same timeline with the same LSNs more
393  * than once. For instance, if two standbys running on timeline 1 were
394  * both promoted and (due to a broken archiving setup) both selected new
395  * timeline ID 2, then it's possible that one of these checks might trip.
396  *
397  * Note that there are lots of ways for the user to do something very bad
398  * without tripping any of these checks, and they are not intended to be
399  * comprehensive. It's pretty hard to see how we could be certain of
400  * anything here. However, if there's a problem staring us right in the
401  * face, it's best to report it, so we do.
402  */
403  for (i = 0; i < num_wal_ranges; ++i)
404  {
406 
407  if (range->tli == earliest_wal_range_tli)
408  {
409  if (range->start_lsn < tlep[i]->begin)
410  ereport(ERROR,
411  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
412  errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
413  range->tli,
414  LSN_FORMAT_ARGS(range->start_lsn),
415  LSN_FORMAT_ARGS(tlep[i]->begin))));
416  }
417  else
418  {
419  if (range->start_lsn != tlep[i]->begin)
420  ereport(ERROR,
421  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
422  errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
423  range->tli,
424  LSN_FORMAT_ARGS(range->start_lsn),
425  LSN_FORMAT_ARGS(tlep[i]->begin))));
426  }
427 
428  if (range->tli == latest_wal_range_tli)
429  {
430  if (range->end_lsn > backup_state->startpoint)
431  ereport(ERROR,
432  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
433  errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
434  range->tli,
435  LSN_FORMAT_ARGS(range->end_lsn),
437  errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
438  }
439  else
440  {
441  if (range->end_lsn != tlep[i]->end)
442  ereport(ERROR,
443  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
444  errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
445  range->tli,
446  LSN_FORMAT_ARGS(range->end_lsn),
447  LSN_FORMAT_ARGS(tlep[i]->end))));
448  }
449 
450  }
451 
452  /*
453  * Wait for WAL summarization to catch up to the backup start LSN. This
454  * will throw an error if the WAL summarizer appears to be stuck. If WAL
455  * summarization gets disabled while we're waiting, this will return
456  * immediately, and we'll error out further down if the WAL summaries are
457  * incomplete.
458  */
460 
461  /*
462  * Retrieve a list of all WAL summaries on any timeline that overlap with
463  * the LSN range of interest. We could instead call GetWalSummaries() once
464  * per timeline in the loop that follows, but that would involve reading
465  * the directory multiple times. It should be mildly faster - and perhaps
466  * a bit safer - to do it just once.
467  */
468  all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
470 
471  /*
472  * We need WAL summaries for everything that happened during the prior
473  * backup and everything that happened afterward up until the point where
474  * the current backup started.
475  */
476  foreach(lc, expectedTLEs)
477  {
478  TimeLineHistoryEntry *tle = lfirst(lc);
479  XLogRecPtr tli_start_lsn = tle->begin;
480  XLogRecPtr tli_end_lsn = tle->end;
481  XLogRecPtr tli_missing_lsn = InvalidXLogRecPtr;
482  List *tli_wslist;
483 
484  /*
485  * Working through the history of this server from the current
486  * timeline backwards, we skip everything until we find the timeline
487  * where this backup started. Most of the time, this means we won't
488  * skip anything at all, as it's unlikely that the timeline has
489  * changed since the beginning of the backup moments ago.
490  */
491  if (tle->tli == backup_state->starttli)
492  {
493  found_backup_start_tli = true;
494  tli_end_lsn = backup_state->startpoint;
495  }
496  else if (!found_backup_start_tli)
497  continue;
498 
499  /*
500  * Find the summaries that overlap the LSN range of interest for this
501  * timeline. If this is the earliest timeline involved, the range of
502  * interest begins with the start LSN of the prior backup; otherwise,
503  * it begins at the LSN at which this timeline came into existence. If
504  * this is the latest TLI involved, the range of interest ends at the
505  * start LSN of the current backup; otherwise, it ends at the point
506  * where we switched from this timeline to the next one.
507  */
508  if (tle->tli == earliest_wal_range_tli)
509  tli_start_lsn = earliest_wal_range_start_lsn;
510  tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
511  tli_start_lsn, tli_end_lsn);
512 
513  /*
514  * There is no guarantee that the WAL summaries we found cover the
515  * entire range of LSNs for which summaries are required, or indeed
516  * that we found any WAL summaries at all. Check whether we have a
517  * problem of that sort.
518  */
519  if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
520  &tli_missing_lsn))
521  {
522  if (XLogRecPtrIsInvalid(tli_missing_lsn))
523  ereport(ERROR,
524  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
525  errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
526  tle->tli,
527  LSN_FORMAT_ARGS(tli_start_lsn),
528  LSN_FORMAT_ARGS(tli_end_lsn))));
529  else
530  ereport(ERROR,
531  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
532  errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
533  tle->tli,
534  LSN_FORMAT_ARGS(tli_start_lsn),
535  LSN_FORMAT_ARGS(tli_end_lsn)),
536  errdetail("The first unsummarized LSN in this range is %X/%X.",
537  LSN_FORMAT_ARGS(tli_missing_lsn))));
538  }
539 
540  /*
541  * Remember that we need to read these summaries.
542  *
543  * Technically, it's possible that this could read more files than
544  * required, since tli_wslist in theory could contain redundant
545  * summaries. For instance, if we have a summary from 0/10000000 to
546  * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
547  * latter subsumes the former and the former could be ignored.
548  *
549  * We ignore this possibility because the WAL summarizer only tries to
550  * generate summaries that do not overlap. If somehow they exist,
551  * we'll do a bit of extra work but the results should still be
552  * correct.
553  */
554  required_wslist = list_concat(required_wslist, tli_wslist);
555 
556  /*
557  * Timelines earlier than the one in which the prior backup began are
558  * not relevant.
559  */
560  if (tle->tli == earliest_wal_range_tli)
561  break;
562  }
563 
564  /*
565  * Read all of the required block reference table files and merge all of
566  * the data into a single in-memory block reference table.
567  *
568  * See the comments for struct IncrementalBackupInfo for some thoughts on
569  * memory usage.
570  */
572  foreach(lc, required_wslist)
573  {
574  WalSummaryFile *ws = lfirst(lc);
575  WalSummaryIO wsio;
576  BlockRefTableReader *reader;
577  RelFileLocator rlocator;
578  ForkNumber forknum;
579  BlockNumber limit_block;
581 
582  wsio.file = OpenWalSummaryFile(ws, false);
583  wsio.filepos = 0;
584  ereport(DEBUG1,
585  (errmsg_internal("reading WAL summary file \"%s\"",
586  FilePathName(wsio.file))));
588  FilePathName(wsio.file),
589  ReportWalSummaryError, NULL);
590  while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
591  &limit_block))
592  {
593  BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
594  forknum, limit_block);
595 
596  while (1)
597  {
598  unsigned nblocks;
599  unsigned i;
600 
601  nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
603  if (nblocks == 0)
604  break;
605 
606  for (i = 0; i < nblocks; ++i)
607  BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
608  forknum, blocks[i]);
609  }
610  }
612  FileClose(wsio.file);
613  }
614 
615  /* Switch back to previous memory context. */
616  MemoryContextSwitchTo(oldcontext);
617 }
618 
619 /*
620  * Get the pathname that should be used when a file is sent incrementally.
621  *
622  * The result is a palloc'd string.
623  */
624 char *
625 GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
626  ForkNumber forknum, unsigned segno)
627 {
628  char *path;
629  char *lastslash;
630  char *ipath;
631 
632  path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
633  forknum);
634 
635  lastslash = strrchr(path, '/');
636  Assert(lastslash != NULL);
637  *lastslash = '\0';
638 
639  if (segno > 0)
640  ipath = psprintf("%s/INCREMENTAL.%s.%u", path, lastslash + 1, segno);
641  else
642  ipath = psprintf("%s/INCREMENTAL.%s", path, lastslash + 1);
643 
644  pfree(path);
645 
646  return ipath;
647 }
648 
649 /*
650  * How should we back up a particular file as part of an incremental backup?
651  *
652  * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
653  * file just as if this were not an incremental backup. The contents of the
654  * relative_block_numbers array are unspecified in this case.
655  *
656  * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
657  * an incremental file in the backup instead of the entire file. On return,
658  * *num_blocks_required will be set to the number of blocks that need to be
659  * sent, and the actual block numbers will have been stored in
660  * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
661  * In addition, *truncation_block_length will be set to the value that should
662  * be included in the incremental file.
663  */
666  Oid dboid, Oid spcoid,
667  RelFileNumber relfilenumber, ForkNumber forknum,
668  unsigned segno, size_t size,
669  unsigned *num_blocks_required,
670  BlockNumber *relative_block_numbers,
671  unsigned *truncation_block_length)
672 {
673  BlockNumber limit_block;
674  BlockNumber start_blkno;
675  BlockNumber stop_blkno;
676  RelFileLocator rlocator;
677  BlockRefTableEntry *brtentry;
678  unsigned i;
679  unsigned nblocks;
680 
681  /* Should only be called after PrepareForIncrementalBackup. */
682  Assert(ib->buf.data == NULL);
683 
684  /*
685  * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
686  * should have legal values.
687  */
688  Assert(OidIsValid(spcoid));
689  Assert(RelFileNumberIsValid(relfilenumber));
690 
691  /*
692  * If the file size is too large or not a multiple of BLCKSZ, then
693  * something weird is happening, so give up and send the whole file.
694  */
695  if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
696  return BACK_UP_FILE_FULLY;
697 
698  /*
699  * The free-space map fork is not properly WAL-logged, so we need to
700  * backup the entire file every time.
701  */
702  if (forknum == FSM_FORKNUM)
703  return BACK_UP_FILE_FULLY;
704 
705  /*
706  * If this file was not part of the prior backup, back it up fully.
707  *
708  * If this file was created after the prior backup and before the start of
709  * the current backup, then the WAL summary information will tell us to
710  * back up the whole file. However, if this file was created after the
711  * start of the current backup, then the WAL summary won't know anything
712  * about it. Without this logic, we would erroneously conclude that it was
713  * OK to send it incrementally.
714  *
715  * Note that the file could have existed at the time of the prior backup,
716  * gotten deleted, and then a new file with the same name could have been
717  * created. In that case, this logic won't prevent the file from being
718  * backed up incrementally. But, if the deletion happened before the start
719  * of the current backup, the limit block will be 0, inducing a full
720  * backup. If the deletion happened after the start of the current backup,
721  * reconstruction will erroneously combine blocks from the current
722  * lifespan of the file with blocks from the previous lifespan -- but in
723  * this type of case, WAL replay to reach backup consistency should remove
724  * and recreate the file anyway, so the initial bogus contents should not
725  * matter.
726  */
727  if (backup_file_lookup(ib->manifest_files, path) == NULL)
728  {
729  char *ipath;
730 
731  ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
732  forknum, segno);
733  if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
734  return BACK_UP_FILE_FULLY;
735  }
736 
737  /*
738  * Look up the special block reference table entry for the database as a
739  * whole.
740  */
741  rlocator.spcOid = spcoid;
742  rlocator.dbOid = dboid;
743  rlocator.relNumber = 0;
744  if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
745  &limit_block) != NULL)
746  {
747  /*
748  * According to the WAL summary, this database OID/tablespace OID
749  * pairing has been created since the previous backup. So, everything
750  * in it must be backed up fully.
751  */
752  return BACK_UP_FILE_FULLY;
753  }
754 
755  /* Look up the block reference table entry for this relfilenode. */
756  rlocator.relNumber = relfilenumber;
757  brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
758  &limit_block);
759 
760  /*
761  * If there is no entry, then there have been no WAL-logged changes to the
762  * relation since the predecessor backup was taken, so we can back it up
763  * incrementally and need not include any modified blocks.
764  *
765  * However, if the file is zero-length, we should do a full backup,
766  * because an incremental file is always more than zero length, and it's
767  * silly to take an incremental backup when a full backup would be
768  * smaller.
769  */
770  if (brtentry == NULL)
771  {
772  if (size == 0)
773  return BACK_UP_FILE_FULLY;
774  *num_blocks_required = 0;
775  *truncation_block_length = size / BLCKSZ;
777  }
778 
779  /*
780  * If the limit_block is less than or equal to the point where this
781  * segment starts, send the whole file.
782  */
783  if (limit_block <= segno * RELSEG_SIZE)
784  return BACK_UP_FILE_FULLY;
785 
786  /*
787  * Get relevant entries from the block reference table entry.
788  *
789  * We shouldn't overflow computing the start or stop block numbers, but if
790  * it manages to happen somehow, detect it and throw an error.
791  */
792  start_blkno = segno * RELSEG_SIZE;
793  stop_blkno = start_blkno + (size / BLCKSZ);
794  if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
795  ereport(ERROR,
796  errcode(ERRCODE_INTERNAL_ERROR),
797  errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
798  segno, size));
799 
800  /*
801  * This will write *absolute* block numbers into the output array, but
802  * we'll transpose them below.
803  */
804  nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
805  relative_block_numbers, RELSEG_SIZE);
806  Assert(nblocks <= RELSEG_SIZE);
807 
808  /*
809  * If we're going to have to send nearly all of the blocks, then just send
810  * the whole file, because that won't require much extra storage or
811  * transfer and will speed up and simplify backup restoration. It's not
812  * clear what threshold is most appropriate here and perhaps it ought to
813  * be configurable, but for now we're just going to say that if we'd need
814  * to send 90% of the blocks anyway, give up and send the whole file.
815  *
816  * NB: If you change the threshold here, at least make sure to back up the
817  * file fully when every single block must be sent, because there's
818  * nothing good about sending an incremental file in that case.
819  */
820  if (nblocks * BLCKSZ > size * 0.9)
821  return BACK_UP_FILE_FULLY;
822 
823  /*
824  * Looks like we can send an incremental file, so sort the block numbers
825  * and then transpose them from absolute block numbers to relative block
826  * numbers if necessary.
827  *
828  * NB: If the block reference table was using the bitmap representation
829  * for a given chunk, the block numbers in that chunk will already be
830  * sorted, but when the array-of-offsets representation is used, we can
831  * receive block numbers here out of order.
832  */
833  qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
835  if (start_blkno != 0)
836  {
837  for (i = 0; i < nblocks; ++i)
838  relative_block_numbers[i] -= start_blkno;
839  }
840  *num_blocks_required = nblocks;
841 
842  /*
843  * The truncation block length is the minimum length of the reconstructed
844  * file. Any block numbers below this threshold that are not present in
845  * the backup need to be fetched from the prior backup. At or above this
846  * threshold, blocks should only be included in the result if they are
847  * present in the backup. (This may require inserting zero blocks if the
848  * blocks included in the backup are non-consecutive.)
849  */
850  *truncation_block_length = size / BLCKSZ;
851  if (BlockNumberIsValid(limit_block))
852  {
853  unsigned relative_limit = limit_block - segno * RELSEG_SIZE;
854 
855  if (*truncation_block_length < relative_limit)
856  *truncation_block_length = relative_limit;
857  }
858 
859  /* Send it incrementally. */
861 }
862 
863 /*
864  * Compute the size for a header of an incremental file containing a given
865  * number of blocks. The header is rounded to a multiple of BLCKSZ, but
866  * only if the file will store some block data.
867  */
868 size_t
869 GetIncrementalHeaderSize(unsigned num_blocks_required)
870 {
871  size_t result;
872 
873  /* Make sure we're not going to overflow. */
874  Assert(num_blocks_required <= RELSEG_SIZE);
875 
876  /*
877  * Three four byte quantities (magic number, truncation block length,
878  * block count) followed by block numbers.
879  */
880  result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
881 
882  /*
883  * Round the header size to a multiple of BLCKSZ - when not a multiple of
884  * BLCKSZ, add the missing fraction of a block. But do this only if the
885  * file will store data for some blocks, otherwise keep it small.
886  */
887  if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
888  result += BLCKSZ - (result % BLCKSZ);
889 
890  return result;
891 }
892 
893 /*
894  * Compute the size for an incremental file containing a given number of blocks.
895  */
896 size_t
897 GetIncrementalFileSize(unsigned num_blocks_required)
898 {
899  size_t result;
900 
901  /* Make sure we're not going to overflow. */
902  Assert(num_blocks_required <= RELSEG_SIZE);
903 
904  /*
905  * Header with three four byte quantities (magic number, truncation block
906  * length, block count) followed by block numbers, rounded to a multiple
907  * of BLCKSZ (for files with block data), followed by block contents.
908  */
909  result = GetIncrementalHeaderSize(num_blocks_required);
910  result += BLCKSZ * num_blocks_required;
911 
912  return result;
913 }
914 
915 /*
916  * Helper function for filemap hash table.
917  */
918 static uint32
919 hash_string_pointer(const char *s)
920 {
921  unsigned char *ss = (unsigned char *) s;
922 
923  return hash_bytes(ss, strlen(s));
924 }
925 
926 /*
927  * This callback to validate the manifest version for incremental backup.
928  */
929 static void
931  int manifest_version)
932 {
933  /* Incremental backups don't work with manifest version 1 */
934  if (manifest_version == 1)
935  context->error_cb(context,
936  "backup manifest version 1 does not support incremental backup");
937 }
938 
939 /*
940  * This callback to validate the manifest system identifier against the current
941  * database server.
942  */
943 static void
945  uint64 manifest_system_identifier)
946 {
947  uint64 system_identifier;
948 
949  /* Get system identifier of current system */
950  system_identifier = GetSystemIdentifier();
951 
952  if (manifest_system_identifier != system_identifier)
953  context->error_cb(context,
954  "system identifier in backup manifest is %llu, but database system identifier is %llu",
955  (unsigned long long) manifest_system_identifier,
956  (unsigned long long) system_identifier);
957 }
958 
959 /*
960  * This callback is invoked for each file mentioned in the backup manifest.
961  *
962  * We store the path to each file and the size of each file for sanity-checking
963  * purposes. For further details, see comments for IncrementalBackupInfo.
964  */
965 static void
967  const char *pathname, uint64 size,
968  pg_checksum_type checksum_type,
969  int checksum_length,
970  uint8 *checksum_payload)
971 {
972  IncrementalBackupInfo *ib = context->private_data;
973  backup_file_entry *entry;
974  bool found;
975 
976  entry = backup_file_insert(ib->manifest_files, pathname, &found);
977  if (!found)
978  {
979  entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
980  pathname);
981  entry->size = size;
982  }
983 }
984 
985 /*
986  * This callback is invoked for each WAL range mentioned in the backup
987  * manifest.
988  *
989  * We're just interested in learning the oldest LSN and the corresponding TLI
990  * that appear in any WAL range.
991  */
992 static void
994  TimeLineID tli, XLogRecPtr start_lsn,
995  XLogRecPtr end_lsn)
996 {
997  IncrementalBackupInfo *ib = context->private_data;
999 
1000  range->tli = tli;
1001  range->start_lsn = start_lsn;
1002  range->end_lsn = end_lsn;
1004 }
1005 
1006 /*
1007  * This callback is invoked if an error occurs while parsing the backup
1008  * manifest.
1009  */
1010 static void
1012 {
1013  StringInfoData errbuf;
1014 
1015  initStringInfo(&errbuf);
1016 
1017  for (;;)
1018  {
1019  va_list ap;
1020  int needed;
1021 
1022  va_start(ap, fmt);
1023  needed = appendStringInfoVA(&errbuf, fmt, ap);
1024  va_end(ap);
1025  if (needed == 0)
1026  break;
1027  enlargeStringInfo(&errbuf, needed);
1028  }
1029 
1030  ereport(ERROR,
1031  errmsg_internal("%s", errbuf.data));
1032 }
1033 
1034 /*
1035  * Quicksort comparator for block numbers.
1036  */
1037 static int
1038 compare_block_numbers(const void *a, const void *b)
1039 {
1040  BlockNumber aa = *(BlockNumber *) a;
1041  BlockNumber bb = *(BlockNumber *) b;
1042 
1043  return pg_cmp_u32(aa, bb);
1044 }
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
#define BLOCKS_PER_READ
void AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data, int len)
static void manifest_process_version(JsonManifestParseContext *context, int manifest_version)
static int compare_block_numbers(const void *a, const void *b)
static uint32 hash_string_pointer(const char *s)
size_t GetIncrementalHeaderSize(unsigned num_blocks_required)
char * GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber, ForkNumber forknum, unsigned segno)
#define MAX_CHUNK
static void manifest_process_system_identifier(JsonManifestParseContext *context, uint64 manifest_system_identifier)
#define MIN_CHUNK
size_t GetIncrementalFileSize(unsigned num_blocks_required)
static void manifest_process_file(JsonManifestParseContext *context, const char *pathname, uint64 size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload)
FileBackupMethod GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path, Oid dboid, Oid spcoid, RelFileNumber relfilenumber, ForkNumber forknum, unsigned segno, size_t size, unsigned *num_blocks_required, BlockNumber *relative_block_numbers, unsigned *truncation_block_length)
static void pg_attribute_noreturn()
static void manifest_report_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2
static void manifest_process_wal_range(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
IncrementalBackupInfo * CreateIncrementalBackupInfo(MemoryContext mcxt)
void PrepareForIncrementalBackup(IncrementalBackupInfo *ib, BackupState *backup_state)
void FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
@ BACK_UP_FILE_INCREMENTALLY
@ BACK_UP_FILE_FULLY
BlockRefTableReader * CreateBlockRefTableReader(io_callback_fn read_callback, void *read_callback_arg, char *error_filename, report_error_fn error_callback, void *error_callback_arg)
Definition: blkreftable.c:577
bool BlockRefTableReaderNextRelation(BlockRefTableReader *reader, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *limit_block)
Definition: blkreftable.c:613
int BlockRefTableEntryGetBlocks(BlockRefTableEntry *entry, BlockNumber start_blkno, BlockNumber stop_blkno, BlockNumber *blocks, int nblocks)
Definition: blkreftable.c:369
void BlockRefTableMarkBlockModified(BlockRefTable *brtab, const RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum)
Definition: blkreftable.c:297
BlockRefTableEntry * BlockRefTableGetEntry(BlockRefTable *brtab, const RelFileLocator *rlocator, ForkNumber forknum, BlockNumber *limit_block)
Definition: blkreftable.c:340
unsigned BlockRefTableReaderGetBlocks(BlockRefTableReader *reader, BlockNumber *blocks, int nblocks)
Definition: blkreftable.c:689
void BlockRefTableSetLimitBlock(BlockRefTable *brtab, const RelFileLocator *rlocator, ForkNumber forknum, BlockNumber limit_block)
Definition: blkreftable.c:262
void DestroyBlockRefTableReader(BlockRefTableReader *reader)
Definition: blkreftable.c:773
void(*) BlockRefTable CreateEmptyBlockRefTable)(void)
uint32 BlockNumber
Definition: block.h:31
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
#define pg_attribute_printf(f, a)
Definition: c.h:210
uint64_t uint64
Definition: c.h:486
uint32_t uint32
Definition: c.h:485
#define OidIsValid(objectId)
Definition: c.h:729
pg_checksum_type
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
void FileClose(File file)
Definition: fd.c:1977
char * FilePathName(File file)
Definition: fd.c:2483
uint32 hash_bytes(const unsigned char *k, int keylen)
Definition: hashfn.c:146
static int pg_cmp_u32(uint32 a, uint32 b)
Definition: int.h:652
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int i
Definition: isn.c:72
static void const char * fmt
va_end(args)
va_start(args, fmt)
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1683
void * palloc(Size size)
Definition: mcxt.c:1317
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, const char *chunk, size_t size, bool is_last)
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define qsort(a, b, c, d)
Definition: port.h:447
unsigned int Oid
Definition: postgres_ext.h:31
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
tree context
Definition: radixtree.h:1837
MemoryContextSwitchTo(old_ctx)
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
char * GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, int procNumber, ForkNumber forkNumber)
Definition: relpath.c:142
Oid RelFileNumber
Definition: relpath.h:25
ForkNumber
Definition: relpath.h:56
@ FSM_FORKNUM
Definition: relpath.h:59
@ MAIN_FORKNUM
Definition: relpath.h:58
#define RelFileNumberIsValid(relnumber)
Definition: relpath.h:27
static pg_noinline void Size size
Definition: slab.c:607
int appendStringInfoVA(StringInfo str, const char *fmt, va_list args)
Definition: stringinfo.c:136
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:286
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
TimeLineID istarttli
Definition: xlogbackup.h:32
TimeLineID starttli
Definition: xlogbackup.h:27
XLogRecPtr startpoint
Definition: xlogbackup.h:26
XLogRecPtr istartpoint
Definition: xlogbackup.h:31
backup_file_hash * manifest_files
JsonManifestParseIncrementalState * inc_state
Definition: pg_list.h:54
RelFileNumber relNumber
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
off_t filepos
Definition: walsummary.h:24
uint32 status
const char * path
uint64 size
void WaitForWalSummarization(XLogRecPtr lsn)
File OpenWalSummaryFile(WalSummaryFile *ws, bool missing_ok)
Definition: walsummary.c:205
List * GetWalSummaries(TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
Definition: walsummary.c:43
bool WalSummariesAreComplete(List *wslist, XLogRecPtr start_lsn, XLogRecPtr end_lsn, XLogRecPtr *missing_lsn)
Definition: walsummary.c:138
int ReadWalSummary(void *wal_summary_io, void *data, int length)
Definition: walsummary.c:273
List * FilterWalSummaries(List *wslist, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
Definition: walsummary.c:100
void ReportWalSummaryError(void *callback_arg, char *fmt,...)
Definition: walsummary.c:322
uint64 GetSystemIdentifier(void)
Definition: xlog.c:4568
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
static BackupState * backup_state
Definition: xlogfuncs.c:41
static List * expectedTLEs
Definition: xlogrecovery.c:123