PostgreSQL Source Code  git master
basebackup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basebackup.c
4  * code for taking a base backup and streaming it to a standby
5  *
6  * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/basebackup.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <time.h>
18 
19 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
20 #include "catalog/pg_type.h"
21 #include "common/file_perm.h"
22 #include "lib/stringinfo.h"
23 #include "libpq/libpq.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "nodes/pg_list.h"
27 #include "pgstat.h"
28 #include "pgtar.h"
29 #include "port.h"
30 #include "postmaster/syslogger.h"
31 #include "replication/basebackup.h"
32 #include "replication/walsender.h"
34 #include "storage/bufpage.h"
35 #include "storage/checksum.h"
36 #include "storage/dsm_impl.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/reinit.h"
40 #include "utils/builtins.h"
41 #include "utils/ps_status.h"
42 #include "utils/relcache.h"
43 #include "utils/timestamp.h"
44 
45 typedef struct
46 {
47  const char *label;
48  bool progress;
50  bool nowait;
51  bool includewal;
55 
56 
57 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
58  List *tablespaces, bool sendtblspclinks);
59 static bool sendFile(const char *readfilename, const char *tarfilename,
60  struct stat *statbuf, bool missing_ok, Oid dboid);
61 static void sendFileWithContent(const char *filename, const char *content);
62 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
63  struct stat *statbuf, bool sizeonly);
64 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
65  bool sizeonly);
66 static void send_int8_string(StringInfoData *buf, int64 intval);
67 static void SendBackupHeader(List *tablespaces);
68 static void perform_base_backup(basebackup_options *opt);
70 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
71 static int compareWalFileNames(const ListCell *a, const ListCell *b);
72 static void throttle(size_t increment);
73 static bool is_checksummed_file(const char *fullpath, const char *filename);
74 
75 /* Was the backup currently in-progress initiated in recovery mode? */
76 static bool backup_started_in_recovery = false;
77 
78 /* Relative path of temporary statistics directory */
79 static char *statrelpath = NULL;
80 
81 /*
82  * Size of each block sent into the tar stream for larger files.
83  */
84 #define TAR_SEND_SIZE 32768
85 
86 /*
87  * How frequently to throttle, as a fraction of the specified rate-second.
88  */
89 #define THROTTLING_FREQUENCY 8
90 
91 /*
92  * Checks whether we encountered any error in fread(). fread() doesn't give
93  * any clue what has happened, so we check with ferror(). Also, neither
94  * fread() nor ferror() set errno, so we just throw a generic error.
95  */
96 #define CHECK_FREAD_ERROR(fp, filename) \
97 do { \
98  if (ferror(fp)) \
99  ereport(ERROR, \
100  (errmsg("could not read from file \"%s\"", filename))); \
101 } while (0)
102 
103 /* The actual number of bytes, transfer of which may cause sleep. */
104 static uint64 throttling_sample;
105 
106 /* Amount of data already transferred but not yet throttled. */
107 static int64 throttling_counter;
108 
109 /* The minimum time required to transfer throttling_sample bytes. */
111 
112 /* The last check of the transfer rate. */
114 
115 /* The starting XLOG position of the base backup. */
117 
118 /* Total number of checksum failures during base backup. */
119 static long long int total_checksum_failures;
120 
121 /* Do not verify checksums. */
122 static bool noverify_checksums = false;
123 
124 /*
125  * Definition of one element part of an exclusion list, used for paths part
126  * of checksum validation or base backups. "name" is the name of the file
127  * or path to check for exclusion. If "match_prefix" is true, any items
128  * matching the name as prefix are excluded.
129  */
131 {
132  const char *name;
134 };
135 
136 /*
137  * The contents of these directories are removed or recreated during server
138  * start so they are not included in backups. The directories themselves are
139  * kept and included as empty to preserve access permissions.
140  *
141  * Note: this list should be kept in sync with the filter lists in pg_rewind's
142  * filemap.c.
143  */
144 static const char *const excludeDirContents[] =
145 {
146  /*
147  * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
148  * when stats_temp_directory is set because PGSS_TEXT_FILE is always
149  * created there.
150  */
152 
153  /*
154  * It is generally not useful to backup the contents of this directory
155  * even if the intention is to restore to another master. See backup.sgml
156  * for a more detailed description.
157  */
158  "pg_replslot",
159 
160  /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
162 
163  /* Contents removed on startup, see AsyncShmemInit(). */
164  "pg_notify",
165 
166  /*
167  * Old contents are loaded for possible debugging but are not required for
168  * normal operation, see OldSerXidInit().
169  */
170  "pg_serial",
171 
172  /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
173  "pg_snapshots",
174 
175  /* Contents zeroed on startup, see StartupSUBTRANS(). */
176  "pg_subtrans",
177 
178  /* end of list */
179  NULL
180 };
181 
182 /*
183  * List of files excluded from backups.
184  */
185 static const struct exclude_list_item excludeFiles[] =
186 {
187  /* Skip auto conf temporary file. */
188  {PG_AUTOCONF_FILENAME ".tmp", false},
189 
190  /* Skip current log file temporary file */
191  {LOG_METAINFO_DATAFILE_TMP, false},
192 
193  /*
194  * Skip relation cache because it is rebuilt on startup. This includes
195  * temporary files.
196  */
197  {RELCACHE_INIT_FILENAME, true},
198 
199  /*
200  * If there's a backup_label or tablespace_map file, it belongs to a
201  * backup started by the user with pg_start_backup(). It is *not* correct
202  * for this backup. Our backup_label/tablespace_map is injected into the
203  * tar separately.
204  */
205  {BACKUP_LABEL_FILE, false},
206  {TABLESPACE_MAP, false},
207 
208  {"postmaster.pid", false},
209  {"postmaster.opts", false},
210 
211  /* end of list */
212  {NULL, false}
213 };
214 
215 /*
216  * List of files excluded from checksum validation.
217  *
218  * Note: this list should be kept in sync with what pg_checksums.c
219  * includes.
220  */
221 static const struct exclude_list_item noChecksumFiles[] = {
222  {"pg_control", false},
223  {"pg_filenode.map", false},
224  {"pg_internal.init", true},
225  {"PG_VERSION", false},
226 #ifdef EXEC_BACKEND
227  {"config_exec_params", true},
228 #endif
229  {NULL, false}
230 };
231 
232 /*
233  * Actually do a base backup for the specified tablespaces.
234  *
235  * This is split out mainly to avoid complaints about "variable might be
236  * clobbered by longjmp" from stupider versions of gcc.
237  */
238 static void
240 {
241  TimeLineID starttli;
242  XLogRecPtr endptr;
243  TimeLineID endtli;
244  StringInfo labelfile;
246  int datadirpathlen;
247  List *tablespaces = NIL;
248 
249  datadirpathlen = strlen(DataDir);
250 
252 
253  labelfile = makeStringInfo();
254  tblspc_map_file = makeStringInfo();
255 
257 
258  startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
259  labelfile, &tablespaces,
260  tblspc_map_file,
261  opt->progress, opt->sendtblspcmapfile);
262 
263  /*
264  * Once do_pg_start_backup has been called, ensure that any failure causes
265  * us to abort the backup so we don't "leak" a backup counter. For this
266  * reason, *all* functionality between do_pg_start_backup() and the end of
267  * do_pg_stop_backup() should be inside the error cleanup block!
268  */
269 
271  {
272  ListCell *lc;
273  tablespaceinfo *ti;
274 
275  SendXlogRecPtrResult(startptr, starttli);
276 
277  /*
278  * Calculate the relative path of temporary statistics directory in
279  * order to skip the files which are located in that directory later.
280  */
282  strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
283  statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
284  else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
286  else
288 
289  /* Add a node for the base directory at the end */
290  ti = palloc0(sizeof(tablespaceinfo));
291  ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
292  tablespaces = lappend(tablespaces, ti);
293 
294  /* Send tablespace header */
295  SendBackupHeader(tablespaces);
296 
297  /* Setup and activate network throttling, if client requested it */
298  if (opt->maxrate > 0)
299  {
301  (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
302 
303  /*
304  * The minimum amount of time for throttling_sample bytes to be
305  * transferred.
306  */
308 
309  /* Enable throttling. */
310  throttling_counter = 0;
311 
312  /* The 'real data' starts now (header was ignored). */
314  }
315  else
316  {
317  /* Disable throttling. */
318  throttling_counter = -1;
319  }
320 
321  /* Send off our tablespaces one by one */
322  foreach(lc, tablespaces)
323  {
324  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
326 
327  /* Send CopyOutResponse message */
328  pq_beginmessage(&buf, 'H');
329  pq_sendbyte(&buf, 0); /* overall format */
330  pq_sendint16(&buf, 0); /* natts */
331  pq_endmessage(&buf);
332 
333  if (ti->path == NULL)
334  {
335  struct stat statbuf;
336 
337  /* In the main tar, include the backup_label first... */
339 
340  /*
341  * Send tablespace_map file if required and then the bulk of
342  * the files.
343  */
344  if (tblspc_map_file && opt->sendtblspcmapfile)
345  {
346  sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
347  sendDir(".", 1, false, tablespaces, false);
348  }
349  else
350  sendDir(".", 1, false, tablespaces, true);
351 
352  /* ... and pg_control after everything else. */
353  if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
354  ereport(ERROR,
356  errmsg("could not stat file \"%s\": %m",
359  }
360  else
361  sendTablespace(ti->path, false);
362 
363  /*
364  * If we're including WAL, and this is the main data directory we
365  * don't terminate the tar stream here. Instead, we will append
366  * the xlog files below and terminate it then. This is safe since
367  * the main data directory is always sent *last*.
368  */
369  if (opt->includewal && ti->path == NULL)
370  {
371  Assert(lnext(tablespaces, lc) == NULL);
372  }
373  else
374  pq_putemptymessage('c'); /* CopyDone */
375  }
376 
377  endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
378  }
380 
381 
382  if (opt->includewal)
383  {
384  /*
385  * We've left the last tar file "open", so we can now append the
386  * required WAL files to it.
387  */
388  char pathbuf[MAXPGPATH];
389  XLogSegNo segno;
390  XLogSegNo startsegno;
391  XLogSegNo endsegno;
392  struct stat statbuf;
393  List *historyFileList = NIL;
394  List *walFileList = NIL;
395  char firstoff[MAXFNAMELEN];
396  char lastoff[MAXFNAMELEN];
397  DIR *dir;
398  struct dirent *de;
399  ListCell *lc;
400  TimeLineID tli;
401 
402  /*
403  * I'd rather not worry about timelines here, so scan pg_wal and
404  * include all WAL files in the range between 'startptr' and 'endptr',
405  * regardless of the timeline the file is stamped with. If there are
406  * some spurious WAL files belonging to timelines that don't belong in
407  * this server's history, they will be included too. Normally there
408  * shouldn't be such files, but if there are, there's little harm in
409  * including them.
410  */
411  XLByteToSeg(startptr, startsegno, wal_segment_size);
412  XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
413  XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
414  XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
415 
416  dir = AllocateDir("pg_wal");
417  while ((de = ReadDir(dir, "pg_wal")) != NULL)
418  {
419  /* Does it look like a WAL segment, and is it in the range? */
420  if (IsXLogFileName(de->d_name) &&
421  strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
422  strcmp(de->d_name + 8, lastoff + 8) <= 0)
423  {
424  walFileList = lappend(walFileList, pstrdup(de->d_name));
425  }
426  /* Does it look like a timeline history file? */
427  else if (IsTLHistoryFileName(de->d_name))
428  {
429  historyFileList = lappend(historyFileList, pstrdup(de->d_name));
430  }
431  }
432  FreeDir(dir);
433 
434  /*
435  * Before we go any further, check that none of the WAL segments we
436  * need were removed.
437  */
438  CheckXLogRemoved(startsegno, ThisTimeLineID);
439 
440  /*
441  * Sort the WAL filenames. We want to send the files in order from
442  * oldest to newest, to reduce the chance that a file is recycled
443  * before we get a chance to send it over.
444  */
445  list_sort(walFileList, compareWalFileNames);
446 
447  /*
448  * There must be at least one xlog file in the pg_wal directory, since
449  * we are doing backup-including-xlog.
450  */
451  if (walFileList == NIL)
452  ereport(ERROR,
453  (errmsg("could not find any WAL files")));
454 
455  /*
456  * Sanity check: the first and last segment should cover startptr and
457  * endptr, with no gaps in between.
458  */
459  XLogFromFileName((char *) linitial(walFileList),
460  &tli, &segno, wal_segment_size);
461  if (segno != startsegno)
462  {
463  char startfname[MAXFNAMELEN];
464 
465  XLogFileName(startfname, ThisTimeLineID, startsegno,
467  ereport(ERROR,
468  (errmsg("could not find WAL file \"%s\"", startfname)));
469  }
470  foreach(lc, walFileList)
471  {
472  char *walFileName = (char *) lfirst(lc);
473  XLogSegNo currsegno = segno;
474  XLogSegNo nextsegno = segno + 1;
475 
476  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
477  if (!(nextsegno == segno || currsegno == segno))
478  {
479  char nextfname[MAXFNAMELEN];
480 
481  XLogFileName(nextfname, ThisTimeLineID, nextsegno,
483  ereport(ERROR,
484  (errmsg("could not find WAL file \"%s\"", nextfname)));
485  }
486  }
487  if (segno != endsegno)
488  {
489  char endfname[MAXFNAMELEN];
490 
491  XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
492  ereport(ERROR,
493  (errmsg("could not find WAL file \"%s\"", endfname)));
494  }
495 
496  /* Ok, we have everything we need. Send the WAL files. */
497  foreach(lc, walFileList)
498  {
499  char *walFileName = (char *) lfirst(lc);
500  FILE *fp;
501  char buf[TAR_SEND_SIZE];
502  size_t cnt;
503  pgoff_t len = 0;
504 
505  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
506  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
507 
508  fp = AllocateFile(pathbuf, "rb");
509  if (fp == NULL)
510  {
511  int save_errno = errno;
512 
513  /*
514  * Most likely reason for this is that the file was already
515  * removed by a checkpoint, so check for that to get a better
516  * error message.
517  */
518  CheckXLogRemoved(segno, tli);
519 
520  errno = save_errno;
521  ereport(ERROR,
523  errmsg("could not open file \"%s\": %m", pathbuf)));
524  }
525 
526  if (fstat(fileno(fp), &statbuf) != 0)
527  ereport(ERROR,
529  errmsg("could not stat file \"%s\": %m",
530  pathbuf)));
531  if (statbuf.st_size != wal_segment_size)
532  {
533  CheckXLogRemoved(segno, tli);
534  ereport(ERROR,
536  errmsg("unexpected WAL file size \"%s\"", walFileName)));
537  }
538 
539  /* send the WAL file itself */
540  _tarWriteHeader(pathbuf, NULL, &statbuf, false);
541 
542  while ((cnt = fread(buf, 1,
543  Min(sizeof(buf), wal_segment_size - len),
544  fp)) > 0)
545  {
546  CheckXLogRemoved(segno, tli);
547  /* Send the chunk as a CopyData message */
548  if (pq_putmessage('d', buf, cnt))
549  ereport(ERROR,
550  (errmsg("base backup could not send data, aborting backup")));
551 
552  len += cnt;
553  throttle(cnt);
554 
555  if (len == wal_segment_size)
556  break;
557  }
558 
559  CHECK_FREAD_ERROR(fp, pathbuf);
560 
561  if (len != wal_segment_size)
562  {
563  CheckXLogRemoved(segno, tli);
564  ereport(ERROR,
566  errmsg("unexpected WAL file size \"%s\"", walFileName)));
567  }
568 
569  /* wal_segment_size is a multiple of 512, so no need for padding */
570 
571  FreeFile(fp);
572 
573  /*
574  * Mark file as archived, otherwise files can get archived again
575  * after promotion of a new node. This is in line with
576  * walreceiver.c always doing an XLogArchiveForceDone() after a
577  * complete segment.
578  */
579  StatusFilePath(pathbuf, walFileName, ".done");
580  sendFileWithContent(pathbuf, "");
581  }
582 
583  /*
584  * Send timeline history files too. Only the latest timeline history
585  * file is required for recovery, and even that only if there happens
586  * to be a timeline switch in the first WAL segment that contains the
587  * checkpoint record, or if we're taking a base backup from a standby
588  * server and the target timeline changes while the backup is taken.
589  * But they are small and highly useful for debugging purposes, so
590  * better include them all, always.
591  */
592  foreach(lc, historyFileList)
593  {
594  char *fname = lfirst(lc);
595 
596  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
597 
598  if (lstat(pathbuf, &statbuf) != 0)
599  ereport(ERROR,
601  errmsg("could not stat file \"%s\": %m", pathbuf)));
602 
603  sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
604 
605  /* unconditionally mark file as archived */
606  StatusFilePath(pathbuf, fname, ".done");
607  sendFileWithContent(pathbuf, "");
608  }
609 
610  /* Send CopyDone message for the last tar file */
611  pq_putemptymessage('c');
612  }
613  SendXlogRecPtrResult(endptr, endtli);
614 
616  {
617  if (total_checksum_failures > 1)
619  (errmsg("%lld total checksum verification failures", total_checksum_failures)));
620 
621  ereport(ERROR,
623  errmsg("checksum verification failure during base backup")));
624  }
625 
626 }
627 
628 /*
629  * list_sort comparison function, to compare log/seg portion of WAL segment
630  * filenames, ignoring the timeline portion.
631  */
632 static int
634 {
635  char *fna = (char *) lfirst(a);
636  char *fnb = (char *) lfirst(b);
637 
638  return strcmp(fna + 8, fnb + 8);
639 }
640 
641 /*
642  * Parse the base backup options passed down by the parser
643  */
644 static void
646 {
647  ListCell *lopt;
648  bool o_label = false;
649  bool o_progress = false;
650  bool o_fast = false;
651  bool o_nowait = false;
652  bool o_wal = false;
653  bool o_maxrate = false;
654  bool o_tablespace_map = false;
655  bool o_noverify_checksums = false;
656 
657  MemSet(opt, 0, sizeof(*opt));
658  foreach(lopt, options)
659  {
660  DefElem *defel = (DefElem *) lfirst(lopt);
661 
662  if (strcmp(defel->defname, "label") == 0)
663  {
664  if (o_label)
665  ereport(ERROR,
666  (errcode(ERRCODE_SYNTAX_ERROR),
667  errmsg("duplicate option \"%s\"", defel->defname)));
668  opt->label = strVal(defel->arg);
669  o_label = true;
670  }
671  else if (strcmp(defel->defname, "progress") == 0)
672  {
673  if (o_progress)
674  ereport(ERROR,
675  (errcode(ERRCODE_SYNTAX_ERROR),
676  errmsg("duplicate option \"%s\"", defel->defname)));
677  opt->progress = true;
678  o_progress = true;
679  }
680  else if (strcmp(defel->defname, "fast") == 0)
681  {
682  if (o_fast)
683  ereport(ERROR,
684  (errcode(ERRCODE_SYNTAX_ERROR),
685  errmsg("duplicate option \"%s\"", defel->defname)));
686  opt->fastcheckpoint = true;
687  o_fast = true;
688  }
689  else if (strcmp(defel->defname, "nowait") == 0)
690  {
691  if (o_nowait)
692  ereport(ERROR,
693  (errcode(ERRCODE_SYNTAX_ERROR),
694  errmsg("duplicate option \"%s\"", defel->defname)));
695  opt->nowait = true;
696  o_nowait = true;
697  }
698  else if (strcmp(defel->defname, "wal") == 0)
699  {
700  if (o_wal)
701  ereport(ERROR,
702  (errcode(ERRCODE_SYNTAX_ERROR),
703  errmsg("duplicate option \"%s\"", defel->defname)));
704  opt->includewal = true;
705  o_wal = true;
706  }
707  else if (strcmp(defel->defname, "max_rate") == 0)
708  {
709  long maxrate;
710 
711  if (o_maxrate)
712  ereport(ERROR,
713  (errcode(ERRCODE_SYNTAX_ERROR),
714  errmsg("duplicate option \"%s\"", defel->defname)));
715 
716  maxrate = intVal(defel->arg);
717  if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
718  ereport(ERROR,
719  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
720  errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
721  (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
722 
723  opt->maxrate = (uint32) maxrate;
724  o_maxrate = true;
725  }
726  else if (strcmp(defel->defname, "tablespace_map") == 0)
727  {
728  if (o_tablespace_map)
729  ereport(ERROR,
730  (errcode(ERRCODE_SYNTAX_ERROR),
731  errmsg("duplicate option \"%s\"", defel->defname)));
732  opt->sendtblspcmapfile = true;
733  o_tablespace_map = true;
734  }
735  else if (strcmp(defel->defname, "noverify_checksums") == 0)
736  {
737  if (o_noverify_checksums)
738  ereport(ERROR,
739  (errcode(ERRCODE_SYNTAX_ERROR),
740  errmsg("duplicate option \"%s\"", defel->defname)));
741  noverify_checksums = true;
742  o_noverify_checksums = true;
743  }
744  else
745  elog(ERROR, "option \"%s\" not recognized",
746  defel->defname);
747  }
748  if (opt->label == NULL)
749  opt->label = "base backup";
750 }
751 
752 
753 /*
754  * SendBaseBackup() - send a complete base backup.
755  *
756  * The function will put the system into backup mode like pg_start_backup()
757  * does, so that the backup is consistent even though we read directly from
758  * the filesystem, bypassing the buffer cache.
759  */
760 void
762 {
763  basebackup_options opt;
764 
765  parse_basebackup_options(cmd->options, &opt);
766 
768 
770  {
771  char activitymsg[50];
772 
773  snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
774  opt.label);
775  set_ps_display(activitymsg, false);
776  }
777 
778  perform_base_backup(&opt);
779 }
780 
781 static void
783 {
784  char is[32];
785 
786  sprintf(is, INT64_FORMAT, intval);
787  pq_sendint32(buf, strlen(is));
788  pq_sendbytes(buf, is, strlen(is));
789 }
790 
791 static void
792 SendBackupHeader(List *tablespaces)
793 {
795  ListCell *lc;
796 
797  /* Construct and send the directory information */
798  pq_beginmessage(&buf, 'T'); /* RowDescription */
799  pq_sendint16(&buf, 3); /* 3 fields */
800 
801  /* First field - spcoid */
802  pq_sendstring(&buf, "spcoid");
803  pq_sendint32(&buf, 0); /* table oid */
804  pq_sendint16(&buf, 0); /* attnum */
805  pq_sendint32(&buf, OIDOID); /* type oid */
806  pq_sendint16(&buf, 4); /* typlen */
807  pq_sendint32(&buf, 0); /* typmod */
808  pq_sendint16(&buf, 0); /* format code */
809 
810  /* Second field - spclocation */
811  pq_sendstring(&buf, "spclocation");
812  pq_sendint32(&buf, 0);
813  pq_sendint16(&buf, 0);
814  pq_sendint32(&buf, TEXTOID);
815  pq_sendint16(&buf, -1);
816  pq_sendint32(&buf, 0);
817  pq_sendint16(&buf, 0);
818 
819  /* Third field - size */
820  pq_sendstring(&buf, "size");
821  pq_sendint32(&buf, 0);
822  pq_sendint16(&buf, 0);
823  pq_sendint32(&buf, INT8OID);
824  pq_sendint16(&buf, 8);
825  pq_sendint32(&buf, 0);
826  pq_sendint16(&buf, 0);
827  pq_endmessage(&buf);
828 
829  foreach(lc, tablespaces)
830  {
831  tablespaceinfo *ti = lfirst(lc);
832 
833  /* Send one datarow message */
834  pq_beginmessage(&buf, 'D');
835  pq_sendint16(&buf, 3); /* number of columns */
836  if (ti->path == NULL)
837  {
838  pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
839  pq_sendint32(&buf, -1);
840  }
841  else
842  {
843  Size len;
844 
845  len = strlen(ti->oid);
846  pq_sendint32(&buf, len);
847  pq_sendbytes(&buf, ti->oid, len);
848 
849  len = strlen(ti->path);
850  pq_sendint32(&buf, len);
851  pq_sendbytes(&buf, ti->path, len);
852  }
853  if (ti->size >= 0)
854  send_int8_string(&buf, ti->size / 1024);
855  else
856  pq_sendint32(&buf, -1); /* NULL */
857 
858  pq_endmessage(&buf);
859  }
860 
861  /* Send a CommandComplete message */
862  pq_puttextmessage('C', "SELECT");
863 }
864 
865 /*
866  * Send a single resultset containing just a single
867  * XLogRecPtr record (in text format)
868  */
869 static void
871 {
873  char str[MAXFNAMELEN];
874  Size len;
875 
876  pq_beginmessage(&buf, 'T'); /* RowDescription */
877  pq_sendint16(&buf, 2); /* 2 fields */
878 
879  /* Field headers */
880  pq_sendstring(&buf, "recptr");
881  pq_sendint32(&buf, 0); /* table oid */
882  pq_sendint16(&buf, 0); /* attnum */
883  pq_sendint32(&buf, TEXTOID); /* type oid */
884  pq_sendint16(&buf, -1);
885  pq_sendint32(&buf, 0);
886  pq_sendint16(&buf, 0);
887 
888  pq_sendstring(&buf, "tli");
889  pq_sendint32(&buf, 0); /* table oid */
890  pq_sendint16(&buf, 0); /* attnum */
891 
892  /*
893  * int8 may seem like a surprising data type for this, but in theory int4
894  * would not be wide enough for this, as TimeLineID is unsigned.
895  */
896  pq_sendint32(&buf, INT8OID); /* type oid */
897  pq_sendint16(&buf, -1);
898  pq_sendint32(&buf, 0);
899  pq_sendint16(&buf, 0);
900  pq_endmessage(&buf);
901 
902  /* Data row */
903  pq_beginmessage(&buf, 'D');
904  pq_sendint16(&buf, 2); /* number of columns */
905 
906  len = snprintf(str, sizeof(str),
907  "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
908  pq_sendint32(&buf, len);
909  pq_sendbytes(&buf, str, len);
910 
911  len = snprintf(str, sizeof(str), "%u", tli);
912  pq_sendint32(&buf, len);
913  pq_sendbytes(&buf, str, len);
914 
915  pq_endmessage(&buf);
916 
917  /* Send a CommandComplete message */
918  pq_puttextmessage('C', "SELECT");
919 }
920 
921 /*
922  * Inject a file with given name and content in the output tar stream.
923  */
924 static void
925 sendFileWithContent(const char *filename, const char *content)
926 {
927  struct stat statbuf;
928  int pad,
929  len;
930 
931  len = strlen(content);
932 
933  /*
934  * Construct a stat struct for the backup_label file we're injecting in
935  * the tar.
936  */
937  /* Windows doesn't have the concept of uid and gid */
938 #ifdef WIN32
939  statbuf.st_uid = 0;
940  statbuf.st_gid = 0;
941 #else
942  statbuf.st_uid = geteuid();
943  statbuf.st_gid = getegid();
944 #endif
945  statbuf.st_mtime = time(NULL);
946  statbuf.st_mode = pg_file_create_mode;
947  statbuf.st_size = len;
948 
949  _tarWriteHeader(filename, NULL, &statbuf, false);
950  /* Send the contents as a CopyData message */
951  pq_putmessage('d', content, len);
952 
953  /* Pad to 512 byte boundary, per tar format requirements */
954  pad = ((len + 511) & ~511) - len;
955  if (pad > 0)
956  {
957  char buf[512];
958 
959  MemSet(buf, 0, pad);
960  pq_putmessage('d', buf, pad);
961  }
962 }
963 
964 /*
965  * Include the tablespace directory pointed to by 'path' in the output tar
966  * stream. If 'sizeonly' is true, we just calculate a total length and return
967  * it, without actually sending anything.
968  *
969  * Only used to send auxiliary tablespaces, not PGDATA.
970  */
971 int64
972 sendTablespace(char *path, bool sizeonly)
973 {
974  int64 size;
975  char pathbuf[MAXPGPATH];
976  struct stat statbuf;
977 
978  /*
979  * 'path' points to the tablespace location, but we only want to include
980  * the version directory in it that belongs to us.
981  */
982  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
984 
985  /*
986  * Store a directory entry in the tar file so we get the permissions
987  * right.
988  */
989  if (lstat(pathbuf, &statbuf) != 0)
990  {
991  if (errno != ENOENT)
992  ereport(ERROR,
994  errmsg("could not stat file or directory \"%s\": %m",
995  pathbuf)));
996 
997  /* If the tablespace went away while scanning, it's no error. */
998  return 0;
999  }
1000 
1001  size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1002  sizeonly);
1003 
1004  /* Send all the files in the tablespace version directory */
1005  size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
1006 
1007  return size;
1008 }
1009 
1010 /*
1011  * Include all files from the given directory in the output tar stream. If
1012  * 'sizeonly' is true, we just calculate a total length and return it, without
1013  * actually sending anything.
1014  *
1015  * Omit any directory in the tablespaces list, to avoid backing up
1016  * tablespaces twice when they were created inside PGDATA.
1017  *
1018  * If sendtblspclinks is true, we need to include symlink
1019  * information in the tar file. If not, we can skip that
1020  * as it will be sent separately in the tablespace_map file.
1021  */
1022 static int64
1023 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
1024  bool sendtblspclinks)
1025 {
1026  DIR *dir;
1027  struct dirent *de;
1028  char pathbuf[MAXPGPATH * 2];
1029  struct stat statbuf;
1030  int64 size = 0;
1031  const char *lastDir; /* Split last dir from parent path. */
1032  bool isDbDir = false; /* Does this directory contain relations? */
1033 
1034  /*
1035  * Determine if the current path is a database directory that can contain
1036  * relations.
1037  *
1038  * Start by finding the location of the delimiter between the parent path
1039  * and the current path.
1040  */
1041  lastDir = last_dir_separator(path);
1042 
1043  /* Does this path look like a database path (i.e. all digits)? */
1044  if (lastDir != NULL &&
1045  strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1046  {
1047  /* Part of path that contains the parent directory. */
1048  int parentPathLen = lastDir - path;
1049 
1050  /*
1051  * Mark path as a database directory if the parent path is either
1052  * $PGDATA/base or a tablespace version path.
1053  */
1054  if (strncmp(path, "./base", parentPathLen) == 0 ||
1055  (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1056  strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1058  sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1059  isDbDir = true;
1060  }
1061 
1062  dir = AllocateDir(path);
1063  while ((de = ReadDir(dir, path)) != NULL)
1064  {
1065  int excludeIdx;
1066  bool excludeFound;
1067  ForkNumber relForkNum; /* Type of fork if file is a relation */
1068  int relOidChars; /* Chars in filename that are the rel oid */
1069 
1070  /* Skip special stuff */
1071  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1072  continue;
1073 
1074  /* Skip temporary files */
1075  if (strncmp(de->d_name,
1077  strlen(PG_TEMP_FILE_PREFIX)) == 0)
1078  continue;
1079 
1080  /*
1081  * Check if the postmaster has signaled us to exit, and abort with an
1082  * error in that case. The error handler further up will call
1083  * do_pg_abort_backup() for us. Also check that if the backup was
1084  * started while still in recovery, the server wasn't promoted.
1085  * do_pg_stop_backup() will check that too, but it's better to stop
1086  * the backup early than continue to the end and fail there.
1087  */
1090  ereport(ERROR,
1091  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1092  errmsg("the standby was promoted during online backup"),
1093  errhint("This means that the backup being taken is corrupt "
1094  "and should not be used. "
1095  "Try taking another online backup.")));
1096 
1097  /* Scan for files that should be excluded */
1098  excludeFound = false;
1099  for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
1100  {
1101  int cmplen = strlen(excludeFiles[excludeIdx].name);
1102 
1103  if (!excludeFiles[excludeIdx].match_prefix)
1104  cmplen++;
1105  if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
1106  {
1107  elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1108  excludeFound = true;
1109  break;
1110  }
1111  }
1112 
1113  if (excludeFound)
1114  continue;
1115 
1116  /* Exclude all forks for unlogged tables except the init fork */
1117  if (isDbDir &&
1118  parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1119  &relForkNum))
1120  {
1121  /* Never exclude init forks */
1122  if (relForkNum != INIT_FORKNUM)
1123  {
1124  char initForkFile[MAXPGPATH];
1125  char relOid[OIDCHARS + 1];
1126 
1127  /*
1128  * If any other type of fork, check if there is an init fork
1129  * with the same OID. If so, the file can be excluded.
1130  */
1131  memcpy(relOid, de->d_name, relOidChars);
1132  relOid[relOidChars] = '\0';
1133  snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1134  path, relOid);
1135 
1136  if (lstat(initForkFile, &statbuf) == 0)
1137  {
1138  elog(DEBUG2,
1139  "unlogged relation file \"%s\" excluded from backup",
1140  de->d_name);
1141 
1142  continue;
1143  }
1144  }
1145  }
1146 
1147  /* Exclude temporary relations */
1148  if (isDbDir && looks_like_temp_rel_name(de->d_name))
1149  {
1150  elog(DEBUG2,
1151  "temporary relation file \"%s\" excluded from backup",
1152  de->d_name);
1153 
1154  continue;
1155  }
1156 
1157  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1158 
1159  /* Skip pg_control here to back up it last */
1160  if (strcmp(pathbuf, "./global/pg_control") == 0)
1161  continue;
1162 
1163  if (lstat(pathbuf, &statbuf) != 0)
1164  {
1165  if (errno != ENOENT)
1166  ereport(ERROR,
1168  errmsg("could not stat file or directory \"%s\": %m",
1169  pathbuf)));
1170 
1171  /* If the file went away while scanning, it's not an error. */
1172  continue;
1173  }
1174 
1175  /* Scan for directories whose contents should be excluded */
1176  excludeFound = false;
1177  for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1178  {
1179  if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1180  {
1181  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1182  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1183  excludeFound = true;
1184  break;
1185  }
1186  }
1187 
1188  if (excludeFound)
1189  continue;
1190 
1191  /*
1192  * Exclude contents of directory specified by statrelpath if not set
1193  * to the default (pg_stat_tmp) which is caught in the loop above.
1194  */
1195  if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1196  {
1197  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1198  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1199  continue;
1200  }
1201 
1202  /*
1203  * We can skip pg_wal, the WAL segments need to be fetched from the
1204  * WAL archive anyway. But include it as an empty directory anyway, so
1205  * we get permissions right.
1206  */
1207  if (strcmp(pathbuf, "./pg_wal") == 0)
1208  {
1209  /* If pg_wal is a symlink, write it as a directory anyway */
1210  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1211 
1212  /*
1213  * Also send archive_status directory (by hackishly reusing
1214  * statbuf from above ...).
1215  */
1216  size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
1217  sizeonly);
1218 
1219  continue; /* don't recurse into pg_wal */
1220  }
1221 
1222  /* Allow symbolic links in pg_tblspc only */
1223  if (strcmp(path, "./pg_tblspc") == 0 &&
1224 #ifndef WIN32
1225  S_ISLNK(statbuf.st_mode)
1226 #else
1227  pgwin32_is_junction(pathbuf)
1228 #endif
1229  )
1230  {
1231 #if defined(HAVE_READLINK) || defined(WIN32)
1232  char linkpath[MAXPGPATH];
1233  int rllen;
1234 
1235  rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1236  if (rllen < 0)
1237  ereport(ERROR,
1239  errmsg("could not read symbolic link \"%s\": %m",
1240  pathbuf)));
1241  if (rllen >= sizeof(linkpath))
1242  ereport(ERROR,
1243  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1244  errmsg("symbolic link \"%s\" target is too long",
1245  pathbuf)));
1246  linkpath[rllen] = '\0';
1247 
1248  size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
1249  &statbuf, sizeonly);
1250 #else
1251 
1252  /*
1253  * If the platform does not have symbolic links, it should not be
1254  * possible to have tablespaces - clearly somebody else created
1255  * them. Warn about it and ignore.
1256  */
1257  ereport(WARNING,
1258  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1259  errmsg("tablespaces are not supported on this platform")));
1260  continue;
1261 #endif /* HAVE_READLINK */
1262  }
1263  else if (S_ISDIR(statbuf.st_mode))
1264  {
1265  bool skip_this_dir = false;
1266  ListCell *lc;
1267 
1268  /*
1269  * Store a directory entry in the tar file so we can get the
1270  * permissions right.
1271  */
1272  size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
1273  sizeonly);
1274 
1275  /*
1276  * Call ourselves recursively for a directory, unless it happens
1277  * to be a separate tablespace located within PGDATA.
1278  */
1279  foreach(lc, tablespaces)
1280  {
1281  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1282 
1283  /*
1284  * ti->rpath is the tablespace relative path within PGDATA, or
1285  * NULL if the tablespace has been properly located somewhere
1286  * else.
1287  *
1288  * Skip past the leading "./" in pathbuf when comparing.
1289  */
1290  if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1291  {
1292  skip_this_dir = true;
1293  break;
1294  }
1295  }
1296 
1297  /*
1298  * skip sending directories inside pg_tblspc, if not required.
1299  */
1300  if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1301  skip_this_dir = true;
1302 
1303  if (!skip_this_dir)
1304  size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
1305  }
1306  else if (S_ISREG(statbuf.st_mode))
1307  {
1308  bool sent = false;
1309 
1310  if (!sizeonly)
1311  sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1312  true, isDbDir ? atooid(lastDir + 1) : InvalidOid);
1313 
1314  if (sent || sizeonly)
1315  {
1316  /* Add size, rounded up to 512byte block */
1317  size += ((statbuf.st_size + 511) & ~511);
1318  size += 512; /* Size of the header of the file */
1319  }
1320  }
1321  else
1322  ereport(WARNING,
1323  (errmsg("skipping special file \"%s\"", pathbuf)));
1324  }
1325  FreeDir(dir);
1326  return size;
1327 }
1328 
1329 /*
1330  * Check if a file should have its checksum validated.
1331  * We validate checksums on files in regular tablespaces
1332  * (including global and default) only, and in those there
1333  * are some files that are explicitly excluded.
1334  */
1335 static bool
1336 is_checksummed_file(const char *fullpath, const char *filename)
1337 {
1338  /* Check that the file is in a tablespace */
1339  if (strncmp(fullpath, "./global/", 9) == 0 ||
1340  strncmp(fullpath, "./base/", 7) == 0 ||
1341  strncmp(fullpath, "/", 1) == 0)
1342  {
1343  int excludeIdx;
1344 
1345  /* Compare file against noChecksumFiles skip list */
1346  for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
1347  {
1348  int cmplen = strlen(noChecksumFiles[excludeIdx].name);
1349 
1350  if (!noChecksumFiles[excludeIdx].match_prefix)
1351  cmplen++;
1352  if (strncmp(filename, noChecksumFiles[excludeIdx].name,
1353  cmplen) == 0)
1354  return false;
1355  }
1356 
1357  return true;
1358  }
1359  else
1360  return false;
1361 }
1362 
1363 /*****
1364  * Functions for handling tar file format
1365  *
1366  * Copied from pg_dump, but modified to work with libpq for sending
1367  */
1368 
1369 
1370 /*
1371  * Given the member, write the TAR header & send the file.
1372  *
1373  * If 'missing_ok' is true, will not throw an error if the file is not found.
1374  *
1375  * If dboid is anything other than InvalidOid then any checksum failures detected
1376  * will get reported to the stats collector.
1377  *
1378  * Returns true if the file was successfully sent, false if 'missing_ok',
1379  * and the file did not exist.
1380  */
1381 static bool
1382 sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
1383  bool missing_ok, Oid dboid)
1384 {
1385  FILE *fp;
1386  BlockNumber blkno = 0;
1387  bool block_retry = false;
1388  char buf[TAR_SEND_SIZE];
1389  uint16 checksum;
1390  int checksum_failures = 0;
1391  off_t cnt;
1392  int i;
1393  pgoff_t len = 0;
1394  char *page;
1395  size_t pad;
1396  PageHeader phdr;
1397  int segmentno = 0;
1398  char *segmentpath;
1399  bool verify_checksum = false;
1400 
1401  fp = AllocateFile(readfilename, "rb");
1402  if (fp == NULL)
1403  {
1404  if (errno == ENOENT && missing_ok)
1405  return false;
1406  ereport(ERROR,
1408  errmsg("could not open file \"%s\": %m", readfilename)));
1409  }
1410 
1411  _tarWriteHeader(tarfilename, NULL, statbuf, false);
1412 
1414  {
1415  char *filename;
1416 
1417  /*
1418  * Get the filename (excluding path). As last_dir_separator()
1419  * includes the last directory separator, we chop that off by
1420  * incrementing the pointer.
1421  */
1422  filename = last_dir_separator(readfilename) + 1;
1423 
1424  if (is_checksummed_file(readfilename, filename))
1425  {
1426  verify_checksum = true;
1427 
1428  /*
1429  * Cut off at the segment boundary (".") to get the segment number
1430  * in order to mix it into the checksum.
1431  */
1432  segmentpath = strstr(filename, ".");
1433  if (segmentpath != NULL)
1434  {
1435  segmentno = atoi(segmentpath + 1);
1436  if (segmentno == 0)
1437  ereport(ERROR,
1438  (errmsg("invalid segment number %d in file \"%s\"",
1439  segmentno, filename)));
1440  }
1441  }
1442  }
1443 
1444  while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1445  {
1446  /*
1447  * The checksums are verified at block level, so we iterate over the
1448  * buffer in chunks of BLCKSZ, after making sure that
1449  * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1450  * BLCKSZ bytes.
1451  */
1452  Assert(TAR_SEND_SIZE % BLCKSZ == 0);
1453 
1454  if (verify_checksum && (cnt % BLCKSZ != 0))
1455  {
1456  ereport(WARNING,
1457  (errmsg("could not verify checksum in file \"%s\", block "
1458  "%d: read buffer size %d and page size %d "
1459  "differ",
1460  readfilename, blkno, (int) cnt, BLCKSZ)));
1461  verify_checksum = false;
1462  }
1463 
1464  if (verify_checksum)
1465  {
1466  for (i = 0; i < cnt / BLCKSZ; i++)
1467  {
1468  page = buf + BLCKSZ * i;
1469 
1470  /*
1471  * Only check pages which have not been modified since the
1472  * start of the base backup. Otherwise, they might have been
1473  * written only halfway and the checksum would not be valid.
1474  * However, replaying WAL would reinstate the correct page in
1475  * this case. We also skip completely new pages, since they
1476  * don't have a checksum yet.
1477  */
1478  if (!PageIsNew(page) && PageGetLSN(page) < startptr)
1479  {
1480  checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1481  phdr = (PageHeader) page;
1482  if (phdr->pd_checksum != checksum)
1483  {
1484  /*
1485  * Retry the block on the first failure. It's
1486  * possible that we read the first 4K page of the
1487  * block just before postgres updated the entire block
1488  * so it ends up looking torn to us. We only need to
1489  * retry once because the LSN should be updated to
1490  * something we can ignore on the next pass. If the
1491  * error happens again then it is a true validation
1492  * failure.
1493  */
1494  if (block_retry == false)
1495  {
1496  /* Reread the failed block */
1497  if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1498  {
1499  ereport(ERROR,
1501  errmsg("could not fseek in file \"%s\": %m",
1502  readfilename)));
1503  }
1504 
1505  if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1506  {
1507  /*
1508  * If we hit end-of-file, a concurrent
1509  * truncation must have occurred, so break out
1510  * of this loop just as if the initial fread()
1511  * returned 0. We'll drop through to the same
1512  * code that handles that case. (We must fix
1513  * up cnt first, though.)
1514  */
1515  if (feof(fp))
1516  {
1517  cnt = BLCKSZ * i;
1518  break;
1519  }
1520 
1521  ereport(ERROR,
1523  errmsg("could not reread block %d of file \"%s\": %m",
1524  blkno, readfilename)));
1525  }
1526 
1527  if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1528  {
1529  ereport(ERROR,
1531  errmsg("could not fseek in file \"%s\": %m",
1532  readfilename)));
1533  }
1534 
1535  /* Set flag so we know a retry was attempted */
1536  block_retry = true;
1537 
1538  /* Reset loop to validate the block again */
1539  i--;
1540  continue;
1541  }
1542 
1543  checksum_failures++;
1544 
1545  if (checksum_failures <= 5)
1546  ereport(WARNING,
1547  (errmsg("checksum verification failed in "
1548  "file \"%s\", block %d: calculated "
1549  "%X but expected %X",
1550  readfilename, blkno, checksum,
1551  phdr->pd_checksum)));
1552  if (checksum_failures == 5)
1553  ereport(WARNING,
1554  (errmsg("further checksum verification "
1555  "failures in file \"%s\" will not "
1556  "be reported", readfilename)));
1557  }
1558  }
1559  block_retry = false;
1560  blkno++;
1561  }
1562  }
1563 
1564  /* Send the chunk as a CopyData message */
1565  if (pq_putmessage('d', buf, cnt))
1566  ereport(ERROR,
1567  (errmsg("base backup could not send data, aborting backup")));
1568 
1569  len += cnt;
1570  throttle(cnt);
1571 
1572  if (feof(fp) || len >= statbuf->st_size)
1573  {
1574  /*
1575  * Reached end of file. The file could be longer, if it was
1576  * extended while we were sending it, but for a base backup we can
1577  * ignore such extended data. It will be restored from WAL.
1578  */
1579  break;
1580  }
1581  }
1582 
1583  CHECK_FREAD_ERROR(fp, readfilename);
1584 
1585  /* If the file was truncated while we were sending it, pad it with zeros */
1586  if (len < statbuf->st_size)
1587  {
1588  MemSet(buf, 0, sizeof(buf));
1589  while (len < statbuf->st_size)
1590  {
1591  cnt = Min(sizeof(buf), statbuf->st_size - len);
1592  pq_putmessage('d', buf, cnt);
1593  len += cnt;
1594  throttle(cnt);
1595  }
1596  }
1597 
1598  /*
1599  * Pad to 512 byte boundary, per tar format requirements. (This small
1600  * piece of data is probably not worth throttling.)
1601  */
1602  pad = ((len + 511) & ~511) - len;
1603  if (pad > 0)
1604  {
1605  MemSet(buf, 0, pad);
1606  pq_putmessage('d', buf, pad);
1607  }
1608 
1609  FreeFile(fp);
1610 
1611  if (checksum_failures > 1)
1612  {
1613  ereport(WARNING,
1614  (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1615  "file \"%s\" has a total of %d checksum verification failures",
1616  checksum_failures,
1617  readfilename, checksum_failures)));
1618 
1619  pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
1620  }
1621 
1622  total_checksum_failures += checksum_failures;
1623 
1624  return true;
1625 }
1626 
1627 
1628 static int64
1629 _tarWriteHeader(const char *filename, const char *linktarget,
1630  struct stat *statbuf, bool sizeonly)
1631 {
1632  char h[512];
1633  enum tarError rc;
1634 
1635  if (!sizeonly)
1636  {
1637  rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1638  statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1639  statbuf->st_mtime);
1640 
1641  switch (rc)
1642  {
1643  case TAR_OK:
1644  break;
1645  case TAR_NAME_TOO_LONG:
1646  ereport(ERROR,
1647  (errmsg("file name too long for tar format: \"%s\"",
1648  filename)));
1649  break;
1650  case TAR_SYMLINK_TOO_LONG:
1651  ereport(ERROR,
1652  (errmsg("symbolic link target too long for tar format: "
1653  "file name \"%s\", target \"%s\"",
1654  filename, linktarget)));
1655  break;
1656  default:
1657  elog(ERROR, "unrecognized tar error: %d", rc);
1658  }
1659 
1660  pq_putmessage('d', h, sizeof(h));
1661  }
1662 
1663  return sizeof(h);
1664 }
1665 
1666 /*
1667  * Write tar header for a directory. If the entry in statbuf is a link then
1668  * write it as a directory anyway.
1669  */
1670 static int64
1671 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
1672  bool sizeonly)
1673 {
1674  /* If symlink, write it as a directory anyway */
1675 #ifndef WIN32
1676  if (S_ISLNK(statbuf->st_mode))
1677 #else
1678  if (pgwin32_is_junction(pathbuf))
1679 #endif
1680  statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1681 
1682  return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
1683 }
1684 
1685 /*
1686  * Increment the network transfer counter by the given number of bytes,
1687  * and sleep if necessary to comply with the requested network transfer
1688  * rate.
1689  */
1690 static void
1691 throttle(size_t increment)
1692 {
1693  TimeOffset elapsed_min;
1694 
1695  if (throttling_counter < 0)
1696  return;
1697 
1698  throttling_counter += increment;
1700  return;
1701 
1702  /* How much time should have elapsed at minimum? */
1703  elapsed_min = elapsed_min_unit *
1705 
1706  /*
1707  * Since the latch could be set repeatedly because of concurrently WAL
1708  * activity, sleep in a loop to ensure enough time has passed.
1709  */
1710  for (;;)
1711  {
1712  TimeOffset elapsed,
1713  sleep;
1714  int wait_result;
1715 
1716  /* Time elapsed since the last measurement (and possible wake up). */
1717  elapsed = GetCurrentTimestamp() - throttled_last;
1718 
1719  /* sleep if the transfer is faster than it should be */
1720  sleep = elapsed_min - elapsed;
1721  if (sleep <= 0)
1722  break;
1723 
1725 
1726  /* We're eating a potentially set latch, so check for interrupts */
1728 
1729  /*
1730  * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1731  * the maximum time to sleep. Thus the cast to long is safe.
1732  */
1733  wait_result = WaitLatch(MyLatch,
1735  (long) (sleep / 1000),
1737 
1738  if (wait_result & WL_LATCH_SET)
1740 
1741  /* Done waiting? */
1742  if (wait_result & WL_TIMEOUT)
1743  break;
1744  }
1745 
1746  /*
1747  * As we work with integers, only whole multiple of throttling_sample was
1748  * processed. The rest will be done during the next call of this function.
1749  */
1751 
1752  /*
1753  * Time interval for the remaining amount and possible next increments
1754  * starts now.
1755  */
1757 }
#define StatusFilePath(path, xlog, suffix)
List * options
Definition: replnodes.h:44
#define NIL
Definition: pg_list.h:65
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
int pg_file_create_mode
Definition: file_perm.c:19
#define MAX_RATE_LOWER
Definition: basebackup.h:20
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1069
static void throttle(size_t increment)
Definition: basebackup.c:1691
uint32 TimeLineID
Definition: xlogdefs.h:52
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:11180
#define WL_TIMEOUT
Definition: latch.h:127
int wal_segment_size
Definition: xlog.c:112
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
#define USECS_PER_SEC
Definition: timestamp.h:94
bool update_process_title
Definition: ps_status.c:35
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:932
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
bool DataChecksumsEnabled(void)
Definition: xlog.c:4862
int64 TimestampTz
Definition: timestamp.h:39
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static const struct exclude_list_item noChecksumFiles[]
Definition: basebackup.c:221
char * pstrdup(const char *in)
Definition: mcxt.c:1186
Definition: pgtar.h:17
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
#define Min(x, y)
Definition: c.h:920
static void SendBackupHeader(List *tablespaces)
Definition: basebackup.c:792
void set_ps_display(const char *activity, bool force)
Definition: ps_status.c:335
static void parse_basebackup_options(List *options, basebackup_options *opt)
Definition: basebackup.c:645
#define strVal(v)
Definition: value.h:54
int errcode(int sqlerrcode)
Definition: elog.c:608
#define MemSet(start, val, len)
Definition: c.h:971
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
uint32 BlockNumber
Definition: block.h:31
void pq_sendstring(StringInfo buf, const char *str)
Definition: pqformat.c:197
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7964
static bool backup_started_in_recovery
Definition: basebackup.c:76
static void send_int8_string(StringInfoData *buf, int64 intval)
Definition: basebackup.c:782
Definition: dirent.h:9
void ResetLatch(Latch *latch)
Definition: latch.c:540
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
Definition: pgstat.c:1528
XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
Definition: xlog.c:10250
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:365
uint16 pd_checksum
Definition: bufpage.h:156
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define pgoff_t
Definition: win32_port.h:194
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define sprintf
Definition: port.h:194
#define TABLESPACE_MAP
Definition: xlog.h:362
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
unsigned short uint16
Definition: c.h:366
#define linitial(l)
Definition: pg_list.h:195
static const char *const excludeDirContents[]
Definition: basebackup.c:144
#define MAX_RATE_UPPER
Definition: basebackup.h:21
Definition: dirent.c:25
tarError
Definition: pgtar.h:15
#define ERROR
Definition: elog.h:43
#define PG_TEMP_FILE_PREFIX
Definition: pg_checksums.c:59
#define IsXLogFileName(fname)
XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
Definition: xlog.c:10769
static bool noverify_checksums
Definition: basebackup.c:122
void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3868
void SendBaseBackup(BaseBackupCmd *cmd)
Definition: basebackup.c:761
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
#define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define TABLESPACE_VERSION_DIRECTORY
Definition: relpath.h:26
static int32 maxrate
static char * buf
Definition: pg_test_fsync.c:67
uint64 XLogSegNo
Definition: xlogdefs.h:41
#define readlink(path, buf, size)
Definition: win32_port.h:222
int errcode_for_file_access(void)
Definition: elog.c:631
#define is_absolute_path(filename)
Definition: port.h:86
const char * label
Definition: basebackup.c:47
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2331
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
Definition: basebackup.c:870
unsigned int uint32
Definition: c.h:367
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:972
static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1629
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2592
char * pgstat_stat_directory
Definition: pgstat.c:133
#define atooid(x)
Definition: postgres_ext.h:42
#define ereport(elevel, rest)
Definition: elog.h:141
#define CHECK_FREAD_ERROR(fp, filename)
Definition: basebackup.c:96
static char * statrelpath
Definition: basebackup.c:79
ForkNumber
Definition: relpath.h:40
Node * arg
Definition: parsenodes.h:731
#define S_ISREG(m)
Definition: win32_port.h:299
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
List * lappend(List *list, void *datum)
Definition: list.c:322
#define WARNING
Definition: elog.h:40
#define stat(a, b)
Definition: win32_port.h:255
#define MAXFNAMELEN
int pg_dir_create_mode
Definition: file_perm.c:18
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:114
int64 TimeOffset
Definition: timestamp.h:40
static void perform_base_backup(basebackup_options *opt)
Definition: basebackup.c:239
void * palloc0(Size size)
Definition: mcxt.c:980
#define XLOGDIR
char * last_dir_separator(const char *filename)
Definition: path.c:138
#define BoolGetDatum(X)
Definition: postgres.h:402
#define XLOG_CONTROL_FILE
#define InvalidOid
Definition: postgres_ext.h:36
TimeLineID ThisTimeLineID
Definition: xlog.c:187
static TimestampTz throttled_last
Definition: basebackup.c:113
#define RELCACHE_INIT_FILENAME
Definition: relcache.h:24
static StringInfo tblspc_map_file
Definition: xlogfuncs.c:45
static bool is_checksummed_file(const char *fullpath, const char *filename)
Definition: basebackup.c:1336
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define lfirst(lc)
Definition: pg_list.h:190
void WalSndSetState(WalSndState state)
Definition: walsender.c:3135
#define PG_STAT_TMP_DIR
Definition: pgstat.h:33
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2658
#define THROTTLING_FREQUENCY
Definition: basebackup.c:89
size_t Size
Definition: c.h:466
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define PG_AUTOCONF_FILENAME
Definition: guc.h:34
#define LOG_METAINFO_DATAFILE_TMP
Definition: syslogger.h:98
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
bool looks_like_temp_rel_name(const char *name)
Definition: fd.c:3190
bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ForkNumber *fork)
Definition: reinit.c:374
#define INT64_FORMAT
Definition: c.h:409
#define S_ISDIR(m)
Definition: win32_port.h:296
#define PageGetLSN(page)
Definition: bufpage.h:366
#define lstat(path, sb)
Definition: win32_port.h:244
const char * name
Definition: basebackup.c:132
int FreeFile(FILE *file)
Definition: fd.c:2530
#define IsTLHistoryFileName(fname)
static int compareWalFileNames(const ListCell *a, const ListCell *b)
Definition: basebackup.c:633
static char * filename
Definition: pg_dumpall.c:90
static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, bool sendtblspclinks)
Definition: basebackup.c:1023
#define intVal(v)
Definition: value.h:52
#define PageIsNew(page)
Definition: bufpage.h:229
void pq_sendbytes(StringInfo buf, const char *data, int datalen)
Definition: pqformat.c:125
void list_sort(List *list, list_sort_comparator cmp)
Definition: list.c:1482
int errmsg(const char *fmt,...)
Definition: elog.c:822
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
#define TAR_SEND_SIZE
Definition: basebackup.c:84
#define elog(elevel,...)
Definition: elog.h:228
int i
char * DataDir
Definition: globals.c:62
struct Latch * MyLatch
Definition: globals.c:54
#define BACKUP_LABEL_FILE
Definition: xlog.h:359
char * defname
Definition: parsenodes.h:730
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
char d_name[MAX_PATH]
Definition: dirent.h:14
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static int64 throttling_counter
Definition: basebackup.c:107
static const struct exclude_list_item excludeFiles[]
Definition: basebackup.c:185
void pq_puttextmessage(char msgtype, const char *str)
Definition: pqformat.c:369
Definition: pg_list.h:50
#define snprintf
Definition: port.h:192
static bool sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, bool missing_ok, Oid dboid)
Definition: basebackup.c:1382
#define WL_LATCH_SET
Definition: latch.h:124
#define OIDCHARS
Definition: relpath.h:30
static uint64 throttling_sample
Definition: basebackup.c:104
static void sendFileWithContent(const char *filename, const char *content)
Definition: basebackup.c:925
uint16 pg_checksum_page(char *page, BlockNumber blkno)
int FreeDir(DIR *dir)
Definition: fd.c:2710
static TimeOffset elapsed_min_unit
Definition: basebackup.c:110
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
static XLogRecPtr startptr
Definition: basebackup.c:116
bool pgwin32_is_junction(const char *path)
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1671
static long long int total_checksum_failures
Definition: basebackup.c:119
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)