PostgreSQL Source Code  git master
basebackup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basebackup.c
4  * code for taking a base backup and streaming it to a standby
5  *
6  * Portions Copyright (c) 2010-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/basebackup.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <time.h>
18 
19 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
20 #include "catalog/pg_type.h"
21 #include "common/file_perm.h"
22 #include "lib/stringinfo.h"
23 #include "libpq/libpq.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "nodes/pg_list.h"
27 #include "pgtar.h"
28 #include "pgstat.h"
29 #include "port.h"
30 #include "postmaster/syslogger.h"
31 #include "replication/basebackup.h"
32 #include "replication/walsender.h"
34 #include "storage/bufpage.h"
35 #include "storage/checksum.h"
36 #include "storage/dsm_impl.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/reinit.h"
40 #include "utils/builtins.h"
41 #include "utils/ps_status.h"
42 #include "utils/relcache.h"
43 #include "utils/timestamp.h"
44 
45 
46 typedef struct
47 {
48  const char *label;
49  bool progress;
51  bool nowait;
52  bool includewal;
56 
57 
58 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
59  List *tablespaces, bool sendtblspclinks);
60 static bool sendFile(const char *readfilename, const char *tarfilename,
61  struct stat *statbuf, bool missing_ok, Oid dboid);
62 static void sendFileWithContent(const char *filename, const char *content);
63 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
64  struct stat *statbuf, bool sizeonly);
65 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
66  bool sizeonly);
67 static void send_int8_string(StringInfoData *buf, int64 intval);
68 static void SendBackupHeader(List *tablespaces);
69 static void base_backup_cleanup(int code, Datum arg);
70 static void perform_base_backup(basebackup_options *opt);
72 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
73 static int compareWalFileNames(const ListCell *a, const ListCell *b);
74 static void throttle(size_t increment);
75 static bool is_checksummed_file(const char *fullpath, const char *filename);
76 
77 /* Was the backup currently in-progress initiated in recovery mode? */
78 static bool backup_started_in_recovery = false;
79 
80 /* Relative path of temporary statistics directory */
81 static char *statrelpath = NULL;
82 
83 /*
84  * Size of each block sent into the tar stream for larger files.
85  */
86 #define TAR_SEND_SIZE 32768
87 
88 /*
89  * How frequently to throttle, as a fraction of the specified rate-second.
90  */
91 #define THROTTLING_FREQUENCY 8
92 
93 /*
94  * Checks whether we encountered any error in fread(). fread() doesn't give
95  * any clue what has happened, so we check with ferror(). Also, neither
96  * fread() nor ferror() set errno, so we just throw a generic error.
97  */
98 #define CHECK_FREAD_ERROR(fp, filename) \
99 do { \
100  if (ferror(fp)) \
101  ereport(ERROR, \
102  (errmsg("could not read from file \"%s\"", filename))); \
103 } while (0)
104 
105 /* The actual number of bytes, transfer of which may cause sleep. */
106 static uint64 throttling_sample;
107 
108 /* Amount of data already transferred but not yet throttled. */
109 static int64 throttling_counter;
110 
111 /* The minimum time required to transfer throttling_sample bytes. */
113 
114 /* The last check of the transfer rate. */
116 
117 /* The starting XLOG position of the base backup. */
119 
120 /* Total number of checksum failures during base backup. */
121 static long long int total_checksum_failures;
122 
123 /* Do not verify checksums. */
124 static bool noverify_checksums = false;
125 
126 /*
127  * The contents of these directories are removed or recreated during server
128  * start so they are not included in backups. The directories themselves are
129  * kept and included as empty to preserve access permissions.
130  *
131  * Note: this list should be kept in sync with the filter lists in pg_rewind's
132  * filemap.c.
133  */
134 static const char *const excludeDirContents[] =
135 {
136  /*
137  * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
138  * when stats_temp_directory is set because PGSS_TEXT_FILE is always
139  * created there.
140  */
142 
143  /*
144  * It is generally not useful to backup the contents of this directory
145  * even if the intention is to restore to another master. See backup.sgml
146  * for a more detailed description.
147  */
148  "pg_replslot",
149 
150  /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
152 
153  /* Contents removed on startup, see AsyncShmemInit(). */
154  "pg_notify",
155 
156  /*
157  * Old contents are loaded for possible debugging but are not required for
158  * normal operation, see OldSerXidInit().
159  */
160  "pg_serial",
161 
162  /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
163  "pg_snapshots",
164 
165  /* Contents zeroed on startup, see StartupSUBTRANS(). */
166  "pg_subtrans",
167 
168  /* end of list */
169  NULL
170 };
171 
172 /*
173  * List of files excluded from backups.
174  */
175 static const char *const excludeFiles[] =
176 {
177  /* Skip auto conf temporary file. */
178  PG_AUTOCONF_FILENAME ".tmp",
179 
180  /* Skip current log file temporary file */
182 
183  /* Skip relation cache because it is rebuilt on startup */
185 
186  /*
187  * If there's a backup_label or tablespace_map file, it belongs to a
188  * backup started by the user with pg_start_backup(). It is *not* correct
189  * for this backup. Our backup_label/tablespace_map is injected into the
190  * tar separately.
191  */
194 
195  "postmaster.pid",
196  "postmaster.opts",
197 
198  /* end of list */
199  NULL
200 };
201 
202 /*
203  * List of files excluded from checksum validation.
204  *
205  * Note: this list should be kept in sync with what pg_checksums.c
206  * includes.
207  */
208 static const char *const noChecksumFiles[] = {
209  "pg_control",
210  "pg_filenode.map",
211  "pg_internal.init",
212  "PG_VERSION",
213 #ifdef EXEC_BACKEND
214  "config_exec_params",
215  "config_exec_params.new",
216 #endif
217  NULL,
218 };
219 
220 
221 /*
222  * Called when ERROR or FATAL happens in perform_base_backup() after
223  * we have started the backup - make sure we end it!
224  */
225 static void
227 {
229 }
230 
231 /*
232  * Actually do a base backup for the specified tablespaces.
233  *
234  * This is split out mainly to avoid complaints about "variable might be
235  * clobbered by longjmp" from stupider versions of gcc.
236  */
237 static void
239 {
240  TimeLineID starttli;
241  XLogRecPtr endptr;
242  TimeLineID endtli;
243  StringInfo labelfile;
245  int datadirpathlen;
246  List *tablespaces = NIL;
247 
248  datadirpathlen = strlen(DataDir);
249 
251 
252  labelfile = makeStringInfo();
253  tblspc_map_file = makeStringInfo();
254 
256 
257  startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
258  labelfile, &tablespaces,
259  tblspc_map_file,
260  opt->progress, opt->sendtblspcmapfile);
261 
262  /*
263  * Once do_pg_start_backup has been called, ensure that any failure causes
264  * us to abort the backup so we don't "leak" a backup counter. For this
265  * reason, *all* functionality between do_pg_start_backup() and the end of
266  * do_pg_stop_backup() should be inside the error cleanup block!
267  */
268 
270  {
271  ListCell *lc;
272  tablespaceinfo *ti;
273 
274  SendXlogRecPtrResult(startptr, starttli);
275 
276  /*
277  * Calculate the relative path of temporary statistics directory in
278  * order to skip the files which are located in that directory later.
279  */
281  strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
282  statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
283  else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
285  else
287 
288  /* Add a node for the base directory at the end */
289  ti = palloc0(sizeof(tablespaceinfo));
290  ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
291  tablespaces = lappend(tablespaces, ti);
292 
293  /* Send tablespace header */
294  SendBackupHeader(tablespaces);
295 
296  /* Setup and activate network throttling, if client requested it */
297  if (opt->maxrate > 0)
298  {
300  (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
301 
302  /*
303  * The minimum amount of time for throttling_sample bytes to be
304  * transferred.
305  */
307 
308  /* Enable throttling. */
309  throttling_counter = 0;
310 
311  /* The 'real data' starts now (header was ignored). */
313  }
314  else
315  {
316  /* Disable throttling. */
317  throttling_counter = -1;
318  }
319 
320  /* Send off our tablespaces one by one */
321  foreach(lc, tablespaces)
322  {
323  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
325 
326  /* Send CopyOutResponse message */
327  pq_beginmessage(&buf, 'H');
328  pq_sendbyte(&buf, 0); /* overall format */
329  pq_sendint16(&buf, 0); /* natts */
330  pq_endmessage(&buf);
331 
332  if (ti->path == NULL)
333  {
334  struct stat statbuf;
335 
336  /* In the main tar, include the backup_label first... */
338 
339  /*
340  * Send tablespace_map file if required and then the bulk of
341  * the files.
342  */
343  if (tblspc_map_file && opt->sendtblspcmapfile)
344  {
345  sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
346  sendDir(".", 1, false, tablespaces, false);
347  }
348  else
349  sendDir(".", 1, false, tablespaces, true);
350 
351  /* ... and pg_control after everything else. */
352  if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
353  ereport(ERROR,
355  errmsg("could not stat file \"%s\": %m",
358  }
359  else
360  sendTablespace(ti->path, false);
361 
362  /*
363  * If we're including WAL, and this is the main data directory we
364  * don't terminate the tar stream here. Instead, we will append
365  * the xlog files below and terminate it then. This is safe since
366  * the main data directory is always sent *last*.
367  */
368  if (opt->includewal && ti->path == NULL)
369  {
370  Assert(lnext(tablespaces, lc) == NULL);
371  }
372  else
373  pq_putemptymessage('c'); /* CopyDone */
374  }
375 
376  endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
377  }
379 
380 
381  if (opt->includewal)
382  {
383  /*
384  * We've left the last tar file "open", so we can now append the
385  * required WAL files to it.
386  */
387  char pathbuf[MAXPGPATH];
388  XLogSegNo segno;
389  XLogSegNo startsegno;
390  XLogSegNo endsegno;
391  struct stat statbuf;
392  List *historyFileList = NIL;
393  List *walFileList = NIL;
394  char firstoff[MAXFNAMELEN];
395  char lastoff[MAXFNAMELEN];
396  DIR *dir;
397  struct dirent *de;
398  ListCell *lc;
399  TimeLineID tli;
400 
401  /*
402  * I'd rather not worry about timelines here, so scan pg_wal and
403  * include all WAL files in the range between 'startptr' and 'endptr',
404  * regardless of the timeline the file is stamped with. If there are
405  * some spurious WAL files belonging to timelines that don't belong in
406  * this server's history, they will be included too. Normally there
407  * shouldn't be such files, but if there are, there's little harm in
408  * including them.
409  */
410  XLByteToSeg(startptr, startsegno, wal_segment_size);
411  XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
412  XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
413  XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
414 
415  dir = AllocateDir("pg_wal");
416  while ((de = ReadDir(dir, "pg_wal")) != NULL)
417  {
418  /* Does it look like a WAL segment, and is it in the range? */
419  if (IsXLogFileName(de->d_name) &&
420  strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
421  strcmp(de->d_name + 8, lastoff + 8) <= 0)
422  {
423  walFileList = lappend(walFileList, pstrdup(de->d_name));
424  }
425  /* Does it look like a timeline history file? */
426  else if (IsTLHistoryFileName(de->d_name))
427  {
428  historyFileList = lappend(historyFileList, pstrdup(de->d_name));
429  }
430  }
431  FreeDir(dir);
432 
433  /*
434  * Before we go any further, check that none of the WAL segments we
435  * need were removed.
436  */
437  CheckXLogRemoved(startsegno, ThisTimeLineID);
438 
439  /*
440  * Sort the WAL filenames. We want to send the files in order from
441  * oldest to newest, to reduce the chance that a file is recycled
442  * before we get a chance to send it over.
443  */
444  list_sort(walFileList, compareWalFileNames);
445 
446  /*
447  * There must be at least one xlog file in the pg_wal directory, since
448  * we are doing backup-including-xlog.
449  */
450  if (walFileList == NIL)
451  ereport(ERROR,
452  (errmsg("could not find any WAL files")));
453 
454  /*
455  * Sanity check: the first and last segment should cover startptr and
456  * endptr, with no gaps in between.
457  */
458  XLogFromFileName((char *) linitial(walFileList),
459  &tli, &segno, wal_segment_size);
460  if (segno != startsegno)
461  {
462  char startfname[MAXFNAMELEN];
463 
464  XLogFileName(startfname, ThisTimeLineID, startsegno,
466  ereport(ERROR,
467  (errmsg("could not find WAL file \"%s\"", startfname)));
468  }
469  foreach(lc, walFileList)
470  {
471  char *walFileName = (char *) lfirst(lc);
472  XLogSegNo currsegno = segno;
473  XLogSegNo nextsegno = segno + 1;
474 
475  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
476  if (!(nextsegno == segno || currsegno == segno))
477  {
478  char nextfname[MAXFNAMELEN];
479 
480  XLogFileName(nextfname, ThisTimeLineID, nextsegno,
482  ereport(ERROR,
483  (errmsg("could not find WAL file \"%s\"", nextfname)));
484  }
485  }
486  if (segno != endsegno)
487  {
488  char endfname[MAXFNAMELEN];
489 
490  XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
491  ereport(ERROR,
492  (errmsg("could not find WAL file \"%s\"", endfname)));
493  }
494 
495  /* Ok, we have everything we need. Send the WAL files. */
496  foreach(lc, walFileList)
497  {
498  char *walFileName = (char *) lfirst(lc);
499  FILE *fp;
500  char buf[TAR_SEND_SIZE];
501  size_t cnt;
502  pgoff_t len = 0;
503 
504  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
505  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
506 
507  fp = AllocateFile(pathbuf, "rb");
508  if (fp == NULL)
509  {
510  int save_errno = errno;
511 
512  /*
513  * Most likely reason for this is that the file was already
514  * removed by a checkpoint, so check for that to get a better
515  * error message.
516  */
517  CheckXLogRemoved(segno, tli);
518 
519  errno = save_errno;
520  ereport(ERROR,
522  errmsg("could not open file \"%s\": %m", pathbuf)));
523  }
524 
525  if (fstat(fileno(fp), &statbuf) != 0)
526  ereport(ERROR,
528  errmsg("could not stat file \"%s\": %m",
529  pathbuf)));
530  if (statbuf.st_size != wal_segment_size)
531  {
532  CheckXLogRemoved(segno, tli);
533  ereport(ERROR,
535  errmsg("unexpected WAL file size \"%s\"", walFileName)));
536  }
537 
538  /* send the WAL file itself */
539  _tarWriteHeader(pathbuf, NULL, &statbuf, false);
540 
541  while ((cnt = fread(buf, 1,
542  Min(sizeof(buf), wal_segment_size - len),
543  fp)) > 0)
544  {
545  CheckXLogRemoved(segno, tli);
546  /* Send the chunk as a CopyData message */
547  if (pq_putmessage('d', buf, cnt))
548  ereport(ERROR,
549  (errmsg("base backup could not send data, aborting backup")));
550 
551  len += cnt;
552  throttle(cnt);
553 
554  if (len == wal_segment_size)
555  break;
556  }
557 
558  CHECK_FREAD_ERROR(fp, pathbuf);
559 
560  if (len != wal_segment_size)
561  {
562  CheckXLogRemoved(segno, tli);
563  ereport(ERROR,
565  errmsg("unexpected WAL file size \"%s\"", walFileName)));
566  }
567 
568  /* wal_segment_size is a multiple of 512, so no need for padding */
569 
570  FreeFile(fp);
571 
572  /*
573  * Mark file as archived, otherwise files can get archived again
574  * after promotion of a new node. This is in line with
575  * walreceiver.c always doing an XLogArchiveForceDone() after a
576  * complete segment.
577  */
578  StatusFilePath(pathbuf, walFileName, ".done");
579  sendFileWithContent(pathbuf, "");
580  }
581 
582  /*
583  * Send timeline history files too. Only the latest timeline history
584  * file is required for recovery, and even that only if there happens
585  * to be a timeline switch in the first WAL segment that contains the
586  * checkpoint record, or if we're taking a base backup from a standby
587  * server and the target timeline changes while the backup is taken.
588  * But they are small and highly useful for debugging purposes, so
589  * better include them all, always.
590  */
591  foreach(lc, historyFileList)
592  {
593  char *fname = lfirst(lc);
594 
595  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
596 
597  if (lstat(pathbuf, &statbuf) != 0)
598  ereport(ERROR,
600  errmsg("could not stat file \"%s\": %m", pathbuf)));
601 
602  sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
603 
604  /* unconditionally mark file as archived */
605  StatusFilePath(pathbuf, fname, ".done");
606  sendFileWithContent(pathbuf, "");
607  }
608 
609  /* Send CopyDone message for the last tar file */
610  pq_putemptymessage('c');
611  }
612  SendXlogRecPtrResult(endptr, endtli);
613 
615  {
616  if (total_checksum_failures > 1)
618  (errmsg("%lld total checksum verification failures", total_checksum_failures)));
619 
620  ereport(ERROR,
622  errmsg("checksum verification failure during base backup")));
623  }
624 
625 }
626 
627 /*
628  * list_sort comparison function, to compare log/seg portion of WAL segment
629  * filenames, ignoring the timeline portion.
630  */
631 static int
633 {
634  char *fna = (char *) lfirst(a);
635  char *fnb = (char *) lfirst(b);
636 
637  return strcmp(fna + 8, fnb + 8);
638 }
639 
640 /*
641  * Parse the base backup options passed down by the parser
642  */
643 static void
645 {
646  ListCell *lopt;
647  bool o_label = false;
648  bool o_progress = false;
649  bool o_fast = false;
650  bool o_nowait = false;
651  bool o_wal = false;
652  bool o_maxrate = false;
653  bool o_tablespace_map = false;
654  bool o_noverify_checksums = false;
655 
656  MemSet(opt, 0, sizeof(*opt));
657  foreach(lopt, options)
658  {
659  DefElem *defel = (DefElem *) lfirst(lopt);
660 
661  if (strcmp(defel->defname, "label") == 0)
662  {
663  if (o_label)
664  ereport(ERROR,
665  (errcode(ERRCODE_SYNTAX_ERROR),
666  errmsg("duplicate option \"%s\"", defel->defname)));
667  opt->label = strVal(defel->arg);
668  o_label = true;
669  }
670  else if (strcmp(defel->defname, "progress") == 0)
671  {
672  if (o_progress)
673  ereport(ERROR,
674  (errcode(ERRCODE_SYNTAX_ERROR),
675  errmsg("duplicate option \"%s\"", defel->defname)));
676  opt->progress = true;
677  o_progress = true;
678  }
679  else if (strcmp(defel->defname, "fast") == 0)
680  {
681  if (o_fast)
682  ereport(ERROR,
683  (errcode(ERRCODE_SYNTAX_ERROR),
684  errmsg("duplicate option \"%s\"", defel->defname)));
685  opt->fastcheckpoint = true;
686  o_fast = true;
687  }
688  else if (strcmp(defel->defname, "nowait") == 0)
689  {
690  if (o_nowait)
691  ereport(ERROR,
692  (errcode(ERRCODE_SYNTAX_ERROR),
693  errmsg("duplicate option \"%s\"", defel->defname)));
694  opt->nowait = true;
695  o_nowait = true;
696  }
697  else if (strcmp(defel->defname, "wal") == 0)
698  {
699  if (o_wal)
700  ereport(ERROR,
701  (errcode(ERRCODE_SYNTAX_ERROR),
702  errmsg("duplicate option \"%s\"", defel->defname)));
703  opt->includewal = true;
704  o_wal = true;
705  }
706  else if (strcmp(defel->defname, "max_rate") == 0)
707  {
708  long maxrate;
709 
710  if (o_maxrate)
711  ereport(ERROR,
712  (errcode(ERRCODE_SYNTAX_ERROR),
713  errmsg("duplicate option \"%s\"", defel->defname)));
714 
715  maxrate = intVal(defel->arg);
716  if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
717  ereport(ERROR,
718  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
719  errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
720  (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
721 
722  opt->maxrate = (uint32) maxrate;
723  o_maxrate = true;
724  }
725  else if (strcmp(defel->defname, "tablespace_map") == 0)
726  {
727  if (o_tablespace_map)
728  ereport(ERROR,
729  (errcode(ERRCODE_SYNTAX_ERROR),
730  errmsg("duplicate option \"%s\"", defel->defname)));
731  opt->sendtblspcmapfile = true;
732  o_tablespace_map = true;
733  }
734  else if (strcmp(defel->defname, "noverify_checksums") == 0)
735  {
736  if (o_noverify_checksums)
737  ereport(ERROR,
738  (errcode(ERRCODE_SYNTAX_ERROR),
739  errmsg("duplicate option \"%s\"", defel->defname)));
740  noverify_checksums = true;
741  o_noverify_checksums = true;
742  }
743  else
744  elog(ERROR, "option \"%s\" not recognized",
745  defel->defname);
746  }
747  if (opt->label == NULL)
748  opt->label = "base backup";
749 }
750 
751 
752 /*
753  * SendBaseBackup() - send a complete base backup.
754  *
755  * The function will put the system into backup mode like pg_start_backup()
756  * does, so that the backup is consistent even though we read directly from
757  * the filesystem, bypassing the buffer cache.
758  */
759 void
761 {
762  basebackup_options opt;
763 
764  parse_basebackup_options(cmd->options, &opt);
765 
767 
769  {
770  char activitymsg[50];
771 
772  snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
773  opt.label);
774  set_ps_display(activitymsg, false);
775  }
776 
777  perform_base_backup(&opt);
778 }
779 
780 static void
782 {
783  char is[32];
784 
785  sprintf(is, INT64_FORMAT, intval);
786  pq_sendint32(buf, strlen(is));
787  pq_sendbytes(buf, is, strlen(is));
788 }
789 
790 static void
791 SendBackupHeader(List *tablespaces)
792 {
794  ListCell *lc;
795 
796  /* Construct and send the directory information */
797  pq_beginmessage(&buf, 'T'); /* RowDescription */
798  pq_sendint16(&buf, 3); /* 3 fields */
799 
800  /* First field - spcoid */
801  pq_sendstring(&buf, "spcoid");
802  pq_sendint32(&buf, 0); /* table oid */
803  pq_sendint16(&buf, 0); /* attnum */
804  pq_sendint32(&buf, OIDOID); /* type oid */
805  pq_sendint16(&buf, 4); /* typlen */
806  pq_sendint32(&buf, 0); /* typmod */
807  pq_sendint16(&buf, 0); /* format code */
808 
809  /* Second field - spclocation */
810  pq_sendstring(&buf, "spclocation");
811  pq_sendint32(&buf, 0);
812  pq_sendint16(&buf, 0);
813  pq_sendint32(&buf, TEXTOID);
814  pq_sendint16(&buf, -1);
815  pq_sendint32(&buf, 0);
816  pq_sendint16(&buf, 0);
817 
818  /* Third field - size */
819  pq_sendstring(&buf, "size");
820  pq_sendint32(&buf, 0);
821  pq_sendint16(&buf, 0);
822  pq_sendint32(&buf, INT8OID);
823  pq_sendint16(&buf, 8);
824  pq_sendint32(&buf, 0);
825  pq_sendint16(&buf, 0);
826  pq_endmessage(&buf);
827 
828  foreach(lc, tablespaces)
829  {
830  tablespaceinfo *ti = lfirst(lc);
831 
832  /* Send one datarow message */
833  pq_beginmessage(&buf, 'D');
834  pq_sendint16(&buf, 3); /* number of columns */
835  if (ti->path == NULL)
836  {
837  pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
838  pq_sendint32(&buf, -1);
839  }
840  else
841  {
842  Size len;
843 
844  len = strlen(ti->oid);
845  pq_sendint32(&buf, len);
846  pq_sendbytes(&buf, ti->oid, len);
847 
848  len = strlen(ti->path);
849  pq_sendint32(&buf, len);
850  pq_sendbytes(&buf, ti->path, len);
851  }
852  if (ti->size >= 0)
853  send_int8_string(&buf, ti->size / 1024);
854  else
855  pq_sendint32(&buf, -1); /* NULL */
856 
857  pq_endmessage(&buf);
858  }
859 
860  /* Send a CommandComplete message */
861  pq_puttextmessage('C', "SELECT");
862 }
863 
864 /*
865  * Send a single resultset containing just a single
866  * XLogRecPtr record (in text format)
867  */
868 static void
870 {
872  char str[MAXFNAMELEN];
873  Size len;
874 
875  pq_beginmessage(&buf, 'T'); /* RowDescription */
876  pq_sendint16(&buf, 2); /* 2 fields */
877 
878  /* Field headers */
879  pq_sendstring(&buf, "recptr");
880  pq_sendint32(&buf, 0); /* table oid */
881  pq_sendint16(&buf, 0); /* attnum */
882  pq_sendint32(&buf, TEXTOID); /* type oid */
883  pq_sendint16(&buf, -1);
884  pq_sendint32(&buf, 0);
885  pq_sendint16(&buf, 0);
886 
887  pq_sendstring(&buf, "tli");
888  pq_sendint32(&buf, 0); /* table oid */
889  pq_sendint16(&buf, 0); /* attnum */
890 
891  /*
892  * int8 may seem like a surprising data type for this, but in theory int4
893  * would not be wide enough for this, as TimeLineID is unsigned.
894  */
895  pq_sendint32(&buf, INT8OID); /* type oid */
896  pq_sendint16(&buf, -1);
897  pq_sendint32(&buf, 0);
898  pq_sendint16(&buf, 0);
899  pq_endmessage(&buf);
900 
901  /* Data row */
902  pq_beginmessage(&buf, 'D');
903  pq_sendint16(&buf, 2); /* number of columns */
904 
905  len = snprintf(str, sizeof(str),
906  "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
907  pq_sendint32(&buf, len);
908  pq_sendbytes(&buf, str, len);
909 
910  len = snprintf(str, sizeof(str), "%u", tli);
911  pq_sendint32(&buf, len);
912  pq_sendbytes(&buf, str, len);
913 
914  pq_endmessage(&buf);
915 
916  /* Send a CommandComplete message */
917  pq_puttextmessage('C', "SELECT");
918 }
919 
920 /*
921  * Inject a file with given name and content in the output tar stream.
922  */
923 static void
924 sendFileWithContent(const char *filename, const char *content)
925 {
926  struct stat statbuf;
927  int pad,
928  len;
929 
930  len = strlen(content);
931 
932  /*
933  * Construct a stat struct for the backup_label file we're injecting in
934  * the tar.
935  */
936  /* Windows doesn't have the concept of uid and gid */
937 #ifdef WIN32
938  statbuf.st_uid = 0;
939  statbuf.st_gid = 0;
940 #else
941  statbuf.st_uid = geteuid();
942  statbuf.st_gid = getegid();
943 #endif
944  statbuf.st_mtime = time(NULL);
945  statbuf.st_mode = pg_file_create_mode;
946  statbuf.st_size = len;
947 
948  _tarWriteHeader(filename, NULL, &statbuf, false);
949  /* Send the contents as a CopyData message */
950  pq_putmessage('d', content, len);
951 
952  /* Pad to 512 byte boundary, per tar format requirements */
953  pad = ((len + 511) & ~511) - len;
954  if (pad > 0)
955  {
956  char buf[512];
957 
958  MemSet(buf, 0, pad);
959  pq_putmessage('d', buf, pad);
960  }
961 }
962 
963 /*
964  * Include the tablespace directory pointed to by 'path' in the output tar
965  * stream. If 'sizeonly' is true, we just calculate a total length and return
966  * it, without actually sending anything.
967  *
968  * Only used to send auxiliary tablespaces, not PGDATA.
969  */
970 int64
971 sendTablespace(char *path, bool sizeonly)
972 {
973  int64 size;
974  char pathbuf[MAXPGPATH];
975  struct stat statbuf;
976 
977  /*
978  * 'path' points to the tablespace location, but we only want to include
979  * the version directory in it that belongs to us.
980  */
981  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
983 
984  /*
985  * Store a directory entry in the tar file so we get the permissions
986  * right.
987  */
988  if (lstat(pathbuf, &statbuf) != 0)
989  {
990  if (errno != ENOENT)
991  ereport(ERROR,
993  errmsg("could not stat file or directory \"%s\": %m",
994  pathbuf)));
995 
996  /* If the tablespace went away while scanning, it's no error. */
997  return 0;
998  }
999 
1000  size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1001  sizeonly);
1002 
1003  /* Send all the files in the tablespace version directory */
1004  size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
1005 
1006  return size;
1007 }
1008 
1009 /*
1010  * Include all files from the given directory in the output tar stream. If
1011  * 'sizeonly' is true, we just calculate a total length and return it, without
1012  * actually sending anything.
1013  *
1014  * Omit any directory in the tablespaces list, to avoid backing up
1015  * tablespaces twice when they were created inside PGDATA.
1016  *
1017  * If sendtblspclinks is true, we need to include symlink
1018  * information in the tar file. If not, we can skip that
1019  * as it will be sent separately in the tablespace_map file.
1020  */
1021 static int64
1022 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
1023  bool sendtblspclinks)
1024 {
1025  DIR *dir;
1026  struct dirent *de;
1027  char pathbuf[MAXPGPATH * 2];
1028  struct stat statbuf;
1029  int64 size = 0;
1030  const char *lastDir; /* Split last dir from parent path. */
1031  bool isDbDir = false; /* Does this directory contain relations? */
1032 
1033  /*
1034  * Determine if the current path is a database directory that can contain
1035  * relations.
1036  *
1037  * Start by finding the location of the delimiter between the parent path
1038  * and the current path.
1039  */
1040  lastDir = last_dir_separator(path);
1041 
1042  /* Does this path look like a database path (i.e. all digits)? */
1043  if (lastDir != NULL &&
1044  strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1045  {
1046  /* Part of path that contains the parent directory. */
1047  int parentPathLen = lastDir - path;
1048 
1049  /*
1050  * Mark path as a database directory if the parent path is either
1051  * $PGDATA/base or a tablespace version path.
1052  */
1053  if (strncmp(path, "./base", parentPathLen) == 0 ||
1054  (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1055  strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1057  sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1058  isDbDir = true;
1059  }
1060 
1061  dir = AllocateDir(path);
1062  while ((de = ReadDir(dir, path)) != NULL)
1063  {
1064  int excludeIdx;
1065  bool excludeFound;
1066  ForkNumber relForkNum; /* Type of fork if file is a relation */
1067  int relOidChars; /* Chars in filename that are the rel oid */
1068 
1069  /* Skip special stuff */
1070  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1071  continue;
1072 
1073  /* Skip temporary files */
1074  if (strncmp(de->d_name,
1076  strlen(PG_TEMP_FILE_PREFIX)) == 0)
1077  continue;
1078 
1079  /*
1080  * Check if the postmaster has signaled us to exit, and abort with an
1081  * error in that case. The error handler further up will call
1082  * do_pg_abort_backup() for us. Also check that if the backup was
1083  * started while still in recovery, the server wasn't promoted.
1084  * do_pg_stop_backup() will check that too, but it's better to stop
1085  * the backup early than continue to the end and fail there.
1086  */
1089  ereport(ERROR,
1090  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1091  errmsg("the standby was promoted during online backup"),
1092  errhint("This means that the backup being taken is corrupt "
1093  "and should not be used. "
1094  "Try taking another online backup.")));
1095 
1096  /* Scan for files that should be excluded */
1097  excludeFound = false;
1098  for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
1099  {
1100  if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
1101  {
1102  elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1103  excludeFound = true;
1104  break;
1105  }
1106  }
1107 
1108  if (excludeFound)
1109  continue;
1110 
1111  /* Exclude all forks for unlogged tables except the init fork */
1112  if (isDbDir &&
1113  parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1114  &relForkNum))
1115  {
1116  /* Never exclude init forks */
1117  if (relForkNum != INIT_FORKNUM)
1118  {
1119  char initForkFile[MAXPGPATH];
1120  char relOid[OIDCHARS + 1];
1121 
1122  /*
1123  * If any other type of fork, check if there is an init fork
1124  * with the same OID. If so, the file can be excluded.
1125  */
1126  memcpy(relOid, de->d_name, relOidChars);
1127  relOid[relOidChars] = '\0';
1128  snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1129  path, relOid);
1130 
1131  if (lstat(initForkFile, &statbuf) == 0)
1132  {
1133  elog(DEBUG2,
1134  "unlogged relation file \"%s\" excluded from backup",
1135  de->d_name);
1136 
1137  continue;
1138  }
1139  }
1140  }
1141 
1142  /* Exclude temporary relations */
1143  if (isDbDir && looks_like_temp_rel_name(de->d_name))
1144  {
1145  elog(DEBUG2,
1146  "temporary relation file \"%s\" excluded from backup",
1147  de->d_name);
1148 
1149  continue;
1150  }
1151 
1152  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1153 
1154  /* Skip pg_control here to back up it last */
1155  if (strcmp(pathbuf, "./global/pg_control") == 0)
1156  continue;
1157 
1158  if (lstat(pathbuf, &statbuf) != 0)
1159  {
1160  if (errno != ENOENT)
1161  ereport(ERROR,
1163  errmsg("could not stat file or directory \"%s\": %m",
1164  pathbuf)));
1165 
1166  /* If the file went away while scanning, it's not an error. */
1167  continue;
1168  }
1169 
1170  /* Scan for directories whose contents should be excluded */
1171  excludeFound = false;
1172  for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1173  {
1174  if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1175  {
1176  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1177  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1178  excludeFound = true;
1179  break;
1180  }
1181  }
1182 
1183  if (excludeFound)
1184  continue;
1185 
1186  /*
1187  * Exclude contents of directory specified by statrelpath if not set
1188  * to the default (pg_stat_tmp) which is caught in the loop above.
1189  */
1190  if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1191  {
1192  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1193  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1194  continue;
1195  }
1196 
1197  /*
1198  * We can skip pg_wal, the WAL segments need to be fetched from the
1199  * WAL archive anyway. But include it as an empty directory anyway, so
1200  * we get permissions right.
1201  */
1202  if (strcmp(pathbuf, "./pg_wal") == 0)
1203  {
1204  /* If pg_wal is a symlink, write it as a directory anyway */
1205  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1206 
1207  /*
1208  * Also send archive_status directory (by hackishly reusing
1209  * statbuf from above ...).
1210  */
1211  size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
1212  sizeonly);
1213 
1214  continue; /* don't recurse into pg_wal */
1215  }
1216 
1217  /* Allow symbolic links in pg_tblspc only */
1218  if (strcmp(path, "./pg_tblspc") == 0 &&
1219 #ifndef WIN32
1220  S_ISLNK(statbuf.st_mode)
1221 #else
1222  pgwin32_is_junction(pathbuf)
1223 #endif
1224  )
1225  {
1226 #if defined(HAVE_READLINK) || defined(WIN32)
1227  char linkpath[MAXPGPATH];
1228  int rllen;
1229 
1230  rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1231  if (rllen < 0)
1232  ereport(ERROR,
1234  errmsg("could not read symbolic link \"%s\": %m",
1235  pathbuf)));
1236  if (rllen >= sizeof(linkpath))
1237  ereport(ERROR,
1238  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1239  errmsg("symbolic link \"%s\" target is too long",
1240  pathbuf)));
1241  linkpath[rllen] = '\0';
1242 
1243  size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
1244  &statbuf, sizeonly);
1245 #else
1246 
1247  /*
1248  * If the platform does not have symbolic links, it should not be
1249  * possible to have tablespaces - clearly somebody else created
1250  * them. Warn about it and ignore.
1251  */
1252  ereport(WARNING,
1253  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1254  errmsg("tablespaces are not supported on this platform")));
1255  continue;
1256 #endif /* HAVE_READLINK */
1257  }
1258  else if (S_ISDIR(statbuf.st_mode))
1259  {
1260  bool skip_this_dir = false;
1261  ListCell *lc;
1262 
1263  /*
1264  * Store a directory entry in the tar file so we can get the
1265  * permissions right.
1266  */
1267  size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
1268  sizeonly);
1269 
1270  /*
1271  * Call ourselves recursively for a directory, unless it happens
1272  * to be a separate tablespace located within PGDATA.
1273  */
1274  foreach(lc, tablespaces)
1275  {
1276  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1277 
1278  /*
1279  * ti->rpath is the tablespace relative path within PGDATA, or
1280  * NULL if the tablespace has been properly located somewhere
1281  * else.
1282  *
1283  * Skip past the leading "./" in pathbuf when comparing.
1284  */
1285  if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1286  {
1287  skip_this_dir = true;
1288  break;
1289  }
1290  }
1291 
1292  /*
1293  * skip sending directories inside pg_tblspc, if not required.
1294  */
1295  if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1296  skip_this_dir = true;
1297 
1298  if (!skip_this_dir)
1299  size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
1300  }
1301  else if (S_ISREG(statbuf.st_mode))
1302  {
1303  bool sent = false;
1304 
1305  if (!sizeonly)
1306  sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1307  true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid);
1308 
1309  if (sent || sizeonly)
1310  {
1311  /* Add size, rounded up to 512byte block */
1312  size += ((statbuf.st_size + 511) & ~511);
1313  size += 512; /* Size of the header of the file */
1314  }
1315  }
1316  else
1317  ereport(WARNING,
1318  (errmsg("skipping special file \"%s\"", pathbuf)));
1319  }
1320  FreeDir(dir);
1321  return size;
1322 }
1323 
1324 /*
1325  * Check if a file should have its checksum validated.
1326  * We validate checksums on files in regular tablespaces
1327  * (including global and default) only, and in those there
1328  * are some files that are explicitly excluded.
1329  */
1330 static bool
1331 is_checksummed_file(const char *fullpath, const char *filename)
1332 {
1333  const char *const *f;
1334 
1335  /* Check that the file is in a tablespace */
1336  if (strncmp(fullpath, "./global/", 9) == 0 ||
1337  strncmp(fullpath, "./base/", 7) == 0 ||
1338  strncmp(fullpath, "/", 1) == 0)
1339  {
1340  /* Compare file against noChecksumFiles skiplist */
1341  for (f = noChecksumFiles; *f; f++)
1342  if (strcmp(*f, filename) == 0)
1343  return false;
1344 
1345  return true;
1346  }
1347  else
1348  return false;
1349 }
1350 
1351 /*****
1352  * Functions for handling tar file format
1353  *
1354  * Copied from pg_dump, but modified to work with libpq for sending
1355  */
1356 
1357 
1358 /*
1359  * Given the member, write the TAR header & send the file.
1360  *
1361  * If 'missing_ok' is true, will not throw an error if the file is not found.
1362  *
1363  * If dboid is anything other than InvalidOid then any checksum failures detected
1364  * will get reported to the stats collector.
1365  *
1366  * Returns true if the file was successfully sent, false if 'missing_ok',
1367  * and the file did not exist.
1368  */
1369 static bool
1370 sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
1371  bool missing_ok, Oid dboid)
1372 {
1373  FILE *fp;
1374  BlockNumber blkno = 0;
1375  bool block_retry = false;
1376  char buf[TAR_SEND_SIZE];
1377  uint16 checksum;
1378  int checksum_failures = 0;
1379  off_t cnt;
1380  int i;
1381  pgoff_t len = 0;
1382  char *page;
1383  size_t pad;
1384  PageHeader phdr;
1385  int segmentno = 0;
1386  char *segmentpath;
1387  bool verify_checksum = false;
1388 
1389  fp = AllocateFile(readfilename, "rb");
1390  if (fp == NULL)
1391  {
1392  if (errno == ENOENT && missing_ok)
1393  return false;
1394  ereport(ERROR,
1396  errmsg("could not open file \"%s\": %m", readfilename)));
1397  }
1398 
1399  _tarWriteHeader(tarfilename, NULL, statbuf, false);
1400 
1402  {
1403  char *filename;
1404 
1405  /*
1406  * Get the filename (excluding path). As last_dir_separator()
1407  * includes the last directory separator, we chop that off by
1408  * incrementing the pointer.
1409  */
1410  filename = last_dir_separator(readfilename) + 1;
1411 
1412  if (is_checksummed_file(readfilename, filename))
1413  {
1414  verify_checksum = true;
1415 
1416  /*
1417  * Cut off at the segment boundary (".") to get the segment number
1418  * in order to mix it into the checksum.
1419  */
1420  segmentpath = strstr(filename, ".");
1421  if (segmentpath != NULL)
1422  {
1423  segmentno = atoi(segmentpath + 1);
1424  if (segmentno == 0)
1425  ereport(ERROR,
1426  (errmsg("invalid segment number %d in file \"%s\"",
1427  segmentno, filename)));
1428  }
1429  }
1430  }
1431 
1432  while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1433  {
1434  /*
1435  * The checksums are verified at block level, so we iterate over the
1436  * buffer in chunks of BLCKSZ, after making sure that
1437  * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1438  * BLCKSZ bytes.
1439  */
1440  Assert(TAR_SEND_SIZE % BLCKSZ == 0);
1441 
1442  if (verify_checksum && (cnt % BLCKSZ != 0))
1443  {
1444  ereport(WARNING,
1445  (errmsg("could not verify checksum in file \"%s\", block "
1446  "%d: read buffer size %d and page size %d "
1447  "differ",
1448  readfilename, blkno, (int) cnt, BLCKSZ)));
1449  verify_checksum = false;
1450  }
1451 
1452  if (verify_checksum)
1453  {
1454  for (i = 0; i < cnt / BLCKSZ; i++)
1455  {
1456  page = buf + BLCKSZ * i;
1457 
1458  /*
1459  * Only check pages which have not been modified since the
1460  * start of the base backup. Otherwise, they might have been
1461  * written only halfway and the checksum would not be valid.
1462  * However, replaying WAL would reinstate the correct page in
1463  * this case. We also skip completely new pages, since they
1464  * don't have a checksum yet.
1465  */
1466  if (!PageIsNew(page) && PageGetLSN(page) < startptr)
1467  {
1468  checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1469  phdr = (PageHeader) page;
1470  if (phdr->pd_checksum != checksum)
1471  {
1472  /*
1473  * Retry the block on the first failure. It's
1474  * possible that we read the first 4K page of the
1475  * block just before postgres updated the entire block
1476  * so it ends up looking torn to us. We only need to
1477  * retry once because the LSN should be updated to
1478  * something we can ignore on the next pass. If the
1479  * error happens again then it is a true validation
1480  * failure.
1481  */
1482  if (block_retry == false)
1483  {
1484  /* Reread the failed block */
1485  if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1486  {
1487  ereport(ERROR,
1489  errmsg("could not fseek in file \"%s\": %m",
1490  readfilename)));
1491  }
1492 
1493  if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1494  {
1495  /*
1496  * If we hit end-of-file, a concurrent
1497  * truncation must have occurred, so break out
1498  * of this loop just as if the initial fread()
1499  * returned 0. We'll drop through to the same
1500  * code that handles that case. (We must fix
1501  * up cnt first, though.)
1502  */
1503  if (feof(fp))
1504  {
1505  cnt = BLCKSZ * i;
1506  break;
1507  }
1508 
1509  ereport(ERROR,
1511  errmsg("could not reread block %d of file \"%s\": %m",
1512  blkno, readfilename)));
1513  }
1514 
1515  if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1516  {
1517  ereport(ERROR,
1519  errmsg("could not fseek in file \"%s\": %m",
1520  readfilename)));
1521  }
1522 
1523  /* Set flag so we know a retry was attempted */
1524  block_retry = true;
1525 
1526  /* Reset loop to validate the block again */
1527  i--;
1528  continue;
1529  }
1530 
1531  checksum_failures++;
1532 
1533  if (checksum_failures <= 5)
1534  ereport(WARNING,
1535  (errmsg("checksum verification failed in "
1536  "file \"%s\", block %d: calculated "
1537  "%X but expected %X",
1538  readfilename, blkno, checksum,
1539  phdr->pd_checksum)));
1540  if (checksum_failures == 5)
1541  ereport(WARNING,
1542  (errmsg("further checksum verification "
1543  "failures in file \"%s\" will not "
1544  "be reported", readfilename)));
1545  }
1546  }
1547  block_retry = false;
1548  blkno++;
1549  }
1550  }
1551 
1552  /* Send the chunk as a CopyData message */
1553  if (pq_putmessage('d', buf, cnt))
1554  ereport(ERROR,
1555  (errmsg("base backup could not send data, aborting backup")));
1556 
1557  len += cnt;
1558  throttle(cnt);
1559 
1560  if (feof(fp) || len >= statbuf->st_size)
1561  {
1562  /*
1563  * Reached end of file. The file could be longer, if it was
1564  * extended while we were sending it, but for a base backup we can
1565  * ignore such extended data. It will be restored from WAL.
1566  */
1567  break;
1568  }
1569  }
1570 
1571  CHECK_FREAD_ERROR(fp, readfilename);
1572 
1573  /* If the file was truncated while we were sending it, pad it with zeros */
1574  if (len < statbuf->st_size)
1575  {
1576  MemSet(buf, 0, sizeof(buf));
1577  while (len < statbuf->st_size)
1578  {
1579  cnt = Min(sizeof(buf), statbuf->st_size - len);
1580  pq_putmessage('d', buf, cnt);
1581  len += cnt;
1582  throttle(cnt);
1583  }
1584  }
1585 
1586  /*
1587  * Pad to 512 byte boundary, per tar format requirements. (This small
1588  * piece of data is probably not worth throttling.)
1589  */
1590  pad = ((len + 511) & ~511) - len;
1591  if (pad > 0)
1592  {
1593  MemSet(buf, 0, pad);
1594  pq_putmessage('d', buf, pad);
1595  }
1596 
1597  FreeFile(fp);
1598 
1599  if (checksum_failures > 1)
1600  {
1601  ereport(WARNING,
1602  (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1603  "file \"%s\" has a total of %d checksum verification failures",
1604  checksum_failures,
1605  readfilename, checksum_failures)));
1606 
1607  pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
1608  }
1609 
1610  total_checksum_failures += checksum_failures;
1611 
1612  return true;
1613 }
1614 
1615 
1616 static int64
1617 _tarWriteHeader(const char *filename, const char *linktarget,
1618  struct stat *statbuf, bool sizeonly)
1619 {
1620  char h[512];
1621  enum tarError rc;
1622 
1623  if (!sizeonly)
1624  {
1625  rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1626  statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1627  statbuf->st_mtime);
1628 
1629  switch (rc)
1630  {
1631  case TAR_OK:
1632  break;
1633  case TAR_NAME_TOO_LONG:
1634  ereport(ERROR,
1635  (errmsg("file name too long for tar format: \"%s\"",
1636  filename)));
1637  break;
1638  case TAR_SYMLINK_TOO_LONG:
1639  ereport(ERROR,
1640  (errmsg("symbolic link target too long for tar format: "
1641  "file name \"%s\", target \"%s\"",
1642  filename, linktarget)));
1643  break;
1644  default:
1645  elog(ERROR, "unrecognized tar error: %d", rc);
1646  }
1647 
1648  pq_putmessage('d', h, sizeof(h));
1649  }
1650 
1651  return sizeof(h);
1652 }
1653 
1654 /*
1655  * Write tar header for a directory. If the entry in statbuf is a link then
1656  * write it as a directory anyway.
1657  */
1658 static int64
1659 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
1660  bool sizeonly)
1661 {
1662  /* If symlink, write it as a directory anyway */
1663 #ifndef WIN32
1664  if (S_ISLNK(statbuf->st_mode))
1665 #else
1666  if (pgwin32_is_junction(pathbuf))
1667 #endif
1668  statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1669 
1670  return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
1671 }
1672 
1673 /*
1674  * Increment the network transfer counter by the given number of bytes,
1675  * and sleep if necessary to comply with the requested network transfer
1676  * rate.
1677  */
1678 static void
1679 throttle(size_t increment)
1680 {
1681  TimeOffset elapsed_min;
1682 
1683  if (throttling_counter < 0)
1684  return;
1685 
1686  throttling_counter += increment;
1688  return;
1689 
1690  /* How much time should have elapsed at minimum? */
1691  elapsed_min = elapsed_min_unit *
1693 
1694  /*
1695  * Since the latch could be set repeatedly because of concurrently WAL
1696  * activity, sleep in a loop to ensure enough time has passed.
1697  */
1698  for (;;)
1699  {
1700  TimeOffset elapsed,
1701  sleep;
1702  int wait_result;
1703 
1704  /* Time elapsed since the last measurement (and possible wake up). */
1705  elapsed = GetCurrentTimestamp() - throttled_last;
1706 
1707  /* sleep if the transfer is faster than it should be */
1708  sleep = elapsed_min - elapsed;
1709  if (sleep <= 0)
1710  break;
1711 
1713 
1714  /* We're eating a potentially set latch, so check for interrupts */
1716 
1717  /*
1718  * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1719  * the maximum time to sleep. Thus the cast to long is safe.
1720  */
1721  wait_result = WaitLatch(MyLatch,
1723  (long) (sleep / 1000),
1725 
1726  if (wait_result & WL_LATCH_SET)
1728 
1729  /* Done waiting? */
1730  if (wait_result & WL_TIMEOUT)
1731  break;
1732  }
1733 
1734  /*
1735  * As we work with integers, only whole multiple of throttling_sample was
1736  * processed. The rest will be done during the next call of this function.
1737  */
1739 
1740  /*
1741  * Time interval for the remaining amount and possible next increments
1742  * starts now.
1743  */
1745 }
#define StatusFilePath(path, xlog, suffix)
List * options
Definition: replnodes.h:44
#define NIL
Definition: pg_list.h:65
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
int pg_file_create_mode
Definition: file_perm.c:19
#define MAX_RATE_LOWER
Definition: basebackup.h:20
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:974
static void throttle(size_t increment)
Definition: basebackup.c:1679
uint32 TimeLineID
Definition: xlogdefs.h:52
#define WL_TIMEOUT
Definition: latch.h:127
int wal_segment_size
Definition: xlog.c:112
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
#define USECS_PER_SEC
Definition: timestamp.h:94
bool update_process_title
Definition: ps_status.c:35
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:837
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
bool DataChecksumsEnabled(void)
Definition: xlog.c:4822
int64 TimestampTz
Definition: timestamp.h:39
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
char * pstrdup(const char *in)
Definition: mcxt.c:1186
Definition: pgtar.h:17
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
#define Min(x, y)
Definition: c.h:904
static void SendBackupHeader(List *tablespaces)
Definition: basebackup.c:791
void set_ps_display(const char *activity, bool force)
Definition: ps_status.c:331
static void parse_basebackup_options(List *options, basebackup_options *opt)
Definition: basebackup.c:644
#define strVal(v)
Definition: value.h:54
int errcode(int sqlerrcode)
Definition: elog.c:570
#define MemSet(start, val, len)
Definition: c.h:955
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
uint32 BlockNumber
Definition: block.h:31
void pq_sendstring(StringInfo buf, const char *str)
Definition: pqformat.c:197
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7913
static bool backup_started_in_recovery
Definition: basebackup.c:78
static void send_int8_string(StringInfoData *buf, int64 intval)
Definition: basebackup.c:781
Definition: dirent.h:9
void ResetLatch(Latch *latch)
Definition: latch.c:519
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
Definition: pgstat.c:1532
XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
Definition: xlog.c:10193
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:344
uint16 pd_checksum
Definition: bufpage.h:156
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define pgoff_t
Definition: win32_port.h:195
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define sprintf
Definition: port.h:194
#define TABLESPACE_MAP
Definition: xlog.h:362
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
unsigned short uint16
Definition: c.h:357
#define linitial(l)
Definition: pg_list.h:195
static const char *const excludeDirContents[]
Definition: basebackup.c:134
#define MAX_RATE_UPPER
Definition: basebackup.h:21
Definition: dirent.c:25
tarError
Definition: pgtar.h:15
#define ERROR
Definition: elog.h:43
#define PG_TEMP_FILE_PREFIX
Definition: pg_checksums.c:59
#define IsXLogFileName(fname)
XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
Definition: xlog.c:10712
static bool noverify_checksums
Definition: basebackup.c:124
void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3846
void SendBaseBackup(BaseBackupCmd *cmd)
Definition: basebackup.c:760
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
#define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define TABLESPACE_VERSION_DIRECTORY
Definition: relpath.h:26
static int32 maxrate
Definition: pg_basebackup.c:98
static char * buf
Definition: pg_test_fsync.c:68
uint64 XLogSegNo
Definition: xlogdefs.h:41
#define readlink(path, buf, size)
Definition: win32_port.h:222
int errcode_for_file_access(void)
Definition: elog.c:593
#define is_absolute_path(filename)
Definition: port.h:86
const char * label
Definition: basebackup.c:48
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2205
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
Definition: basebackup.c:869
unsigned int uint32
Definition: c.h:358
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:971
static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1617
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2466
char * pgstat_stat_directory
Definition: pgstat.c:134
#define ereport(elevel, rest)
Definition: elog.h:141
#define CHECK_FREAD_ERROR(fp, filename)
Definition: basebackup.c:98
static char * statrelpath
Definition: basebackup.c:81
ForkNumber
Definition: relpath.h:40
Node * arg
Definition: parsenodes.h:731
static const char *const excludeFiles[]
Definition: basebackup.c:175
#define S_ISREG(m)
Definition: win32_port.h:299
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
List * lappend(List *list, void *datum)
Definition: list.c:322
#define WARNING
Definition: elog.h:40
#define stat(a, b)
Definition: win32_port.h:255
#define MAXFNAMELEN
int pg_dir_create_mode
Definition: file_perm.c:18
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:112
int64 TimeOffset
Definition: timestamp.h:40
static void perform_base_backup(basebackup_options *opt)
Definition: basebackup.c:238
void * palloc0(Size size)
Definition: mcxt.c:980
#define XLOGDIR
uintptr_t Datum
Definition: postgres.h:367
char * last_dir_separator(const char *filename)
Definition: path.c:138
#define XLOG_CONTROL_FILE
#define InvalidOid
Definition: postgres_ext.h:36
TimeLineID ThisTimeLineID
Definition: xlog.c:187
static TimestampTz throttled_last
Definition: basebackup.c:115
#define RELCACHE_INIT_FILENAME
Definition: relcache.h:24
static StringInfo tblspc_map_file
Definition: xlogfuncs.c:46
static bool is_checksummed_file(const char *fullpath, const char *filename)
Definition: basebackup.c:1331
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
#define lfirst(lc)
Definition: pg_list.h:190
void WalSndSetState(WalSndState state)
Definition: walsender.c:3166
#define PG_STAT_TMP_DIR
Definition: pgstat.h:33
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2532
#define THROTTLING_FREQUENCY
Definition: basebackup.c:91
size_t Size
Definition: c.h:466
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define PG_AUTOCONF_FILENAME
Definition: guc.h:34
#define LOG_METAINFO_DATAFILE_TMP
Definition: syslogger.h:98
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
bool looks_like_temp_rel_name(const char *name)
Definition: fd.c:3064
bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ForkNumber *fork)
Definition: reinit.c:374
#define INT64_FORMAT
Definition: c.h:400
#define S_ISDIR(m)
Definition: win32_port.h:296
#define PageGetLSN(page)
Definition: bufpage.h:366
#define lstat(path, sb)
Definition: win32_port.h:244
int FreeFile(FILE *file)
Definition: fd.c:2404
#define IsTLHistoryFileName(fname)
static int compareWalFileNames(const ListCell *a, const ListCell *b)
Definition: basebackup.c:632
static char * filename
Definition: pg_dumpall.c:91
static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, bool sendtblspclinks)
Definition: basebackup.c:1022
#define intVal(v)
Definition: value.h:52
#define PageIsNew(page)
Definition: bufpage.h:229
void pq_sendbytes(StringInfo buf, const char *data, int datalen)
Definition: pqformat.c:125
void list_sort(List *list, list_sort_comparator cmp)
Definition: list.c:1482
int errmsg(const char *fmt,...)
Definition: elog.c:784
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
#define TAR_SEND_SIZE
Definition: basebackup.c:86
#define elog(elevel,...)
Definition: elog.h:226
int i
void do_pg_abort_backup(void)
Definition: xlog.c:11117
void * arg
char * DataDir
Definition: globals.c:62
struct Latch * MyLatch
Definition: globals.c:54
#define BACKUP_LABEL_FILE
Definition: xlog.h:359
char * defname
Definition: parsenodes.h:730
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
char d_name[MAX_PATH]
Definition: dirent.h:14
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static int64 throttling_counter
Definition: basebackup.c:109
void pq_puttextmessage(char msgtype, const char *str)
Definition: pqformat.c:369
static void base_backup_cleanup(int code, Datum arg)
Definition: basebackup.c:226
Definition: pg_list.h:50
#define snprintf
Definition: port.h:192
static bool sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, bool missing_ok, Oid dboid)
Definition: basebackup.c:1370
#define WL_LATCH_SET
Definition: latch.h:124
#define OIDCHARS
Definition: relpath.h:30
static uint64 throttling_sample
Definition: basebackup.c:106
static void sendFileWithContent(const char *filename, const char *content)
Definition: basebackup.c:924
uint16 pg_checksum_page(char *page, BlockNumber blkno)
int FreeDir(DIR *dir)
Definition: fd.c:2584
static TimeOffset elapsed_min_unit
Definition: basebackup.c:112
int32 pg_atoi(const char *s, int size, int c)
Definition: numutils.c:38
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
static XLogRecPtr startptr
Definition: basebackup.c:118
bool pgwin32_is_junction(const char *path)
static const char *const noChecksumFiles[]
Definition: basebackup.c:208
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1659
static long long int total_checksum_failures
Definition: basebackup.c:121
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)