PostgreSQL Source Code  git master
basebackup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basebackup.c
4  * code for taking a base backup and streaming it to a standby
5  *
6  * Portions Copyright (c) 2010-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/basebackup.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <time.h>
18 
19 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
20 #include "catalog/pg_type.h"
21 #include "common/file_perm.h"
22 #include "lib/stringinfo.h"
23 #include "libpq/libpq.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "nodes/pg_list.h"
27 #include "pgstat.h"
28 #include "pgtar.h"
29 #include "port.h"
30 #include "postmaster/syslogger.h"
31 #include "replication/basebackup.h"
32 #include "replication/walsender.h"
34 #include "storage/bufpage.h"
35 #include "storage/checksum.h"
36 #include "storage/dsm_impl.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/reinit.h"
40 #include "utils/builtins.h"
41 #include "utils/ps_status.h"
42 #include "utils/relcache.h"
43 #include "utils/timestamp.h"
44 
45 typedef struct
46 {
47  const char *label;
48  bool progress;
50  bool nowait;
51  bool includewal;
55 
56 
57 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
58  List *tablespaces, bool sendtblspclinks);
59 static bool sendFile(const char *readfilename, const char *tarfilename,
60  struct stat *statbuf, bool missing_ok, Oid dboid);
61 static void sendFileWithContent(const char *filename, const char *content);
62 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
63  struct stat *statbuf, bool sizeonly);
64 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
65  bool sizeonly);
66 static void send_int8_string(StringInfoData *buf, int64 intval);
67 static void SendBackupHeader(List *tablespaces);
68 static void base_backup_cleanup(int code, Datum arg);
69 static void perform_base_backup(basebackup_options *opt);
71 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
72 static int compareWalFileNames(const ListCell *a, const ListCell *b);
73 static void throttle(size_t increment);
74 static bool is_checksummed_file(const char *fullpath, const char *filename);
75 
76 /* Was the backup currently in-progress initiated in recovery mode? */
77 static bool backup_started_in_recovery = false;
78 
79 /* Relative path of temporary statistics directory */
80 static char *statrelpath = NULL;
81 
82 /*
83  * Size of each block sent into the tar stream for larger files.
84  */
85 #define TAR_SEND_SIZE 32768
86 
87 /*
88  * How frequently to throttle, as a fraction of the specified rate-second.
89  */
90 #define THROTTLING_FREQUENCY 8
91 
92 /*
93  * Checks whether we encountered any error in fread(). fread() doesn't give
94  * any clue what has happened, so we check with ferror(). Also, neither
95  * fread() nor ferror() set errno, so we just throw a generic error.
96  */
97 #define CHECK_FREAD_ERROR(fp, filename) \
98 do { \
99  if (ferror(fp)) \
100  ereport(ERROR, \
101  (errmsg("could not read from file \"%s\"", filename))); \
102 } while (0)
103 
104 /* The actual number of bytes, transfer of which may cause sleep. */
105 static uint64 throttling_sample;
106 
107 /* Amount of data already transferred but not yet throttled. */
108 static int64 throttling_counter;
109 
110 /* The minimum time required to transfer throttling_sample bytes. */
112 
113 /* The last check of the transfer rate. */
115 
116 /* The starting XLOG position of the base backup. */
118 
119 /* Total number of checksum failures during base backup. */
120 static long long int total_checksum_failures;
121 
122 /* Do not verify checksums. */
123 static bool noverify_checksums = false;
124 
125 /*
126  * The contents of these directories are removed or recreated during server
127  * start so they are not included in backups. The directories themselves are
128  * kept and included as empty to preserve access permissions.
129  *
130  * Note: this list should be kept in sync with the filter lists in pg_rewind's
131  * filemap.c.
132  */
133 static const char *const excludeDirContents[] =
134 {
135  /*
136  * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
137  * when stats_temp_directory is set because PGSS_TEXT_FILE is always
138  * created there.
139  */
141 
142  /*
143  * It is generally not useful to backup the contents of this directory
144  * even if the intention is to restore to another master. See backup.sgml
145  * for a more detailed description.
146  */
147  "pg_replslot",
148 
149  /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
151 
152  /* Contents removed on startup, see AsyncShmemInit(). */
153  "pg_notify",
154 
155  /*
156  * Old contents are loaded for possible debugging but are not required for
157  * normal operation, see OldSerXidInit().
158  */
159  "pg_serial",
160 
161  /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
162  "pg_snapshots",
163 
164  /* Contents zeroed on startup, see StartupSUBTRANS(). */
165  "pg_subtrans",
166 
167  /* end of list */
168  NULL
169 };
170 
171 /*
172  * List of files excluded from backups.
173  */
174 static const char *const excludeFiles[] =
175 {
176  /* Skip auto conf temporary file. */
177  PG_AUTOCONF_FILENAME ".tmp",
178 
179  /* Skip current log file temporary file */
181 
182  /* Skip relation cache because it is rebuilt on startup */
184 
185  /*
186  * If there's a backup_label or tablespace_map file, it belongs to a
187  * backup started by the user with pg_start_backup(). It is *not* correct
188  * for this backup. Our backup_label/tablespace_map is injected into the
189  * tar separately.
190  */
193 
194  "postmaster.pid",
195  "postmaster.opts",
196 
197  /* end of list */
198  NULL
199 };
200 
201 /*
202  * List of files excluded from checksum validation.
203  *
204  * Note: this list should be kept in sync with what pg_checksums.c
205  * includes.
206  */
207 static const char *const noChecksumFiles[] = {
208  "pg_control",
209  "pg_filenode.map",
210  "pg_internal.init",
211  "PG_VERSION",
212 #ifdef EXEC_BACKEND
213  "config_exec_params",
214  "config_exec_params.new",
215 #endif
216  NULL,
217 };
218 
219 
220 /*
221  * Called when ERROR or FATAL happens in perform_base_backup() after
222  * we have started the backup - make sure we end it!
223  */
224 static void
226 {
228 }
229 
230 /*
231  * Actually do a base backup for the specified tablespaces.
232  *
233  * This is split out mainly to avoid complaints about "variable might be
234  * clobbered by longjmp" from stupider versions of gcc.
235  */
236 static void
238 {
239  TimeLineID starttli;
240  XLogRecPtr endptr;
241  TimeLineID endtli;
242  StringInfo labelfile;
244  int datadirpathlen;
245  List *tablespaces = NIL;
246 
247  datadirpathlen = strlen(DataDir);
248 
250 
251  labelfile = makeStringInfo();
252  tblspc_map_file = makeStringInfo();
253 
255 
256  startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
257  labelfile, &tablespaces,
258  tblspc_map_file,
259  opt->progress, opt->sendtblspcmapfile);
260 
261  /*
262  * Once do_pg_start_backup has been called, ensure that any failure causes
263  * us to abort the backup so we don't "leak" a backup counter. For this
264  * reason, *all* functionality between do_pg_start_backup() and the end of
265  * do_pg_stop_backup() should be inside the error cleanup block!
266  */
267 
269  {
270  ListCell *lc;
271  tablespaceinfo *ti;
272 
273  SendXlogRecPtrResult(startptr, starttli);
274 
275  /*
276  * Calculate the relative path of temporary statistics directory in
277  * order to skip the files which are located in that directory later.
278  */
280  strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
281  statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
282  else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
284  else
286 
287  /* Add a node for the base directory at the end */
288  ti = palloc0(sizeof(tablespaceinfo));
289  ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
290  tablespaces = lappend(tablespaces, ti);
291 
292  /* Send tablespace header */
293  SendBackupHeader(tablespaces);
294 
295  /* Setup and activate network throttling, if client requested it */
296  if (opt->maxrate > 0)
297  {
299  (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
300 
301  /*
302  * The minimum amount of time for throttling_sample bytes to be
303  * transferred.
304  */
306 
307  /* Enable throttling. */
308  throttling_counter = 0;
309 
310  /* The 'real data' starts now (header was ignored). */
312  }
313  else
314  {
315  /* Disable throttling. */
316  throttling_counter = -1;
317  }
318 
319  /* Send off our tablespaces one by one */
320  foreach(lc, tablespaces)
321  {
322  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
324 
325  /* Send CopyOutResponse message */
326  pq_beginmessage(&buf, 'H');
327  pq_sendbyte(&buf, 0); /* overall format */
328  pq_sendint16(&buf, 0); /* natts */
329  pq_endmessage(&buf);
330 
331  if (ti->path == NULL)
332  {
333  struct stat statbuf;
334 
335  /* In the main tar, include the backup_label first... */
337 
338  /*
339  * Send tablespace_map file if required and then the bulk of
340  * the files.
341  */
342  if (tblspc_map_file && opt->sendtblspcmapfile)
343  {
344  sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
345  sendDir(".", 1, false, tablespaces, false);
346  }
347  else
348  sendDir(".", 1, false, tablespaces, true);
349 
350  /* ... and pg_control after everything else. */
351  if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
352  ereport(ERROR,
354  errmsg("could not stat file \"%s\": %m",
357  }
358  else
359  sendTablespace(ti->path, false);
360 
361  /*
362  * If we're including WAL, and this is the main data directory we
363  * don't terminate the tar stream here. Instead, we will append
364  * the xlog files below and terminate it then. This is safe since
365  * the main data directory is always sent *last*.
366  */
367  if (opt->includewal && ti->path == NULL)
368  {
369  Assert(lnext(tablespaces, lc) == NULL);
370  }
371  else
372  pq_putemptymessage('c'); /* CopyDone */
373  }
374 
375  endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
376  }
378 
379 
380  if (opt->includewal)
381  {
382  /*
383  * We've left the last tar file "open", so we can now append the
384  * required WAL files to it.
385  */
386  char pathbuf[MAXPGPATH];
387  XLogSegNo segno;
388  XLogSegNo startsegno;
389  XLogSegNo endsegno;
390  struct stat statbuf;
391  List *historyFileList = NIL;
392  List *walFileList = NIL;
393  char firstoff[MAXFNAMELEN];
394  char lastoff[MAXFNAMELEN];
395  DIR *dir;
396  struct dirent *de;
397  ListCell *lc;
398  TimeLineID tli;
399 
400  /*
401  * I'd rather not worry about timelines here, so scan pg_wal and
402  * include all WAL files in the range between 'startptr' and 'endptr',
403  * regardless of the timeline the file is stamped with. If there are
404  * some spurious WAL files belonging to timelines that don't belong in
405  * this server's history, they will be included too. Normally there
406  * shouldn't be such files, but if there are, there's little harm in
407  * including them.
408  */
409  XLByteToSeg(startptr, startsegno, wal_segment_size);
410  XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
411  XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
412  XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
413 
414  dir = AllocateDir("pg_wal");
415  while ((de = ReadDir(dir, "pg_wal")) != NULL)
416  {
417  /* Does it look like a WAL segment, and is it in the range? */
418  if (IsXLogFileName(de->d_name) &&
419  strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
420  strcmp(de->d_name + 8, lastoff + 8) <= 0)
421  {
422  walFileList = lappend(walFileList, pstrdup(de->d_name));
423  }
424  /* Does it look like a timeline history file? */
425  else if (IsTLHistoryFileName(de->d_name))
426  {
427  historyFileList = lappend(historyFileList, pstrdup(de->d_name));
428  }
429  }
430  FreeDir(dir);
431 
432  /*
433  * Before we go any further, check that none of the WAL segments we
434  * need were removed.
435  */
436  CheckXLogRemoved(startsegno, ThisTimeLineID);
437 
438  /*
439  * Sort the WAL filenames. We want to send the files in order from
440  * oldest to newest, to reduce the chance that a file is recycled
441  * before we get a chance to send it over.
442  */
443  list_sort(walFileList, compareWalFileNames);
444 
445  /*
446  * There must be at least one xlog file in the pg_wal directory, since
447  * we are doing backup-including-xlog.
448  */
449  if (walFileList == NIL)
450  ereport(ERROR,
451  (errmsg("could not find any WAL files")));
452 
453  /*
454  * Sanity check: the first and last segment should cover startptr and
455  * endptr, with no gaps in between.
456  */
457  XLogFromFileName((char *) linitial(walFileList),
458  &tli, &segno, wal_segment_size);
459  if (segno != startsegno)
460  {
461  char startfname[MAXFNAMELEN];
462 
463  XLogFileName(startfname, ThisTimeLineID, startsegno,
465  ereport(ERROR,
466  (errmsg("could not find WAL file \"%s\"", startfname)));
467  }
468  foreach(lc, walFileList)
469  {
470  char *walFileName = (char *) lfirst(lc);
471  XLogSegNo currsegno = segno;
472  XLogSegNo nextsegno = segno + 1;
473 
474  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
475  if (!(nextsegno == segno || currsegno == segno))
476  {
477  char nextfname[MAXFNAMELEN];
478 
479  XLogFileName(nextfname, ThisTimeLineID, nextsegno,
481  ereport(ERROR,
482  (errmsg("could not find WAL file \"%s\"", nextfname)));
483  }
484  }
485  if (segno != endsegno)
486  {
487  char endfname[MAXFNAMELEN];
488 
489  XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
490  ereport(ERROR,
491  (errmsg("could not find WAL file \"%s\"", endfname)));
492  }
493 
494  /* Ok, we have everything we need. Send the WAL files. */
495  foreach(lc, walFileList)
496  {
497  char *walFileName = (char *) lfirst(lc);
498  FILE *fp;
499  char buf[TAR_SEND_SIZE];
500  size_t cnt;
501  pgoff_t len = 0;
502 
503  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
504  XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
505 
506  fp = AllocateFile(pathbuf, "rb");
507  if (fp == NULL)
508  {
509  int save_errno = errno;
510 
511  /*
512  * Most likely reason for this is that the file was already
513  * removed by a checkpoint, so check for that to get a better
514  * error message.
515  */
516  CheckXLogRemoved(segno, tli);
517 
518  errno = save_errno;
519  ereport(ERROR,
521  errmsg("could not open file \"%s\": %m", pathbuf)));
522  }
523 
524  if (fstat(fileno(fp), &statbuf) != 0)
525  ereport(ERROR,
527  errmsg("could not stat file \"%s\": %m",
528  pathbuf)));
529  if (statbuf.st_size != wal_segment_size)
530  {
531  CheckXLogRemoved(segno, tli);
532  ereport(ERROR,
534  errmsg("unexpected WAL file size \"%s\"", walFileName)));
535  }
536 
537  /* send the WAL file itself */
538  _tarWriteHeader(pathbuf, NULL, &statbuf, false);
539 
540  while ((cnt = fread(buf, 1,
541  Min(sizeof(buf), wal_segment_size - len),
542  fp)) > 0)
543  {
544  CheckXLogRemoved(segno, tli);
545  /* Send the chunk as a CopyData message */
546  if (pq_putmessage('d', buf, cnt))
547  ereport(ERROR,
548  (errmsg("base backup could not send data, aborting backup")));
549 
550  len += cnt;
551  throttle(cnt);
552 
553  if (len == wal_segment_size)
554  break;
555  }
556 
557  CHECK_FREAD_ERROR(fp, pathbuf);
558 
559  if (len != wal_segment_size)
560  {
561  CheckXLogRemoved(segno, tli);
562  ereport(ERROR,
564  errmsg("unexpected WAL file size \"%s\"", walFileName)));
565  }
566 
567  /* wal_segment_size is a multiple of 512, so no need for padding */
568 
569  FreeFile(fp);
570 
571  /*
572  * Mark file as archived, otherwise files can get archived again
573  * after promotion of a new node. This is in line with
574  * walreceiver.c always doing an XLogArchiveForceDone() after a
575  * complete segment.
576  */
577  StatusFilePath(pathbuf, walFileName, ".done");
578  sendFileWithContent(pathbuf, "");
579  }
580 
581  /*
582  * Send timeline history files too. Only the latest timeline history
583  * file is required for recovery, and even that only if there happens
584  * to be a timeline switch in the first WAL segment that contains the
585  * checkpoint record, or if we're taking a base backup from a standby
586  * server and the target timeline changes while the backup is taken.
587  * But they are small and highly useful for debugging purposes, so
588  * better include them all, always.
589  */
590  foreach(lc, historyFileList)
591  {
592  char *fname = lfirst(lc);
593 
594  snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
595 
596  if (lstat(pathbuf, &statbuf) != 0)
597  ereport(ERROR,
599  errmsg("could not stat file \"%s\": %m", pathbuf)));
600 
601  sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
602 
603  /* unconditionally mark file as archived */
604  StatusFilePath(pathbuf, fname, ".done");
605  sendFileWithContent(pathbuf, "");
606  }
607 
608  /* Send CopyDone message for the last tar file */
609  pq_putemptymessage('c');
610  }
611  SendXlogRecPtrResult(endptr, endtli);
612 
614  {
615  if (total_checksum_failures > 1)
617  (errmsg("%lld total checksum verification failures", total_checksum_failures)));
618 
619  ereport(ERROR,
621  errmsg("checksum verification failure during base backup")));
622  }
623 
624 }
625 
626 /*
627  * list_sort comparison function, to compare log/seg portion of WAL segment
628  * filenames, ignoring the timeline portion.
629  */
630 static int
632 {
633  char *fna = (char *) lfirst(a);
634  char *fnb = (char *) lfirst(b);
635 
636  return strcmp(fna + 8, fnb + 8);
637 }
638 
639 /*
640  * Parse the base backup options passed down by the parser
641  */
642 static void
644 {
645  ListCell *lopt;
646  bool o_label = false;
647  bool o_progress = false;
648  bool o_fast = false;
649  bool o_nowait = false;
650  bool o_wal = false;
651  bool o_maxrate = false;
652  bool o_tablespace_map = false;
653  bool o_noverify_checksums = false;
654 
655  MemSet(opt, 0, sizeof(*opt));
656  foreach(lopt, options)
657  {
658  DefElem *defel = (DefElem *) lfirst(lopt);
659 
660  if (strcmp(defel->defname, "label") == 0)
661  {
662  if (o_label)
663  ereport(ERROR,
664  (errcode(ERRCODE_SYNTAX_ERROR),
665  errmsg("duplicate option \"%s\"", defel->defname)));
666  opt->label = strVal(defel->arg);
667  o_label = true;
668  }
669  else if (strcmp(defel->defname, "progress") == 0)
670  {
671  if (o_progress)
672  ereport(ERROR,
673  (errcode(ERRCODE_SYNTAX_ERROR),
674  errmsg("duplicate option \"%s\"", defel->defname)));
675  opt->progress = true;
676  o_progress = true;
677  }
678  else if (strcmp(defel->defname, "fast") == 0)
679  {
680  if (o_fast)
681  ereport(ERROR,
682  (errcode(ERRCODE_SYNTAX_ERROR),
683  errmsg("duplicate option \"%s\"", defel->defname)));
684  opt->fastcheckpoint = true;
685  o_fast = true;
686  }
687  else if (strcmp(defel->defname, "nowait") == 0)
688  {
689  if (o_nowait)
690  ereport(ERROR,
691  (errcode(ERRCODE_SYNTAX_ERROR),
692  errmsg("duplicate option \"%s\"", defel->defname)));
693  opt->nowait = true;
694  o_nowait = true;
695  }
696  else if (strcmp(defel->defname, "wal") == 0)
697  {
698  if (o_wal)
699  ereport(ERROR,
700  (errcode(ERRCODE_SYNTAX_ERROR),
701  errmsg("duplicate option \"%s\"", defel->defname)));
702  opt->includewal = true;
703  o_wal = true;
704  }
705  else if (strcmp(defel->defname, "max_rate") == 0)
706  {
707  long maxrate;
708 
709  if (o_maxrate)
710  ereport(ERROR,
711  (errcode(ERRCODE_SYNTAX_ERROR),
712  errmsg("duplicate option \"%s\"", defel->defname)));
713 
714  maxrate = intVal(defel->arg);
715  if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
716  ereport(ERROR,
717  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
718  errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
719  (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
720 
721  opt->maxrate = (uint32) maxrate;
722  o_maxrate = true;
723  }
724  else if (strcmp(defel->defname, "tablespace_map") == 0)
725  {
726  if (o_tablespace_map)
727  ereport(ERROR,
728  (errcode(ERRCODE_SYNTAX_ERROR),
729  errmsg("duplicate option \"%s\"", defel->defname)));
730  opt->sendtblspcmapfile = true;
731  o_tablespace_map = true;
732  }
733  else if (strcmp(defel->defname, "noverify_checksums") == 0)
734  {
735  if (o_noverify_checksums)
736  ereport(ERROR,
737  (errcode(ERRCODE_SYNTAX_ERROR),
738  errmsg("duplicate option \"%s\"", defel->defname)));
739  noverify_checksums = true;
740  o_noverify_checksums = true;
741  }
742  else
743  elog(ERROR, "option \"%s\" not recognized",
744  defel->defname);
745  }
746  if (opt->label == NULL)
747  opt->label = "base backup";
748 }
749 
750 
751 /*
752  * SendBaseBackup() - send a complete base backup.
753  *
754  * The function will put the system into backup mode like pg_start_backup()
755  * does, so that the backup is consistent even though we read directly from
756  * the filesystem, bypassing the buffer cache.
757  */
758 void
760 {
761  basebackup_options opt;
762 
763  parse_basebackup_options(cmd->options, &opt);
764 
766 
768  {
769  char activitymsg[50];
770 
771  snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
772  opt.label);
773  set_ps_display(activitymsg, false);
774  }
775 
776  perform_base_backup(&opt);
777 }
778 
779 static void
781 {
782  char is[32];
783 
784  sprintf(is, INT64_FORMAT, intval);
785  pq_sendint32(buf, strlen(is));
786  pq_sendbytes(buf, is, strlen(is));
787 }
788 
789 static void
790 SendBackupHeader(List *tablespaces)
791 {
793  ListCell *lc;
794 
795  /* Construct and send the directory information */
796  pq_beginmessage(&buf, 'T'); /* RowDescription */
797  pq_sendint16(&buf, 3); /* 3 fields */
798 
799  /* First field - spcoid */
800  pq_sendstring(&buf, "spcoid");
801  pq_sendint32(&buf, 0); /* table oid */
802  pq_sendint16(&buf, 0); /* attnum */
803  pq_sendint32(&buf, OIDOID); /* type oid */
804  pq_sendint16(&buf, 4); /* typlen */
805  pq_sendint32(&buf, 0); /* typmod */
806  pq_sendint16(&buf, 0); /* format code */
807 
808  /* Second field - spclocation */
809  pq_sendstring(&buf, "spclocation");
810  pq_sendint32(&buf, 0);
811  pq_sendint16(&buf, 0);
812  pq_sendint32(&buf, TEXTOID);
813  pq_sendint16(&buf, -1);
814  pq_sendint32(&buf, 0);
815  pq_sendint16(&buf, 0);
816 
817  /* Third field - size */
818  pq_sendstring(&buf, "size");
819  pq_sendint32(&buf, 0);
820  pq_sendint16(&buf, 0);
821  pq_sendint32(&buf, INT8OID);
822  pq_sendint16(&buf, 8);
823  pq_sendint32(&buf, 0);
824  pq_sendint16(&buf, 0);
825  pq_endmessage(&buf);
826 
827  foreach(lc, tablespaces)
828  {
829  tablespaceinfo *ti = lfirst(lc);
830 
831  /* Send one datarow message */
832  pq_beginmessage(&buf, 'D');
833  pq_sendint16(&buf, 3); /* number of columns */
834  if (ti->path == NULL)
835  {
836  pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
837  pq_sendint32(&buf, -1);
838  }
839  else
840  {
841  Size len;
842 
843  len = strlen(ti->oid);
844  pq_sendint32(&buf, len);
845  pq_sendbytes(&buf, ti->oid, len);
846 
847  len = strlen(ti->path);
848  pq_sendint32(&buf, len);
849  pq_sendbytes(&buf, ti->path, len);
850  }
851  if (ti->size >= 0)
852  send_int8_string(&buf, ti->size / 1024);
853  else
854  pq_sendint32(&buf, -1); /* NULL */
855 
856  pq_endmessage(&buf);
857  }
858 
859  /* Send a CommandComplete message */
860  pq_puttextmessage('C', "SELECT");
861 }
862 
863 /*
864  * Send a single resultset containing just a single
865  * XLogRecPtr record (in text format)
866  */
867 static void
869 {
871  char str[MAXFNAMELEN];
872  Size len;
873 
874  pq_beginmessage(&buf, 'T'); /* RowDescription */
875  pq_sendint16(&buf, 2); /* 2 fields */
876 
877  /* Field headers */
878  pq_sendstring(&buf, "recptr");
879  pq_sendint32(&buf, 0); /* table oid */
880  pq_sendint16(&buf, 0); /* attnum */
881  pq_sendint32(&buf, TEXTOID); /* type oid */
882  pq_sendint16(&buf, -1);
883  pq_sendint32(&buf, 0);
884  pq_sendint16(&buf, 0);
885 
886  pq_sendstring(&buf, "tli");
887  pq_sendint32(&buf, 0); /* table oid */
888  pq_sendint16(&buf, 0); /* attnum */
889 
890  /*
891  * int8 may seem like a surprising data type for this, but in theory int4
892  * would not be wide enough for this, as TimeLineID is unsigned.
893  */
894  pq_sendint32(&buf, INT8OID); /* type oid */
895  pq_sendint16(&buf, -1);
896  pq_sendint32(&buf, 0);
897  pq_sendint16(&buf, 0);
898  pq_endmessage(&buf);
899 
900  /* Data row */
901  pq_beginmessage(&buf, 'D');
902  pq_sendint16(&buf, 2); /* number of columns */
903 
904  len = snprintf(str, sizeof(str),
905  "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
906  pq_sendint32(&buf, len);
907  pq_sendbytes(&buf, str, len);
908 
909  len = snprintf(str, sizeof(str), "%u", tli);
910  pq_sendint32(&buf, len);
911  pq_sendbytes(&buf, str, len);
912 
913  pq_endmessage(&buf);
914 
915  /* Send a CommandComplete message */
916  pq_puttextmessage('C', "SELECT");
917 }
918 
919 /*
920  * Inject a file with given name and content in the output tar stream.
921  */
922 static void
923 sendFileWithContent(const char *filename, const char *content)
924 {
925  struct stat statbuf;
926  int pad,
927  len;
928 
929  len = strlen(content);
930 
931  /*
932  * Construct a stat struct for the backup_label file we're injecting in
933  * the tar.
934  */
935  /* Windows doesn't have the concept of uid and gid */
936 #ifdef WIN32
937  statbuf.st_uid = 0;
938  statbuf.st_gid = 0;
939 #else
940  statbuf.st_uid = geteuid();
941  statbuf.st_gid = getegid();
942 #endif
943  statbuf.st_mtime = time(NULL);
944  statbuf.st_mode = pg_file_create_mode;
945  statbuf.st_size = len;
946 
947  _tarWriteHeader(filename, NULL, &statbuf, false);
948  /* Send the contents as a CopyData message */
949  pq_putmessage('d', content, len);
950 
951  /* Pad to 512 byte boundary, per tar format requirements */
952  pad = ((len + 511) & ~511) - len;
953  if (pad > 0)
954  {
955  char buf[512];
956 
957  MemSet(buf, 0, pad);
958  pq_putmessage('d', buf, pad);
959  }
960 }
961 
962 /*
963  * Include the tablespace directory pointed to by 'path' in the output tar
964  * stream. If 'sizeonly' is true, we just calculate a total length and return
965  * it, without actually sending anything.
966  *
967  * Only used to send auxiliary tablespaces, not PGDATA.
968  */
969 int64
970 sendTablespace(char *path, bool sizeonly)
971 {
972  int64 size;
973  char pathbuf[MAXPGPATH];
974  struct stat statbuf;
975 
976  /*
977  * 'path' points to the tablespace location, but we only want to include
978  * the version directory in it that belongs to us.
979  */
980  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
982 
983  /*
984  * Store a directory entry in the tar file so we get the permissions
985  * right.
986  */
987  if (lstat(pathbuf, &statbuf) != 0)
988  {
989  if (errno != ENOENT)
990  ereport(ERROR,
992  errmsg("could not stat file or directory \"%s\": %m",
993  pathbuf)));
994 
995  /* If the tablespace went away while scanning, it's no error. */
996  return 0;
997  }
998 
999  size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1000  sizeonly);
1001 
1002  /* Send all the files in the tablespace version directory */
1003  size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
1004 
1005  return size;
1006 }
1007 
1008 /*
1009  * Include all files from the given directory in the output tar stream. If
1010  * 'sizeonly' is true, we just calculate a total length and return it, without
1011  * actually sending anything.
1012  *
1013  * Omit any directory in the tablespaces list, to avoid backing up
1014  * tablespaces twice when they were created inside PGDATA.
1015  *
1016  * If sendtblspclinks is true, we need to include symlink
1017  * information in the tar file. If not, we can skip that
1018  * as it will be sent separately in the tablespace_map file.
1019  */
1020 static int64
1021 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
1022  bool sendtblspclinks)
1023 {
1024  DIR *dir;
1025  struct dirent *de;
1026  char pathbuf[MAXPGPATH * 2];
1027  struct stat statbuf;
1028  int64 size = 0;
1029  const char *lastDir; /* Split last dir from parent path. */
1030  bool isDbDir = false; /* Does this directory contain relations? */
1031 
1032  /*
1033  * Determine if the current path is a database directory that can contain
1034  * relations.
1035  *
1036  * Start by finding the location of the delimiter between the parent path
1037  * and the current path.
1038  */
1039  lastDir = last_dir_separator(path);
1040 
1041  /* Does this path look like a database path (i.e. all digits)? */
1042  if (lastDir != NULL &&
1043  strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1044  {
1045  /* Part of path that contains the parent directory. */
1046  int parentPathLen = lastDir - path;
1047 
1048  /*
1049  * Mark path as a database directory if the parent path is either
1050  * $PGDATA/base or a tablespace version path.
1051  */
1052  if (strncmp(path, "./base", parentPathLen) == 0 ||
1053  (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1054  strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1056  sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1057  isDbDir = true;
1058  }
1059 
1060  dir = AllocateDir(path);
1061  while ((de = ReadDir(dir, path)) != NULL)
1062  {
1063  int excludeIdx;
1064  bool excludeFound;
1065  ForkNumber relForkNum; /* Type of fork if file is a relation */
1066  int relOidChars; /* Chars in filename that are the rel oid */
1067 
1068  /* Skip special stuff */
1069  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1070  continue;
1071 
1072  /* Skip temporary files */
1073  if (strncmp(de->d_name,
1075  strlen(PG_TEMP_FILE_PREFIX)) == 0)
1076  continue;
1077 
1078  /*
1079  * Check if the postmaster has signaled us to exit, and abort with an
1080  * error in that case. The error handler further up will call
1081  * do_pg_abort_backup() for us. Also check that if the backup was
1082  * started while still in recovery, the server wasn't promoted.
1083  * do_pg_stop_backup() will check that too, but it's better to stop
1084  * the backup early than continue to the end and fail there.
1085  */
1088  ereport(ERROR,
1089  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1090  errmsg("the standby was promoted during online backup"),
1091  errhint("This means that the backup being taken is corrupt "
1092  "and should not be used. "
1093  "Try taking another online backup.")));
1094 
1095  /* Scan for files that should be excluded */
1096  excludeFound = false;
1097  for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
1098  {
1099  if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
1100  {
1101  elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1102  excludeFound = true;
1103  break;
1104  }
1105  }
1106 
1107  if (excludeFound)
1108  continue;
1109 
1110  /* Exclude all forks for unlogged tables except the init fork */
1111  if (isDbDir &&
1112  parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1113  &relForkNum))
1114  {
1115  /* Never exclude init forks */
1116  if (relForkNum != INIT_FORKNUM)
1117  {
1118  char initForkFile[MAXPGPATH];
1119  char relOid[OIDCHARS + 1];
1120 
1121  /*
1122  * If any other type of fork, check if there is an init fork
1123  * with the same OID. If so, the file can be excluded.
1124  */
1125  memcpy(relOid, de->d_name, relOidChars);
1126  relOid[relOidChars] = '\0';
1127  snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1128  path, relOid);
1129 
1130  if (lstat(initForkFile, &statbuf) == 0)
1131  {
1132  elog(DEBUG2,
1133  "unlogged relation file \"%s\" excluded from backup",
1134  de->d_name);
1135 
1136  continue;
1137  }
1138  }
1139  }
1140 
1141  /* Exclude temporary relations */
1142  if (isDbDir && looks_like_temp_rel_name(de->d_name))
1143  {
1144  elog(DEBUG2,
1145  "temporary relation file \"%s\" excluded from backup",
1146  de->d_name);
1147 
1148  continue;
1149  }
1150 
1151  snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1152 
1153  /* Skip pg_control here to back up it last */
1154  if (strcmp(pathbuf, "./global/pg_control") == 0)
1155  continue;
1156 
1157  if (lstat(pathbuf, &statbuf) != 0)
1158  {
1159  if (errno != ENOENT)
1160  ereport(ERROR,
1162  errmsg("could not stat file or directory \"%s\": %m",
1163  pathbuf)));
1164 
1165  /* If the file went away while scanning, it's not an error. */
1166  continue;
1167  }
1168 
1169  /* Scan for directories whose contents should be excluded */
1170  excludeFound = false;
1171  for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1172  {
1173  if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1174  {
1175  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1176  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1177  excludeFound = true;
1178  break;
1179  }
1180  }
1181 
1182  if (excludeFound)
1183  continue;
1184 
1185  /*
1186  * Exclude contents of directory specified by statrelpath if not set
1187  * to the default (pg_stat_tmp) which is caught in the loop above.
1188  */
1189  if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1190  {
1191  elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1192  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1193  continue;
1194  }
1195 
1196  /*
1197  * We can skip pg_wal, the WAL segments need to be fetched from the
1198  * WAL archive anyway. But include it as an empty directory anyway, so
1199  * we get permissions right.
1200  */
1201  if (strcmp(pathbuf, "./pg_wal") == 0)
1202  {
1203  /* If pg_wal is a symlink, write it as a directory anyway */
1204  size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1205 
1206  /*
1207  * Also send archive_status directory (by hackishly reusing
1208  * statbuf from above ...).
1209  */
1210  size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
1211  sizeonly);
1212 
1213  continue; /* don't recurse into pg_wal */
1214  }
1215 
1216  /* Allow symbolic links in pg_tblspc only */
1217  if (strcmp(path, "./pg_tblspc") == 0 &&
1218 #ifndef WIN32
1219  S_ISLNK(statbuf.st_mode)
1220 #else
1221  pgwin32_is_junction(pathbuf)
1222 #endif
1223  )
1224  {
1225 #if defined(HAVE_READLINK) || defined(WIN32)
1226  char linkpath[MAXPGPATH];
1227  int rllen;
1228 
1229  rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1230  if (rllen < 0)
1231  ereport(ERROR,
1233  errmsg("could not read symbolic link \"%s\": %m",
1234  pathbuf)));
1235  if (rllen >= sizeof(linkpath))
1236  ereport(ERROR,
1237  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1238  errmsg("symbolic link \"%s\" target is too long",
1239  pathbuf)));
1240  linkpath[rllen] = '\0';
1241 
1242  size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
1243  &statbuf, sizeonly);
1244 #else
1245 
1246  /*
1247  * If the platform does not have symbolic links, it should not be
1248  * possible to have tablespaces - clearly somebody else created
1249  * them. Warn about it and ignore.
1250  */
1251  ereport(WARNING,
1252  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1253  errmsg("tablespaces are not supported on this platform")));
1254  continue;
1255 #endif /* HAVE_READLINK */
1256  }
1257  else if (S_ISDIR(statbuf.st_mode))
1258  {
1259  bool skip_this_dir = false;
1260  ListCell *lc;
1261 
1262  /*
1263  * Store a directory entry in the tar file so we can get the
1264  * permissions right.
1265  */
1266  size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
1267  sizeonly);
1268 
1269  /*
1270  * Call ourselves recursively for a directory, unless it happens
1271  * to be a separate tablespace located within PGDATA.
1272  */
1273  foreach(lc, tablespaces)
1274  {
1275  tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1276 
1277  /*
1278  * ti->rpath is the tablespace relative path within PGDATA, or
1279  * NULL if the tablespace has been properly located somewhere
1280  * else.
1281  *
1282  * Skip past the leading "./" in pathbuf when comparing.
1283  */
1284  if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1285  {
1286  skip_this_dir = true;
1287  break;
1288  }
1289  }
1290 
1291  /*
1292  * skip sending directories inside pg_tblspc, if not required.
1293  */
1294  if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1295  skip_this_dir = true;
1296 
1297  if (!skip_this_dir)
1298  size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
1299  }
1300  else if (S_ISREG(statbuf.st_mode))
1301  {
1302  bool sent = false;
1303 
1304  if (!sizeonly)
1305  sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1306  true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid);
1307 
1308  if (sent || sizeonly)
1309  {
1310  /* Add size, rounded up to 512byte block */
1311  size += ((statbuf.st_size + 511) & ~511);
1312  size += 512; /* Size of the header of the file */
1313  }
1314  }
1315  else
1316  ereport(WARNING,
1317  (errmsg("skipping special file \"%s\"", pathbuf)));
1318  }
1319  FreeDir(dir);
1320  return size;
1321 }
1322 
1323 /*
1324  * Check if a file should have its checksum validated.
1325  * We validate checksums on files in regular tablespaces
1326  * (including global and default) only, and in those there
1327  * are some files that are explicitly excluded.
1328  */
1329 static bool
1330 is_checksummed_file(const char *fullpath, const char *filename)
1331 {
1332  const char *const *f;
1333 
1334  /* Check that the file is in a tablespace */
1335  if (strncmp(fullpath, "./global/", 9) == 0 ||
1336  strncmp(fullpath, "./base/", 7) == 0 ||
1337  strncmp(fullpath, "/", 1) == 0)
1338  {
1339  /* Compare file against noChecksumFiles skiplist */
1340  for (f = noChecksumFiles; *f; f++)
1341  if (strcmp(*f, filename) == 0)
1342  return false;
1343 
1344  return true;
1345  }
1346  else
1347  return false;
1348 }
1349 
1350 /*****
1351  * Functions for handling tar file format
1352  *
1353  * Copied from pg_dump, but modified to work with libpq for sending
1354  */
1355 
1356 
1357 /*
1358  * Given the member, write the TAR header & send the file.
1359  *
1360  * If 'missing_ok' is true, will not throw an error if the file is not found.
1361  *
1362  * If dboid is anything other than InvalidOid then any checksum failures detected
1363  * will get reported to the stats collector.
1364  *
1365  * Returns true if the file was successfully sent, false if 'missing_ok',
1366  * and the file did not exist.
1367  */
1368 static bool
1369 sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
1370  bool missing_ok, Oid dboid)
1371 {
1372  FILE *fp;
1373  BlockNumber blkno = 0;
1374  bool block_retry = false;
1375  char buf[TAR_SEND_SIZE];
1376  uint16 checksum;
1377  int checksum_failures = 0;
1378  off_t cnt;
1379  int i;
1380  pgoff_t len = 0;
1381  char *page;
1382  size_t pad;
1383  PageHeader phdr;
1384  int segmentno = 0;
1385  char *segmentpath;
1386  bool verify_checksum = false;
1387 
1388  fp = AllocateFile(readfilename, "rb");
1389  if (fp == NULL)
1390  {
1391  if (errno == ENOENT && missing_ok)
1392  return false;
1393  ereport(ERROR,
1395  errmsg("could not open file \"%s\": %m", readfilename)));
1396  }
1397 
1398  _tarWriteHeader(tarfilename, NULL, statbuf, false);
1399 
1401  {
1402  char *filename;
1403 
1404  /*
1405  * Get the filename (excluding path). As last_dir_separator()
1406  * includes the last directory separator, we chop that off by
1407  * incrementing the pointer.
1408  */
1409  filename = last_dir_separator(readfilename) + 1;
1410 
1411  if (is_checksummed_file(readfilename, filename))
1412  {
1413  verify_checksum = true;
1414 
1415  /*
1416  * Cut off at the segment boundary (".") to get the segment number
1417  * in order to mix it into the checksum.
1418  */
1419  segmentpath = strstr(filename, ".");
1420  if (segmentpath != NULL)
1421  {
1422  segmentno = atoi(segmentpath + 1);
1423  if (segmentno == 0)
1424  ereport(ERROR,
1425  (errmsg("invalid segment number %d in file \"%s\"",
1426  segmentno, filename)));
1427  }
1428  }
1429  }
1430 
1431  while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1432  {
1433  /*
1434  * The checksums are verified at block level, so we iterate over the
1435  * buffer in chunks of BLCKSZ, after making sure that
1436  * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1437  * BLCKSZ bytes.
1438  */
1439  Assert(TAR_SEND_SIZE % BLCKSZ == 0);
1440 
1441  if (verify_checksum && (cnt % BLCKSZ != 0))
1442  {
1443  ereport(WARNING,
1444  (errmsg("could not verify checksum in file \"%s\", block "
1445  "%d: read buffer size %d and page size %d "
1446  "differ",
1447  readfilename, blkno, (int) cnt, BLCKSZ)));
1448  verify_checksum = false;
1449  }
1450 
1451  if (verify_checksum)
1452  {
1453  for (i = 0; i < cnt / BLCKSZ; i++)
1454  {
1455  page = buf + BLCKSZ * i;
1456 
1457  /*
1458  * Only check pages which have not been modified since the
1459  * start of the base backup. Otherwise, they might have been
1460  * written only halfway and the checksum would not be valid.
1461  * However, replaying WAL would reinstate the correct page in
1462  * this case. We also skip completely new pages, since they
1463  * don't have a checksum yet.
1464  */
1465  if (!PageIsNew(page) && PageGetLSN(page) < startptr)
1466  {
1467  checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1468  phdr = (PageHeader) page;
1469  if (phdr->pd_checksum != checksum)
1470  {
1471  /*
1472  * Retry the block on the first failure. It's
1473  * possible that we read the first 4K page of the
1474  * block just before postgres updated the entire block
1475  * so it ends up looking torn to us. We only need to
1476  * retry once because the LSN should be updated to
1477  * something we can ignore on the next pass. If the
1478  * error happens again then it is a true validation
1479  * failure.
1480  */
1481  if (block_retry == false)
1482  {
1483  /* Reread the failed block */
1484  if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1485  {
1486  ereport(ERROR,
1488  errmsg("could not fseek in file \"%s\": %m",
1489  readfilename)));
1490  }
1491 
1492  if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1493  {
1494  /*
1495  * If we hit end-of-file, a concurrent
1496  * truncation must have occurred, so break out
1497  * of this loop just as if the initial fread()
1498  * returned 0. We'll drop through to the same
1499  * code that handles that case. (We must fix
1500  * up cnt first, though.)
1501  */
1502  if (feof(fp))
1503  {
1504  cnt = BLCKSZ * i;
1505  break;
1506  }
1507 
1508  ereport(ERROR,
1510  errmsg("could not reread block %d of file \"%s\": %m",
1511  blkno, readfilename)));
1512  }
1513 
1514  if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1515  {
1516  ereport(ERROR,
1518  errmsg("could not fseek in file \"%s\": %m",
1519  readfilename)));
1520  }
1521 
1522  /* Set flag so we know a retry was attempted */
1523  block_retry = true;
1524 
1525  /* Reset loop to validate the block again */
1526  i--;
1527  continue;
1528  }
1529 
1530  checksum_failures++;
1531 
1532  if (checksum_failures <= 5)
1533  ereport(WARNING,
1534  (errmsg("checksum verification failed in "
1535  "file \"%s\", block %d: calculated "
1536  "%X but expected %X",
1537  readfilename, blkno, checksum,
1538  phdr->pd_checksum)));
1539  if (checksum_failures == 5)
1540  ereport(WARNING,
1541  (errmsg("further checksum verification "
1542  "failures in file \"%s\" will not "
1543  "be reported", readfilename)));
1544  }
1545  }
1546  block_retry = false;
1547  blkno++;
1548  }
1549  }
1550 
1551  /* Send the chunk as a CopyData message */
1552  if (pq_putmessage('d', buf, cnt))
1553  ereport(ERROR,
1554  (errmsg("base backup could not send data, aborting backup")));
1555 
1556  len += cnt;
1557  throttle(cnt);
1558 
1559  if (feof(fp) || len >= statbuf->st_size)
1560  {
1561  /*
1562  * Reached end of file. The file could be longer, if it was
1563  * extended while we were sending it, but for a base backup we can
1564  * ignore such extended data. It will be restored from WAL.
1565  */
1566  break;
1567  }
1568  }
1569 
1570  CHECK_FREAD_ERROR(fp, readfilename);
1571 
1572  /* If the file was truncated while we were sending it, pad it with zeros */
1573  if (len < statbuf->st_size)
1574  {
1575  MemSet(buf, 0, sizeof(buf));
1576  while (len < statbuf->st_size)
1577  {
1578  cnt = Min(sizeof(buf), statbuf->st_size - len);
1579  pq_putmessage('d', buf, cnt);
1580  len += cnt;
1581  throttle(cnt);
1582  }
1583  }
1584 
1585  /*
1586  * Pad to 512 byte boundary, per tar format requirements. (This small
1587  * piece of data is probably not worth throttling.)
1588  */
1589  pad = ((len + 511) & ~511) - len;
1590  if (pad > 0)
1591  {
1592  MemSet(buf, 0, pad);
1593  pq_putmessage('d', buf, pad);
1594  }
1595 
1596  FreeFile(fp);
1597 
1598  if (checksum_failures > 1)
1599  {
1600  ereport(WARNING,
1601  (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1602  "file \"%s\" has a total of %d checksum verification failures",
1603  checksum_failures,
1604  readfilename, checksum_failures)));
1605 
1606  pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
1607  }
1608 
1609  total_checksum_failures += checksum_failures;
1610 
1611  return true;
1612 }
1613 
1614 
1615 static int64
1616 _tarWriteHeader(const char *filename, const char *linktarget,
1617  struct stat *statbuf, bool sizeonly)
1618 {
1619  char h[512];
1620  enum tarError rc;
1621 
1622  if (!sizeonly)
1623  {
1624  rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1625  statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1626  statbuf->st_mtime);
1627 
1628  switch (rc)
1629  {
1630  case TAR_OK:
1631  break;
1632  case TAR_NAME_TOO_LONG:
1633  ereport(ERROR,
1634  (errmsg("file name too long for tar format: \"%s\"",
1635  filename)));
1636  break;
1637  case TAR_SYMLINK_TOO_LONG:
1638  ereport(ERROR,
1639  (errmsg("symbolic link target too long for tar format: "
1640  "file name \"%s\", target \"%s\"",
1641  filename, linktarget)));
1642  break;
1643  default:
1644  elog(ERROR, "unrecognized tar error: %d", rc);
1645  }
1646 
1647  pq_putmessage('d', h, sizeof(h));
1648  }
1649 
1650  return sizeof(h);
1651 }
1652 
1653 /*
1654  * Write tar header for a directory. If the entry in statbuf is a link then
1655  * write it as a directory anyway.
1656  */
1657 static int64
1658 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
1659  bool sizeonly)
1660 {
1661  /* If symlink, write it as a directory anyway */
1662 #ifndef WIN32
1663  if (S_ISLNK(statbuf->st_mode))
1664 #else
1665  if (pgwin32_is_junction(pathbuf))
1666 #endif
1667  statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1668 
1669  return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
1670 }
1671 
1672 /*
1673  * Increment the network transfer counter by the given number of bytes,
1674  * and sleep if necessary to comply with the requested network transfer
1675  * rate.
1676  */
1677 static void
1678 throttle(size_t increment)
1679 {
1680  TimeOffset elapsed_min;
1681 
1682  if (throttling_counter < 0)
1683  return;
1684 
1685  throttling_counter += increment;
1687  return;
1688 
1689  /* How much time should have elapsed at minimum? */
1690  elapsed_min = elapsed_min_unit *
1692 
1693  /*
1694  * Since the latch could be set repeatedly because of concurrently WAL
1695  * activity, sleep in a loop to ensure enough time has passed.
1696  */
1697  for (;;)
1698  {
1699  TimeOffset elapsed,
1700  sleep;
1701  int wait_result;
1702 
1703  /* Time elapsed since the last measurement (and possible wake up). */
1704  elapsed = GetCurrentTimestamp() - throttled_last;
1705 
1706  /* sleep if the transfer is faster than it should be */
1707  sleep = elapsed_min - elapsed;
1708  if (sleep <= 0)
1709  break;
1710 
1712 
1713  /* We're eating a potentially set latch, so check for interrupts */
1715 
1716  /*
1717  * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1718  * the maximum time to sleep. Thus the cast to long is safe.
1719  */
1720  wait_result = WaitLatch(MyLatch,
1722  (long) (sleep / 1000),
1724 
1725  if (wait_result & WL_LATCH_SET)
1727 
1728  /* Done waiting? */
1729  if (wait_result & WL_TIMEOUT)
1730  break;
1731  }
1732 
1733  /*
1734  * As we work with integers, only whole multiple of throttling_sample was
1735  * processed. The rest will be done during the next call of this function.
1736  */
1738 
1739  /*
1740  * Time interval for the remaining amount and possible next increments
1741  * starts now.
1742  */
1744 }
#define StatusFilePath(path, xlog, suffix)
List * options
Definition: replnodes.h:44
#define NIL
Definition: pg_list.h:65
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
int pg_file_create_mode
Definition: file_perm.c:19
#define MAX_RATE_LOWER
Definition: basebackup.h:20
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1069
static void throttle(size_t increment)
Definition: basebackup.c:1678
uint32 TimeLineID
Definition: xlogdefs.h:52
#define WL_TIMEOUT
Definition: latch.h:127
int wal_segment_size
Definition: xlog.c:112
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
#define USECS_PER_SEC
Definition: timestamp.h:94
bool update_process_title
Definition: ps_status.c:35
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:932
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
bool DataChecksumsEnabled(void)
Definition: xlog.c:4819
int64 TimestampTz
Definition: timestamp.h:39
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
char * pstrdup(const char *in)
Definition: mcxt.c:1186
Definition: pgtar.h:17
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
#define Min(x, y)
Definition: c.h:911
static void SendBackupHeader(List *tablespaces)
Definition: basebackup.c:790
void set_ps_display(const char *activity, bool force)
Definition: ps_status.c:331
static void parse_basebackup_options(List *options, basebackup_options *opt)
Definition: basebackup.c:643
#define strVal(v)
Definition: value.h:54
int errcode(int sqlerrcode)
Definition: elog.c:608
#define MemSet(start, val, len)
Definition: c.h:962
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
uint32 BlockNumber
Definition: block.h:31
void pq_sendstring(StringInfo buf, const char *str)
Definition: pqformat.c:197
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7935
static bool backup_started_in_recovery
Definition: basebackup.c:77
static void send_int8_string(StringInfoData *buf, int64 intval)
Definition: basebackup.c:780
Definition: dirent.h:9
void ResetLatch(Latch *latch)
Definition: latch.c:519
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
Definition: pgstat.c:1530
XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
Definition: xlog.c:10220
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:344
uint16 pd_checksum
Definition: bufpage.h:156
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define pgoff_t
Definition: win32_port.h:195
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define sprintf
Definition: port.h:194
#define TABLESPACE_MAP
Definition: xlog.h:361
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
unsigned short uint16
Definition: c.h:358
#define linitial(l)
Definition: pg_list.h:195
static const char *const excludeDirContents[]
Definition: basebackup.c:133
#define MAX_RATE_UPPER
Definition: basebackup.h:21
Definition: dirent.c:25
tarError
Definition: pgtar.h:15
#define ERROR
Definition: elog.h:43
#define PG_TEMP_FILE_PREFIX
Definition: pg_checksums.c:59
#define IsXLogFileName(fname)
XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
Definition: xlog.c:10739
static bool noverify_checksums
Definition: basebackup.c:123
void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3860
void SendBaseBackup(BaseBackupCmd *cmd)
Definition: basebackup.c:759
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
#define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define TABLESPACE_VERSION_DIRECTORY
Definition: relpath.h:26
static int32 maxrate
static char * buf
Definition: pg_test_fsync.c:67
uint64 XLogSegNo
Definition: xlogdefs.h:41
#define readlink(path, buf, size)
Definition: win32_port.h:222
int errcode_for_file_access(void)
Definition: elog.c:631
#define is_absolute_path(filename)
Definition: port.h:86
const char * label
Definition: basebackup.c:47
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2242
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
Definition: basebackup.c:868
unsigned int uint32
Definition: c.h:359
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:970
static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1616
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2503
char * pgstat_stat_directory
Definition: pgstat.c:132
#define ereport(elevel, rest)
Definition: elog.h:141
#define CHECK_FREAD_ERROR(fp, filename)
Definition: basebackup.c:97
static char * statrelpath
Definition: basebackup.c:80
ForkNumber
Definition: relpath.h:40
Node * arg
Definition: parsenodes.h:731
static const char *const excludeFiles[]
Definition: basebackup.c:174
#define S_ISREG(m)
Definition: win32_port.h:299
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
List * lappend(List *list, void *datum)
Definition: list.c:322
#define WARNING
Definition: elog.h:40
#define stat(a, b)
Definition: win32_port.h:255
#define MAXFNAMELEN
int pg_dir_create_mode
Definition: file_perm.c:18
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:114
int64 TimeOffset
Definition: timestamp.h:40
static void perform_base_backup(basebackup_options *opt)
Definition: basebackup.c:237
void * palloc0(Size size)
Definition: mcxt.c:980
#define XLOGDIR
uintptr_t Datum
Definition: postgres.h:367
char * last_dir_separator(const char *filename)
Definition: path.c:138
#define XLOG_CONTROL_FILE
#define InvalidOid
Definition: postgres_ext.h:36
TimeLineID ThisTimeLineID
Definition: xlog.c:187
static TimestampTz throttled_last
Definition: basebackup.c:114
#define RELCACHE_INIT_FILENAME
Definition: relcache.h:24
static StringInfo tblspc_map_file
Definition: xlogfuncs.c:45
static bool is_checksummed_file(const char *fullpath, const char *filename)
Definition: basebackup.c:1330
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
void WalSndSetState(WalSndState state)
Definition: walsender.c:3126
#define PG_STAT_TMP_DIR
Definition: pgstat.h:33
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2569
#define THROTTLING_FREQUENCY
Definition: basebackup.c:90
size_t Size
Definition: c.h:467
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define PG_AUTOCONF_FILENAME
Definition: guc.h:34
#define LOG_METAINFO_DATAFILE_TMP
Definition: syslogger.h:98
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
bool looks_like_temp_rel_name(const char *name)
Definition: fd.c:3101
bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ForkNumber *fork)
Definition: reinit.c:374
#define INT64_FORMAT
Definition: c.h:401
#define S_ISDIR(m)
Definition: win32_port.h:296
#define PageGetLSN(page)
Definition: bufpage.h:366
#define lstat(path, sb)
Definition: win32_port.h:244
int FreeFile(FILE *file)
Definition: fd.c:2441
#define IsTLHistoryFileName(fname)
static int compareWalFileNames(const ListCell *a, const ListCell *b)
Definition: basebackup.c:631
static char * filename
Definition: pg_dumpall.c:90
static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, bool sendtblspclinks)
Definition: basebackup.c:1021
#define intVal(v)
Definition: value.h:52
#define PageIsNew(page)
Definition: bufpage.h:229
void pq_sendbytes(StringInfo buf, const char *data, int datalen)
Definition: pqformat.c:125
void list_sort(List *list, list_sort_comparator cmp)
Definition: list.c:1482
int errmsg(const char *fmt,...)
Definition: elog.c:822
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
#define TAR_SEND_SIZE
Definition: basebackup.c:85
#define elog(elevel,...)
Definition: elog.h:228
int i
void do_pg_abort_backup(void)
Definition: xlog.c:11144
void * arg
char * DataDir
Definition: globals.c:62
struct Latch * MyLatch
Definition: globals.c:54
#define BACKUP_LABEL_FILE
Definition: xlog.h:358
char * defname
Definition: parsenodes.h:730
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
char d_name[MAX_PATH]
Definition: dirent.h:14
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static int64 throttling_counter
Definition: basebackup.c:108
void pq_puttextmessage(char msgtype, const char *str)
Definition: pqformat.c:369
static void base_backup_cleanup(int code, Datum arg)
Definition: basebackup.c:225
Definition: pg_list.h:50
#define snprintf
Definition: port.h:192
static bool sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, bool missing_ok, Oid dboid)
Definition: basebackup.c:1369
#define WL_LATCH_SET
Definition: latch.h:124
#define OIDCHARS
Definition: relpath.h:30
static uint64 throttling_sample
Definition: basebackup.c:105
static void sendFileWithContent(const char *filename, const char *content)
Definition: basebackup.c:923
uint16 pg_checksum_page(char *page, BlockNumber blkno)
int FreeDir(DIR *dir)
Definition: fd.c:2621
static TimeOffset elapsed_min_unit
Definition: basebackup.c:111
int32 pg_atoi(const char *s, int size, int c)
Definition: numutils.c:38
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
static XLogRecPtr startptr
Definition: basebackup.c:117
bool pgwin32_is_junction(const char *path)
static const char *const noChecksumFiles[]
Definition: basebackup.c:207
static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly)
Definition: basebackup.c:1658
static long long int total_checksum_failures
Definition: basebackup.c:120
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)