PostgreSQL Source Code  git master
receivelog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * receivelog.c - receive WAL files using the streaming
4  * replication protocol.
5  *
6  * Author: Magnus Hagander <magnus@hagander.net>
7  *
8  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
9  *
10  * IDENTIFICATION
11  * src/bin/pg_basebackup/receivelog.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres_fe.h"
16 
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #ifdef HAVE_SYS_SELECT_H
20 #include <sys/select.h>
21 #endif
22 
23 #include "access/xlog_internal.h"
24 #include "common/file_utils.h"
25 #include "common/logging.h"
26 #include "libpq-fe.h"
27 #include "receivelog.h"
28 #include "streamutil.h"
29 
30 /* fd and filename for currently open WAL file */
31 static Walfile *walfile = NULL;
32 static char current_walfile_name[MAXPGPATH] = "";
33 static bool reportFlushPosition = false;
35 
36 static bool still_sending = true; /* feedback still needs to be sent? */
37 
39  XLogRecPtr *stoppos);
40 static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket);
41 static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
42  char **buffer);
43 static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf,
44  int len, XLogRecPtr blockpos, TimestampTz *last_status);
45 static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
46  XLogRecPtr *blockpos);
48  XLogRecPtr blockpos, XLogRecPtr *stoppos);
49 static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos);
51  TimestampTz last_status);
52 
54  uint32 *timeline);
55 
56 static bool
57 mark_file_as_archived(StreamCtl *stream, const char *fname)
58 {
59  Walfile *f;
60  static char tmppath[MAXPGPATH];
61 
62  snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done",
63  fname);
64 
65  f = stream->walmethod->open_for_write(tmppath, NULL, 0);
66  if (f == NULL)
67  {
68  pg_log_error("could not create archive status file \"%s\": %s",
69  tmppath, stream->walmethod->getlasterror());
70  return false;
71  }
72 
73  stream->walmethod->close(f, CLOSE_NORMAL);
74 
75  return true;
76 }
77 
78 /*
79  * Open a new WAL file in the specified directory.
80  *
81  * Returns true if OK; on failure, returns false after printing an error msg.
82  * On success, 'walfile' is set to the FD for the file, and the base filename
83  * (without partial_suffix) is stored in 'current_walfile_name'.
84  *
85  * The file will be padded to 16Mb with zeroes.
86  */
87 static bool
88 open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
89 {
90  Walfile *f;
91  char *fn;
92  ssize_t size;
93  XLogSegNo segno;
94 
95  XLByteToSeg(startpoint, segno, WalSegSz);
97 
98  /* Note that this considers the compression used if necessary */
100  stream->partial_suffix);
101 
102  /*
103  * When streaming to files, if an existing file exists we verify that it's
104  * either empty (just created), or a complete WalSegSz segment (in which
105  * case it has been created and padded). Anything else indicates a corrupt
106  * file. Compressed files have no need for padding, so just ignore this
107  * case.
108  *
109  * When streaming to tar, no file with this name will exist before, so we
110  * never have to verify a size.
111  */
112  if (stream->walmethod->compression() == 0 &&
113  stream->walmethod->existsfile(fn))
114  {
115  size = stream->walmethod->get_file_size(fn);
116  if (size < 0)
117  {
118  pg_log_error("could not get size of write-ahead log file \"%s\": %s",
119  fn, stream->walmethod->getlasterror());
120  pg_free(fn);
121  return false;
122  }
123  if (size == WalSegSz)
124  {
125  /* Already padded file. Open it for use */
127  if (f == NULL)
128  {
129  pg_log_error("could not open existing write-ahead log file \"%s\": %s",
130  fn, stream->walmethod->getlasterror());
131  pg_free(fn);
132  return false;
133  }
134 
135  /* fsync file in case of a previous crash */
136  if (stream->walmethod->sync(f) != 0)
137  {
138  pg_log_fatal("could not fsync existing write-ahead log file \"%s\": %s",
139  fn, stream->walmethod->getlasterror());
140  stream->walmethod->close(f, CLOSE_UNLINK);
141  exit(1);
142  }
143 
144  walfile = f;
145  pg_free(fn);
146  return true;
147  }
148  if (size != 0)
149  {
150  /* if write didn't set errno, assume problem is no disk space */
151  if (errno == 0)
152  errno = ENOSPC;
153  pg_log_error(ngettext("write-ahead log file \"%s\" has %zd byte, should be 0 or %d",
154  "write-ahead log file \"%s\" has %zd bytes, should be 0 or %d",
155  size),
156  fn, size, WalSegSz);
157  pg_free(fn);
158  return false;
159  }
160  /* File existed and was empty, so fall through and open */
161  }
162 
163  /* No file existed, so create one */
164 
166  stream->partial_suffix, WalSegSz);
167  if (f == NULL)
168  {
169  pg_log_error("could not open write-ahead log file \"%s\": %s",
170  fn, stream->walmethod->getlasterror());
171  pg_free(fn);
172  return false;
173  }
174 
175  pg_free(fn);
176  walfile = f;
177  return true;
178 }
179 
180 /*
181  * Close the current WAL file (if open), and rename it to the correct
182  * filename if it's complete. On failure, prints an error message to stderr
183  * and returns false, otherwise returns true.
184  */
185 static bool
187 {
188  char *fn;
189  off_t currpos;
190  int r;
191 
192  if (walfile == NULL)
193  return true;
194 
195  /* Note that this considers the compression used if necessary */
197  stream->partial_suffix);
198 
199  currpos = stream->walmethod->get_current_pos(walfile);
200 
201  if (currpos == -1)
202  {
203  pg_log_error("could not determine seek position in file \"%s\": %s",
204  fn, stream->walmethod->getlasterror());
205  stream->walmethod->close(walfile, CLOSE_UNLINK);
206  walfile = NULL;
207 
208  pg_free(fn);
209  return false;
210  }
211 
212  if (stream->partial_suffix)
213  {
214  if (currpos == WalSegSz)
215  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
216  else
217  {
218  pg_log_info("not renaming \"%s\", segment is not complete", fn);
219  r = stream->walmethod->close(walfile, CLOSE_NO_RENAME);
220  }
221  }
222  else
223  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
224 
225  walfile = NULL;
226 
227  if (r != 0)
228  {
229  pg_log_error("could not close file \"%s\": %s",
230  fn, stream->walmethod->getlasterror());
231 
232  pg_free(fn);
233  return false;
234  }
235 
236  pg_free(fn);
237 
238  /*
239  * Mark file as archived if requested by the caller - pg_basebackup needs
240  * to do so as files can otherwise get archived again after promotion of a
241  * new node. This is in line with walreceiver.c always doing a
242  * XLogArchiveForceDone() after a complete segment.
243  */
244  if (currpos == WalSegSz && stream->mark_done)
245  {
246  /* writes error message if failed */
248  return false;
249  }
250 
251  lastFlushPosition = pos;
252  return true;
253 }
254 
255 
256 /*
257  * Check if a timeline history file exists.
258  */
259 static bool
261 {
262  char histfname[MAXFNAMELEN];
263 
264  /*
265  * Timeline 1 never has a history file. We treat that as if it existed,
266  * since we never need to stream it.
267  */
268  if (stream->timeline == 1)
269  return true;
270 
271  TLHistoryFileName(histfname, stream->timeline);
272 
273  return stream->walmethod->existsfile(histfname);
274 }
275 
276 static bool
277 writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
278 {
279  int size = strlen(content);
280  char histfname[MAXFNAMELEN];
281  Walfile *f;
282 
283  /*
284  * Check that the server's idea of how timeline history files should be
285  * named matches ours.
286  */
287  TLHistoryFileName(histfname, stream->timeline);
288  if (strcmp(histfname, filename) != 0)
289  {
290  pg_log_error("server reported unexpected history file name for timeline %u: %s",
291  stream->timeline, filename);
292  return false;
293  }
294 
295  f = stream->walmethod->open_for_write(histfname, ".tmp", 0);
296  if (f == NULL)
297  {
298  pg_log_error("could not create timeline history file \"%s\": %s",
299  histfname, stream->walmethod->getlasterror());
300  return false;
301  }
302 
303  if ((int) stream->walmethod->write(f, content, size) != size)
304  {
305  pg_log_error("could not write timeline history file \"%s\": %s",
306  histfname, stream->walmethod->getlasterror());
307 
308  /*
309  * If we fail to make the file, delete it to release disk space
310  */
311  stream->walmethod->close(f, CLOSE_UNLINK);
312 
313  return false;
314  }
315 
316  if (stream->walmethod->close(f, CLOSE_NORMAL) != 0)
317  {
318  pg_log_error("could not close file \"%s\": %s",
319  histfname, stream->walmethod->getlasterror());
320  return false;
321  }
322 
323  /* Maintain archive_status, check close_walfile() for details. */
324  if (stream->mark_done)
325  {
326  /* writes error message if failed */
327  if (!mark_file_as_archived(stream, histfname))
328  return false;
329  }
330 
331  return true;
332 }
333 
334 /*
335  * Send a Standby Status Update message to server.
336  */
337 static bool
338 sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
339 {
340  char replybuf[1 + 8 + 8 + 8 + 8 + 1];
341  int len = 0;
342 
343  replybuf[len] = 'r';
344  len += 1;
345  fe_sendint64(blockpos, &replybuf[len]); /* write */
346  len += 8;
348  fe_sendint64(lastFlushPosition, &replybuf[len]); /* flush */
349  else
350  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* flush */
351  len += 8;
352  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* apply */
353  len += 8;
354  fe_sendint64(now, &replybuf[len]); /* sendTime */
355  len += 8;
356  replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */
357  len += 1;
358 
359  if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn))
360  {
361  pg_log_error("could not send feedback packet: %s",
362  PQerrorMessage(conn));
363  return false;
364  }
365 
366  return true;
367 }
368 
369 /*
370  * Check that the server version we're connected to is supported by
371  * ReceiveXlogStream().
372  *
373  * If it's not, an error message is printed to stderr, and false is returned.
374  */
375 bool
377 {
378  int minServerMajor,
379  maxServerMajor;
380  int serverMajor;
381 
382  /*
383  * The message format used in streaming replication changed in 9.3, so we
384  * cannot stream from older servers. And we don't support servers newer
385  * than the client; it might work, but we don't know, so err on the safe
386  * side.
387  */
388  minServerMajor = 903;
389  maxServerMajor = PG_VERSION_NUM / 100;
390  serverMajor = PQserverVersion(conn) / 100;
391  if (serverMajor < minServerMajor)
392  {
393  const char *serverver = PQparameterStatus(conn, "server_version");
394 
395  pg_log_error("incompatible server version %s; client does not support streaming from server versions older than %s",
396  serverver ? serverver : "'unknown'",
397  "9.3");
398  return false;
399  }
400  else if (serverMajor > maxServerMajor)
401  {
402  const char *serverver = PQparameterStatus(conn, "server_version");
403 
404  pg_log_error("incompatible server version %s; client does not support streaming from server versions newer than %s",
405  serverver ? serverver : "'unknown'",
406  PG_VERSION);
407  return false;
408  }
409  return true;
410 }
411 
412 /*
413  * Receive a log stream starting at the specified position.
414  *
415  * Individual parameters are passed through the StreamCtl structure.
416  *
417  * If sysidentifier is specified, validate that both the system
418  * identifier and the timeline matches the specified ones
419  * (by sending an extra IDENTIFY_SYSTEM command)
420  *
421  * All received segments will be written to the directory
422  * specified by basedir. This will also fetch any missing timeline history
423  * files.
424  *
425  * The stream_stop callback will be called every time data
426  * is received, and whenever a segment is completed. If it returns
427  * true, the streaming will stop and the function
428  * return. As long as it returns false, streaming will continue
429  * indefinitely.
430  *
431  * If stream_stop() checks for external input, stop_socket should be set to
432  * the FD it checks. This will allow such input to be detected promptly
433  * rather than after standby_message_timeout (which might be indefinite).
434  * Note that signals will interrupt waits for input as well, but that is
435  * race-y since a signal received while busy won't interrupt the wait.
436  *
437  * standby_message_timeout controls how often we send a message
438  * back to the primary letting it know our progress, in milliseconds.
439  * Zero means no messages are sent.
440  * This message will only contain the write location, and never
441  * flush or replay.
442  *
443  * If 'partial_suffix' is not NULL, files are initially created with the
444  * given suffix, and the suffix is removed once the file is finished. That
445  * allows you to tell the difference between partial and completed files,
446  * so that you can continue later where you left.
447  *
448  * If 'synchronous' is true, the received WAL is flushed as soon as written,
449  * otherwise only when the WAL file is closed.
450  *
451  * Note: The WAL location *must* be at a log segment start!
452  */
453 bool
455 {
456  char query[128];
457  char slotcmd[128];
458  PGresult *res;
459  XLogRecPtr stoppos;
460 
461  /*
462  * The caller should've checked the server version already, but doesn't do
463  * any harm to check it here too.
464  */
466  return false;
467 
468  /*
469  * Decide whether we want to report the flush position. If we report the
470  * flush position, the primary will know what WAL we'll possibly
471  * re-request, and it can then remove older WAL safely. We must always do
472  * that when we are using slots.
473  *
474  * Reporting the flush position makes one eligible as a synchronous
475  * replica. People shouldn't include generic names in
476  * synchronous_standby_names, but we've protected them against it so far,
477  * so let's continue to do so unless specifically requested.
478  */
479  if (stream->replication_slot != NULL)
480  {
481  reportFlushPosition = true;
482  sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot);
483  }
484  else
485  {
486  if (stream->synchronous)
487  reportFlushPosition = true;
488  else
489  reportFlushPosition = false;
490  slotcmd[0] = 0;
491  }
492 
493  if (stream->sysidentifier != NULL)
494  {
495  char *sysidentifier = NULL;
496  TimeLineID servertli;
497 
498  /*
499  * Get the server system identifier and timeline, and validate them.
500  */
501  if (!RunIdentifySystem(conn, &sysidentifier, &servertli, NULL, NULL))
502  {
503  pg_free(sysidentifier);
504  return false;
505  }
506 
507  if (strcmp(stream->sysidentifier, sysidentifier) != 0)
508  {
509  pg_log_error("system identifier does not match between base backup and streaming connection");
510  pg_free(sysidentifier);
511  return false;
512  }
513  pg_free(sysidentifier);
514 
515  if (stream->timeline > servertli)
516  {
517  pg_log_error("starting timeline %u is not present in the server",
518  stream->timeline);
519  return false;
520  }
521  }
522 
523  /*
524  * initialize flush position to starting point, it's the caller's
525  * responsibility that that's sane.
526  */
527  lastFlushPosition = stream->startpos;
528 
529  while (1)
530  {
531  /*
532  * Fetch the timeline history file for this timeline, if we don't have
533  * it already. When streaming log to tar, this will always return
534  * false, as we are never streaming into an existing file and
535  * therefore there can be no pre-existing timeline history file.
536  */
537  if (!existsTimeLineHistoryFile(stream))
538  {
539  snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", stream->timeline);
540  res = PQexec(conn, query);
541  if (PQresultStatus(res) != PGRES_TUPLES_OK)
542  {
543  /* FIXME: we might send it ok, but get an error */
544  pg_log_error("could not send replication command \"%s\": %s",
545  "TIMELINE_HISTORY", PQresultErrorMessage(res));
546  PQclear(res);
547  return false;
548  }
549 
550  /*
551  * The response to TIMELINE_HISTORY is a single row result set
552  * with two fields: filename and content
553  */
554  if (PQnfields(res) != 2 || PQntuples(res) != 1)
555  {
556  pg_log_warning("unexpected response to TIMELINE_HISTORY command: got %d rows and %d fields, expected %d rows and %d fields",
557  PQntuples(res), PQnfields(res), 1, 2);
558  }
559 
560  /* Write the history file to disk */
562  PQgetvalue(res, 0, 0),
563  PQgetvalue(res, 0, 1));
564 
565  PQclear(res);
566  }
567 
568  /*
569  * Before we start streaming from the requested location, check if the
570  * callback tells us to stop here.
571  */
572  if (stream->stream_stop(stream->startpos, stream->timeline, false))
573  return true;
574 
575  /* Initiate the replication stream at specified location */
576  snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u",
577  slotcmd,
578  LSN_FORMAT_ARGS(stream->startpos),
579  stream->timeline);
580  res = PQexec(conn, query);
581  if (PQresultStatus(res) != PGRES_COPY_BOTH)
582  {
583  pg_log_error("could not send replication command \"%s\": %s",
584  "START_REPLICATION", PQresultErrorMessage(res));
585  PQclear(res);
586  return false;
587  }
588  PQclear(res);
589 
590  /* Stream the WAL */
591  res = HandleCopyStream(conn, stream, &stoppos);
592  if (res == NULL)
593  goto error;
594 
595  /*
596  * Streaming finished.
597  *
598  * There are two possible reasons for that: a controlled shutdown, or
599  * we reached the end of the current timeline. In case of
600  * end-of-timeline, the server sends a result set after Copy has
601  * finished, containing information about the next timeline. Read
602  * that, and restart streaming from the next timeline. In case of
603  * controlled shutdown, stop here.
604  */
605  if (PQresultStatus(res) == PGRES_TUPLES_OK)
606  {
607  /*
608  * End-of-timeline. Read the next timeline's ID and starting
609  * position. Usually, the starting position will match the end of
610  * the previous timeline, but there are corner cases like if the
611  * server had sent us half of a WAL record, when it was promoted.
612  * The new timeline will begin at the end of the last complete
613  * record in that case, overlapping the partial WAL record on the
614  * old timeline.
615  */
616  uint32 newtimeline;
617  bool parsed;
618 
619  parsed = ReadEndOfStreamingResult(res, &stream->startpos, &newtimeline);
620  PQclear(res);
621  if (!parsed)
622  goto error;
623 
624  /* Sanity check the values the server gave us */
625  if (newtimeline <= stream->timeline)
626  {
627  pg_log_error("server reported unexpected next timeline %u, following timeline %u",
628  newtimeline, stream->timeline);
629  goto error;
630  }
631  if (stream->startpos > stoppos)
632  {
633  pg_log_error("server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X",
634  stream->timeline, LSN_FORMAT_ARGS(stoppos),
635  newtimeline, LSN_FORMAT_ARGS(stream->startpos));
636  goto error;
637  }
638 
639  /* Read the final result, which should be CommandComplete. */
640  res = PQgetResult(conn);
641  if (PQresultStatus(res) != PGRES_COMMAND_OK)
642  {
643  pg_log_error("unexpected termination of replication stream: %s",
644  PQresultErrorMessage(res));
645  PQclear(res);
646  goto error;
647  }
648  PQclear(res);
649 
650  /*
651  * Loop back to start streaming from the new timeline. Always
652  * start streaming at the beginning of a segment.
653  */
654  stream->timeline = newtimeline;
655  stream->startpos = stream->startpos -
657  continue;
658  }
659  else if (PQresultStatus(res) == PGRES_COMMAND_OK)
660  {
661  PQclear(res);
662 
663  /*
664  * End of replication (ie. controlled shut down of the server).
665  *
666  * Check if the callback thinks it's OK to stop here. If not,
667  * complain.
668  */
669  if (stream->stream_stop(stoppos, stream->timeline, false))
670  return true;
671  else
672  {
673  pg_log_error("replication stream was terminated before stop point");
674  goto error;
675  }
676  }
677  else
678  {
679  /* Server returned an error. */
680  pg_log_error("unexpected termination of replication stream: %s",
681  PQresultErrorMessage(res));
682  PQclear(res);
683  goto error;
684  }
685  }
686 
687 error:
688  if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NO_RENAME) != 0)
689  pg_log_error("could not close file \"%s\": %s",
691  walfile = NULL;
692  return false;
693 }
694 
695 /*
696  * Helper function to parse the result set returned by server after streaming
697  * has finished. On failure, prints an error to stderr and returns false.
698  */
699 static bool
701 {
702  uint32 startpos_xlogid,
703  startpos_xrecoff;
704 
705  /*----------
706  * The result set consists of one row and two columns, e.g:
707  *
708  * next_tli | next_tli_startpos
709  * ----------+-------------------
710  * 4 | 0/9949AE0
711  *
712  * next_tli is the timeline ID of the next timeline after the one that
713  * just finished streaming. next_tli_startpos is the WAL location where
714  * the server switched to it.
715  *----------
716  */
717  if (PQnfields(res) < 2 || PQntuples(res) != 1)
718  {
719  pg_log_error("unexpected result set after end-of-timeline: got %d rows and %d fields, expected %d rows and %d fields",
720  PQntuples(res), PQnfields(res), 1, 2);
721  return false;
722  }
723 
724  *timeline = atoi(PQgetvalue(res, 0, 0));
725  if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &startpos_xlogid,
726  &startpos_xrecoff) != 2)
727  {
728  pg_log_error("could not parse next timeline's starting point \"%s\"",
729  PQgetvalue(res, 0, 1));
730  return false;
731  }
732  *startpos = ((uint64) startpos_xlogid << 32) | startpos_xrecoff;
733 
734  return true;
735 }
736 
737 /*
738  * The main loop of ReceiveXlogStream. Handles the COPY stream after
739  * initiating streaming with the START_REPLICATION command.
740  *
741  * If the COPY ends (not necessarily successfully) due a message from the
742  * server, returns a PGresult and sets *stoppos to the last byte written.
743  * On any other sort of error, returns NULL.
744  */
745 static PGresult *
747  XLogRecPtr *stoppos)
748 {
749  char *copybuf = NULL;
750  TimestampTz last_status = -1;
751  XLogRecPtr blockpos = stream->startpos;
752 
753  still_sending = true;
754 
755  while (1)
756  {
757  int r;
759  long sleeptime;
760 
761  /*
762  * Check if we should continue streaming, or abort at this point.
763  */
764  if (!CheckCopyStreamStop(conn, stream, blockpos))
765  goto error;
766 
767  now = feGetCurrentTimestamp();
768 
769  /*
770  * If synchronous option is true, issue sync command as soon as there
771  * are WAL data which has not been flushed yet.
772  */
773  if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL)
774  {
775  if (stream->walmethod->sync(walfile) != 0)
776  {
777  pg_log_fatal("could not fsync file \"%s\": %s",
779  exit(1);
780  }
781  lastFlushPosition = blockpos;
782 
783  /*
784  * Send feedback so that the server sees the latest WAL locations
785  * immediately.
786  */
787  if (!sendFeedback(conn, blockpos, now, false))
788  goto error;
789  last_status = now;
790  }
791 
792  /*
793  * Potentially send a status message to the primary
794  */
795  if (still_sending && stream->standby_message_timeout > 0 &&
796  feTimestampDifferenceExceeds(last_status, now,
797  stream->standby_message_timeout))
798  {
799  /* Time to send feedback! */
800  if (!sendFeedback(conn, blockpos, now, false))
801  goto error;
802  last_status = now;
803  }
804 
805  /*
806  * Calculate how long send/receive loops should sleep
807  */
808  sleeptime = CalculateCopyStreamSleeptime(now, stream->standby_message_timeout,
809  last_status);
810 
811  r = CopyStreamReceive(conn, sleeptime, stream->stop_socket, &copybuf);
812  while (r != 0)
813  {
814  if (r == -1)
815  goto error;
816  if (r == -2)
817  {
818  PGresult *res = HandleEndOfCopyStream(conn, stream, copybuf, blockpos, stoppos);
819 
820  if (res == NULL)
821  goto error;
822  else
823  return res;
824  }
825 
826  /* Check the message type. */
827  if (copybuf[0] == 'k')
828  {
829  if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos,
830  &last_status))
831  goto error;
832  }
833  else if (copybuf[0] == 'w')
834  {
835  if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos))
836  goto error;
837 
838  /*
839  * Check if we should continue streaming, or abort at this
840  * point.
841  */
842  if (!CheckCopyStreamStop(conn, stream, blockpos))
843  goto error;
844  }
845  else
846  {
847  pg_log_error("unrecognized streaming header: \"%c\"",
848  copybuf[0]);
849  goto error;
850  }
851 
852  /*
853  * Process the received data, and any subsequent data we can read
854  * without blocking.
855  */
856  r = CopyStreamReceive(conn, 0, stream->stop_socket, &copybuf);
857  }
858  }
859 
860 error:
861  if (copybuf != NULL)
862  PQfreemem(copybuf);
863  return NULL;
864 }
865 
866 /*
867  * Wait until we can read a CopyData message,
868  * or timeout, or occurrence of a signal or input on the stop_socket.
869  * (timeout_ms < 0 means wait indefinitely; 0 means don't wait.)
870  *
871  * Returns 1 if data has become available for reading, 0 if timed out
872  * or interrupted by signal or stop_socket input, and -1 on an error.
873  */
874 static int
875 CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
876 {
877  int ret;
878  fd_set input_mask;
879  int connsocket;
880  int maxfd;
881  struct timeval timeout;
882  struct timeval *timeoutptr;
883 
884  connsocket = PQsocket(conn);
885  if (connsocket < 0)
886  {
887  pg_log_error("invalid socket: %s", PQerrorMessage(conn));
888  return -1;
889  }
890 
891  FD_ZERO(&input_mask);
892  FD_SET(connsocket, &input_mask);
893  maxfd = connsocket;
894  if (stop_socket != PGINVALID_SOCKET)
895  {
896  FD_SET(stop_socket, &input_mask);
897  maxfd = Max(maxfd, stop_socket);
898  }
899 
900  if (timeout_ms < 0)
901  timeoutptr = NULL;
902  else
903  {
904  timeout.tv_sec = timeout_ms / 1000L;
905  timeout.tv_usec = (timeout_ms % 1000L) * 1000L;
906  timeoutptr = &timeout;
907  }
908 
909  ret = select(maxfd + 1, &input_mask, NULL, NULL, timeoutptr);
910 
911  if (ret < 0)
912  {
913  if (errno == EINTR)
914  return 0; /* Got a signal, so not an error */
915  pg_log_error("%s() failed: %m", "select");
916  return -1;
917  }
918  if (ret > 0 && FD_ISSET(connsocket, &input_mask))
919  return 1; /* Got input on connection socket */
920 
921  return 0; /* Got timeout or input on stop_socket */
922 }
923 
924 /*
925  * Receive CopyData message available from XLOG stream, blocking for
926  * maximum of 'timeout' ms.
927  *
928  * If data was received, returns the length of the data. *buffer is set to
929  * point to a buffer holding the received message. The buffer is only valid
930  * until the next CopyStreamReceive call.
931  *
932  * Returns 0 if no data was available within timeout, or if wait was
933  * interrupted by signal or stop_socket input.
934  * -1 on error. -2 if the server ended the COPY.
935  */
936 static int
937 CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
938  char **buffer)
939 {
940  char *copybuf = NULL;
941  int rawlen;
942 
943  if (*buffer != NULL)
944  PQfreemem(*buffer);
945  *buffer = NULL;
946 
947  /* Try to receive a CopyData message */
948  rawlen = PQgetCopyData(conn, &copybuf, 1);
949  if (rawlen == 0)
950  {
951  int ret;
952 
953  /*
954  * No data available. Wait for some to appear, but not longer than
955  * the specified timeout, so that we can ping the server. Also stop
956  * waiting if input appears on stop_socket.
957  */
958  ret = CopyStreamPoll(conn, timeout, stop_socket);
959  if (ret <= 0)
960  return ret;
961 
962  /* Now there is actually data on the socket */
963  if (PQconsumeInput(conn) == 0)
964  {
965  pg_log_error("could not receive data from WAL stream: %s",
966  PQerrorMessage(conn));
967  return -1;
968  }
969 
970  /* Now that we've consumed some input, try again */
971  rawlen = PQgetCopyData(conn, &copybuf, 1);
972  if (rawlen == 0)
973  return 0;
974  }
975  if (rawlen == -1) /* end-of-streaming or error */
976  return -2;
977  if (rawlen == -2)
978  {
979  pg_log_error("could not read COPY data: %s", PQerrorMessage(conn));
980  return -1;
981  }
982 
983  /* Return received messages to caller */
984  *buffer = copybuf;
985  return rawlen;
986 }
987 
988 /*
989  * Process the keepalive message.
990  */
991 static bool
992 ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
993  XLogRecPtr blockpos, TimestampTz *last_status)
994 {
995  int pos;
996  bool replyRequested;
998 
999  /*
1000  * Parse the keepalive message, enclosed in the CopyData message. We just
1001  * check if the server requested a reply, and ignore the rest.
1002  */
1003  pos = 1; /* skip msgtype 'k' */
1004  pos += 8; /* skip walEnd */
1005  pos += 8; /* skip sendTime */
1006 
1007  if (len < pos + 1)
1008  {
1009  pg_log_error("streaming header too small: %d", len);
1010  return false;
1011  }
1012  replyRequested = copybuf[pos];
1013 
1014  /* If the server requested an immediate reply, send one. */
1015  if (replyRequested && still_sending)
1016  {
1017  if (reportFlushPosition && lastFlushPosition < blockpos &&
1018  walfile != NULL)
1019  {
1020  /*
1021  * If a valid flush location needs to be reported, flush the
1022  * current WAL file so that the latest flush location is sent back
1023  * to the server. This is necessary to see whether the last WAL
1024  * data has been successfully replicated or not, at the normal
1025  * shutdown of the server.
1026  */
1027  if (stream->walmethod->sync(walfile) != 0)
1028  {
1029  pg_log_fatal("could not fsync file \"%s\": %s",
1031  exit(1);
1032  }
1033  lastFlushPosition = blockpos;
1034  }
1035 
1036  now = feGetCurrentTimestamp();
1037  if (!sendFeedback(conn, blockpos, now, false))
1038  return false;
1039  *last_status = now;
1040  }
1041 
1042  return true;
1043 }
1044 
1045 /*
1046  * Process XLogData message.
1047  */
1048 static bool
1049 ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
1050  XLogRecPtr *blockpos)
1051 {
1052  int xlogoff;
1053  int bytes_left;
1054  int bytes_written;
1055  int hdr_len;
1056 
1057  /*
1058  * Once we've decided we don't want to receive any more, just ignore any
1059  * subsequent XLogData messages.
1060  */
1061  if (!(still_sending))
1062  return true;
1063 
1064  /*
1065  * Read the header of the XLogData message, enclosed in the CopyData
1066  * message. We only need the WAL location field (dataStart), the rest of
1067  * the header is ignored.
1068  */
1069  hdr_len = 1; /* msgtype 'w' */
1070  hdr_len += 8; /* dataStart */
1071  hdr_len += 8; /* walEnd */
1072  hdr_len += 8; /* sendTime */
1073  if (len < hdr_len)
1074  {
1075  pg_log_error("streaming header too small: %d", len);
1076  return false;
1077  }
1078  *blockpos = fe_recvint64(&copybuf[1]);
1079 
1080  /* Extract WAL location for this block */
1081  xlogoff = XLogSegmentOffset(*blockpos, WalSegSz);
1082 
1083  /*
1084  * Verify that the initial location in the stream matches where we think
1085  * we are.
1086  */
1087  if (walfile == NULL)
1088  {
1089  /* No file open yet */
1090  if (xlogoff != 0)
1091  {
1092  pg_log_error("received write-ahead log record for offset %u with no file open",
1093  xlogoff);
1094  return false;
1095  }
1096  }
1097  else
1098  {
1099  /* More data in existing segment */
1100  if (stream->walmethod->get_current_pos(walfile) != xlogoff)
1101  {
1102  pg_log_error("got WAL data offset %08x, expected %08x",
1103  xlogoff, (int) stream->walmethod->get_current_pos(walfile));
1104  return false;
1105  }
1106  }
1107 
1108  bytes_left = len - hdr_len;
1109  bytes_written = 0;
1110 
1111  while (bytes_left)
1112  {
1113  int bytes_to_write;
1114 
1115  /*
1116  * If crossing a WAL boundary, only write up until we reach wal
1117  * segment size.
1118  */
1119  if (xlogoff + bytes_left > WalSegSz)
1120  bytes_to_write = WalSegSz - xlogoff;
1121  else
1122  bytes_to_write = bytes_left;
1123 
1124  if (walfile == NULL)
1125  {
1126  if (!open_walfile(stream, *blockpos))
1127  {
1128  /* Error logged by open_walfile */
1129  return false;
1130  }
1131  }
1132 
1133  if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written,
1134  bytes_to_write) != bytes_to_write)
1135  {
1136  pg_log_error("could not write %u bytes to WAL file \"%s\": %s",
1137  bytes_to_write, current_walfile_name,
1138  stream->walmethod->getlasterror());
1139  return false;
1140  }
1141 
1142  /* Write was successful, advance our position */
1143  bytes_written += bytes_to_write;
1144  bytes_left -= bytes_to_write;
1145  *blockpos += bytes_to_write;
1146  xlogoff += bytes_to_write;
1147 
1148  /* Did we reach the end of a WAL segment? */
1149  if (XLogSegmentOffset(*blockpos, WalSegSz) == 0)
1150  {
1151  if (!close_walfile(stream, *blockpos))
1152  /* Error message written in close_walfile() */
1153  return false;
1154 
1155  xlogoff = 0;
1156 
1157  if (still_sending && stream->stream_stop(*blockpos, stream->timeline, true))
1158  {
1159  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1160  {
1161  pg_log_error("could not send copy-end packet: %s",
1162  PQerrorMessage(conn));
1163  return false;
1164  }
1165  still_sending = false;
1166  return true; /* ignore the rest of this XLogData packet */
1167  }
1168  }
1169  }
1170  /* No more data left to write, receive next copy packet */
1171 
1172  return true;
1173 }
1174 
1175 /*
1176  * Handle end of the copy stream.
1177  */
1178 static PGresult *
1180  XLogRecPtr blockpos, XLogRecPtr *stoppos)
1181 {
1182  PGresult *res = PQgetResult(conn);
1183 
1184  /*
1185  * The server closed its end of the copy stream. If we haven't closed
1186  * ours already, we need to do so now, unless the server threw an error,
1187  * in which case we don't.
1188  */
1189  if (still_sending)
1190  {
1191  if (!close_walfile(stream, blockpos))
1192  {
1193  /* Error message written in close_walfile() */
1194  PQclear(res);
1195  return NULL;
1196  }
1197  if (PQresultStatus(res) == PGRES_COPY_IN)
1198  {
1199  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1200  {
1201  pg_log_error("could not send copy-end packet: %s",
1202  PQerrorMessage(conn));
1203  PQclear(res);
1204  return NULL;
1205  }
1206  res = PQgetResult(conn);
1207  }
1208  still_sending = false;
1209  }
1210  if (copybuf != NULL)
1211  PQfreemem(copybuf);
1212  *stoppos = blockpos;
1213  return res;
1214 }
1215 
1216 /*
1217  * Check if we should continue streaming, or abort at this point.
1218  */
1219 static bool
1221 {
1222  if (still_sending && stream->stream_stop(blockpos, stream->timeline, false))
1223  {
1224  if (!close_walfile(stream, blockpos))
1225  {
1226  /* Potential error message is written by close_walfile */
1227  return false;
1228  }
1229  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1230  {
1231  pg_log_error("could not send copy-end packet: %s",
1232  PQerrorMessage(conn));
1233  return false;
1234  }
1235  still_sending = false;
1236  }
1237 
1238  return true;
1239 }
1240 
1241 /*
1242  * Calculate how long send/receive loops should sleep
1243  */
1244 static long
1246  TimestampTz last_status)
1247 {
1248  TimestampTz status_targettime = 0;
1249  long sleeptime;
1250 
1251  if (standby_message_timeout && still_sending)
1252  status_targettime = last_status +
1253  (standby_message_timeout - 1) * ((int64) 1000);
1254 
1255  if (status_targettime > 0)
1256  {
1257  long secs;
1258  int usecs;
1259 
1261  status_targettime,
1262  &secs,
1263  &usecs);
1264  /* Always sleep at least 1 sec */
1265  if (secs <= 0)
1266  {
1267  secs = 1;
1268  usecs = 0;
1269  }
1270 
1271  sleeptime = secs * 1000 + usecs / 1000;
1272  }
1273  else
1274  sleeptime = -1;
1275 
1276  return sleeptime;
1277 }
int PQputCopyData(PGconn *conn, const char *buffer, int nbytes)
Definition: fe-exec.c:2544
int PQnfields(const PGresult *res)
Definition: fe-exec.c:3256
static bool open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
Definition: receivelog.c:88
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:6744
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
char *(* get_file_name)(const char *pathname, const char *temp_suffix)
Definition: walmethods.h:59
uint32 TimeLineID
Definition: xlogdefs.h:59
static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
Definition: receivelog.c:875
TimestampTz feGetCurrentTimestamp(void)
Definition: streamutil.c:612
char * PQgetvalue(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3642
void * Walfile
Definition: walmethods.h:13
int(* close)(Walfile f, WalCloseMethod method)
Definition: walmethods.h:47
static void error(void)
Definition: sql-dyntest.c:147
const char * PQparameterStatus(const PGconn *conn, const char *paramName)
Definition: fe-connect.c:6709
static bool still_sending
Definition: receivelog.c:36
int64 TimestampTz
Definition: timestamp.h:39
#define pg_log_error(...)
Definition: logging.h:80
bool RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, XLogRecPtr *startpos, char **db_name)
Definition: streamutil.c:409
char * sysidentifier
Definition: receivelog.h:33
int PQputCopyEnd(PGconn *conn, const char *errormsg)
Definition: fe-exec.c:2600
XLogRecPtr startpos
Definition: receivelog.h:31
static long CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout, TimestampTz last_status)
Definition: receivelog.c:1245
char * partial_suffix
Definition: receivelog.h:47
int PQserverVersion(const PGconn *conn)
Definition: fe-connect.c:6734
int PQntuples(const PGresult *res)
Definition: fe-exec.c:3248
bool feTimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: streamutil.c:653
int(* sync)(Walfile f)
Definition: walmethods.h:76
TimeLineID timeline
Definition: receivelog.h:32
ExecStatusType PQresultStatus(const PGresult *res)
Definition: fe-exec.c:3178
int PQgetCopyData(PGconn *conn, char **buffer, int async)
Definition: fe-exec.c:2668
static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr blockpos, TimestampTz *last_status)
Definition: receivelog.c:992
const char *(* getlasterror)(void)
Definition: walmethods.h:87
off_t(* get_current_pos)(Walfile f)
Definition: walmethods.h:71
#define sprintf
Definition: port.h:218
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, char **buffer)
Definition: receivelog.c:937
static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline)
Definition: receivelog.c:700
PGconn * conn
Definition: streamutil.c:54
#define MAXPGPATH
static Walfile * walfile
Definition: receivelog.c:31
int PQflush(PGconn *conn)
Definition: fe-exec.c:3766
char * replication_slot
Definition: receivelog.h:48
bool mark_done
Definition: receivelog.h:37
void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: streamutil.c:631
#define TLHistoryFileName(fname, tli)
static bool reportFlushPosition
Definition: receivelog.c:33
uint64 XLogSegNo
Definition: xlogdefs.h:48
ssize_t(* write)(Walfile f, const void *buf, size_t count)
Definition: walmethods.h:68
#define select(n, r, w, e, timeout)
Definition: win32_port.h:464
unsigned int uint32
Definition: c.h:441
int pgsocket
Definition: port.h:31
static bool close_walfile(StreamCtl *stream, XLogRecPtr pos)
Definition: receivelog.c:186
stream_stop_callback stream_stop
Definition: receivelog.h:41
WalWriteMethod * walmethod
Definition: receivelog.h:46
#define MAXFNAMELEN
static int standby_message_timeout
#define ngettext(s, p, n)
Definition: c.h:1182
static bool sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
Definition: receivelog.c:338
static XLogRecPtr lastFlushPosition
Definition: receivelog.c:34
static PGresult * HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, XLogRecPtr blockpos, XLogRecPtr *stoppos)
Definition: receivelog.c:1179
#define PGINVALID_SOCKET
Definition: port.h:33
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
int PQconsumeInput(PGconn *conn)
Definition: fe-exec.c:1904
static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr *blockpos)
Definition: receivelog.c:1049
StringInfo copybuf
Definition: tablesync.c:124
void PQclear(PGresult *res)
Definition: fe-exec.c:694
static void * fn(void *arg)
#define Max(x, y)
Definition: c.h:980
static PGresult * HandleCopyStream(PGconn *conn, StreamCtl *stream, XLogRecPtr *stoppos)
Definition: receivelog.c:746
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static bool existsTimeLineHistoryFile(StreamCtl *stream)
Definition: receivelog.c:260
bool ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
Definition: receivelog.c:454
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static XLogRecPtr startpos
bool synchronous
Definition: receivelog.h:36
void pg_free(void *ptr)
Definition: fe_memutils.c:105
pgsocket stop_socket
Definition: receivelog.h:43
uint32 WalSegSz
Definition: streamutil.c:34
static char * filename
Definition: pg_dumpall.c:92
bool(* existsfile)(const char *pathname)
Definition: walmethods.h:50
char * PQresultErrorMessage(const PGresult *res)
Definition: fe-exec.c:3194
int(* compression)(void)
Definition: walmethods.h:62
Walfile(* open_for_write)(const char *pathname, const char *temp_suffix, size_t pad_to_size)
Definition: walmethods.h:41
int64 fe_recvint64(char *buf)
Definition: streamutil.c:677
static bool mark_file_as_archived(StreamCtl *stream, const char *fname)
Definition: receivelog.c:57
static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos)
Definition: receivelog.c:1220
int standby_message_timeout
Definition: receivelog.h:35
PGresult * PQexec(PGconn *conn, const char *query)
Definition: fe-exec.c:2193
ssize_t(* get_file_size)(const char *pathname)
Definition: walmethods.h:53
void fe_sendint64(int64 i, char *buf)
Definition: streamutil.c:666
#define EINTR
Definition: win32_port.h:343
#define pg_log_warning(...)
Definition: pgfnames.c:24
static char current_walfile_name[MAXPGPATH]
Definition: receivelog.c:32
#define snprintf
Definition: port.h:216
void PQfreemem(void *ptr)
Definition: fe-exec.c:3796
int PQsocket(const PGconn *conn)
Definition: fe-connect.c:6770
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
PGresult * PQgetResult(PGconn *conn)
Definition: fe-exec.c:1978
#define pg_log_info(...)
Definition: logging.h:88
static bool writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
Definition: receivelog.c:277
bool CheckServerVersionForStreaming(PGconn *conn)
Definition: receivelog.c:376
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define pg_log_fatal(...)
Definition: logging.h:76