PostgreSQL Source Code  git master
receivelog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * receivelog.c - receive WAL files using the streaming
4  * replication protocol.
5  *
6  * Author: Magnus Hagander <magnus@hagander.net>
7  *
8  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
9  *
10  * IDENTIFICATION
11  * src/bin/pg_basebackup/receivelog.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres_fe.h"
16 
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #ifdef HAVE_SYS_SELECT_H
20 #include <sys/select.h>
21 #endif
22 
23 /* local includes */
24 #include "receivelog.h"
25 #include "streamutil.h"
26 
27 #include "libpq-fe.h"
28 #include "access/xlog_internal.h"
29 #include "common/file_utils.h"
30 #include "common/logging.h"
31 
32 
33 /* fd and filename for currently open WAL file */
34 static Walfile *walfile = NULL;
35 static char current_walfile_name[MAXPGPATH] = "";
36 static bool reportFlushPosition = false;
38 
39 static bool still_sending = true; /* feedback still needs to be sent? */
40 
42  XLogRecPtr *stoppos);
43 static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket);
44 static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
45  char **buffer);
46 static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf,
47  int len, XLogRecPtr blockpos, TimestampTz *last_status);
48 static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
49  XLogRecPtr *blockpos);
51  XLogRecPtr blockpos, XLogRecPtr *stoppos);
52 static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos,
53  XLogRecPtr *stoppos);
55  TimestampTz last_status);
56 
58  uint32 *timeline);
59 
60 static bool
61 mark_file_as_archived(StreamCtl *stream, const char *fname)
62 {
63  Walfile *f;
64  static char tmppath[MAXPGPATH];
65 
66  snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done",
67  fname);
68 
69  f = stream->walmethod->open_for_write(tmppath, NULL, 0);
70  if (f == NULL)
71  {
72  pg_log_error("could not create archive status file \"%s\": %s",
73  tmppath, stream->walmethod->getlasterror());
74  return false;
75  }
76 
77  stream->walmethod->close(f, CLOSE_NORMAL);
78 
79  return true;
80 }
81 
82 /*
83  * Open a new WAL file in the specified directory.
84  *
85  * Returns true if OK; on failure, returns false after printing an error msg.
86  * On success, 'walfile' is set to the FD for the file, and the base filename
87  * (without partial_suffix) is stored in 'current_walfile_name'.
88  *
89  * The file will be padded to 16Mb with zeroes.
90  */
91 static bool
92 open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
93 {
94  Walfile *f;
95  char fn[MAXPGPATH];
96  ssize_t size;
97  XLogSegNo segno;
98 
99  XLByteToSeg(startpoint, segno, WalSegSz);
101 
102  snprintf(fn, sizeof(fn), "%s%s", current_walfile_name,
103  stream->partial_suffix ? stream->partial_suffix : "");
104 
105  /*
106  * When streaming to files, if an existing file exists we verify that it's
107  * either empty (just created), or a complete WalSegSz segment (in which
108  * case it has been created and padded). Anything else indicates a corrupt
109  * file.
110  *
111  * When streaming to tar, no file with this name will exist before, so we
112  * never have to verify a size.
113  */
114  if (stream->walmethod->existsfile(fn))
115  {
116  size = stream->walmethod->get_file_size(fn);
117  if (size < 0)
118  {
119  pg_log_error("could not get size of write-ahead log file \"%s\": %s",
120  fn, stream->walmethod->getlasterror());
121  return false;
122  }
123  if (size == WalSegSz)
124  {
125  /* Already padded file. Open it for use */
127  if (f == NULL)
128  {
129  pg_log_error("could not open existing write-ahead log file \"%s\": %s",
130  fn, stream->walmethod->getlasterror());
131  return false;
132  }
133 
134  /* fsync file in case of a previous crash */
135  if (stream->walmethod->sync(f) != 0)
136  {
137  pg_log_fatal("could not fsync existing write-ahead log file \"%s\": %s",
138  fn, stream->walmethod->getlasterror());
139  stream->walmethod->close(f, CLOSE_UNLINK);
140  exit(1);
141  }
142 
143  walfile = f;
144  return true;
145  }
146  if (size != 0)
147  {
148  /* if write didn't set errno, assume problem is no disk space */
149  if (errno == 0)
150  errno = ENOSPC;
151  pg_log_error(ngettext("write-ahead log file \"%s\" has %d byte, should be 0 or %d",
152  "write-ahead log file \"%s\" has %d bytes, should be 0 or %d",
153  size),
154  fn, (int) size, WalSegSz);
155  return false;
156  }
157  /* File existed and was empty, so fall through and open */
158  }
159 
160  /* No file existed, so create one */
161 
163  stream->partial_suffix, WalSegSz);
164  if (f == NULL)
165  {
166  pg_log_error("could not open write-ahead log file \"%s\": %s",
167  fn, stream->walmethod->getlasterror());
168  return false;
169  }
170 
171  walfile = f;
172  return true;
173 }
174 
175 /*
176  * Close the current WAL file (if open), and rename it to the correct
177  * filename if it's complete. On failure, prints an error message to stderr
178  * and returns false, otherwise returns true.
179  */
180 static bool
182 {
183  off_t currpos;
184  int r;
185 
186  if (walfile == NULL)
187  return true;
188 
189  currpos = stream->walmethod->get_current_pos(walfile);
190  if (currpos == -1)
191  {
192  pg_log_error("could not determine seek position in file \"%s\": %s",
194  stream->walmethod->close(walfile, CLOSE_UNLINK);
195  walfile = NULL;
196 
197  return false;
198  }
199 
200  if (stream->partial_suffix)
201  {
202  if (currpos == WalSegSz)
203  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
204  else
205  {
206  pg_log_info("not renaming \"%s%s\", segment is not complete",
208  r = stream->walmethod->close(walfile, CLOSE_NO_RENAME);
209  }
210  }
211  else
212  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
213 
214  walfile = NULL;
215 
216  if (r != 0)
217  {
218  pg_log_error("could not close file \"%s\": %s",
220  return false;
221  }
222 
223  /*
224  * Mark file as archived if requested by the caller - pg_basebackup needs
225  * to do so as files can otherwise get archived again after promotion of a
226  * new node. This is in line with walreceiver.c always doing a
227  * XLogArchiveForceDone() after a complete segment.
228  */
229  if (currpos == WalSegSz && stream->mark_done)
230  {
231  /* writes error message if failed */
233  return false;
234  }
235 
236  lastFlushPosition = pos;
237  return true;
238 }
239 
240 
241 /*
242  * Check if a timeline history file exists.
243  */
244 static bool
246 {
247  char histfname[MAXFNAMELEN];
248 
249  /*
250  * Timeline 1 never has a history file. We treat that as if it existed,
251  * since we never need to stream it.
252  */
253  if (stream->timeline == 1)
254  return true;
255 
256  TLHistoryFileName(histfname, stream->timeline);
257 
258  return stream->walmethod->existsfile(histfname);
259 }
260 
261 static bool
262 writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
263 {
264  int size = strlen(content);
265  char histfname[MAXFNAMELEN];
266  Walfile *f;
267 
268  /*
269  * Check that the server's idea of how timeline history files should be
270  * named matches ours.
271  */
272  TLHistoryFileName(histfname, stream->timeline);
273  if (strcmp(histfname, filename) != 0)
274  {
275  pg_log_error("server reported unexpected history file name for timeline %u: %s",
276  stream->timeline, filename);
277  return false;
278  }
279 
280  f = stream->walmethod->open_for_write(histfname, ".tmp", 0);
281  if (f == NULL)
282  {
283  pg_log_error("could not create timeline history file \"%s\": %s",
284  histfname, stream->walmethod->getlasterror());
285  return false;
286  }
287 
288  if ((int) stream->walmethod->write(f, content, size) != size)
289  {
290  pg_log_error("could not write timeline history file \"%s\": %s",
291  histfname, stream->walmethod->getlasterror());
292 
293  /*
294  * If we fail to make the file, delete it to release disk space
295  */
296  stream->walmethod->close(f, CLOSE_UNLINK);
297 
298  return false;
299  }
300 
301  if (stream->walmethod->close(f, CLOSE_NORMAL) != 0)
302  {
303  pg_log_error("could not close file \"%s\": %s",
304  histfname, stream->walmethod->getlasterror());
305  return false;
306  }
307 
308  /* Maintain archive_status, check close_walfile() for details. */
309  if (stream->mark_done)
310  {
311  /* writes error message if failed */
312  if (!mark_file_as_archived(stream, histfname))
313  return false;
314  }
315 
316  return true;
317 }
318 
319 /*
320  * Send a Standby Status Update message to server.
321  */
322 static bool
323 sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
324 {
325  char replybuf[1 + 8 + 8 + 8 + 8 + 1];
326  int len = 0;
327 
328  replybuf[len] = 'r';
329  len += 1;
330  fe_sendint64(blockpos, &replybuf[len]); /* write */
331  len += 8;
333  fe_sendint64(lastFlushPosition, &replybuf[len]); /* flush */
334  else
335  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* flush */
336  len += 8;
337  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* apply */
338  len += 8;
339  fe_sendint64(now, &replybuf[len]); /* sendTime */
340  len += 8;
341  replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */
342  len += 1;
343 
344  if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn))
345  {
346  pg_log_error("could not send feedback packet: %s",
347  PQerrorMessage(conn));
348  return false;
349  }
350 
351  return true;
352 }
353 
354 /*
355  * Check that the server version we're connected to is supported by
356  * ReceiveXlogStream().
357  *
358  * If it's not, an error message is printed to stderr, and false is returned.
359  */
360 bool
362 {
363  int minServerMajor,
364  maxServerMajor;
365  int serverMajor;
366 
367  /*
368  * The message format used in streaming replication changed in 9.3, so we
369  * cannot stream from older servers. And we don't support servers newer
370  * than the client; it might work, but we don't know, so err on the safe
371  * side.
372  */
373  minServerMajor = 903;
374  maxServerMajor = PG_VERSION_NUM / 100;
375  serverMajor = PQserverVersion(conn) / 100;
376  if (serverMajor < minServerMajor)
377  {
378  const char *serverver = PQparameterStatus(conn, "server_version");
379 
380  pg_log_error("incompatible server version %s; client does not support streaming from server versions older than %s",
381  serverver ? serverver : "'unknown'",
382  "9.3");
383  return false;
384  }
385  else if (serverMajor > maxServerMajor)
386  {
387  const char *serverver = PQparameterStatus(conn, "server_version");
388 
389  pg_log_error("incompatible server version %s; client does not support streaming from server versions newer than %s",
390  serverver ? serverver : "'unknown'",
391  PG_VERSION);
392  return false;
393  }
394  return true;
395 }
396 
397 /*
398  * Receive a log stream starting at the specified position.
399  *
400  * Individual parameters are passed through the StreamCtl structure.
401  *
402  * If sysidentifier is specified, validate that both the system
403  * identifier and the timeline matches the specified ones
404  * (by sending an extra IDENTIFY_SYSTEM command)
405  *
406  * All received segments will be written to the directory
407  * specified by basedir. This will also fetch any missing timeline history
408  * files.
409  *
410  * The stream_stop callback will be called every time data
411  * is received, and whenever a segment is completed. If it returns
412  * true, the streaming will stop and the function
413  * return. As long as it returns false, streaming will continue
414  * indefinitely.
415  *
416  * If stream_stop() checks for external input, stop_socket should be set to
417  * the FD it checks. This will allow such input to be detected promptly
418  * rather than after standby_message_timeout (which might be indefinite).
419  * Note that signals will interrupt waits for input as well, but that is
420  * race-y since a signal received while busy won't interrupt the wait.
421  *
422  * standby_message_timeout controls how often we send a message
423  * back to the master letting it know our progress, in milliseconds.
424  * Zero means no messages are sent.
425  * This message will only contain the write location, and never
426  * flush or replay.
427  *
428  * If 'partial_suffix' is not NULL, files are initially created with the
429  * given suffix, and the suffix is removed once the file is finished. That
430  * allows you to tell the difference between partial and completed files,
431  * so that you can continue later where you left.
432  *
433  * If 'synchronous' is true, the received WAL is flushed as soon as written,
434  * otherwise only when the WAL file is closed.
435  *
436  * Note: The WAL location *must* be at a log segment start!
437  */
438 bool
440 {
441  char query[128];
442  char slotcmd[128];
443  PGresult *res;
444  XLogRecPtr stoppos;
445 
446  /*
447  * The caller should've checked the server version already, but doesn't do
448  * any harm to check it here too.
449  */
451  return false;
452 
453  /*
454  * Decide whether we want to report the flush position. If we report the
455  * flush position, the primary will know what WAL we'll possibly
456  * re-request, and it can then remove older WAL safely. We must always do
457  * that when we are using slots.
458  *
459  * Reporting the flush position makes one eligible as a synchronous
460  * replica. People shouldn't include generic names in
461  * synchronous_standby_names, but we've protected them against it so far,
462  * so let's continue to do so unless specifically requested.
463  */
464  if (stream->replication_slot != NULL)
465  {
466  reportFlushPosition = true;
467  sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot);
468  }
469  else
470  {
471  if (stream->synchronous)
472  reportFlushPosition = true;
473  else
474  reportFlushPosition = false;
475  slotcmd[0] = 0;
476  }
477 
478  if (stream->sysidentifier != NULL)
479  {
480  /* Validate system identifier hasn't changed */
481  res = PQexec(conn, "IDENTIFY_SYSTEM");
482  if (PQresultStatus(res) != PGRES_TUPLES_OK)
483  {
484  pg_log_error("could not send replication command \"%s\": %s",
485  "IDENTIFY_SYSTEM", PQerrorMessage(conn));
486  PQclear(res);
487  return false;
488  }
489  if (PQntuples(res) != 1 || PQnfields(res) < 3)
490  {
491  pg_log_error("could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields",
492  PQntuples(res), PQnfields(res), 1, 3);
493  PQclear(res);
494  return false;
495  }
496  if (strcmp(stream->sysidentifier, PQgetvalue(res, 0, 0)) != 0)
497  {
498  pg_log_error("system identifier does not match between base backup and streaming connection");
499  PQclear(res);
500  return false;
501  }
502  if (stream->timeline > atoi(PQgetvalue(res, 0, 1)))
503  {
504  pg_log_error("starting timeline %u is not present in the server",
505  stream->timeline);
506  PQclear(res);
507  return false;
508  }
509  PQclear(res);
510  }
511 
512  /*
513  * initialize flush position to starting point, it's the caller's
514  * responsibility that that's sane.
515  */
516  lastFlushPosition = stream->startpos;
517 
518  while (1)
519  {
520  /*
521  * Fetch the timeline history file for this timeline, if we don't have
522  * it already. When streaming log to tar, this will always return
523  * false, as we are never streaming into an existing file and
524  * therefore there can be no pre-existing timeline history file.
525  */
526  if (!existsTimeLineHistoryFile(stream))
527  {
528  snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", stream->timeline);
529  res = PQexec(conn, query);
530  if (PQresultStatus(res) != PGRES_TUPLES_OK)
531  {
532  /* FIXME: we might send it ok, but get an error */
533  pg_log_error("could not send replication command \"%s\": %s",
534  "TIMELINE_HISTORY", PQresultErrorMessage(res));
535  PQclear(res);
536  return false;
537  }
538 
539  /*
540  * The response to TIMELINE_HISTORY is a single row result set
541  * with two fields: filename and content
542  */
543  if (PQnfields(res) != 2 || PQntuples(res) != 1)
544  {
545  pg_log_warning("unexpected response to TIMELINE_HISTORY command: got %d rows and %d fields, expected %d rows and %d fields",
546  PQntuples(res), PQnfields(res), 1, 2);
547  }
548 
549  /* Write the history file to disk */
551  PQgetvalue(res, 0, 0),
552  PQgetvalue(res, 0, 1));
553 
554  PQclear(res);
555  }
556 
557  /*
558  * Before we start streaming from the requested location, check if the
559  * callback tells us to stop here.
560  */
561  if (stream->stream_stop(stream->startpos, stream->timeline, false))
562  return true;
563 
564  /* Initiate the replication stream at specified location */
565  snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u",
566  slotcmd,
567  (uint32) (stream->startpos >> 32), (uint32) stream->startpos,
568  stream->timeline);
569  res = PQexec(conn, query);
570  if (PQresultStatus(res) != PGRES_COPY_BOTH)
571  {
572  pg_log_error("could not send replication command \"%s\": %s",
573  "START_REPLICATION", PQresultErrorMessage(res));
574  PQclear(res);
575  return false;
576  }
577  PQclear(res);
578 
579  /* Stream the WAL */
580  res = HandleCopyStream(conn, stream, &stoppos);
581  if (res == NULL)
582  goto error;
583 
584  /*
585  * Streaming finished.
586  *
587  * There are two possible reasons for that: a controlled shutdown, or
588  * we reached the end of the current timeline. In case of
589  * end-of-timeline, the server sends a result set after Copy has
590  * finished, containing information about the next timeline. Read
591  * that, and restart streaming from the next timeline. In case of
592  * controlled shutdown, stop here.
593  */
594  if (PQresultStatus(res) == PGRES_TUPLES_OK)
595  {
596  /*
597  * End-of-timeline. Read the next timeline's ID and starting
598  * position. Usually, the starting position will match the end of
599  * the previous timeline, but there are corner cases like if the
600  * server had sent us half of a WAL record, when it was promoted.
601  * The new timeline will begin at the end of the last complete
602  * record in that case, overlapping the partial WAL record on the
603  * old timeline.
604  */
605  uint32 newtimeline;
606  bool parsed;
607 
608  parsed = ReadEndOfStreamingResult(res, &stream->startpos, &newtimeline);
609  PQclear(res);
610  if (!parsed)
611  goto error;
612 
613  /* Sanity check the values the server gave us */
614  if (newtimeline <= stream->timeline)
615  {
616  pg_log_error("server reported unexpected next timeline %u, following timeline %u",
617  newtimeline, stream->timeline);
618  goto error;
619  }
620  if (stream->startpos > stoppos)
621  {
622  pg_log_error("server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X",
623  stream->timeline, (uint32) (stoppos >> 32), (uint32) stoppos,
624  newtimeline, (uint32) (stream->startpos >> 32), (uint32) stream->startpos);
625  goto error;
626  }
627 
628  /* Read the final result, which should be CommandComplete. */
629  res = PQgetResult(conn);
630  if (PQresultStatus(res) != PGRES_COMMAND_OK)
631  {
632  pg_log_error("unexpected termination of replication stream: %s",
633  PQresultErrorMessage(res));
634  PQclear(res);
635  goto error;
636  }
637  PQclear(res);
638 
639  /*
640  * Loop back to start streaming from the new timeline. Always
641  * start streaming at the beginning of a segment.
642  */
643  stream->timeline = newtimeline;
644  stream->startpos = stream->startpos -
646  continue;
647  }
648  else if (PQresultStatus(res) == PGRES_COMMAND_OK)
649  {
650  PQclear(res);
651 
652  /*
653  * End of replication (ie. controlled shut down of the server).
654  *
655  * Check if the callback thinks it's OK to stop here. If not,
656  * complain.
657  */
658  if (stream->stream_stop(stoppos, stream->timeline, false))
659  return true;
660  else
661  {
662  pg_log_error("replication stream was terminated before stop point");
663  goto error;
664  }
665  }
666  else
667  {
668  /* Server returned an error. */
669  pg_log_error("unexpected termination of replication stream: %s",
670  PQresultErrorMessage(res));
671  PQclear(res);
672  goto error;
673  }
674  }
675 
676 error:
677  if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NO_RENAME) != 0)
678  pg_log_error("could not close file \"%s\": %s",
680  walfile = NULL;
681  return false;
682 }
683 
684 /*
685  * Helper function to parse the result set returned by server after streaming
686  * has finished. On failure, prints an error to stderr and returns false.
687  */
688 static bool
690 {
691  uint32 startpos_xlogid,
692  startpos_xrecoff;
693 
694  /*----------
695  * The result set consists of one row and two columns, e.g:
696  *
697  * next_tli | next_tli_startpos
698  * ----------+-------------------
699  * 4 | 0/9949AE0
700  *
701  * next_tli is the timeline ID of the next timeline after the one that
702  * just finished streaming. next_tli_startpos is the WAL location where
703  * the server switched to it.
704  *----------
705  */
706  if (PQnfields(res) < 2 || PQntuples(res) != 1)
707  {
708  pg_log_error("unexpected result set after end-of-timeline: got %d rows and %d fields, expected %d rows and %d fields",
709  PQntuples(res), PQnfields(res), 1, 2);
710  return false;
711  }
712 
713  *timeline = atoi(PQgetvalue(res, 0, 0));
714  if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &startpos_xlogid,
715  &startpos_xrecoff) != 2)
716  {
717  pg_log_error("could not parse next timeline's starting point \"%s\"",
718  PQgetvalue(res, 0, 1));
719  return false;
720  }
721  *startpos = ((uint64) startpos_xlogid << 32) | startpos_xrecoff;
722 
723  return true;
724 }
725 
726 /*
727  * The main loop of ReceiveXlogStream. Handles the COPY stream after
728  * initiating streaming with the START_REPLICATION command.
729  *
730  * If the COPY ends (not necessarily successfully) due a message from the
731  * server, returns a PGresult and sets *stoppos to the last byte written.
732  * On any other sort of error, returns NULL.
733  */
734 static PGresult *
736  XLogRecPtr *stoppos)
737 {
738  char *copybuf = NULL;
739  TimestampTz last_status = -1;
740  XLogRecPtr blockpos = stream->startpos;
741 
742  still_sending = true;
743 
744  while (1)
745  {
746  int r;
748  long sleeptime;
749 
750  /*
751  * Check if we should continue streaming, or abort at this point.
752  */
753  if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos))
754  goto error;
755 
756  now = feGetCurrentTimestamp();
757 
758  /*
759  * If synchronous option is true, issue sync command as soon as there
760  * are WAL data which has not been flushed yet.
761  */
762  if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL)
763  {
764  if (stream->walmethod->sync(walfile) != 0)
765  {
766  pg_log_fatal("could not fsync file \"%s\": %s",
768  exit(1);
769  }
770  lastFlushPosition = blockpos;
771 
772  /*
773  * Send feedback so that the server sees the latest WAL locations
774  * immediately.
775  */
776  if (!sendFeedback(conn, blockpos, now, false))
777  goto error;
778  last_status = now;
779  }
780 
781  /*
782  * Potentially send a status message to the master
783  */
784  if (still_sending && stream->standby_message_timeout > 0 &&
785  feTimestampDifferenceExceeds(last_status, now,
786  stream->standby_message_timeout))
787  {
788  /* Time to send feedback! */
789  if (!sendFeedback(conn, blockpos, now, false))
790  goto error;
791  last_status = now;
792  }
793 
794  /*
795  * Calculate how long send/receive loops should sleep
796  */
797  sleeptime = CalculateCopyStreamSleeptime(now, stream->standby_message_timeout,
798  last_status);
799 
800  r = CopyStreamReceive(conn, sleeptime, stream->stop_socket, &copybuf);
801  while (r != 0)
802  {
803  if (r == -1)
804  goto error;
805  if (r == -2)
806  {
807  PGresult *res = HandleEndOfCopyStream(conn, stream, copybuf, blockpos, stoppos);
808 
809  if (res == NULL)
810  goto error;
811  else
812  return res;
813  }
814 
815  /* Check the message type. */
816  if (copybuf[0] == 'k')
817  {
818  if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos,
819  &last_status))
820  goto error;
821  }
822  else if (copybuf[0] == 'w')
823  {
824  if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos))
825  goto error;
826 
827  /*
828  * Check if we should continue streaming, or abort at this
829  * point.
830  */
831  if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos))
832  goto error;
833  }
834  else
835  {
836  pg_log_error("unrecognized streaming header: \"%c\"",
837  copybuf[0]);
838  goto error;
839  }
840 
841  /*
842  * Process the received data, and any subsequent data we can read
843  * without blocking.
844  */
845  r = CopyStreamReceive(conn, 0, stream->stop_socket, &copybuf);
846  }
847  }
848 
849 error:
850  if (copybuf != NULL)
851  PQfreemem(copybuf);
852  return NULL;
853 }
854 
855 /*
856  * Wait until we can read a CopyData message,
857  * or timeout, or occurrence of a signal or input on the stop_socket.
858  * (timeout_ms < 0 means wait indefinitely; 0 means don't wait.)
859  *
860  * Returns 1 if data has become available for reading, 0 if timed out
861  * or interrupted by signal or stop_socket input, and -1 on an error.
862  */
863 static int
864 CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
865 {
866  int ret;
867  fd_set input_mask;
868  int connsocket;
869  int maxfd;
870  struct timeval timeout;
871  struct timeval *timeoutptr;
872 
873  connsocket = PQsocket(conn);
874  if (connsocket < 0)
875  {
876  pg_log_error("invalid socket: %s", PQerrorMessage(conn));
877  return -1;
878  }
879 
880  FD_ZERO(&input_mask);
881  FD_SET(connsocket, &input_mask);
882  maxfd = connsocket;
883  if (stop_socket != PGINVALID_SOCKET)
884  {
885  FD_SET(stop_socket, &input_mask);
886  maxfd = Max(maxfd, stop_socket);
887  }
888 
889  if (timeout_ms < 0)
890  timeoutptr = NULL;
891  else
892  {
893  timeout.tv_sec = timeout_ms / 1000L;
894  timeout.tv_usec = (timeout_ms % 1000L) * 1000L;
895  timeoutptr = &timeout;
896  }
897 
898  ret = select(maxfd + 1, &input_mask, NULL, NULL, timeoutptr);
899 
900  if (ret < 0)
901  {
902  if (errno == EINTR)
903  return 0; /* Got a signal, so not an error */
904  pg_log_error("select() failed: %m");
905  return -1;
906  }
907  if (ret > 0 && FD_ISSET(connsocket, &input_mask))
908  return 1; /* Got input on connection socket */
909 
910  return 0; /* Got timeout or input on stop_socket */
911 }
912 
913 /*
914  * Receive CopyData message available from XLOG stream, blocking for
915  * maximum of 'timeout' ms.
916  *
917  * If data was received, returns the length of the data. *buffer is set to
918  * point to a buffer holding the received message. The buffer is only valid
919  * until the next CopyStreamReceive call.
920  *
921  * Returns 0 if no data was available within timeout, or if wait was
922  * interrupted by signal or stop_socket input.
923  * -1 on error. -2 if the server ended the COPY.
924  */
925 static int
926 CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
927  char **buffer)
928 {
929  char *copybuf = NULL;
930  int rawlen;
931 
932  if (*buffer != NULL)
933  PQfreemem(*buffer);
934  *buffer = NULL;
935 
936  /* Try to receive a CopyData message */
937  rawlen = PQgetCopyData(conn, &copybuf, 1);
938  if (rawlen == 0)
939  {
940  int ret;
941 
942  /*
943  * No data available. Wait for some to appear, but not longer than
944  * the specified timeout, so that we can ping the server. Also stop
945  * waiting if input appears on stop_socket.
946  */
947  ret = CopyStreamPoll(conn, timeout, stop_socket);
948  if (ret <= 0)
949  return ret;
950 
951  /* Now there is actually data on the socket */
952  if (PQconsumeInput(conn) == 0)
953  {
954  pg_log_error("could not receive data from WAL stream: %s",
955  PQerrorMessage(conn));
956  return -1;
957  }
958 
959  /* Now that we've consumed some input, try again */
960  rawlen = PQgetCopyData(conn, &copybuf, 1);
961  if (rawlen == 0)
962  return 0;
963  }
964  if (rawlen == -1) /* end-of-streaming or error */
965  return -2;
966  if (rawlen == -2)
967  {
968  pg_log_error("could not read COPY data: %s", PQerrorMessage(conn));
969  return -1;
970  }
971 
972  /* Return received messages to caller */
973  *buffer = copybuf;
974  return rawlen;
975 }
976 
977 /*
978  * Process the keepalive message.
979  */
980 static bool
981 ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
982  XLogRecPtr blockpos, TimestampTz *last_status)
983 {
984  int pos;
985  bool replyRequested;
987 
988  /*
989  * Parse the keepalive message, enclosed in the CopyData message. We just
990  * check if the server requested a reply, and ignore the rest.
991  */
992  pos = 1; /* skip msgtype 'k' */
993  pos += 8; /* skip walEnd */
994  pos += 8; /* skip sendTime */
995 
996  if (len < pos + 1)
997  {
998  pg_log_error("streaming header too small: %d", len);
999  return false;
1000  }
1001  replyRequested = copybuf[pos];
1002 
1003  /* If the server requested an immediate reply, send one. */
1004  if (replyRequested && still_sending)
1005  {
1006  if (reportFlushPosition && lastFlushPosition < blockpos &&
1007  walfile != NULL)
1008  {
1009  /*
1010  * If a valid flush location needs to be reported, flush the
1011  * current WAL file so that the latest flush location is sent back
1012  * to the server. This is necessary to see whether the last WAL
1013  * data has been successfully replicated or not, at the normal
1014  * shutdown of the server.
1015  */
1016  if (stream->walmethod->sync(walfile) != 0)
1017  {
1018  pg_log_fatal("could not fsync file \"%s\": %s",
1020  exit(1);
1021  }
1022  lastFlushPosition = blockpos;
1023  }
1024 
1025  now = feGetCurrentTimestamp();
1026  if (!sendFeedback(conn, blockpos, now, false))
1027  return false;
1028  *last_status = now;
1029  }
1030 
1031  return true;
1032 }
1033 
1034 /*
1035  * Process XLogData message.
1036  */
1037 static bool
1038 ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
1039  XLogRecPtr *blockpos)
1040 {
1041  int xlogoff;
1042  int bytes_left;
1043  int bytes_written;
1044  int hdr_len;
1045 
1046  /*
1047  * Once we've decided we don't want to receive any more, just ignore any
1048  * subsequent XLogData messages.
1049  */
1050  if (!(still_sending))
1051  return true;
1052 
1053  /*
1054  * Read the header of the XLogData message, enclosed in the CopyData
1055  * message. We only need the WAL location field (dataStart), the rest of
1056  * the header is ignored.
1057  */
1058  hdr_len = 1; /* msgtype 'w' */
1059  hdr_len += 8; /* dataStart */
1060  hdr_len += 8; /* walEnd */
1061  hdr_len += 8; /* sendTime */
1062  if (len < hdr_len)
1063  {
1064  pg_log_error("streaming header too small: %d", len);
1065  return false;
1066  }
1067  *blockpos = fe_recvint64(&copybuf[1]);
1068 
1069  /* Extract WAL location for this block */
1070  xlogoff = XLogSegmentOffset(*blockpos, WalSegSz);
1071 
1072  /*
1073  * Verify that the initial location in the stream matches where we think
1074  * we are.
1075  */
1076  if (walfile == NULL)
1077  {
1078  /* No file open yet */
1079  if (xlogoff != 0)
1080  {
1081  pg_log_error("received write-ahead log record for offset %u with no file open",
1082  xlogoff);
1083  return false;
1084  }
1085  }
1086  else
1087  {
1088  /* More data in existing segment */
1089  if (stream->walmethod->get_current_pos(walfile) != xlogoff)
1090  {
1091  pg_log_error("got WAL data offset %08x, expected %08x",
1092  xlogoff, (int) stream->walmethod->get_current_pos(walfile));
1093  return false;
1094  }
1095  }
1096 
1097  bytes_left = len - hdr_len;
1098  bytes_written = 0;
1099 
1100  while (bytes_left)
1101  {
1102  int bytes_to_write;
1103 
1104  /*
1105  * If crossing a WAL boundary, only write up until we reach wal
1106  * segment size.
1107  */
1108  if (xlogoff + bytes_left > WalSegSz)
1109  bytes_to_write = WalSegSz - xlogoff;
1110  else
1111  bytes_to_write = bytes_left;
1112 
1113  if (walfile == NULL)
1114  {
1115  if (!open_walfile(stream, *blockpos))
1116  {
1117  /* Error logged by open_walfile */
1118  return false;
1119  }
1120  }
1121 
1122  if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written,
1123  bytes_to_write) != bytes_to_write)
1124  {
1125  pg_log_error("could not write %u bytes to WAL file \"%s\": %s",
1126  bytes_to_write, current_walfile_name,
1127  stream->walmethod->getlasterror());
1128  return false;
1129  }
1130 
1131  /* Write was successful, advance our position */
1132  bytes_written += bytes_to_write;
1133  bytes_left -= bytes_to_write;
1134  *blockpos += bytes_to_write;
1135  xlogoff += bytes_to_write;
1136 
1137  /* Did we reach the end of a WAL segment? */
1138  if (XLogSegmentOffset(*blockpos, WalSegSz) == 0)
1139  {
1140  if (!close_walfile(stream, *blockpos))
1141  /* Error message written in close_walfile() */
1142  return false;
1143 
1144  xlogoff = 0;
1145 
1146  if (still_sending && stream->stream_stop(*blockpos, stream->timeline, true))
1147  {
1148  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1149  {
1150  pg_log_error("could not send copy-end packet: %s",
1151  PQerrorMessage(conn));
1152  return false;
1153  }
1154  still_sending = false;
1155  return true; /* ignore the rest of this XLogData packet */
1156  }
1157  }
1158  }
1159  /* No more data left to write, receive next copy packet */
1160 
1161  return true;
1162 }
1163 
1164 /*
1165  * Handle end of the copy stream.
1166  */
1167 static PGresult *
1169  XLogRecPtr blockpos, XLogRecPtr *stoppos)
1170 {
1171  PGresult *res = PQgetResult(conn);
1172 
1173  /*
1174  * The server closed its end of the copy stream. If we haven't closed
1175  * ours already, we need to do so now, unless the server threw an error,
1176  * in which case we don't.
1177  */
1178  if (still_sending)
1179  {
1180  if (!close_walfile(stream, blockpos))
1181  {
1182  /* Error message written in close_walfile() */
1183  PQclear(res);
1184  return NULL;
1185  }
1186  if (PQresultStatus(res) == PGRES_COPY_IN)
1187  {
1188  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1189  {
1190  pg_log_error("could not send copy-end packet: %s",
1191  PQerrorMessage(conn));
1192  PQclear(res);
1193  return NULL;
1194  }
1195  res = PQgetResult(conn);
1196  }
1197  still_sending = false;
1198  }
1199  if (copybuf != NULL)
1200  PQfreemem(copybuf);
1201  *stoppos = blockpos;
1202  return res;
1203 }
1204 
1205 /*
1206  * Check if we should continue streaming, or abort at this point.
1207  */
1208 static bool
1210  XLogRecPtr *stoppos)
1211 {
1212  if (still_sending && stream->stream_stop(blockpos, stream->timeline, false))
1213  {
1214  if (!close_walfile(stream, blockpos))
1215  {
1216  /* Potential error message is written by close_walfile */
1217  return false;
1218  }
1219  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1220  {
1221  pg_log_error("could not send copy-end packet: %s",
1222  PQerrorMessage(conn));
1223  return false;
1224  }
1225  still_sending = false;
1226  }
1227 
1228  return true;
1229 }
1230 
1231 /*
1232  * Calculate how long send/receive loops should sleep
1233  */
1234 static long
1236  TimestampTz last_status)
1237 {
1238  TimestampTz status_targettime = 0;
1239  long sleeptime;
1240 
1241  if (standby_message_timeout && still_sending)
1242  status_targettime = last_status +
1243  (standby_message_timeout - 1) * ((int64) 1000);
1244 
1245  if (status_targettime > 0)
1246  {
1247  long secs;
1248  int usecs;
1249 
1251  status_targettime,
1252  &secs,
1253  &usecs);
1254  /* Always sleep at least 1 sec */
1255  if (secs <= 0)
1256  {
1257  secs = 1;
1258  usecs = 0;
1259  }
1260 
1261  sleeptime = secs * 1000 + usecs / 1000;
1262  }
1263  else
1264  sleeptime = -1;
1265 
1266  return sleeptime;
1267 }
int PQputCopyData(PGconn *conn, const char *buffer, int nbytes)
Definition: fe-exec.c:2318
int PQnfields(const PGresult *res)
Definition: fe-exec.c:2778
static bool open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
Definition: receivelog.c:92
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:6561
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
Definition: receivelog.c:864
TimestampTz feGetCurrentTimestamp(void)
Definition: streamutil.c:608
char * PQgetvalue(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3164
void * Walfile
Definition: walmethods.h:13
int(* close)(Walfile f, WalCloseMethod method)
Definition: walmethods.h:47
static void error(void)
Definition: sql-dyntest.c:147
const char * PQparameterStatus(const PGconn *conn, const char *paramName)
Definition: fe-connect.c:6526
static bool still_sending
Definition: receivelog.c:39
int64 TimestampTz
Definition: timestamp.h:39
#define pg_log_error(...)
Definition: logging.h:79
static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos, XLogRecPtr *stoppos)
Definition: receivelog.c:1209
char * sysidentifier
Definition: receivelog.h:34
int PQputCopyEnd(PGconn *conn, const char *errormsg)
Definition: fe-exec.c:2385
XLogRecPtr startpos
Definition: receivelog.h:32
static long CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout, TimestampTz last_status)
Definition: receivelog.c:1235
int sleeptime
Definition: pg_standby.c:42
char * partial_suffix
Definition: receivelog.h:48
int PQserverVersion(const PGconn *conn)
Definition: fe-connect.c:6551
int PQntuples(const PGresult *res)
Definition: fe-exec.c:2770
bool feTimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: streamutil.c:649
int(* sync)(Walfile f)
Definition: walmethods.h:67
TimeLineID timeline
Definition: receivelog.h:33
ExecStatusType PQresultStatus(const PGresult *res)
Definition: fe-exec.c:2693
int PQgetCopyData(PGconn *conn, char **buffer, int async)
Definition: fe-exec.c:2474
static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr blockpos, TimestampTz *last_status)
Definition: receivelog.c:981
const char *(* getlasterror)(void)
Definition: walmethods.h:78
off_t(* get_current_pos)(Walfile f)
Definition: walmethods.h:62
#define sprintf
Definition: port.h:194
static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, char **buffer)
Definition: receivelog.c:926
static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline)
Definition: receivelog.c:689
PGconn * conn
Definition: streamutil.c:56
#define MAXPGPATH
static Walfile * walfile
Definition: receivelog.c:34
int PQflush(PGconn *conn)
Definition: fe-exec.c:3284
char * replication_slot
Definition: receivelog.h:49
bool mark_done
Definition: receivelog.h:38
void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: streamutil.c:627
#define TLHistoryFileName(fname, tli)
static bool reportFlushPosition
Definition: receivelog.c:36
uint64 XLogSegNo
Definition: xlogdefs.h:41
ssize_t(* write)(Walfile f, const void *buf, size_t count)
Definition: walmethods.h:59
#define select(n, r, w, e, timeout)
Definition: win32_port.h:445
unsigned int uint32
Definition: c.h:358
int pgsocket
Definition: port.h:31
static bool close_walfile(StreamCtl *stream, XLogRecPtr pos)
Definition: receivelog.c:181
stream_stop_callback stream_stop
Definition: receivelog.h:42
WalWriteMethod * walmethod
Definition: receivelog.h:47
#define MAXFNAMELEN
static int standby_message_timeout
#define ngettext(s, p, n)
Definition: c.h:1103
static bool sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
Definition: receivelog.c:323
static XLogRecPtr lastFlushPosition
Definition: receivelog.c:37
static PGresult * HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, XLogRecPtr blockpos, XLogRecPtr *stoppos)
Definition: receivelog.c:1168
#define PGINVALID_SOCKET
Definition: port.h:33
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
int PQconsumeInput(PGconn *conn)
Definition: fe-exec.c:1705
static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr *blockpos)
Definition: receivelog.c:1038
StringInfo copybuf
Definition: tablesync.c:115
void PQclear(PGresult *res)
Definition: fe-exec.c:695
static void * fn(void *arg)
#define Max(x, y)
Definition: c.h:898
static PGresult * HandleCopyStream(PGconn *conn, StreamCtl *stream, XLogRecPtr *stoppos)
Definition: receivelog.c:735
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static bool existsTimeLineHistoryFile(StreamCtl *stream)
Definition: receivelog.c:245
bool ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
Definition: receivelog.c:439
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static XLogRecPtr startpos
bool synchronous
Definition: receivelog.h:37
pgsocket stop_socket
Definition: receivelog.h:44
int WalSegSz
Definition: pg_standby.c:39
static char * filename
Definition: pg_dumpall.c:91
bool(* existsfile)(const char *pathname)
Definition: walmethods.h:50
char * PQresultErrorMessage(const PGresult *res)
Definition: fe-exec.c:2709
Walfile(* open_for_write)(const char *pathname, const char *temp_suffix, size_t pad_to_size)
Definition: walmethods.h:41
int64 fe_recvint64(char *buf)
Definition: streamutil.c:673
static bool mark_file_as_archived(StreamCtl *stream, const char *fname)
Definition: receivelog.c:61
int standby_message_timeout
Definition: receivelog.h:36
PGresult * PQexec(PGconn *conn, const char *query)
Definition: fe-exec.c:1940
ssize_t(* get_file_size)(const char *pathname)
Definition: walmethods.h:53
void fe_sendint64(int64 i, char *buf)
Definition: streamutil.c:662
#define EINTR
Definition: win32_port.h:332
#define pg_log_warning(...)
Definition: pgfnames.c:24
static char current_walfile_name[MAXPGPATH]
Definition: receivelog.c:35
#define snprintf
Definition: port.h:192
void PQfreemem(void *ptr)
Definition: fe-exec.c:3297
int PQsocket(const PGconn *conn)
Definition: fe-connect.c:6579
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1533
PGresult * PQgetResult(PGconn *conn)
Definition: fe-exec.c:1779
#define pg_log_info(...)
Definition: logging.h:87
static bool writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
Definition: receivelog.c:262
bool CheckServerVersionForStreaming(PGconn *conn)
Definition: receivelog.c:361
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define pg_log_fatal(...)
Definition: logging.h:75