PostgreSQL Source Code  git master
receivelog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * receivelog.c - receive WAL files using the streaming
4  * replication protocol.
5  *
6  * Author: Magnus Hagander <magnus@hagander.net>
7  *
8  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
9  *
10  * IDENTIFICATION
11  * src/bin/pg_basebackup/receivelog.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres_fe.h"
16 
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #ifdef HAVE_SYS_SELECT_H
20 #include <sys/select.h>
21 #endif
22 
23 #include "access/xlog_internal.h"
24 #include "common/file_utils.h"
25 #include "common/logging.h"
26 #include "libpq-fe.h"
27 #include "receivelog.h"
28 #include "streamutil.h"
29 
30 /* fd and filename for currently open WAL file */
31 static Walfile *walfile = NULL;
32 static char current_walfile_name[MAXPGPATH] = "";
33 static bool reportFlushPosition = false;
35 
36 static bool still_sending = true; /* feedback still needs to be sent? */
37 
39  XLogRecPtr *stoppos);
40 static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket);
41 static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
42  char **buffer);
43 static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf,
44  int len, XLogRecPtr blockpos, TimestampTz *last_status);
45 static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
46  XLogRecPtr *blockpos);
48  XLogRecPtr blockpos, XLogRecPtr *stoppos);
49 static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos);
51  TimestampTz last_status);
52 
54  uint32 *timeline);
55 
56 static bool
57 mark_file_as_archived(StreamCtl *stream, const char *fname)
58 {
59  Walfile *f;
60  static char tmppath[MAXPGPATH];
61 
62  snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done",
63  fname);
64 
65  f = stream->walmethod->open_for_write(tmppath, NULL, 0);
66  if (f == NULL)
67  {
68  pg_log_error("could not create archive status file \"%s\": %s",
69  tmppath, stream->walmethod->getlasterror());
70  return false;
71  }
72 
73  if (stream->walmethod->close(f, CLOSE_NORMAL) != 0)
74  {
75  pg_log_error("could not close archive status file \"%s\": %s",
76  tmppath, stream->walmethod->getlasterror());
77  return false;
78  }
79 
80  return true;
81 }
82 
83 /*
84  * Open a new WAL file in the specified directory.
85  *
86  * Returns true if OK; on failure, returns false after printing an error msg.
87  * On success, 'walfile' is set to the FD for the file, and the base filename
88  * (without partial_suffix) is stored in 'current_walfile_name'.
89  *
90  * The file will be padded to 16Mb with zeroes.
91  */
92 static bool
93 open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
94 {
95  Walfile *f;
96  char *fn;
97  ssize_t size;
98  XLogSegNo segno;
99 
100  XLByteToSeg(startpoint, segno, WalSegSz);
102 
103  /* Note that this considers the compression used if necessary */
105  stream->partial_suffix);
106 
107  /*
108  * When streaming to files, if an existing file exists we verify that it's
109  * either empty (just created), or a complete WalSegSz segment (in which
110  * case it has been created and padded). Anything else indicates a corrupt
111  * file. Compressed files have no need for padding, so just ignore this
112  * case.
113  *
114  * When streaming to tar, no file with this name will exist before, so we
115  * never have to verify a size.
116  */
118  stream->walmethod->existsfile(fn))
119  {
120  size = stream->walmethod->get_file_size(fn);
121  if (size < 0)
122  {
123  pg_log_error("could not get size of write-ahead log file \"%s\": %s",
124  fn, stream->walmethod->getlasterror());
125  pg_free(fn);
126  return false;
127  }
128  if (size == WalSegSz)
129  {
130  /* Already padded file. Open it for use */
132  if (f == NULL)
133  {
134  pg_log_error("could not open existing write-ahead log file \"%s\": %s",
135  fn, stream->walmethod->getlasterror());
136  pg_free(fn);
137  return false;
138  }
139 
140  /* fsync file in case of a previous crash */
141  if (stream->walmethod->sync(f) != 0)
142  {
143  pg_log_error("could not fsync existing write-ahead log file \"%s\": %s",
144  fn, stream->walmethod->getlasterror());
145  stream->walmethod->close(f, CLOSE_UNLINK);
146  exit(1);
147  }
148 
149  walfile = f;
150  pg_free(fn);
151  return true;
152  }
153  if (size != 0)
154  {
155  /* if write didn't set errno, assume problem is no disk space */
156  if (errno == 0)
157  errno = ENOSPC;
158  pg_log_error(ngettext("write-ahead log file \"%s\" has %zd byte, should be 0 or %d",
159  "write-ahead log file \"%s\" has %zd bytes, should be 0 or %d",
160  size),
161  fn, size, WalSegSz);
162  pg_free(fn);
163  return false;
164  }
165  /* File existed and was empty, so fall through and open */
166  }
167 
168  /* No file existed, so create one */
169 
171  stream->partial_suffix, WalSegSz);
172  if (f == NULL)
173  {
174  pg_log_error("could not open write-ahead log file \"%s\": %s",
175  fn, stream->walmethod->getlasterror());
176  pg_free(fn);
177  return false;
178  }
179 
180  pg_free(fn);
181  walfile = f;
182  return true;
183 }
184 
185 /*
186  * Close the current WAL file (if open), and rename it to the correct
187  * filename if it's complete. On failure, prints an error message to stderr
188  * and returns false, otherwise returns true.
189  */
190 static bool
192 {
193  char *fn;
194  off_t currpos;
195  int r;
196 
197  if (walfile == NULL)
198  return true;
199 
200  /* Note that this considers the compression used if necessary */
202  stream->partial_suffix);
203 
204  currpos = stream->walmethod->get_current_pos(walfile);
205 
206  if (currpos == -1)
207  {
208  pg_log_error("could not determine seek position in file \"%s\": %s",
209  fn, stream->walmethod->getlasterror());
210  stream->walmethod->close(walfile, CLOSE_UNLINK);
211  walfile = NULL;
212 
213  pg_free(fn);
214  return false;
215  }
216 
217  if (stream->partial_suffix)
218  {
219  if (currpos == WalSegSz)
220  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
221  else
222  {
223  pg_log_info("not renaming \"%s\", segment is not complete", fn);
224  r = stream->walmethod->close(walfile, CLOSE_NO_RENAME);
225  }
226  }
227  else
228  r = stream->walmethod->close(walfile, CLOSE_NORMAL);
229 
230  walfile = NULL;
231 
232  if (r != 0)
233  {
234  pg_log_error("could not close file \"%s\": %s",
235  fn, stream->walmethod->getlasterror());
236 
237  pg_free(fn);
238  return false;
239  }
240 
241  pg_free(fn);
242 
243  /*
244  * Mark file as archived if requested by the caller - pg_basebackup needs
245  * to do so as files can otherwise get archived again after promotion of a
246  * new node. This is in line with walreceiver.c always doing a
247  * XLogArchiveForceDone() after a complete segment.
248  */
249  if (currpos == WalSegSz && stream->mark_done)
250  {
251  /* writes error message if failed */
253  return false;
254  }
255 
256  lastFlushPosition = pos;
257  return true;
258 }
259 
260 
261 /*
262  * Check if a timeline history file exists.
263  */
264 static bool
266 {
267  char histfname[MAXFNAMELEN];
268 
269  /*
270  * Timeline 1 never has a history file. We treat that as if it existed,
271  * since we never need to stream it.
272  */
273  if (stream->timeline == 1)
274  return true;
275 
276  TLHistoryFileName(histfname, stream->timeline);
277 
278  return stream->walmethod->existsfile(histfname);
279 }
280 
281 static bool
282 writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
283 {
284  int size = strlen(content);
285  char histfname[MAXFNAMELEN];
286  Walfile *f;
287 
288  /*
289  * Check that the server's idea of how timeline history files should be
290  * named matches ours.
291  */
292  TLHistoryFileName(histfname, stream->timeline);
293  if (strcmp(histfname, filename) != 0)
294  {
295  pg_log_error("server reported unexpected history file name for timeline %u: %s",
296  stream->timeline, filename);
297  return false;
298  }
299 
300  f = stream->walmethod->open_for_write(histfname, ".tmp", 0);
301  if (f == NULL)
302  {
303  pg_log_error("could not create timeline history file \"%s\": %s",
304  histfname, stream->walmethod->getlasterror());
305  return false;
306  }
307 
308  if ((int) stream->walmethod->write(f, content, size) != size)
309  {
310  pg_log_error("could not write timeline history file \"%s\": %s",
311  histfname, stream->walmethod->getlasterror());
312 
313  /*
314  * If we fail to make the file, delete it to release disk space
315  */
316  stream->walmethod->close(f, CLOSE_UNLINK);
317 
318  return false;
319  }
320 
321  if (stream->walmethod->close(f, CLOSE_NORMAL) != 0)
322  {
323  pg_log_error("could not close file \"%s\": %s",
324  histfname, stream->walmethod->getlasterror());
325  return false;
326  }
327 
328  /* Maintain archive_status, check close_walfile() for details. */
329  if (stream->mark_done)
330  {
331  /* writes error message if failed */
332  if (!mark_file_as_archived(stream, histfname))
333  return false;
334  }
335 
336  return true;
337 }
338 
339 /*
340  * Send a Standby Status Update message to server.
341  */
342 static bool
343 sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
344 {
345  char replybuf[1 + 8 + 8 + 8 + 8 + 1];
346  int len = 0;
347 
348  replybuf[len] = 'r';
349  len += 1;
350  fe_sendint64(blockpos, &replybuf[len]); /* write */
351  len += 8;
353  fe_sendint64(lastFlushPosition, &replybuf[len]); /* flush */
354  else
355  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* flush */
356  len += 8;
357  fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* apply */
358  len += 8;
359  fe_sendint64(now, &replybuf[len]); /* sendTime */
360  len += 8;
361  replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */
362  len += 1;
363 
364  if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn))
365  {
366  pg_log_error("could not send feedback packet: %s",
368  return false;
369  }
370 
371  return true;
372 }
373 
374 /*
375  * Check that the server version we're connected to is supported by
376  * ReceiveXlogStream().
377  *
378  * If it's not, an error message is printed to stderr, and false is returned.
379  */
380 bool
382 {
383  int minServerMajor,
384  maxServerMajor;
385  int serverMajor;
386 
387  /*
388  * The message format used in streaming replication changed in 9.3, so we
389  * cannot stream from older servers. And we don't support servers newer
390  * than the client; it might work, but we don't know, so err on the safe
391  * side.
392  */
393  minServerMajor = 903;
394  maxServerMajor = PG_VERSION_NUM / 100;
395  serverMajor = PQserverVersion(conn) / 100;
396  if (serverMajor < minServerMajor)
397  {
398  const char *serverver = PQparameterStatus(conn, "server_version");
399 
400  pg_log_error("incompatible server version %s; client does not support streaming from server versions older than %s",
401  serverver ? serverver : "'unknown'",
402  "9.3");
403  return false;
404  }
405  else if (serverMajor > maxServerMajor)
406  {
407  const char *serverver = PQparameterStatus(conn, "server_version");
408 
409  pg_log_error("incompatible server version %s; client does not support streaming from server versions newer than %s",
410  serverver ? serverver : "'unknown'",
411  PG_VERSION);
412  return false;
413  }
414  return true;
415 }
416 
417 /*
418  * Receive a log stream starting at the specified position.
419  *
420  * Individual parameters are passed through the StreamCtl structure.
421  *
422  * If sysidentifier is specified, validate that both the system
423  * identifier and the timeline matches the specified ones
424  * (by sending an extra IDENTIFY_SYSTEM command)
425  *
426  * All received segments will be written to the directory
427  * specified by basedir. This will also fetch any missing timeline history
428  * files.
429  *
430  * The stream_stop callback will be called every time data
431  * is received, and whenever a segment is completed. If it returns
432  * true, the streaming will stop and the function
433  * return. As long as it returns false, streaming will continue
434  * indefinitely.
435  *
436  * If stream_stop() checks for external input, stop_socket should be set to
437  * the FD it checks. This will allow such input to be detected promptly
438  * rather than after standby_message_timeout (which might be indefinite).
439  * Note that signals will interrupt waits for input as well, but that is
440  * race-y since a signal received while busy won't interrupt the wait.
441  *
442  * standby_message_timeout controls how often we send a message
443  * back to the primary letting it know our progress, in milliseconds.
444  * Zero means no messages are sent.
445  * This message will only contain the write location, and never
446  * flush or replay.
447  *
448  * If 'partial_suffix' is not NULL, files are initially created with the
449  * given suffix, and the suffix is removed once the file is finished. That
450  * allows you to tell the difference between partial and completed files,
451  * so that you can continue later where you left.
452  *
453  * If 'synchronous' is true, the received WAL is flushed as soon as written,
454  * otherwise only when the WAL file is closed.
455  *
456  * Note: The WAL location *must* be at a log segment start!
457  */
458 bool
460 {
461  char query[128];
462  char slotcmd[128];
463  PGresult *res;
464  XLogRecPtr stoppos;
465 
466  /*
467  * The caller should've checked the server version already, but doesn't do
468  * any harm to check it here too.
469  */
471  return false;
472 
473  /*
474  * Decide whether we want to report the flush position. If we report the
475  * flush position, the primary will know what WAL we'll possibly
476  * re-request, and it can then remove older WAL safely. We must always do
477  * that when we are using slots.
478  *
479  * Reporting the flush position makes one eligible as a synchronous
480  * replica. People shouldn't include generic names in
481  * synchronous_standby_names, but we've protected them against it so far,
482  * so let's continue to do so unless specifically requested.
483  */
484  if (stream->replication_slot != NULL)
485  {
486  reportFlushPosition = true;
487  sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot);
488  }
489  else
490  {
491  if (stream->synchronous)
492  reportFlushPosition = true;
493  else
494  reportFlushPosition = false;
495  slotcmd[0] = 0;
496  }
497 
498  if (stream->sysidentifier != NULL)
499  {
500  char *sysidentifier = NULL;
501  TimeLineID servertli;
502 
503  /*
504  * Get the server system identifier and timeline, and validate them.
505  */
506  if (!RunIdentifySystem(conn, &sysidentifier, &servertli, NULL, NULL))
507  {
508  pg_free(sysidentifier);
509  return false;
510  }
511 
512  if (strcmp(stream->sysidentifier, sysidentifier) != 0)
513  {
514  pg_log_error("system identifier does not match between base backup and streaming connection");
515  pg_free(sysidentifier);
516  return false;
517  }
518  pg_free(sysidentifier);
519 
520  if (stream->timeline > servertli)
521  {
522  pg_log_error("starting timeline %u is not present in the server",
523  stream->timeline);
524  return false;
525  }
526  }
527 
528  /*
529  * initialize flush position to starting point, it's the caller's
530  * responsibility that that's sane.
531  */
532  lastFlushPosition = stream->startpos;
533 
534  while (1)
535  {
536  /*
537  * Fetch the timeline history file for this timeline, if we don't have
538  * it already. When streaming log to tar, this will always return
539  * false, as we are never streaming into an existing file and
540  * therefore there can be no pre-existing timeline history file.
541  */
542  if (!existsTimeLineHistoryFile(stream))
543  {
544  snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", stream->timeline);
545  res = PQexec(conn, query);
547  {
548  /* FIXME: we might send it ok, but get an error */
549  pg_log_error("could not send replication command \"%s\": %s",
550  "TIMELINE_HISTORY", PQresultErrorMessage(res));
551  PQclear(res);
552  return false;
553  }
554 
555  /*
556  * The response to TIMELINE_HISTORY is a single row result set
557  * with two fields: filename and content
558  */
559  if (PQnfields(res) != 2 || PQntuples(res) != 1)
560  {
561  pg_log_warning("unexpected response to TIMELINE_HISTORY command: got %d rows and %d fields, expected %d rows and %d fields",
562  PQntuples(res), PQnfields(res), 1, 2);
563  }
564 
565  /* Write the history file to disk */
567  PQgetvalue(res, 0, 0),
568  PQgetvalue(res, 0, 1));
569 
570  PQclear(res);
571  }
572 
573  /*
574  * Before we start streaming from the requested location, check if the
575  * callback tells us to stop here.
576  */
577  if (stream->stream_stop(stream->startpos, stream->timeline, false))
578  return true;
579 
580  /* Initiate the replication stream at specified location */
581  snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u",
582  slotcmd,
583  LSN_FORMAT_ARGS(stream->startpos),
584  stream->timeline);
585  res = PQexec(conn, query);
587  {
588  pg_log_error("could not send replication command \"%s\": %s",
589  "START_REPLICATION", PQresultErrorMessage(res));
590  PQclear(res);
591  return false;
592  }
593  PQclear(res);
594 
595  /* Stream the WAL */
596  res = HandleCopyStream(conn, stream, &stoppos);
597  if (res == NULL)
598  goto error;
599 
600  /*
601  * Streaming finished.
602  *
603  * There are two possible reasons for that: a controlled shutdown, or
604  * we reached the end of the current timeline. In case of
605  * end-of-timeline, the server sends a result set after Copy has
606  * finished, containing information about the next timeline. Read
607  * that, and restart streaming from the next timeline. In case of
608  * controlled shutdown, stop here.
609  */
611  {
612  /*
613  * End-of-timeline. Read the next timeline's ID and starting
614  * position. Usually, the starting position will match the end of
615  * the previous timeline, but there are corner cases like if the
616  * server had sent us half of a WAL record, when it was promoted.
617  * The new timeline will begin at the end of the last complete
618  * record in that case, overlapping the partial WAL record on the
619  * old timeline.
620  */
621  uint32 newtimeline;
622  bool parsed;
623 
624  parsed = ReadEndOfStreamingResult(res, &stream->startpos, &newtimeline);
625  PQclear(res);
626  if (!parsed)
627  goto error;
628 
629  /* Sanity check the values the server gave us */
630  if (newtimeline <= stream->timeline)
631  {
632  pg_log_error("server reported unexpected next timeline %u, following timeline %u",
633  newtimeline, stream->timeline);
634  goto error;
635  }
636  if (stream->startpos > stoppos)
637  {
638  pg_log_error("server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X",
639  stream->timeline, LSN_FORMAT_ARGS(stoppos),
640  newtimeline, LSN_FORMAT_ARGS(stream->startpos));
641  goto error;
642  }
643 
644  /* Read the final result, which should be CommandComplete. */
645  res = PQgetResult(conn);
647  {
648  pg_log_error("unexpected termination of replication stream: %s",
650  PQclear(res);
651  goto error;
652  }
653  PQclear(res);
654 
655  /*
656  * Loop back to start streaming from the new timeline. Always
657  * start streaming at the beginning of a segment.
658  */
659  stream->timeline = newtimeline;
660  stream->startpos = stream->startpos -
662  continue;
663  }
664  else if (PQresultStatus(res) == PGRES_COMMAND_OK)
665  {
666  PQclear(res);
667 
668  /*
669  * End of replication (ie. controlled shut down of the server).
670  *
671  * Check if the callback thinks it's OK to stop here. If not,
672  * complain.
673  */
674  if (stream->stream_stop(stoppos, stream->timeline, false))
675  return true;
676  else
677  {
678  pg_log_error("replication stream was terminated before stop point");
679  goto error;
680  }
681  }
682  else
683  {
684  /* Server returned an error. */
685  pg_log_error("unexpected termination of replication stream: %s",
687  PQclear(res);
688  goto error;
689  }
690  }
691 
692 error:
693  if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NO_RENAME) != 0)
694  pg_log_error("could not close file \"%s\": %s",
696  walfile = NULL;
697  return false;
698 }
699 
700 /*
701  * Helper function to parse the result set returned by server after streaming
702  * has finished. On failure, prints an error to stderr and returns false.
703  */
704 static bool
706 {
707  uint32 startpos_xlogid,
708  startpos_xrecoff;
709 
710  /*----------
711  * The result set consists of one row and two columns, e.g:
712  *
713  * next_tli | next_tli_startpos
714  * ----------+-------------------
715  * 4 | 0/9949AE0
716  *
717  * next_tli is the timeline ID of the next timeline after the one that
718  * just finished streaming. next_tli_startpos is the WAL location where
719  * the server switched to it.
720  *----------
721  */
722  if (PQnfields(res) < 2 || PQntuples(res) != 1)
723  {
724  pg_log_error("unexpected result set after end-of-timeline: got %d rows and %d fields, expected %d rows and %d fields",
725  PQntuples(res), PQnfields(res), 1, 2);
726  return false;
727  }
728 
729  *timeline = atoi(PQgetvalue(res, 0, 0));
730  if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &startpos_xlogid,
731  &startpos_xrecoff) != 2)
732  {
733  pg_log_error("could not parse next timeline's starting point \"%s\"",
734  PQgetvalue(res, 0, 1));
735  return false;
736  }
737  *startpos = ((uint64) startpos_xlogid << 32) | startpos_xrecoff;
738 
739  return true;
740 }
741 
742 /*
743  * The main loop of ReceiveXlogStream. Handles the COPY stream after
744  * initiating streaming with the START_REPLICATION command.
745  *
746  * If the COPY ends (not necessarily successfully) due a message from the
747  * server, returns a PGresult and sets *stoppos to the last byte written.
748  * On any other sort of error, returns NULL.
749  */
750 static PGresult *
752  XLogRecPtr *stoppos)
753 {
754  char *copybuf = NULL;
755  TimestampTz last_status = -1;
756  XLogRecPtr blockpos = stream->startpos;
757 
758  still_sending = true;
759 
760  while (1)
761  {
762  int r;
764  long sleeptime;
765 
766  /*
767  * Check if we should continue streaming, or abort at this point.
768  */
769  if (!CheckCopyStreamStop(conn, stream, blockpos))
770  goto error;
771 
773 
774  /*
775  * If synchronous option is true, issue sync command as soon as there
776  * are WAL data which has not been flushed yet.
777  */
778  if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL)
779  {
780  if (stream->walmethod->sync(walfile) != 0)
781  pg_fatal("could not fsync file \"%s\": %s",
783  lastFlushPosition = blockpos;
784 
785  /*
786  * Send feedback so that the server sees the latest WAL locations
787  * immediately.
788  */
789  if (!sendFeedback(conn, blockpos, now, false))
790  goto error;
791  last_status = now;
792  }
793 
794  /*
795  * Potentially send a status message to the primary
796  */
797  if (still_sending && stream->standby_message_timeout > 0 &&
798  feTimestampDifferenceExceeds(last_status, now,
799  stream->standby_message_timeout))
800  {
801  /* Time to send feedback! */
802  if (!sendFeedback(conn, blockpos, now, false))
803  goto error;
804  last_status = now;
805  }
806 
807  /*
808  * Calculate how long send/receive loops should sleep
809  */
811  last_status);
812 
813  r = CopyStreamReceive(conn, sleeptime, stream->stop_socket, &copybuf);
814  while (r != 0)
815  {
816  if (r == -1)
817  goto error;
818  if (r == -2)
819  {
820  PGresult *res = HandleEndOfCopyStream(conn, stream, copybuf, blockpos, stoppos);
821 
822  if (res == NULL)
823  goto error;
824  else
825  return res;
826  }
827 
828  /* Check the message type. */
829  if (copybuf[0] == 'k')
830  {
831  if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos,
832  &last_status))
833  goto error;
834  }
835  else if (copybuf[0] == 'w')
836  {
837  if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos))
838  goto error;
839 
840  /*
841  * Check if we should continue streaming, or abort at this
842  * point.
843  */
844  if (!CheckCopyStreamStop(conn, stream, blockpos))
845  goto error;
846  }
847  else
848  {
849  pg_log_error("unrecognized streaming header: \"%c\"",
850  copybuf[0]);
851  goto error;
852  }
853 
854  /*
855  * Process the received data, and any subsequent data we can read
856  * without blocking.
857  */
858  r = CopyStreamReceive(conn, 0, stream->stop_socket, &copybuf);
859  }
860  }
861 
862 error:
863  if (copybuf != NULL)
865  return NULL;
866 }
867 
868 /*
869  * Wait until we can read a CopyData message,
870  * or timeout, or occurrence of a signal or input on the stop_socket.
871  * (timeout_ms < 0 means wait indefinitely; 0 means don't wait.)
872  *
873  * Returns 1 if data has become available for reading, 0 if timed out
874  * or interrupted by signal or stop_socket input, and -1 on an error.
875  */
876 static int
877 CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
878 {
879  int ret;
880  fd_set input_mask;
881  int connsocket;
882  int maxfd;
883  struct timeval timeout;
884  struct timeval *timeoutptr;
885 
886  connsocket = PQsocket(conn);
887  if (connsocket < 0)
888  {
889  pg_log_error("invalid socket: %s", PQerrorMessage(conn));
890  return -1;
891  }
892 
893  FD_ZERO(&input_mask);
894  FD_SET(connsocket, &input_mask);
895  maxfd = connsocket;
896  if (stop_socket != PGINVALID_SOCKET)
897  {
898  FD_SET(stop_socket, &input_mask);
899  maxfd = Max(maxfd, stop_socket);
900  }
901 
902  if (timeout_ms < 0)
903  timeoutptr = NULL;
904  else
905  {
906  timeout.tv_sec = timeout_ms / 1000L;
907  timeout.tv_usec = (timeout_ms % 1000L) * 1000L;
908  timeoutptr = &timeout;
909  }
910 
911  ret = select(maxfd + 1, &input_mask, NULL, NULL, timeoutptr);
912 
913  if (ret < 0)
914  {
915  if (errno == EINTR)
916  return 0; /* Got a signal, so not an error */
917  pg_log_error("%s() failed: %m", "select");
918  return -1;
919  }
920  if (ret > 0 && FD_ISSET(connsocket, &input_mask))
921  return 1; /* Got input on connection socket */
922 
923  return 0; /* Got timeout or input on stop_socket */
924 }
925 
926 /*
927  * Receive CopyData message available from XLOG stream, blocking for
928  * maximum of 'timeout' ms.
929  *
930  * If data was received, returns the length of the data. *buffer is set to
931  * point to a buffer holding the received message. The buffer is only valid
932  * until the next CopyStreamReceive call.
933  *
934  * Returns 0 if no data was available within timeout, or if wait was
935  * interrupted by signal or stop_socket input.
936  * -1 on error. -2 if the server ended the COPY.
937  */
938 static int
939 CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket,
940  char **buffer)
941 {
942  char *copybuf = NULL;
943  int rawlen;
944 
945  if (*buffer != NULL)
946  PQfreemem(*buffer);
947  *buffer = NULL;
948 
949  /* Try to receive a CopyData message */
950  rawlen = PQgetCopyData(conn, &copybuf, 1);
951  if (rawlen == 0)
952  {
953  int ret;
954 
955  /*
956  * No data available. Wait for some to appear, but not longer than
957  * the specified timeout, so that we can ping the server. Also stop
958  * waiting if input appears on stop_socket.
959  */
960  ret = CopyStreamPoll(conn, timeout, stop_socket);
961  if (ret <= 0)
962  return ret;
963 
964  /* Now there is actually data on the socket */
965  if (PQconsumeInput(conn) == 0)
966  {
967  pg_log_error("could not receive data from WAL stream: %s",
969  return -1;
970  }
971 
972  /* Now that we've consumed some input, try again */
973  rawlen = PQgetCopyData(conn, &copybuf, 1);
974  if (rawlen == 0)
975  return 0;
976  }
977  if (rawlen == -1) /* end-of-streaming or error */
978  return -2;
979  if (rawlen == -2)
980  {
981  pg_log_error("could not read COPY data: %s", PQerrorMessage(conn));
982  return -1;
983  }
984 
985  /* Return received messages to caller */
986  *buffer = copybuf;
987  return rawlen;
988 }
989 
990 /*
991  * Process the keepalive message.
992  */
993 static bool
995  XLogRecPtr blockpos, TimestampTz *last_status)
996 {
997  int pos;
998  bool replyRequested;
1000 
1001  /*
1002  * Parse the keepalive message, enclosed in the CopyData message. We just
1003  * check if the server requested a reply, and ignore the rest.
1004  */
1005  pos = 1; /* skip msgtype 'k' */
1006  pos += 8; /* skip walEnd */
1007  pos += 8; /* skip sendTime */
1008 
1009  if (len < pos + 1)
1010  {
1011  pg_log_error("streaming header too small: %d", len);
1012  return false;
1013  }
1014  replyRequested = copybuf[pos];
1015 
1016  /* If the server requested an immediate reply, send one. */
1017  if (replyRequested && still_sending)
1018  {
1019  if (reportFlushPosition && lastFlushPosition < blockpos &&
1020  walfile != NULL)
1021  {
1022  /*
1023  * If a valid flush location needs to be reported, flush the
1024  * current WAL file so that the latest flush location is sent back
1025  * to the server. This is necessary to see whether the last WAL
1026  * data has been successfully replicated or not, at the normal
1027  * shutdown of the server.
1028  */
1029  if (stream->walmethod->sync(walfile) != 0)
1030  pg_fatal("could not fsync file \"%s\": %s",
1032  lastFlushPosition = blockpos;
1033  }
1034 
1036  if (!sendFeedback(conn, blockpos, now, false))
1037  return false;
1038  *last_status = now;
1039  }
1040 
1041  return true;
1042 }
1043 
1044 /*
1045  * Process XLogData message.
1046  */
1047 static bool
1049  XLogRecPtr *blockpos)
1050 {
1051  int xlogoff;
1052  int bytes_left;
1053  int bytes_written;
1054  int hdr_len;
1055 
1056  /*
1057  * Once we've decided we don't want to receive any more, just ignore any
1058  * subsequent XLogData messages.
1059  */
1060  if (!(still_sending))
1061  return true;
1062 
1063  /*
1064  * Read the header of the XLogData message, enclosed in the CopyData
1065  * message. We only need the WAL location field (dataStart), the rest of
1066  * the header is ignored.
1067  */
1068  hdr_len = 1; /* msgtype 'w' */
1069  hdr_len += 8; /* dataStart */
1070  hdr_len += 8; /* walEnd */
1071  hdr_len += 8; /* sendTime */
1072  if (len < hdr_len)
1073  {
1074  pg_log_error("streaming header too small: %d", len);
1075  return false;
1076  }
1077  *blockpos = fe_recvint64(&copybuf[1]);
1078 
1079  /* Extract WAL location for this block */
1080  xlogoff = XLogSegmentOffset(*blockpos, WalSegSz);
1081 
1082  /*
1083  * Verify that the initial location in the stream matches where we think
1084  * we are.
1085  */
1086  if (walfile == NULL)
1087  {
1088  /* No file open yet */
1089  if (xlogoff != 0)
1090  {
1091  pg_log_error("received write-ahead log record for offset %u with no file open",
1092  xlogoff);
1093  return false;
1094  }
1095  }
1096  else
1097  {
1098  /* More data in existing segment */
1099  if (stream->walmethod->get_current_pos(walfile) != xlogoff)
1100  {
1101  pg_log_error("got WAL data offset %08x, expected %08x",
1102  xlogoff, (int) stream->walmethod->get_current_pos(walfile));
1103  return false;
1104  }
1105  }
1106 
1107  bytes_left = len - hdr_len;
1108  bytes_written = 0;
1109 
1110  while (bytes_left)
1111  {
1112  int bytes_to_write;
1113 
1114  /*
1115  * If crossing a WAL boundary, only write up until we reach wal
1116  * segment size.
1117  */
1118  if (xlogoff + bytes_left > WalSegSz)
1119  bytes_to_write = WalSegSz - xlogoff;
1120  else
1121  bytes_to_write = bytes_left;
1122 
1123  if (walfile == NULL)
1124  {
1125  if (!open_walfile(stream, *blockpos))
1126  {
1127  /* Error logged by open_walfile */
1128  return false;
1129  }
1130  }
1131 
1132  if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written,
1133  bytes_to_write) != bytes_to_write)
1134  {
1135  pg_log_error("could not write %d bytes to WAL file \"%s\": %s",
1136  bytes_to_write, current_walfile_name,
1137  stream->walmethod->getlasterror());
1138  return false;
1139  }
1140 
1141  /* Write was successful, advance our position */
1142  bytes_written += bytes_to_write;
1143  bytes_left -= bytes_to_write;
1144  *blockpos += bytes_to_write;
1145  xlogoff += bytes_to_write;
1146 
1147  /* Did we reach the end of a WAL segment? */
1148  if (XLogSegmentOffset(*blockpos, WalSegSz) == 0)
1149  {
1150  if (!close_walfile(stream, *blockpos))
1151  /* Error message written in close_walfile() */
1152  return false;
1153 
1154  xlogoff = 0;
1155 
1156  if (still_sending && stream->stream_stop(*blockpos, stream->timeline, true))
1157  {
1158  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1159  {
1160  pg_log_error("could not send copy-end packet: %s",
1161  PQerrorMessage(conn));
1162  return false;
1163  }
1164  still_sending = false;
1165  return true; /* ignore the rest of this XLogData packet */
1166  }
1167  }
1168  }
1169  /* No more data left to write, receive next copy packet */
1170 
1171  return true;
1172 }
1173 
1174 /*
1175  * Handle end of the copy stream.
1176  */
1177 static PGresult *
1179  XLogRecPtr blockpos, XLogRecPtr *stoppos)
1180 {
1182 
1183  /*
1184  * The server closed its end of the copy stream. If we haven't closed
1185  * ours already, we need to do so now, unless the server threw an error,
1186  * in which case we don't.
1187  */
1188  if (still_sending)
1189  {
1190  if (!close_walfile(stream, blockpos))
1191  {
1192  /* Error message written in close_walfile() */
1193  PQclear(res);
1194  return NULL;
1195  }
1197  {
1198  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1199  {
1200  pg_log_error("could not send copy-end packet: %s",
1201  PQerrorMessage(conn));
1202  PQclear(res);
1203  return NULL;
1204  }
1205  res = PQgetResult(conn);
1206  }
1207  still_sending = false;
1208  }
1209  if (copybuf != NULL)
1210  PQfreemem(copybuf);
1211  *stoppos = blockpos;
1212  return res;
1213 }
1214 
1215 /*
1216  * Check if we should continue streaming, or abort at this point.
1217  */
1218 static bool
1220 {
1221  if (still_sending && stream->stream_stop(blockpos, stream->timeline, false))
1222  {
1223  if (!close_walfile(stream, blockpos))
1224  {
1225  /* Potential error message is written by close_walfile */
1226  return false;
1227  }
1228  if (PQputCopyEnd(conn, NULL) <= 0 || PQflush(conn))
1229  {
1230  pg_log_error("could not send copy-end packet: %s",
1231  PQerrorMessage(conn));
1232  return false;
1233  }
1234  still_sending = false;
1235  }
1236 
1237  return true;
1238 }
1239 
1240 /*
1241  * Calculate how long send/receive loops should sleep
1242  */
1243 static long
1245  TimestampTz last_status)
1246 {
1247  TimestampTz status_targettime = 0;
1248  long sleeptime;
1249 
1251  status_targettime = last_status +
1252  (standby_message_timeout - 1) * ((int64) 1000);
1253 
1254  if (status_targettime > 0)
1255  {
1256  long secs;
1257  int usecs;
1258 
1260  status_targettime,
1261  &secs,
1262  &usecs);
1263  /* Always sleep at least 1 sec */
1264  if (secs <= 0)
1265  {
1266  secs = 1;
1267  usecs = 0;
1268  }
1269 
1270  sleeptime = secs * 1000 + usecs / 1000;
1271  }
1272  else
1273  sleeptime = -1;
1274 
1275  return sleeptime;
1276 }
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1538
unsigned int uint32
Definition: c.h:441
#define ngettext(s, p, n)
Definition: c.h:1179
#define Max(x, y)
Definition: c.h:980
@ PG_COMPRESSION_NONE
Definition: compression.h:19
int64 TimestampTz
Definition: timestamp.h:39
const char * PQparameterStatus(const PGconn *conn, const char *paramName)
Definition: fe-connect.c:6873
int PQserverVersion(const PGconn *conn)
Definition: fe-connect.c:6898
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:6908
int PQsocket(const PGconn *conn)
Definition: fe-connect.c:6934
int PQflush(PGconn *conn)
Definition: fe-exec.c:3861
void PQfreemem(void *ptr)
Definition: fe-exec.c:3891
ExecStatusType PQresultStatus(const PGresult *res)
Definition: fe-exec.c:3270
void PQclear(PGresult *res)
Definition: fe-exec.c:718
char * PQresultErrorMessage(const PGresult *res)
Definition: fe-exec.c:3286
int PQputCopyEnd(PGconn *conn, const char *errormsg)
Definition: fe-exec.c:2683
int PQntuples(const PGresult *res)
Definition: fe-exec.c:3340
int PQputCopyData(PGconn *conn, const char *buffer, int nbytes)
Definition: fe-exec.c:2627
PGresult * PQexec(PGconn *conn, const char *query)
Definition: fe-exec.c:2273
int PQconsumeInput(PGconn *conn)
Definition: fe-exec.c:2004
char * PQgetvalue(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3735
int PQnfields(const PGresult *res)
Definition: fe-exec.c:3348
PGresult * PQgetResult(PGconn *conn)
Definition: fe-exec.c:2082
int PQgetCopyData(PGconn *conn, char **buffer, int async)
Definition: fe-exec.c:2751
void pg_free(void *ptr)
Definition: fe_memutils.c:105
@ PGRES_COPY_IN
Definition: libpq-fe.h:104
@ PGRES_COPY_BOTH
Definition: libpq-fe.h:109
@ PGRES_COMMAND_OK
Definition: libpq-fe.h:97
@ PGRES_TUPLES_OK
Definition: libpq-fe.h:100
exit(1)
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_info(...)
Definition: logging.h:124
#define pg_fatal(...)
static int standby_message_timeout
#define MAXPGPATH
const void size_t len
static char * filename
Definition: pg_dumpall.c:94
static XLogRecPtr startpos
#define pg_log_warning(...)
Definition: pgfnames.c:24
#define sprintf
Definition: port.h:227
int pgsocket
Definition: port.h:29
#define snprintf
Definition: port.h:225
#define PGINVALID_SOCKET
Definition: port.h:31
static PGresult * HandleCopyStream(PGconn *conn, StreamCtl *stream, XLogRecPtr *stoppos)
Definition: receivelog.c:751
static bool open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
Definition: receivelog.c:93
static bool mark_file_as_archived(StreamCtl *stream, const char *fname)
Definition: receivelog.c:57
static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, char **buffer)
Definition: receivelog.c:939
static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos)
Definition: receivelog.c:1219
static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr blockpos, TimestampTz *last_status)
Definition: receivelog.c:994
static char current_walfile_name[MAXPGPATH]
Definition: receivelog.c:32
static bool reportFlushPosition
Definition: receivelog.c:33
static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr *blockpos)
Definition: receivelog.c:1048
static bool close_walfile(StreamCtl *stream, XLogRecPtr pos)
Definition: receivelog.c:191
static bool existsTimeLineHistoryFile(StreamCtl *stream)
Definition: receivelog.c:265
static bool still_sending
Definition: receivelog.c:36
static bool writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
Definition: receivelog.c:282
bool ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
Definition: receivelog.c:459
static PGresult * HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, XLogRecPtr blockpos, XLogRecPtr *stoppos)
Definition: receivelog.c:1178
static Walfile * walfile
Definition: receivelog.c:31
static long CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout, TimestampTz last_status)
Definition: receivelog.c:1244
bool CheckServerVersionForStreaming(PGconn *conn)
Definition: receivelog.c:381
static bool sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
Definition: receivelog.c:343
static XLogRecPtr lastFlushPosition
Definition: receivelog.c:34
static int CopyStreamPoll(PGconn *conn, long timeout_ms, pgsocket stop_socket)
Definition: receivelog.c:877
static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline)
Definition: receivelog.c:705
static void error(void)
Definition: sql-dyntest.c:147
int64 fe_recvint64(char *buf)
Definition: streamutil.c:857
TimestampTz feGetCurrentTimestamp(void)
Definition: streamutil.c:792
void feTimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: streamutil.c:811
void fe_sendint64(int64 i, char *buf)
Definition: streamutil.c:846
bool feTimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: streamutil.c:833
PGconn * conn
Definition: streamutil.c:54
bool RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, XLogRecPtr *startpos, char **db_name)
Definition: streamutil.c:403
uint32 WalSegSz
Definition: streamutil.c:34
char * sysidentifier
Definition: receivelog.h:33
TimeLineID timeline
Definition: receivelog.h:32
stream_stop_callback stream_stop
Definition: receivelog.h:41
char * replication_slot
Definition: receivelog.h:48
XLogRecPtr startpos
Definition: receivelog.h:31
pgsocket stop_socket
Definition: receivelog.h:43
int standby_message_timeout
Definition: receivelog.h:35
WalWriteMethod * walmethod
Definition: receivelog.h:46
bool mark_done
Definition: receivelog.h:37
char * partial_suffix
Definition: receivelog.h:47
bool synchronous
Definition: receivelog.h:36
const char *(* getlasterror)(void)
Definition: walmethods.h:88
off_t(* get_current_pos)(Walfile f)
Definition: walmethods.h:72
ssize_t(* get_file_size)(const char *pathname)
Definition: walmethods.h:54
Walfile(* open_for_write)(const char *pathname, const char *temp_suffix, size_t pad_to_size)
Definition: walmethods.h:42
bool(* existsfile)(const char *pathname)
Definition: walmethods.h:51
int(* close)(Walfile f, WalCloseMethod method)
Definition: walmethods.h:48
char *(* get_file_name)(const char *pathname, const char *temp_suffix)
Definition: walmethods.h:60
int(* sync)(Walfile f)
Definition: walmethods.h:77
ssize_t(* write)(Walfile f, const void *buf, size_t count)
Definition: walmethods.h:69
pg_compress_algorithm(* compression_algorithm)(void)
Definition: walmethods.h:63
static StringInfo copybuf
Definition: tablesync.c:127
static void * fn(void *arg)
@ CLOSE_UNLINK
Definition: walmethods.h:19
@ CLOSE_NO_RENAME
Definition: walmethods.h:20
@ CLOSE_NORMAL
Definition: walmethods.h:18
void * Walfile
Definition: walmethods.h:14
#define EINTR
Definition: win32_port.h:351
#define select(n, r, w, e, timeout)
Definition: win32_port.h:474
#define TLHistoryFileName(fname, tli)
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define MAXFNAMELEN
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48