PostgreSQL Source Code  git master
walreceiverfuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * walreceiverfuncs.c
4  *
5  * This file contains functions used by the startup process to communicate
6  * with the walreceiver process. Functions implementing walreceiver itself
7  * are in walreceiver.c.
8  *
9  * Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
10  *
11  *
12  * IDENTIFICATION
13  * src/backend/replication/walreceiverfuncs.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include <sys/stat.h>
20 #include <sys/time.h>
21 #include <time.h>
22 #include <unistd.h>
23 #include <signal.h>
24 
25 #include "access/xlog_internal.h"
26 #include "access/xlogrecovery.h"
27 #include "pgstat.h"
28 #include "postmaster/startup.h"
30 #include "storage/pmsignal.h"
31 #include "storage/shmem.h"
32 #include "utils/timestamp.h"
33 
35 
36 /*
37  * How long to wait for walreceiver to start up after requesting
38  * postmaster to launch it. In seconds.
39  */
40 #define WALRCV_STARTUP_TIMEOUT 10
41 
42 /* Report shared memory space needed by WalRcvShmemInit */
43 Size
45 {
46  Size size = 0;
47 
48  size = add_size(size, sizeof(WalRcvData));
49 
50  return size;
51 }
52 
53 /* Allocate and initialize walreceiver-related shared memory */
54 void
56 {
57  bool found;
58 
59  WalRcv = (WalRcvData *)
60  ShmemInitStruct("Wal Receiver Ctl", WalRcvShmemSize(), &found);
61 
62  if (!found)
63  {
64  /* First time through, so initialize */
70  WalRcv->latch = NULL;
71  }
72 }
73 
74 /* Is walreceiver running (or starting up)? */
75 bool
77 {
78  WalRcvData *walrcv = WalRcv;
80  pg_time_t startTime;
81 
82  SpinLockAcquire(&walrcv->mutex);
83 
84  state = walrcv->walRcvState;
85  startTime = walrcv->startTime;
86 
87  SpinLockRelease(&walrcv->mutex);
88 
89  /*
90  * If it has taken too long for walreceiver to start up, give up. Setting
91  * the state to STOPPED ensures that if walreceiver later does start up
92  * after all, it will see that it's not supposed to be running and die
93  * without doing anything.
94  */
95  if (state == WALRCV_STARTING)
96  {
97  pg_time_t now = (pg_time_t) time(NULL);
98 
99  if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
100  {
101  bool stopped = false;
102 
103  SpinLockAcquire(&walrcv->mutex);
104  if (walrcv->walRcvState == WALRCV_STARTING)
105  {
106  state = walrcv->walRcvState = WALRCV_STOPPED;
107  stopped = true;
108  }
109  SpinLockRelease(&walrcv->mutex);
110 
111  if (stopped)
113  }
114  }
115 
116  if (state != WALRCV_STOPPED)
117  return true;
118  else
119  return false;
120 }
121 
122 /*
123  * Is walreceiver running and streaming (or at least attempting to connect,
124  * or starting up)?
125  */
126 bool
128 {
129  WalRcvData *walrcv = WalRcv;
131  pg_time_t startTime;
132 
133  SpinLockAcquire(&walrcv->mutex);
134 
135  state = walrcv->walRcvState;
136  startTime = walrcv->startTime;
137 
138  SpinLockRelease(&walrcv->mutex);
139 
140  /*
141  * If it has taken too long for walreceiver to start up, give up. Setting
142  * the state to STOPPED ensures that if walreceiver later does start up
143  * after all, it will see that it's not supposed to be running and die
144  * without doing anything.
145  */
146  if (state == WALRCV_STARTING)
147  {
148  pg_time_t now = (pg_time_t) time(NULL);
149 
150  if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
151  {
152  bool stopped = false;
153 
154  SpinLockAcquire(&walrcv->mutex);
155  if (walrcv->walRcvState == WALRCV_STARTING)
156  {
157  state = walrcv->walRcvState = WALRCV_STOPPED;
158  stopped = true;
159  }
160  SpinLockRelease(&walrcv->mutex);
161 
162  if (stopped)
164  }
165  }
166 
169  return true;
170  else
171  return false;
172 }
173 
174 /*
175  * Stop walreceiver (if running) and wait for it to die.
176  * Executed by the Startup process.
177  */
178 void
180 {
181  WalRcvData *walrcv = WalRcv;
182  pid_t walrcvpid = 0;
183  bool stopped = false;
184 
185  /*
186  * Request walreceiver to stop. Walreceiver will switch to WALRCV_STOPPED
187  * mode once it's finished, and will also request postmaster to not
188  * restart itself.
189  */
190  SpinLockAcquire(&walrcv->mutex);
191  switch (walrcv->walRcvState)
192  {
193  case WALRCV_STOPPED:
194  break;
195  case WALRCV_STARTING:
196  walrcv->walRcvState = WALRCV_STOPPED;
197  stopped = true;
198  break;
199 
200  case WALRCV_STREAMING:
201  case WALRCV_WAITING:
202  case WALRCV_RESTARTING:
203  walrcv->walRcvState = WALRCV_STOPPING;
204  /* fall through */
205  case WALRCV_STOPPING:
206  walrcvpid = walrcv->pid;
207  break;
208  }
209  SpinLockRelease(&walrcv->mutex);
210 
211  /* Unnecessary but consistent. */
212  if (stopped)
214 
215  /*
216  * Signal walreceiver process if it was still running.
217  */
218  if (walrcvpid != 0)
219  kill(walrcvpid, SIGTERM);
220 
221  /*
222  * Wait for walreceiver to acknowledge its death by setting state to
223  * WALRCV_STOPPED.
224  */
226  while (WalRcvRunning())
228  WAIT_EVENT_WAL_RECEIVER_EXIT);
230 }
231 
232 /*
233  * Request postmaster to start walreceiver.
234  *
235  * "recptr" indicates the position where streaming should begin. "conninfo"
236  * is a libpq connection string to use. "slotname" is, optionally, the name
237  * of a replication slot to acquire. "create_temp_slot" indicates to create
238  * a temporary slot when no "slotname" is given.
239  *
240  * WAL receivers do not directly load GUC parameters used for the connection
241  * to the primary, and rely on the values passed down by the caller of this
242  * routine instead. Hence, the addition of any new parameters should happen
243  * through this code path.
244  */
245 void
246 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
247  const char *slotname, bool create_temp_slot)
248 {
249  WalRcvData *walrcv = WalRcv;
250  bool launch = false;
251  pg_time_t now = (pg_time_t) time(NULL);
252  Latch *latch;
253 
254  /*
255  * We always start at the beginning of the segment. That prevents a broken
256  * segment (i.e., with no records in the first half of a segment) from
257  * being created by XLOG streaming, which might cause trouble later on if
258  * the segment is e.g archived.
259  */
260  if (XLogSegmentOffset(recptr, wal_segment_size) != 0)
261  recptr -= XLogSegmentOffset(recptr, wal_segment_size);
262 
263  SpinLockAcquire(&walrcv->mutex);
264 
265  /* It better be stopped if we try to restart it */
266  Assert(walrcv->walRcvState == WALRCV_STOPPED ||
267  walrcv->walRcvState == WALRCV_WAITING);
268 
269  if (conninfo != NULL)
270  strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
271  else
272  walrcv->conninfo[0] = '\0';
273 
274  /*
275  * Use configured replication slot if present, and ignore the value of
276  * create_temp_slot as the slot name should be persistent. Otherwise, use
277  * create_temp_slot to determine whether this WAL receiver should create a
278  * temporary slot by itself and use it, or not.
279  */
280  if (slotname != NULL && slotname[0] != '\0')
281  {
282  strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
283  walrcv->is_temp_slot = false;
284  }
285  else
286  {
287  walrcv->slotname[0] = '\0';
288  walrcv->is_temp_slot = create_temp_slot;
289  }
290 
291  if (walrcv->walRcvState == WALRCV_STOPPED)
292  {
293  launch = true;
294  walrcv->walRcvState = WALRCV_STARTING;
295  }
296  else
297  walrcv->walRcvState = WALRCV_RESTARTING;
298  walrcv->startTime = now;
299 
300  /*
301  * If this is the first startup of walreceiver (on this timeline),
302  * initialize flushedUpto and latestChunkStart to the starting point.
303  */
304  if (walrcv->receiveStart == 0 || walrcv->receivedTLI != tli)
305  {
306  walrcv->flushedUpto = recptr;
307  walrcv->receivedTLI = tli;
308  walrcv->latestChunkStart = recptr;
309  }
310  walrcv->receiveStart = recptr;
311  walrcv->receiveStartTLI = tli;
312 
313  latch = walrcv->latch;
314 
315  SpinLockRelease(&walrcv->mutex);
316 
317  if (launch)
319  else if (latch)
320  SetLatch(latch);
321 }
322 
323 /*
324  * Returns the last+1 byte position that walreceiver has flushed.
325  *
326  * Optionally, returns the previous chunk start, that is the first byte
327  * written in the most recent walreceiver flush cycle. Callers not
328  * interested in that value may pass NULL for latestChunkStart. Same for
329  * receiveTLI.
330  */
333 {
334  WalRcvData *walrcv = WalRcv;
335  XLogRecPtr recptr;
336 
337  SpinLockAcquire(&walrcv->mutex);
338  recptr = walrcv->flushedUpto;
339  if (latestChunkStart)
340  *latestChunkStart = walrcv->latestChunkStart;
341  if (receiveTLI)
342  *receiveTLI = walrcv->receivedTLI;
343  SpinLockRelease(&walrcv->mutex);
344 
345  return recptr;
346 }
347 
348 /*
349  * Returns the last+1 byte position that walreceiver has written.
350  * This returns a recently written value without taking a lock.
351  */
354 {
355  WalRcvData *walrcv = WalRcv;
356 
357  return pg_atomic_read_u64(&walrcv->writtenUpto);
358 }
359 
360 /*
361  * Returns the replication apply delay in ms or -1
362  * if the apply delay info is not available
363  */
364 int
366 {
367  WalRcvData *walrcv = WalRcv;
368  XLogRecPtr receivePtr;
369  XLogRecPtr replayPtr;
370  TimestampTz chunkReplayStartTime;
371 
372  SpinLockAcquire(&walrcv->mutex);
373  receivePtr = walrcv->flushedUpto;
374  SpinLockRelease(&walrcv->mutex);
375 
376  replayPtr = GetXLogReplayRecPtr(NULL);
377 
378  if (receivePtr == replayPtr)
379  return 0;
380 
381  chunkReplayStartTime = GetCurrentChunkReplayStartTime();
382 
383  if (chunkReplayStartTime == 0)
384  return -1;
385 
386  return TimestampDifferenceMilliseconds(chunkReplayStartTime,
388 }
389 
390 /*
391  * Returns the network latency in ms, note that this includes any
392  * difference in clock settings between the servers, as well as timezone.
393  */
394 int
396 {
397  WalRcvData *walrcv = WalRcv;
398  TimestampTz lastMsgSendTime;
399  TimestampTz lastMsgReceiptTime;
400 
401  SpinLockAcquire(&walrcv->mutex);
402  lastMsgSendTime = walrcv->lastMsgSendTime;
403  lastMsgReceiptTime = walrcv->lastMsgReceiptTime;
404  SpinLockRelease(&walrcv->mutex);
405 
406  return TimestampDifferenceMilliseconds(lastMsgSendTime,
407  lastMsgReceiptTime);
408 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:410
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:424
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1695
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1547
#define MemSet(start, val, len)
Definition: c.h:1009
size_t Size
Definition: c.h:594
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
void SetLatch(Latch *latch)
Definition: latch.c:605
Assert(fmt[strlen(fmt) - 1] !='\n')
#define NAMEDATALEN
int64 pg_time_t
Definition: pgtime.h:23
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:181
@ PMSIGNAL_START_WALRECEIVER
Definition: pmsignal.h:41
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: latch.h:111
TimestampTz lastMsgReceiptTime
Definition: walreceiver.h:102
TimeLineID receiveStartTLI
Definition: walreceiver.h:78
TimeLineID receivedTLI
Definition: walreceiver.h:88
char slotname[NAMEDATALEN]
Definition: walreceiver.h:127
Latch * latch
Definition: walreceiver.h:145
pid_t pid
Definition: walreceiver.h:66
XLogRecPtr latestChunkStart
Definition: walreceiver.h:96
XLogRecPtr receiveStart
Definition: walreceiver.h:77
XLogRecPtr flushedUpto
Definition: walreceiver.h:87
ConditionVariable walRcvStoppedCV
Definition: walreceiver.h:68
bool is_temp_slot
Definition: walreceiver.h:133
pg_atomic_uint64 writtenUpto
Definition: walreceiver.h:155
pg_time_t startTime
Definition: walreceiver.h:69
TimestampTz lastMsgSendTime
Definition: walreceiver.h:101
WalRcvState walRcvState
Definition: walreceiver.h:67
slock_t mutex
Definition: walreceiver.h:147
char conninfo[MAXCONNINFO]
Definition: walreceiver.h:114
Definition: regguts.h:323
#define MAXCONNINFO
Definition: walreceiver.h:39
WalRcvState
Definition: walreceiver.h:48
@ WALRCV_STARTING
Definition: walreceiver.h:50
@ WALRCV_STOPPED
Definition: walreceiver.h:49
@ WALRCV_RESTARTING
Definition: walreceiver.h:54
@ WALRCV_STREAMING
Definition: walreceiver.h:52
@ WALRCV_WAITING
Definition: walreceiver.h:53
@ WALRCV_STOPPING
Definition: walreceiver.h:55
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
bool WalRcvStreaming(void)
void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo, const char *slotname, bool create_temp_slot)
WalRcvData * WalRcv
XLogRecPtr GetWalRcvWriteRecPtr(void)
void ShutdownWalRcv(void)
#define WALRCV_STARTUP_TIMEOUT
bool WalRcvRunning(void)
int GetReplicationApplyDelay(void)
void WalRcvShmemInit(void)
Size WalRcvShmemSize(void)
int GetReplicationTransferLatency(void)
#define kill(pid, sig)
Definition: win32_port.h:485
int wal_segment_size
Definition: xlog.c:146
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59
static TimeLineID receiveTLI
Definition: xlogrecovery.c:263
TimestampTz GetCurrentChunkReplayStartTime(void)
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)