PostgreSQL Source Code git master
Loading...
Searching...
No Matches
syncrep.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * syncrep.c
4 *
5 * Synchronous replication is new as of PostgreSQL 9.1.
6 *
7 * If requested, transaction commits wait until their commit LSN are
8 * acknowledged by the synchronous standbys.
9 *
10 * This module contains the code for waiting and release of backends.
11 * All code in this module executes on the primary. The core streaming
12 * replication transport remains within WALreceiver/WALsender modules.
13 *
14 * The essence of this design is that it isolates all logic about
15 * waiting/releasing onto the primary. The primary defines which standbys
16 * it wishes to wait for. The standbys are completely unaware of the
17 * durability requirements of transactions on the primary, reducing the
18 * complexity of the code and streamlining both standby operations and
19 * network bandwidth because there is no requirement to ship
20 * per-transaction state information.
21 *
22 * Replication is either synchronous or not synchronous (async). If it is
23 * async, we just fastpath out of here. If it is sync, then we wait for
24 * the write, flush or apply location on the standby before releasing
25 * the waiting backend. Further complexity in that interaction is
26 * expected in later releases.
27 *
28 * The best performing way to manage the waiting backends is to have a
29 * single ordered queue of waiting backends, so that we can avoid
30 * searching the through all waiters each time we receive a reply.
31 *
32 * In 9.5 or before only a single standby could be considered as
33 * synchronous. In 9.6 we support a priority-based multiple synchronous
34 * standbys. In 10.0 a quorum-based multiple synchronous standbys is also
35 * supported. The number of synchronous standbys that transactions
36 * must wait for replies from is specified in synchronous_standby_names.
37 * This parameter also specifies a list of standby names and the method
38 * (FIRST and ANY) to choose synchronous standbys from the listed ones.
39 *
40 * The method FIRST specifies a priority-based synchronous replication
41 * and makes transaction commits wait until their WAL records are
42 * replicated to the requested number of synchronous standbys chosen based
43 * on their priorities. The standbys whose names appear earlier in the list
44 * are given higher priority and will be considered as synchronous.
45 * Other standby servers appearing later in this list represent potential
46 * synchronous standbys. If any of the current synchronous standbys
47 * disconnects for whatever reason, it will be replaced immediately with
48 * the next-highest-priority standby.
49 *
50 * The method ANY specifies a quorum-based synchronous replication
51 * and makes transaction commits wait until their WAL records are
52 * replicated to at least the requested number of synchronous standbys
53 * in the list. All the standbys appearing in the list are considered as
54 * candidates for quorum synchronous standbys.
55 *
56 * If neither FIRST nor ANY is specified, FIRST is used as the method.
57 * This is for backward compatibility with 9.6 or before where only a
58 * priority-based sync replication was supported.
59 *
60 * Before the standbys chosen from synchronous_standby_names can
61 * become the synchronous standbys they must have caught up with
62 * the primary; that may take some time. Once caught up,
63 * the standbys which are considered as synchronous at that moment
64 * will release waiters from the queue.
65 *
66 * Portions Copyright (c) 2010-2026, PostgreSQL Global Development Group
67 *
68 * IDENTIFICATION
69 * src/backend/replication/syncrep.c
70 *
71 *-------------------------------------------------------------------------
72 */
73#include "postgres.h"
74
75#include <unistd.h>
76
77#include "access/xact.h"
78#include "common/int.h"
79#include "miscadmin.h"
80#include "pgstat.h"
81#include "replication/syncrep.h"
84#include "storage/proc.h"
85#include "tcop/tcopprot.h"
86#include "utils/guc_hooks.h"
87#include "utils/ps_status.h"
88#include "utils/wait_event.h"
89
90/* User-settable parameters for sync rep */
92
93#define SyncStandbysDefined() \
94 (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')
95
96static bool announce_next_takeover = true;
97
100
101static void SyncRepQueueInsert(int mode);
102static void SyncRepCancelWait(void);
103static int SyncRepWakeQueue(bool all, int mode);
104
108 bool *am_sync);
113 int num_standbys);
118 int num_standbys,
119 uint8 nth);
120static int SyncRepGetStandbyPriority(void);
121static int standby_priority_comparator(const void *a, const void *b);
122static int cmp_lsn(const void *a, const void *b);
123
124#ifdef USE_ASSERT_CHECKING
125static bool SyncRepQueueIsOrderedByLSN(int mode);
126#endif
127
128/*
129 * ===========================================================
130 * Synchronous Replication functions for normal user backends
131 * ===========================================================
132 */
133
134/*
135 * Wait for synchronous replication, if requested by user.
136 *
137 * Initially backends start in state SYNC_REP_NOT_WAITING and then
138 * change that state to SYNC_REP_WAITING before adding ourselves
139 * to the wait queue. During SyncRepWakeQueue() a WALSender changes
140 * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.
141 * This backend then resets its state to SYNC_REP_NOT_WAITING.
142 *
143 * 'lsn' represents the LSN to wait for. 'commit' indicates whether this LSN
144 * represents a commit record. If it doesn't, then we wait only for the WAL
145 * to be flushed if synchronous_commit is set to the higher level of
146 * remote_apply, because only commit records provide apply feedback.
147 */
148void
150{
151 int mode;
152
153 /*
154 * This should be called while holding interrupts during a transaction
155 * commit to prevent the follow-up shared memory queue cleanups to be
156 * influenced by external interruptions.
157 */
159
160 /*
161 * Fast exit if user has not requested sync replication, or there are no
162 * sync replication standby names defined.
163 *
164 * Since this routine gets called every commit time, it's important to
165 * exit quickly if sync replication is not requested.
166 *
167 * We check WalSndCtl->sync_standbys_status flag without the lock and exit
168 * immediately if SYNC_STANDBY_INIT is set (the checkpointer has
169 * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync
170 * replication requested).
171 *
172 * If SYNC_STANDBY_DEFINED is set, we need to check the status again later
173 * while holding the lock, to check the flag and operate the sync rep
174 * queue atomically. This is necessary to avoid the race condition
175 * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
176 * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we
177 * don't touch the queue.
178 */
179 if (!SyncRepRequested() ||
180 ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) &
182 return;
183
184 /* Cap the level for anything other than commit to remote flush only. */
185 if (commit)
187 else
189
192
195
196 /*
197 * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See
198 * SyncRepUpdateSyncStandbysDefined().
199 *
200 * Also check that the standby hasn't already replied. Unlikely race
201 * condition but we'll be fetching that cache line anyway so it's likely
202 * to be a low cost check.
203 *
204 * If the sync standby data has not been initialized yet
205 * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN,
206 * then do a direct GUC check.
207 */
209 {
212 {
214 return;
215 }
216 }
217 else if (lsn <= WalSndCtl->lsn[mode])
218 {
219 /*
220 * The LSN is older than what we need to wait for. The sync standby
221 * data has not been initialized yet, but we are OK to not wait
222 * because we know that there is no point in doing so based on the
223 * LSN.
224 */
226 return;
227 }
228 else if (!SyncStandbysDefined())
229 {
230 /*
231 * If we are here, the sync standby data has not been initialized yet,
232 * and the LSN is newer than what need to wait for, so we have fallen
233 * back to the best thing we could do in this case: a check on
234 * SyncStandbysDefined() to see if the GUC is set or not.
235 *
236 * When the GUC has a value, we wait until the checkpointer updates
237 * the status data because we cannot be sure yet if we should wait or
238 * not. Here, the GUC has *no* value, we are sure that there is no
239 * point to wait; this matters for example when initializing a
240 * cluster, where we should never wait, and no sync standbys is the
241 * default behavior.
242 */
244 return;
245 }
246
247 /*
248 * Set our waitLSN so WALSender will know when to wake us, and add
249 * ourselves to the queue.
250 */
251 MyProc->waitLSN = lsn;
256
257 /* Alter ps display to show waiting for sync rep. */
259 {
260 char buffer[32];
261
262 sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
263 set_ps_display_suffix(buffer);
264 }
265
266 /*
267 * Wait for specified LSN to be confirmed.
268 *
269 * Each proc has its own wait latch, so we perform a normal latch
270 * check/wait loop here.
271 */
272 for (;;)
273 {
274 int rc;
275
276 /* Must reset the latch before testing state. */
278
279 /*
280 * Acquiring the lock is not needed, the latch ensures proper
281 * barriers. If it looks like we're done, we must really be done,
282 * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
283 * it will never update it again, so we can't be seeing a stale value
284 * in that case.
285 */
287 break;
288
289 /*
290 * If a wait for synchronous replication is pending, we can neither
291 * acknowledge the commit nor raise ERROR or FATAL. The latter would
292 * lead the client to believe that the transaction aborted, which is
293 * not true: it's already committed locally. The former is no good
294 * either: the client has requested synchronous replication, and is
295 * entitled to assume that an acknowledged commit is also replicated,
296 * which might not be true. So in this case we issue a WARNING (which
297 * some clients may be able to interpret) and shut off further output.
298 * We do NOT reset ProcDiePending, so that the process will die after
299 * the commit is cleaned up.
300 */
301 if (ProcDiePending)
302 {
305 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
306 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
309 break;
310 }
311
312 /*
313 * It's unclear what to do if a query cancel interrupt arrives. We
314 * can't actually abort at this point, but ignoring the interrupt
315 * altogether is not helpful, so we just terminate the wait with a
316 * suitable warning.
317 */
319 {
320 QueryCancelPending = false;
322 (errmsg("canceling wait for synchronous replication due to user request"),
323 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
325 break;
326 }
327
328 /*
329 * Wait on latch. Any condition that should wake us up will set the
330 * latch, so no need for timeout.
331 */
334
335 /*
336 * If the postmaster dies, we'll probably never get an acknowledgment,
337 * because all the wal sender processes will exit. So just bail out.
338 */
339 if (rc & WL_POSTMASTER_DEATH)
340 {
341 ProcDiePending = true;
344 break;
345 }
346 }
347
348 /*
349 * WalSender has checked our LSN and has removed us from queue. Clean up
350 * state and leave. It's OK to reset these shared memory fields without
351 * holding SyncRepLock, because any walsenders will ignore us anyway when
352 * we're not on the queue. We need a read barrier to make sure we see the
353 * changes to the queue link (this might be unnecessary without
354 * assertions, but better safe than sorry).
355 */
360
361 /* reset ps display to remove the suffix */
364}
365
366/*
367 * Insert MyProc into the specified SyncRepQueue, maintaining sorted invariant.
368 *
369 * Usually we will go at tail of queue, though it's possible that we arrive
370 * here out of order, so start at tail and work back to insertion point.
371 */
372static void
374{
375 dlist_head *queue;
376 dlist_iter iter;
377
379 queue = &WalSndCtl->SyncRepQueue[mode];
380
381 dlist_reverse_foreach(iter, queue)
382 {
383 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
384
385 /*
386 * Stop at the queue element that we should insert after to ensure the
387 * queue is ordered by LSN.
388 */
389 if (proc->waitLSN < MyProc->waitLSN)
390 {
392 return;
393 }
394 }
395
396 /*
397 * If we get here, the list was either empty, or this process needs to be
398 * at the head.
399 */
401}
402
403/*
404 * Acquire SyncRepLock and cancel any wait currently in progress.
405 */
406static void
415
416void
418{
419 /*
420 * First check if we are removed from the queue without the lock to not
421 * slow down backend exit.
422 */
424 {
426
427 /* maybe we have just been removed, so recheck */
430
432 }
433}
434
435/*
436 * ===========================================================
437 * Synchronous Replication functions for wal sender processes
438 * ===========================================================
439 */
440
441/*
442 * Take any action required to initialise sync rep state from config
443 * data. Called at WALSender startup and after each SIGHUP.
444 */
445void
447{
448 int priority;
449
450 /*
451 * Determine if we are a potential sync standby and remember the result
452 * for handling replies from standby.
453 */
456 {
460
462 (errmsg_internal("standby \"%s\" now has synchronous standby priority %d",
464 }
465}
466
467/*
468 * Update the LSNs on each queue based upon our latest state. This
469 * implements a simple policy of first-valid-sync-standby-releases-waiter.
470 *
471 * Other policies are possible, which would change what we do here and
472 * perhaps also which information we store as well.
473 */
474void
476{
481 bool got_recptr;
482 bool am_sync;
483 int numwrite = 0;
484 int numflush = 0;
485 int numapply = 0;
486
487 /*
488 * If this WALSender is serving a standby that is not on the list of
489 * potential sync standbys then we have nothing to do. If we are still
490 * starting up, still running base backup or the current flush position is
491 * still invalid, then leave quickly also. Streaming or stopping WAL
492 * senders are allowed to release waiters.
493 */
498 {
500 return;
501 }
502
503 /*
504 * We're a potential sync standby. Release waiters if there are enough
505 * sync standbys and we are considered as sync.
506 */
508
509 /*
510 * Check whether we are a sync standby or not, and calculate the synced
511 * positions among all sync standbys. (Note: although this step does not
512 * of itself require holding SyncRepLock, it seems like a good idea to do
513 * it after acquiring the lock. This ensures that the WAL pointers we use
514 * to release waiters are newer than any previous execution of this
515 * routine used.)
516 */
518
519 /*
520 * If we are managing a sync standby, though we weren't prior to this,
521 * then announce we are now a sync standby.
522 */
524 {
526
528 ereport(LOG,
529 (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
531 else
532 ereport(LOG,
533 (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
535 }
536
537 /*
538 * If the number of sync standbys is less than requested or we aren't
539 * managing a sync standby then just leave.
540 */
541 if (!got_recptr || !am_sync)
542 {
545 return;
546 }
547
548 /*
549 * Set the lsn first so that when we wake backends they will release up to
550 * this location.
551 */
553 {
556 }
558 {
561 }
563 {
566 }
567
569
570 elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
574}
575
576/*
577 * Calculate the synced Write, Flush and Apply positions among sync standbys.
578 *
579 * Return false if the number of sync standbys is less than
580 * synchronous_standby_names specifies. Otherwise return true and
581 * store the positions into *writePtr, *flushPtr and *applyPtr.
582 *
583 * On return, *am_sync is set to true if this walsender is connecting to
584 * sync standby. Otherwise it's set to false.
585 */
586static bool
589{
591 int num_standbys;
592 int i;
593
594 /* Initialize default results */
598 *am_sync = false;
599
600 /* Quick out if not even configured to be synchronous */
601 if (SyncRepConfig == NULL)
602 return false;
603
604 /* Get standbys that are considered as synchronous at this moment */
606
607 /* Am I among the candidate sync standbys? */
608 for (i = 0; i < num_standbys; i++)
609 {
610 if (sync_standbys[i].is_me)
611 {
612 *am_sync = true;
613 break;
614 }
615 }
616
617 /*
618 * Nothing more to do if we are not managing a sync standby or there are
619 * not enough synchronous standbys.
620 */
621 if (!(*am_sync) ||
623 {
625 return false;
626 }
627
628 /*
629 * In a priority-based sync replication, the synced positions are the
630 * oldest ones among sync standbys. In a quorum-based, they are the Nth
631 * latest ones.
632 *
633 * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
634 * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
635 * because it's a bit more efficient.
636 *
637 * XXX If the numbers of current and requested sync standbys are the same,
638 * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
639 * positions even in a quorum-based sync replication.
640 */
642 {
645 }
646 else
647 {
651 }
652
654 return true;
655}
656
657/*
658 * Calculate the oldest Write, Flush and Apply positions among sync standbys.
659 */
660static void
665 int num_standbys)
666{
667 int i;
668
669 /*
670 * Scan through all sync standbys and calculate the oldest Write, Flush
671 * and Apply positions. We assume *writePtr et al were initialized to
672 * InvalidXLogRecPtr.
673 */
674 for (i = 0; i < num_standbys; i++)
675 {
677 XLogRecPtr flush = sync_standbys[i].flush;
678 XLogRecPtr apply = sync_standbys[i].apply;
679
681 *writePtr = write;
682 if (!XLogRecPtrIsValid(*flushPtr) || *flushPtr > flush)
683 *flushPtr = flush;
684 if (!XLogRecPtrIsValid(*applyPtr) || *applyPtr > apply)
685 *applyPtr = apply;
686 }
687}
688
689/*
690 * Calculate the Nth latest Write, Flush and Apply positions among sync
691 * standbys.
692 */
693static void
698 int num_standbys,
699 uint8 nth)
700{
704 int i;
705
706 /* Should have enough candidates, or somebody messed up */
707 Assert(nth > 0 && nth <= num_standbys);
708
712
713 for (i = 0; i < num_standbys; i++)
714 {
715 write_array[i] = sync_standbys[i].write;
716 flush_array[i] = sync_standbys[i].flush;
717 apply_array[i] = sync_standbys[i].apply;
718 }
719
720 /* Sort each array in descending order */
724
725 /* Get Nth latest Write, Flush, Apply positions */
726 *writePtr = write_array[nth - 1];
727 *flushPtr = flush_array[nth - 1];
728 *applyPtr = apply_array[nth - 1];
729
733}
734
735/*
736 * Compare lsn in order to sort array in descending order.
737 */
738static int
739cmp_lsn(const void *a, const void *b)
740{
741 XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
742 XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
743
744 return pg_cmp_u64(lsn2, lsn1);
745}
746
747/*
748 * Return data about walsenders that are candidates to be sync standbys.
749 *
750 * *standbys is set to a palloc'd array of structs of per-walsender data,
751 * and the number of valid entries (candidate sync senders) is returned.
752 * (This might be more or fewer than num_sync; caller must check.)
753 */
754int
756{
757 int i;
758 int n;
759
760 /* Create result array */
762
763 /* Quick exit if sync replication is not requested */
764 if (SyncRepConfig == NULL)
765 return 0;
766
767 /* Collect raw data from shared memory */
768 n = 0;
769 for (i = 0; i < max_wal_senders; i++)
770 {
771 volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
772 * rearrangement */
774 WalSndState state; /* not included in SyncRepStandbyData */
775
777 stby = *standbys + n;
778
779 SpinLockAcquire(&walsnd->mutex);
780 stby->pid = walsnd->pid;
781 state = walsnd->state;
782 stby->write = walsnd->write;
783 stby->flush = walsnd->flush;
784 stby->apply = walsnd->apply;
785 stby->sync_standby_priority = walsnd->sync_standby_priority;
786 SpinLockRelease(&walsnd->mutex);
787
788 /* Must be active */
789 if (stby->pid == 0)
790 continue;
791
792 /* Must be streaming or stopping */
795 continue;
796
797 /* Must be synchronous */
798 if (stby->sync_standby_priority == 0)
799 continue;
800
801 /* Must have a valid flush position */
802 if (!XLogRecPtrIsValid(stby->flush))
803 continue;
804
805 /* OK, it's a candidate */
806 stby->walsnd_index = i;
807 stby->is_me = (walsnd == MyWalSnd);
808 n++;
809 }
810
811 /*
812 * In quorum mode, we return all the candidates. In priority mode, if we
813 * have too many candidates then return only the num_sync ones of highest
814 * priority.
815 */
818 {
819 /* Sort by priority ... */
820 qsort(*standbys, n, sizeof(SyncRepStandbyData),
822 /* ... then report just the first num_sync ones */
824 }
825
826 return n;
827}
828
829/*
830 * qsort comparator to sort SyncRepStandbyData entries by priority
831 */
832static int
833standby_priority_comparator(const void *a, const void *b)
834{
835 const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
836 const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
837
838 /* First, sort by increasing priority value */
839 if (sa->sync_standby_priority != sb->sync_standby_priority)
840 return sa->sync_standby_priority - sb->sync_standby_priority;
841
842 /*
843 * We might have equal priority values; arbitrarily break ties by position
844 * in the WalSnd array. (This is utterly bogus, since that is arrival
845 * order dependent, but there are regression tests that rely on it.)
846 */
847 return sa->walsnd_index - sb->walsnd_index;
848}
849
850
851/*
852 * Check if we are in the list of sync standbys, and if so, determine
853 * priority sequence. Return priority if set, or zero to indicate that
854 * we are not a potential sync standby.
855 *
856 * Compare the parameter SyncRepStandbyNames against the application_name
857 * for this WALSender, or allow any name if we find a wildcard "*".
858 */
859static int
861{
862 const char *standby_name;
863 int priority;
864 bool found = false;
865
866 /*
867 * Since synchronous cascade replication is not allowed, we always set the
868 * priority of cascading walsender to zero.
869 */
871 return 0;
872
874 return 0;
875
878 {
880 strcmp(standby_name, "*") == 0)
881 {
882 found = true;
883 break;
884 }
886 }
887
888 if (!found)
889 return 0;
890
891 /*
892 * In quorum-based sync replication, all the standbys in the list have the
893 * same priority, one.
894 */
896}
897
898/*
899 * Walk the specified queue from head. Set the state of any backends that
900 * need to be woken, remove them from the queue, and then wake them.
901 * Pass all = true to wake whole queue; otherwise, just wake up to
902 * the walsender's LSN.
903 *
904 * The caller must hold SyncRepLock in exclusive mode.
905 */
906static int
907SyncRepWakeQueue(bool all, int mode)
908{
910 int numprocs = 0;
912
916
918 {
919 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
920
921 /*
922 * Assume the queue is ordered by LSN
923 */
924 if (!all && walsndctl->lsn[mode] < proc->waitLSN)
925 return numprocs;
926
927 /*
928 * Remove from queue.
929 */
931
932 /*
933 * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
934 * make sure that it sees the queue link being removed before the
935 * syncRepState change.
936 */
938
939 /*
940 * Set state to complete; see SyncRepWaitForLSN() for discussion of
941 * the various states.
942 */
944
945 /*
946 * Wake only when we have set state and removed from queue.
947 */
948 SetLatch(&(proc->procLatch));
949
950 numprocs++;
951 }
952
953 return numprocs;
954}
955
956/*
957 * The checkpointer calls this as needed to update the shared
958 * sync_standbys_status flag, so that backends don't remain permanently wedged
959 * if synchronous_standby_names is unset. It's safe to check the current value
960 * without the lock, because it's only ever updated by one process. But we
961 * must take the lock to change it.
962 */
963void
965{
967
970 {
972
973 /*
974 * If synchronous_standby_names has been reset to empty, it's futile
975 * for backends to continue waiting. Since the user no longer wants
976 * synchronous replication, we'd better wake them up.
977 */
979 {
980 int i;
981
982 for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
983 SyncRepWakeQueue(true, i);
984 }
985
986 /*
987 * Only allow people to join the queue when there are synchronous
988 * standbys defined. Without this interlock, there's a race
989 * condition: we might wake up all the current waiters; then, some
990 * backend that hasn't yet reloaded its config might go to sleep on
991 * the queue (and never wake up). This prevents that.
992 */
995
997 }
999 {
1001
1002 /*
1003 * Note that there is no need to wake up the queues here. We would
1004 * reach this path only if SyncStandbysDefined() returns false, or it
1005 * would mean that some backends are waiting with the GUC set. See
1006 * SyncRepWaitForLSN().
1007 */
1009
1010 /*
1011 * Even if there is no sync standby defined, let the readers of this
1012 * information know that the sync standby data has been initialized.
1013 * This can just be done once, hence the previous check on
1014 * SYNC_STANDBY_INIT to avoid useless work.
1015 */
1017
1019 }
1020}
1021
1022#ifdef USE_ASSERT_CHECKING
1023static bool
1025{
1027 dlist_iter iter;
1028
1030
1032
1034 {
1035 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
1036
1037 /*
1038 * Check the queue is ordered by LSN and that multiple procs don't
1039 * have matching LSNs
1040 */
1041 if (proc->waitLSN <= lastLSN)
1042 return false;
1043
1044 lastLSN = proc->waitLSN;
1045 }
1046
1047 return true;
1048}
1049#endif
1050
1051/*
1052 * ===========================================================
1053 * Synchronous Replication functions executed by any process
1054 * ===========================================================
1055 */
1056
1057bool
1059{
1060 if (*newval != NULL && (*newval)[0] != '\0')
1061 {
1062 yyscan_t scanner;
1063 int parse_rc;
1065
1066 /* Result of parsing is returned in one of these two variables */
1069
1070 /* Parse the synchronous_standby_names string */
1071 syncrep_scanner_init(*newval, &scanner);
1073 syncrep_scanner_finish(scanner);
1074
1075 if (parse_rc != 0 || syncrep_parse_result == NULL)
1076 {
1080 else
1081 /* translator: %s is a GUC name */
1082 GUC_check_errdetail("\"%s\" parser failed.",
1083 "synchronous_standby_names");
1084 return false;
1085 }
1086
1087 if (syncrep_parse_result->num_sync <= 0)
1088 {
1089 GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1090 syncrep_parse_result->num_sync);
1091 return false;
1092 }
1093
1094 /* GUC extra value must be guc_malloc'd, not palloc'd */
1096 guc_malloc(LOG, syncrep_parse_result->config_size);
1097 if (pconf == NULL)
1098 return false;
1100
1101 *extra = pconf;
1102
1103 /*
1104 * We need not explicitly clean up syncrep_parse_result. It, and any
1105 * other cruft generated during parsing, will be freed when the
1106 * current memory context is deleted. (This code is generally run in
1107 * a short-lived context used for config file processing, so that will
1108 * not be very long.)
1109 */
1110 }
1111 else
1112 *extra = NULL;
1113
1114 return true;
1115}
1116
1117void
1119{
1120 SyncRepConfig = (SyncRepConfigData *) extra;
1121}
1122
1123void
1125{
1126 switch (newval)
1127 {
1130 break;
1133 break;
1136 break;
1137 default:
1139 break;
1140 }
1141}
#define pg_read_barrier()
Definition atomics.h:154
#define pg_write_barrier()
Definition atomics.h:155
#define Min(x, y)
Definition c.h:1054
uint8_t uint8
Definition c.h:577
#define Assert(condition)
Definition c.h:906
void * yyscan_t
Definition cubedata.h:65
@ DestNone
Definition dest.h:87
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
#define DEBUG3
Definition elog.h:28
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
volatile uint32 InterruptHoldoffCount
Definition globals.c:43
volatile sig_atomic_t QueryCancelPending
Definition globals.c:33
struct Latch * MyLatch
Definition globals.c:63
volatile sig_atomic_t ProcDiePending
Definition globals.c:34
void GUC_check_errcode(int sqlerrcode)
Definition guc.c:6628
void * guc_malloc(int elevel, size_t size)
Definition guc.c:636
#define newval
#define GUC_check_errmsg
Definition guc.h:502
#define GUC_check_errdetail
Definition guc.h:506
GucSource
Definition guc.h:112
char * application_name
Definition guc_tables.c:570
static void dlist_insert_after(dlist_node *after, dlist_node *node)
Definition ilist.h:381
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_delete_thoroughly(dlist_node *node)
Definition ilist.h:416
static bool dlist_node_is_detached(const dlist_node *node)
Definition ilist.h:525
#define dlist_reverse_foreach(iter, lhead)
Definition ilist.h:654
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
void SetLatch(Latch *latch)
Definition latch.c:290
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1956
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_EXCLUSIVE
Definition lwlock.h:112
void pfree(void *pointer)
Definition mcxt.c:1616
static char * errmsg
static PgChecksumMode mode
static rewind_source * source
Definition pg_rewind.c:89
int pg_strcasecmp(const char *s1, const char *s2)
#define sprintf
Definition port.h:262
#define qsort(a, b, c, d)
Definition port.h:495
CommandDest whereToSendOutput
Definition postgres.c:93
static int fb(int x)
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
bool update_process_title
Definition ps_status.c:31
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
PGPROC * MyProc
Definition proc.c:68
Definition proc.h:176
XLogRecPtr waitLSN
Definition proc.h:333
dlist_node syncRepLinks
Definition proc.h:335
int syncRepState
Definition proc.h:334
Latch procLatch
Definition proc.h:248
uint8 syncrep_method
Definition syncrep.h:68
char member_names[FLEXIBLE_ARRAY_MEMBER]
Definition syncrep.h:71
WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]
dlist_head SyncRepQueue[NUM_SYNC_REP_WAIT_MODE]
slock_t mutex
XLogRecPtr flush
WalSndState state
int sync_standby_priority
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
static int SyncRepWaitMode
Definition syncrep.c:99
void SyncRepInitConfig(void)
Definition syncrep.c:446
void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
Definition syncrep.c:149
static bool SyncRepGetSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
Definition syncrep.c:587
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
Definition syncrep.c:694
void assign_synchronous_commit(int newval, void *extra)
Definition syncrep.c:1124
void assign_synchronous_standby_names(const char *newval, void *extra)
Definition syncrep.c:1118
static int standby_priority_comparator(const void *a, const void *b)
Definition syncrep.c:833
static int SyncRepWakeQueue(bool all, int mode)
Definition syncrep.c:907
SyncRepConfigData * SyncRepConfig
Definition syncrep.c:98
int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys)
Definition syncrep.c:755
void SyncRepReleaseWaiters(void)
Definition syncrep.c:475
void SyncRepUpdateSyncStandbysDefined(void)
Definition syncrep.c:964
static bool announce_next_takeover
Definition syncrep.c:96
static int SyncRepGetStandbyPriority(void)
Definition syncrep.c:860
char * SyncRepStandbyNames
Definition syncrep.c:91
static void SyncRepQueueInsert(int mode)
Definition syncrep.c:373
static void SyncRepCancelWait(void)
Definition syncrep.c:407
bool check_synchronous_standby_names(char **newval, void **extra, GucSource source)
Definition syncrep.c:1058
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
Definition syncrep.c:661
void SyncRepCleanupAtProcExit(void)
Definition syncrep.c:417
static int cmp_lsn(const void *a, const void *b)
Definition syncrep.c:739
#define SyncStandbysDefined()
Definition syncrep.c:93
#define SYNC_REP_PRIORITY
Definition syncrep.h:35
#define NUM_SYNC_REP_WAIT_MODE
Definition syncrep.h:27
#define SyncRepRequested()
Definition syncrep.h:18
#define SYNC_REP_NO_WAIT
Definition syncrep.h:22
#define SYNC_REP_WAIT_WRITE
Definition syncrep.h:23
#define SYNC_REP_WAITING
Definition syncrep.h:31
#define SYNC_REP_WAIT_COMPLETE
Definition syncrep.h:32
#define SYNC_REP_WAIT_FLUSH
Definition syncrep.h:24
#define SYNC_REP_NOT_WAITING
Definition syncrep.h:30
int syncrep_yyparse(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner)
#define SYNC_REP_WAIT_APPLY
Definition syncrep.h:25
void syncrep_scanner_finish(yyscan_t yyscanner)
void syncrep_scanner_init(const char *str, yyscan_t *yyscannerp)
#define WL_LATCH_SET
#define WL_POSTMASTER_DEATH
WalSnd * MyWalSnd
Definition walsender.c:121
int max_wal_senders
Definition walsender.c:130
bool am_cascading_walsender
Definition walsender.c:125
WalSndCtlData * WalSndCtl
Definition walsender.c:118
#define SYNC_STANDBY_DEFINED
WalSndState
@ WALSNDSTATE_STREAMING
@ WALSNDSTATE_STOPPING
#define SYNC_STANDBY_INIT
@ SYNCHRONOUS_COMMIT_REMOTE_WRITE
Definition xact.h:73
@ SYNCHRONOUS_COMMIT_REMOTE_APPLY
Definition xact.h:76
@ SYNCHRONOUS_COMMIT_REMOTE_FLUSH
Definition xact.h:75
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28