PostgreSQL Source Code git master
Loading...
Searching...
No Matches
syncrep.c File Reference
#include "postgres.h"
#include <unistd.h>
#include "access/xact.h"
#include "common/int.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/guc_hooks.h"
#include "utils/ps_status.h"
#include "utils/wait_event.h"
Include dependency graph for syncrep.c:

Go to the source code of this file.

Macros

#define SyncStandbysDefined()    (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')
 

Functions

static void SyncRepQueueInsert (int mode)
 
static void SyncRepCancelWait (void)
 
static int SyncRepWakeQueue (bool all, int mode)
 
static bool SyncRepGetSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
 
static void SyncRepGetOldestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
 
static void SyncRepGetNthLatestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
 
static int SyncRepGetStandbyPriority (void)
 
static int standby_priority_comparator (const void *a, const void *b)
 
static int cmp_lsn (const void *a, const void *b)
 
void SyncRepWaitForLSN (XLogRecPtr lsn, bool commit)
 
void SyncRepCleanupAtProcExit (void)
 
void SyncRepInitConfig (void)
 
void SyncRepReleaseWaiters (void)
 
int SyncRepGetCandidateStandbys (SyncRepStandbyData **standbys)
 
void SyncRepUpdateSyncStandbysDefined (void)
 
bool check_synchronous_standby_names (char **newval, void **extra, GucSource source)
 
void assign_synchronous_standby_names (const char *newval, void *extra)
 
void assign_synchronous_commit (int newval, void *extra)
 

Variables

charSyncRepStandbyNames
 
static bool announce_next_takeover = true
 
SyncRepConfigDataSyncRepConfig = NULL
 
static int SyncRepWaitMode = SYNC_REP_NO_WAIT
 

Macro Definition Documentation

◆ SyncStandbysDefined

#define SyncStandbysDefined ( )     (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')

Definition at line 93 of file syncrep.c.

149{
150 int mode;
151
152 /*
153 * This should be called while holding interrupts during a transaction
154 * commit to prevent the follow-up shared memory queue cleanups to be
155 * influenced by external interruptions.
156 */
158
159 /*
160 * Fast exit if user has not requested sync replication, or there are no
161 * sync replication standby names defined.
162 *
163 * Since this routine gets called every commit time, it's important to
164 * exit quickly if sync replication is not requested.
165 *
166 * We check WalSndCtl->sync_standbys_status flag without the lock and exit
167 * immediately if SYNC_STANDBY_INIT is set (the checkpointer has
168 * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync
169 * replication requested).
170 *
171 * If SYNC_STANDBY_DEFINED is set, we need to check the status again later
172 * while holding the lock, to check the flag and operate the sync rep
173 * queue atomically. This is necessary to avoid the race condition
174 * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
175 * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we
176 * don't touch the queue.
177 */
178 if (!SyncRepRequested() ||
179 ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) &
181 return;
182
183 /* Cap the level for anything other than commit to remote flush only. */
184 if (commit)
186 else
188
191
194
195 /*
196 * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See
197 * SyncRepUpdateSyncStandbysDefined().
198 *
199 * Also check that the standby hasn't already replied. Unlikely race
200 * condition but we'll be fetching that cache line anyway so it's likely
201 * to be a low cost check.
202 *
203 * If the sync standby data has not been initialized yet
204 * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN,
205 * then do a direct GUC check.
206 */
208 {
211 {
213 return;
214 }
215 }
216 else if (lsn <= WalSndCtl->lsn[mode])
217 {
218 /*
219 * The LSN is older than what we need to wait for. The sync standby
220 * data has not been initialized yet, but we are OK to not wait
221 * because we know that there is no point in doing so based on the
222 * LSN.
223 */
225 return;
226 }
227 else if (!SyncStandbysDefined())
228 {
229 /*
230 * If we are here, the sync standby data has not been initialized yet,
231 * and the LSN is newer than what need to wait for, so we have fallen
232 * back to the best thing we could do in this case: a check on
233 * SyncStandbysDefined() to see if the GUC is set or not.
234 *
235 * When the GUC has a value, we wait until the checkpointer updates
236 * the status data because we cannot be sure yet if we should wait or
237 * not. Here, the GUC has *no* value, we are sure that there is no
238 * point to wait; this matters for example when initializing a
239 * cluster, where we should never wait, and no sync standbys is the
240 * default behavior.
241 */
243 return;
244 }
245
246 /*
247 * Set our waitLSN so WALSender will know when to wake us, and add
248 * ourselves to the queue.
249 */
250 MyProc->waitLSN = lsn;
255
256 /* Alter ps display to show waiting for sync rep. */
258 {
259 char buffer[32];
260
261 sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
262 set_ps_display_suffix(buffer);
263 }
264
265 /*
266 * Wait for specified LSN to be confirmed.
267 *
268 * Each proc has its own wait latch, so we perform a normal latch
269 * check/wait loop here.
270 */
271 for (;;)
272 {
273 int rc;
274
275 /* Must reset the latch before testing state. */
277
278 /*
279 * Acquiring the lock is not needed, the latch ensures proper
280 * barriers. If it looks like we're done, we must really be done,
281 * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
282 * it will never update it again, so we can't be seeing a stale value
283 * in that case.
284 */
286 break;
287
288 /*
289 * If a wait for synchronous replication is pending, we can neither
290 * acknowledge the commit nor raise ERROR or FATAL. The latter would
291 * lead the client to believe that the transaction aborted, which is
292 * not true: it's already committed locally. The former is no good
293 * either: the client has requested synchronous replication, and is
294 * entitled to assume that an acknowledged commit is also replicated,
295 * which might not be true. So in this case we issue a WARNING (which
296 * some clients may be able to interpret) and shut off further output.
297 * We do NOT reset ProcDiePending, so that the process will die after
298 * the commit is cleaned up.
299 */
300 if (ProcDiePending)
301 {
302 /*
303 * ProcDieSenderPid/Uid are read directly from the globals here
304 * rather than copied to locals first; a second SIGTERM could
305 * change them between reads, but that is harmless because the
306 * process is about to die anyway. The signal sender detail is
307 * inlined rather than using a separate errdetail() call because
308 * it must be appended to the existing detail message.
309 */
312 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
313 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
314 ProcDieSenderPid == 0 ? "" :
315 psprintf("\nSignal sent by PID %d, UID %d.",
317 (int) ProcDieSenderUid))));
320 break;
321 }
322
323 /*
324 * It's unclear what to do if a query cancel interrupt arrives. We
325 * can't actually abort at this point, but ignoring the interrupt
326 * altogether is not helpful, so we just terminate the wait with a
327 * suitable warning.
328 */
330 {
331 QueryCancelPending = false;
333 (errmsg("canceling wait for synchronous replication due to user request"),
334 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
336 break;
337 }
338
339 /*
340 * Wait on latch. Any condition that should wake us up will set the
341 * latch, so no need for timeout.
342 */
345
346 /*
347 * If the postmaster dies, we'll probably never get an acknowledgment,
348 * because all the wal sender processes will exit. So just bail out.
349 */
350 if (rc & WL_POSTMASTER_DEATH)
351 {
352 ProcDiePending = true;
355 break;
356 }
357 }
358
359 /*
360 * WalSender has checked our LSN and has removed us from queue. Clean up
361 * state and leave. It's OK to reset these shared memory fields without
362 * holding SyncRepLock, because any walsenders will ignore us anyway when
363 * we're not on the queue. We need a read barrier to make sure we see the
364 * changes to the queue link (this might be unnecessary without
365 * assertions, but better safe than sorry).
366 */
371
372 /* reset ps display to remove the suffix */
375}
376
377/*
378 * Insert MyProc into the specified SyncRepQueue, maintaining sorted invariant.
379 *
380 * Usually we will go at tail of queue, though it's possible that we arrive
381 * here out of order, so start at tail and work back to insertion point.
382 */
383static void
385{
386 dlist_head *queue;
387 dlist_iter iter;
388
390 queue = &WalSndCtl->SyncRepQueue[mode];
391
392 dlist_reverse_foreach(iter, queue)
393 {
394 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
395
396 /*
397 * Stop at the queue element that we should insert after to ensure the
398 * queue is ordered by LSN.
399 */
400 if (proc->waitLSN < MyProc->waitLSN)
401 {
403 return;
404 }
405 }
406
407 /*
408 * If we get here, the list was either empty, or this process needs to be
409 * at the head.
410 */
412}
413
414/*
415 * Acquire SyncRepLock and cancel any wait currently in progress.
416 */
417static void
419{
425}
426
427void
429{
430 /*
431 * First check if we are removed from the queue without the lock to not
432 * slow down backend exit.
433 */
435 {
437
438 /* maybe we have just been removed, so recheck */
441
443 }
444}
445
446/*
447 * ===========================================================
448 * Synchronous Replication functions for wal sender processes
449 * ===========================================================
450 */
451
452/*
453 * Take any action required to initialise sync rep state from config
454 * data. Called at WALSender startup and after each SIGHUP.
455 */
456void
458{
459 int priority;
460
461 /*
462 * Determine if we are a potential sync standby and remember the result
463 * for handling replies from standby.
464 */
467 {
471
473 (errmsg_internal("standby \"%s\" now has synchronous standby priority %d",
475 }
476}
477
478/*
479 * Update the LSNs on each queue based upon our latest state. This
480 * implements a simple policy of first-valid-sync-standby-releases-waiter.
481 *
482 * Other policies are possible, which would change what we do here and
483 * perhaps also which information we store as well.
484 */
485void
487{
492 bool got_recptr;
493 bool am_sync;
494 int numwrite = 0;
495 int numflush = 0;
496 int numapply = 0;
497
498 /*
499 * If this WALSender is serving a standby that is not on the list of
500 * potential sync standbys then we have nothing to do. If we are still
501 * starting up, still running base backup or the current flush position is
502 * still invalid, then leave quickly also. Streaming or stopping WAL
503 * senders are allowed to release waiters.
504 */
509 {
511 return;
512 }
513
514 /*
515 * We're a potential sync standby. Release waiters if there are enough
516 * sync standbys and we are considered as sync.
517 */
519
520 /*
521 * Check whether we are a sync standby or not, and calculate the synced
522 * positions among all sync standbys. (Note: although this step does not
523 * of itself require holding SyncRepLock, it seems like a good idea to do
524 * it after acquiring the lock. This ensures that the WAL pointers we use
525 * to release waiters are newer than any previous execution of this
526 * routine used.)
527 */
529
530 /*
531 * If we are managing a sync standby, though we weren't prior to this,
532 * then announce we are now a sync standby.
533 */
535 {
537
539 ereport(LOG,
540 (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
542 else
543 ereport(LOG,
544 (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
546 }
547
548 /*
549 * If the number of sync standbys is less than requested or we aren't
550 * managing a sync standby then just leave.
551 */
552 if (!got_recptr || !am_sync)
553 {
556 return;
557 }
558
559 /*
560 * Set the lsn first so that when we wake backends they will release up to
561 * this location.
562 */
564 {
567 }
569 {
572 }
574 {
577 }
578
580
581 elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
585}
586
587/*
588 * Calculate the synced Write, Flush and Apply positions among sync standbys.
589 *
590 * Return false if the number of sync standbys is less than
591 * synchronous_standby_names specifies. Otherwise return true and
592 * store the positions into *writePtr, *flushPtr and *applyPtr.
593 *
594 * On return, *am_sync is set to true if this walsender is connecting to
595 * sync standby. Otherwise it's set to false.
596 */
597static bool
600{
602 int num_standbys;
603 int i;
604
605 /* Initialize default results */
609 *am_sync = false;
610
611 /* Quick out if not even configured to be synchronous */
612 if (SyncRepConfig == NULL)
613 return false;
614
615 /* Get standbys that are considered as synchronous at this moment */
617
618 /* Am I among the candidate sync standbys? */
619 for (i = 0; i < num_standbys; i++)
620 {
621 if (sync_standbys[i].is_me)
622 {
623 *am_sync = true;
624 break;
625 }
626 }
627
628 /*
629 * Nothing more to do if we are not managing a sync standby or there are
630 * not enough synchronous standbys.
631 */
632 if (!(*am_sync) ||
634 {
636 return false;
637 }
638
639 /*
640 * In a priority-based sync replication, the synced positions are the
641 * oldest ones among sync standbys. In a quorum-based, they are the Nth
642 * latest ones.
643 *
644 * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
645 * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
646 * because it's a bit more efficient.
647 *
648 * XXX If the numbers of current and requested sync standbys are the same,
649 * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
650 * positions even in a quorum-based sync replication.
651 */
653 {
656 }
657 else
658 {
662 }
663
665 return true;
666}
667
668/*
669 * Calculate the oldest Write, Flush and Apply positions among sync standbys.
670 */
671static void
676 int num_standbys)
677{
678 int i;
679
680 /*
681 * Scan through all sync standbys and calculate the oldest Write, Flush
682 * and Apply positions. We assume *writePtr et al were initialized to
683 * InvalidXLogRecPtr.
684 */
685 for (i = 0; i < num_standbys; i++)
686 {
688 XLogRecPtr flush = sync_standbys[i].flush;
689 XLogRecPtr apply = sync_standbys[i].apply;
690
692 *writePtr = write;
693 if (!XLogRecPtrIsValid(*flushPtr) || *flushPtr > flush)
694 *flushPtr = flush;
695 if (!XLogRecPtrIsValid(*applyPtr) || *applyPtr > apply)
696 *applyPtr = apply;
697 }
698}
699
700/*
701 * Calculate the Nth latest Write, Flush and Apply positions among sync
702 * standbys.
703 */
704static void
709 int num_standbys,
710 uint8 nth)
711{
715 int i;
716
717 /* Should have enough candidates, or somebody messed up */
718 Assert(nth > 0 && nth <= num_standbys);
719
723
724 for (i = 0; i < num_standbys; i++)
725 {
726 write_array[i] = sync_standbys[i].write;
727 flush_array[i] = sync_standbys[i].flush;
728 apply_array[i] = sync_standbys[i].apply;
729 }
730
731 /* Sort each array in descending order */
735
736 /* Get Nth latest Write, Flush, Apply positions */
737 *writePtr = write_array[nth - 1];
738 *flushPtr = flush_array[nth - 1];
739 *applyPtr = apply_array[nth - 1];
740
744}
745
746/*
747 * Compare lsn in order to sort array in descending order.
748 */
749static int
750cmp_lsn(const void *a, const void *b)
751{
752 XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
753 XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
754
755 return pg_cmp_u64(lsn2, lsn1);
756}
757
758/*
759 * Return data about walsenders that are candidates to be sync standbys.
760 *
761 * *standbys is set to a palloc'd array of structs of per-walsender data,
762 * and the number of valid entries (candidate sync senders) is returned.
763 * (This might be more or fewer than num_sync; caller must check.)
764 */
765int
767{
768 int i;
769 int n;
770
771 /* Create result array */
773
774 /* Quick exit if sync replication is not requested */
775 if (SyncRepConfig == NULL)
776 return 0;
777
778 /* Collect raw data from shared memory */
779 n = 0;
780 for (i = 0; i < max_wal_senders; i++)
781 {
782 volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
783 * rearrangement */
785 WalSndState state; /* not included in SyncRepStandbyData */
786
788 stby = *standbys + n;
789
790 SpinLockAcquire(&walsnd->mutex);
791 stby->pid = walsnd->pid;
792 state = walsnd->state;
793 stby->write = walsnd->write;
794 stby->flush = walsnd->flush;
795 stby->apply = walsnd->apply;
796 stby->sync_standby_priority = walsnd->sync_standby_priority;
797 SpinLockRelease(&walsnd->mutex);
798
799 /* Must be active */
800 if (stby->pid == 0)
801 continue;
802
803 /* Must be streaming or stopping */
806 continue;
807
808 /* Must be synchronous */
809 if (stby->sync_standby_priority == 0)
810 continue;
811
812 /* Must have a valid flush position */
813 if (!XLogRecPtrIsValid(stby->flush))
814 continue;
815
816 /* OK, it's a candidate */
817 stby->walsnd_index = i;
818 stby->is_me = (walsnd == MyWalSnd);
819 n++;
820 }
821
822 /*
823 * In quorum mode, we return all the candidates. In priority mode, if we
824 * have too many candidates then return only the num_sync ones of highest
825 * priority.
826 */
829 {
830 /* Sort by priority ... */
831 qsort(*standbys, n, sizeof(SyncRepStandbyData),
833 /* ... then report just the first num_sync ones */
835 }
836
837 return n;
838}
839
840/*
841 * qsort comparator to sort SyncRepStandbyData entries by priority
842 */
843static int
844standby_priority_comparator(const void *a, const void *b)
845{
846 const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
847 const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
848
849 /* First, sort by increasing priority value */
850 if (sa->sync_standby_priority != sb->sync_standby_priority)
851 return sa->sync_standby_priority - sb->sync_standby_priority;
852
853 /*
854 * We might have equal priority values; arbitrarily break ties by position
855 * in the WalSnd array. (This is utterly bogus, since that is arrival
856 * order dependent, but there are regression tests that rely on it.)
857 */
858 return sa->walsnd_index - sb->walsnd_index;
859}
860
861
862/*
863 * Check if we are in the list of sync standbys, and if so, determine
864 * priority sequence. Return priority if set, or zero to indicate that
865 * we are not a potential sync standby.
866 *
867 * Compare the parameter SyncRepStandbyNames against the application_name
868 * for this WALSender, or allow any name if we find a wildcard "*".
869 */
870static int
872{
873 const char *standby_name;
874 int priority;
875 bool found = false;
876
877 /*
878 * Since synchronous cascade replication is not allowed, we always set the
879 * priority of cascading walsender to zero.
880 */
882 return 0;
883
885 return 0;
886
889 {
891 strcmp(standby_name, "*") == 0)
892 {
893 found = true;
894 break;
895 }
897 }
898
899 if (!found)
900 return 0;
901
902 /*
903 * In quorum-based sync replication, all the standbys in the list have the
904 * same priority, one.
905 */
907}
908
909/*
910 * Walk the specified queue from head. Set the state of any backends that
911 * need to be woken, remove them from the queue, and then wake them.
912 * Pass all = true to wake whole queue; otherwise, just wake up to
913 * the walsender's LSN.
914 *
915 * The caller must hold SyncRepLock in exclusive mode.
916 */
917static int
918SyncRepWakeQueue(bool all, int mode)
919{
921 int numprocs = 0;
923
927
929 {
930 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
931
932 /*
933 * Assume the queue is ordered by LSN
934 */
935 if (!all && walsndctl->lsn[mode] < proc->waitLSN)
936 return numprocs;
937
938 /*
939 * Remove from queue.
940 */
942
943 /*
944 * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
945 * make sure that it sees the queue link being removed before the
946 * syncRepState change.
947 */
949
950 /*
951 * Set state to complete; see SyncRepWaitForLSN() for discussion of
952 * the various states.
953 */
955
956 /*
957 * Wake only when we have set state and removed from queue.
958 */
959 SetLatch(&(proc->procLatch));
960
961 numprocs++;
962 }
963
964 return numprocs;
965}
966
967/*
968 * The checkpointer calls this as needed to update the shared
969 * sync_standbys_status flag, so that backends don't remain permanently wedged
970 * if synchronous_standby_names is unset. It's safe to check the current value
971 * without the lock, because it's only ever updated by one process. But we
972 * must take the lock to change it.
973 */
974void
976{
978
981 {
983
984 /*
985 * If synchronous_standby_names has been reset to empty, it's futile
986 * for backends to continue waiting. Since the user no longer wants
987 * synchronous replication, we'd better wake them up.
988 */
990 {
991 int i;
992
993 for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
994 SyncRepWakeQueue(true, i);
995 }
996
997 /*
998 * Only allow people to join the queue when there are synchronous
999 * standbys defined. Without this interlock, there's a race
1000 * condition: we might wake up all the current waiters; then, some
1001 * backend that hasn't yet reloaded its config might go to sleep on
1002 * the queue (and never wake up). This prevents that.
1003 */
1006
1008 }
1010 {
1012
1013 /*
1014 * Note that there is no need to wake up the queues here. We would
1015 * reach this path only if SyncStandbysDefined() returns false, or it
1016 * would mean that some backends are waiting with the GUC set. See
1017 * SyncRepWaitForLSN().
1018 */
1020
1021 /*
1022 * Even if there is no sync standby defined, let the readers of this
1023 * information know that the sync standby data has been initialized.
1024 * This can just be done once, hence the previous check on
1025 * SYNC_STANDBY_INIT to avoid useless work.
1026 */
1028
1030 }
1031}
1032
1033#ifdef USE_ASSERT_CHECKING
1034static bool
1036{
1038 dlist_iter iter;
1039
1041
1043
1045 {
1046 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
1047
1048 /*
1049 * Check the queue is ordered by LSN and that multiple procs don't
1050 * have matching LSNs
1051 */
1052 if (proc->waitLSN <= lastLSN)
1053 return false;
1054
1055 lastLSN = proc->waitLSN;
1056 }
1057
1058 return true;
1059}
1060#endif
1061
1062/*
1063 * ===========================================================
1064 * Synchronous Replication functions executed by any process
1065 * ===========================================================
1066 */
1067
1068bool
1070{
1071 if (*newval != NULL && (*newval)[0] != '\0')
1072 {
1073 yyscan_t scanner;
1074 int parse_rc;
1076
1077 /* Result of parsing is returned in one of these two variables */
1080
1081 /* Parse the synchronous_standby_names string */
1082 syncrep_scanner_init(*newval, &scanner);
1084 syncrep_scanner_finish(scanner);
1085
1086 if (parse_rc != 0 || syncrep_parse_result == NULL)
1087 {
1091 else
1092 /* translator: %s is a GUC name */
1093 GUC_check_errdetail("\"%s\" parser failed.",
1094 "synchronous_standby_names");
1095 return false;
1096 }
1097
1098 if (syncrep_parse_result->num_sync <= 0)
1099 {
1100 GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1101 syncrep_parse_result->num_sync);
1102 return false;
1103 }
1104
1105 /* GUC extra value must be guc_malloc'd, not palloc'd */
1107 guc_malloc(LOG, syncrep_parse_result->config_size);
1108 if (pconf == NULL)
1109 return false;
1111
1112 *extra = pconf;
1113
1114 /*
1115 * We need not explicitly clean up syncrep_parse_result. It, and any
1116 * other cruft generated during parsing, will be freed when the
1117 * current memory context is deleted. (This code is generally run in
1118 * a short-lived context used for config file processing, so that will
1119 * not be very long.)
1120 */
1121 }
1122 else
1123 *extra = NULL;
1124
1125 return true;
1126}
1127
1128void
1129assign_synchronous_standby_names(const char *newval, void *extra)
1130{
1131 SyncRepConfig = (SyncRepConfigData *) extra;
1132}
1133
1134void
1135assign_synchronous_commit(int newval, void *extra)
1136{
1137 switch (newval)
1138 {
1141 break;
1144 break;
1147 break;
1148 default:
1150 break;
1151 }
1152}
#define pg_read_barrier()
Definition atomics.h:154
#define pg_write_barrier()
Definition atomics.h:155
#define Min(x, y)
Definition c.h:1091
uint8_t uint8
Definition c.h:622
#define Assert(condition)
Definition c.h:943
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void * yyscan_t
Definition cubedata.h:65
@ DestNone
Definition dest.h:87
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
#define DEBUG3
Definition elog.h:29
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:37
#define DEBUG1
Definition elog.h:31
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
#define palloc_array(type, count)
Definition fe_memutils.h:76
volatile int ProcDieSenderPid
Definition globals.c:46
volatile uint32 InterruptHoldoffCount
Definition globals.c:43
volatile int ProcDieSenderUid
Definition globals.c:47
volatile sig_atomic_t QueryCancelPending
Definition globals.c:33
struct Latch * MyLatch
Definition globals.c:65
volatile sig_atomic_t ProcDiePending
Definition globals.c:34
void GUC_check_errcode(int sqlerrcode)
Definition guc.c:6666
void * guc_malloc(int elevel, size_t size)
Definition guc.c:637
#define newval
#define GUC_check_errmsg
Definition guc.h:503
#define GUC_check_errdetail
Definition guc.h:507
GucSource
Definition guc.h:112
char * application_name
Definition guc_tables.c:589
static void dlist_insert_after(dlist_node *after, dlist_node *node)
Definition ilist.h:381
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_delete_thoroughly(dlist_node *node)
Definition ilist.h:416
static bool dlist_node_is_detached(const dlist_node *node)
Definition ilist.h:525
#define dlist_reverse_foreach(iter, lhead)
Definition ilist.h:654
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
void SetLatch(Latch *latch)
Definition latch.c:290
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1929
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_EXCLUSIVE
Definition lwlock.h:104
void pfree(void *pointer)
Definition mcxt.c:1616
static char * errmsg
static PgChecksumMode mode
static rewind_source * source
Definition pg_rewind.c:89
int pg_strcasecmp(const char *s1, const char *s2)
#define sprintf
Definition port.h:262
#define qsort(a, b, c, d)
Definition port.h:495
CommandDest whereToSendOutput
Definition postgres.c:97
static int fb(int x)
void set_ps_display_remove_suffix(void)
Definition ps_status.c:440
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:388
bool update_process_title
Definition ps_status.c:31
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
PGPROC * MyProc
Definition proc.c:71
Definition proc.h:179
XLogRecPtr waitLSN
Definition proc.h:341
dlist_node syncRepLinks
Definition proc.h:343
int syncRepState
Definition proc.h:342
Latch procLatch
Definition proc.h:256
uint8 syncrep_method
Definition syncrep.h:68
char member_names[FLEXIBLE_ARRAY_MEMBER]
Definition syncrep.h:71
WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]
dlist_head SyncRepQueue[NUM_SYNC_REP_WAIT_MODE]
slock_t mutex
XLogRecPtr flush
WalSndState state
int sync_standby_priority
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
static int SyncRepWaitMode
Definition syncrep.c:99
void SyncRepInitConfig(void)
Definition syncrep.c:458
static bool SyncRepGetSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
Definition syncrep.c:599
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
Definition syncrep.c:706
void assign_synchronous_commit(int newval, void *extra)
Definition syncrep.c:1136
void assign_synchronous_standby_names(const char *newval, void *extra)
Definition syncrep.c:1130
static int standby_priority_comparator(const void *a, const void *b)
Definition syncrep.c:845
static int SyncRepWakeQueue(bool all, int mode)
Definition syncrep.c:919
SyncRepConfigData * SyncRepConfig
Definition syncrep.c:98
int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys)
Definition syncrep.c:767
void SyncRepReleaseWaiters(void)
Definition syncrep.c:487
void SyncRepUpdateSyncStandbysDefined(void)
Definition syncrep.c:976
static bool announce_next_takeover
Definition syncrep.c:96
static int SyncRepGetStandbyPriority(void)
Definition syncrep.c:872
static void SyncRepQueueInsert(int mode)
Definition syncrep.c:385
static void SyncRepCancelWait(void)
Definition syncrep.c:419
bool check_synchronous_standby_names(char **newval, void **extra, GucSource source)
Definition syncrep.c:1070
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
Definition syncrep.c:673
void SyncRepCleanupAtProcExit(void)
Definition syncrep.c:429
static int cmp_lsn(const void *a, const void *b)
Definition syncrep.c:751
#define SyncStandbysDefined()
Definition syncrep.c:93
#define SYNC_REP_PRIORITY
Definition syncrep.h:35
#define NUM_SYNC_REP_WAIT_MODE
Definition syncrep.h:27
#define SyncRepRequested()
Definition syncrep.h:18
#define SYNC_REP_NO_WAIT
Definition syncrep.h:22
#define SYNC_REP_WAIT_WRITE
Definition syncrep.h:23
#define SYNC_REP_WAITING
Definition syncrep.h:31
#define SYNC_REP_WAIT_COMPLETE
Definition syncrep.h:32
#define SYNC_REP_WAIT_FLUSH
Definition syncrep.h:24
#define SYNC_REP_NOT_WAITING
Definition syncrep.h:30
int syncrep_yyparse(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner)
#define SYNC_REP_WAIT_APPLY
Definition syncrep.h:25
void syncrep_scanner_finish(yyscan_t yyscanner)
void syncrep_scanner_init(const char *str, yyscan_t *yyscannerp)
#define WL_LATCH_SET
#define WL_POSTMASTER_DEATH
WalSnd * MyWalSnd
Definition walsender.c:132
int max_wal_senders
Definition walsender.c:141
bool am_cascading_walsender
Definition walsender.c:136
WalSndCtlData * WalSndCtl
Definition walsender.c:121
#define SYNC_STANDBY_DEFINED
WalSndState
@ WALSNDSTATE_STREAMING
@ WALSNDSTATE_STOPPING
#define SYNC_STANDBY_INIT
@ SYNCHRONOUS_COMMIT_REMOTE_WRITE
Definition xact.h:73
@ SYNCHRONOUS_COMMIT_REMOTE_APPLY
Definition xact.h:76
@ SYNCHRONOUS_COMMIT_REMOTE_FLUSH
Definition xact.h:75
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

Function Documentation

◆ assign_synchronous_commit()

void assign_synchronous_commit ( int  newval,
void extra 
)

◆ assign_synchronous_standby_names()

void assign_synchronous_standby_names ( const char newval,
void extra 
)

Definition at line 1130 of file syncrep.c.

1131{
1132 SyncRepConfig = (SyncRepConfigData *) extra;
1133}

References SyncRepConfig.

◆ check_synchronous_standby_names()

bool check_synchronous_standby_names ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 1070 of file syncrep.c.

1071{
1072 if (*newval != NULL && (*newval)[0] != '\0')
1073 {
1074 yyscan_t scanner;
1075 int parse_rc;
1077
1078 /* Result of parsing is returned in one of these two variables */
1081
1082 /* Parse the synchronous_standby_names string */
1083 syncrep_scanner_init(*newval, &scanner);
1085 syncrep_scanner_finish(scanner);
1086
1087 if (parse_rc != 0 || syncrep_parse_result == NULL)
1088 {
1092 else
1093 /* translator: %s is a GUC name */
1094 GUC_check_errdetail("\"%s\" parser failed.",
1095 "synchronous_standby_names");
1096 return false;
1097 }
1098
1099 if (syncrep_parse_result->num_sync <= 0)
1100 {
1101 GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1102 syncrep_parse_result->num_sync);
1103 return false;
1104 }
1105
1106 /* GUC extra value must be guc_malloc'd, not palloc'd */
1108 guc_malloc(LOG, syncrep_parse_result->config_size);
1109 if (pconf == NULL)
1110 return false;
1112
1113 *extra = pconf;
1114
1115 /*
1116 * We need not explicitly clean up syncrep_parse_result. It, and any
1117 * other cruft generated during parsing, will be freed when the
1118 * current memory context is deleted. (This code is generally run in
1119 * a short-lived context used for config file processing, so that will
1120 * not be very long.)
1121 */
1122 }
1123 else
1124 *extra = NULL;
1125
1126 return true;
1127}

References fb(), GUC_check_errcode(), GUC_check_errdetail, GUC_check_errmsg, guc_malloc(), LOG, memcpy(), newval, syncrep_scanner_finish(), syncrep_scanner_init(), and syncrep_yyparse().

◆ cmp_lsn()

static int cmp_lsn ( const void a,
const void b 
)
static

Definition at line 751 of file syncrep.c.

752{
753 XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
754 XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
755
756 return pg_cmp_u64(lsn2, lsn1);
757}

References a, b, fb(), and pg_cmp_u64().

Referenced by SyncRepGetNthLatestSyncRecPtr().

◆ standby_priority_comparator()

static int standby_priority_comparator ( const void a,
const void b 
)
static

Definition at line 845 of file syncrep.c.

846{
847 const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
848 const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
849
850 /* First, sort by increasing priority value */
851 if (sa->sync_standby_priority != sb->sync_standby_priority)
852 return sa->sync_standby_priority - sb->sync_standby_priority;
853
854 /*
855 * We might have equal priority values; arbitrarily break ties by position
856 * in the WalSnd array. (This is utterly bogus, since that is arrival
857 * order dependent, but there are regression tests that rely on it.)
858 */
859 return sa->walsnd_index - sb->walsnd_index;
860}

References a, b, and fb().

Referenced by SyncRepGetCandidateStandbys().

◆ SyncRepCancelWait()

◆ SyncRepCleanupAtProcExit()

void SyncRepCleanupAtProcExit ( void  )

Definition at line 429 of file syncrep.c.

430{
431 /*
432 * First check if we are removed from the queue without the lock to not
433 * slow down backend exit.
434 */
436 {
438
439 /* maybe we have just been removed, so recheck */
442
444 }
445}

References dlist_delete_thoroughly(), dlist_node_is_detached(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, and PGPROC::syncRepLinks.

Referenced by ProcKill().

◆ SyncRepGetCandidateStandbys()

int SyncRepGetCandidateStandbys ( SyncRepStandbyData **  standbys)

Definition at line 767 of file syncrep.c.

768{
769 int i;
770 int n;
771
772 /* Create result array */
774
775 /* Quick exit if sync replication is not requested */
776 if (SyncRepConfig == NULL)
777 return 0;
778
779 /* Collect raw data from shared memory */
780 n = 0;
781 for (i = 0; i < max_wal_senders; i++)
782 {
783 volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
784 * rearrangement */
786 WalSndState state; /* not included in SyncRepStandbyData */
787
789 stby = *standbys + n;
790
791 SpinLockAcquire(&walsnd->mutex);
792 stby->pid = walsnd->pid;
793 state = walsnd->state;
794 stby->write = walsnd->write;
795 stby->flush = walsnd->flush;
796 stby->apply = walsnd->apply;
797 stby->sync_standby_priority = walsnd->sync_standby_priority;
798 SpinLockRelease(&walsnd->mutex);
799
800 /* Must be active */
801 if (stby->pid == 0)
802 continue;
803
804 /* Must be streaming or stopping */
807 continue;
808
809 /* Must be synchronous */
810 if (stby->sync_standby_priority == 0)
811 continue;
812
813 /* Must have a valid flush position */
814 if (!XLogRecPtrIsValid(stby->flush))
815 continue;
816
817 /* OK, it's a candidate */
818 stby->walsnd_index = i;
819 stby->is_me = (walsnd == MyWalSnd);
820 n++;
821 }
822
823 /*
824 * In quorum mode, we return all the candidates. In priority mode, if we
825 * have too many candidates then return only the num_sync ones of highest
826 * priority.
827 */
830 {
831 /* Sort by priority ... */
832 qsort(*standbys, n, sizeof(SyncRepStandbyData),
834 /* ... then report just the first num_sync ones */
836 }
837
838 return n;
839}

References fb(), i, max_wal_senders, MyWalSnd, SyncRepConfigData::num_sync, palloc_array, qsort, SpinLockAcquire(), SpinLockRelease(), standby_priority_comparator(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, WalSndCtl, WalSndCtlData::walsnds, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, and XLogRecPtrIsValid.

Referenced by pg_stat_get_wal_senders(), and SyncRepGetSyncRecPtr().

◆ SyncRepGetNthLatestSyncRecPtr()

static void SyncRepGetNthLatestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys,
uint8  nth 
)
static

Definition at line 706 of file syncrep.c.

712{
716 int i;
717
718 /* Should have enough candidates, or somebody messed up */
719 Assert(nth > 0 && nth <= num_standbys);
720
724
725 for (i = 0; i < num_standbys; i++)
726 {
727 write_array[i] = sync_standbys[i].write;
728 flush_array[i] = sync_standbys[i].flush;
729 apply_array[i] = sync_standbys[i].apply;
730 }
731
732 /* Sort each array in descending order */
736
737 /* Get Nth latest Write, Flush, Apply positions */
738 *writePtr = write_array[nth - 1];
739 *flushPtr = flush_array[nth - 1];
740 *applyPtr = apply_array[nth - 1];
741
745}

References Assert, cmp_lsn(), fb(), i, palloc_array, pfree(), and qsort.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetOldestSyncRecPtr()

static void SyncRepGetOldestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys 
)
static

Definition at line 673 of file syncrep.c.

678{
679 int i;
680
681 /*
682 * Scan through all sync standbys and calculate the oldest Write, Flush
683 * and Apply positions. We assume *writePtr et al were initialized to
684 * InvalidXLogRecPtr.
685 */
686 for (i = 0; i < num_standbys; i++)
687 {
689 XLogRecPtr flush = sync_standbys[i].flush;
690 XLogRecPtr apply = sync_standbys[i].apply;
691
693 *writePtr = write;
694 if (!XLogRecPtrIsValid(*flushPtr) || *flushPtr > flush)
695 *flushPtr = flush;
696 if (!XLogRecPtrIsValid(*applyPtr) || *applyPtr > apply)
697 *applyPtr = apply;
698 }
699}

References fb(), i, write, and XLogRecPtrIsValid.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetStandbyPriority()

static int SyncRepGetStandbyPriority ( void  )
static

Definition at line 872 of file syncrep.c.

873{
874 const char *standby_name;
875 int priority;
876 bool found = false;
877
878 /*
879 * Since synchronous cascade replication is not allowed, we always set the
880 * priority of cascading walsender to zero.
881 */
883 return 0;
884
886 return 0;
887
890 {
892 strcmp(standby_name, "*") == 0)
893 {
894 found = true;
895 break;
896 }
898 }
899
900 if (!found)
901 return 0;
902
903 /*
904 * In quorum-based sync replication, all the standbys in the list have the
905 * same priority, one.
906 */
908}

References am_cascading_walsender, application_name, fb(), SyncRepConfigData::member_names, SyncRepConfigData::nmembers, pg_strcasecmp(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, and SyncStandbysDefined.

Referenced by SyncRepInitConfig().

◆ SyncRepGetSyncRecPtr()

static bool SyncRepGetSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
bool am_sync 
)
static

Definition at line 599 of file syncrep.c.

601{
603 int num_standbys;
604 int i;
605
606 /* Initialize default results */
610 *am_sync = false;
611
612 /* Quick out if not even configured to be synchronous */
613 if (SyncRepConfig == NULL)
614 return false;
615
616 /* Get standbys that are considered as synchronous at this moment */
618
619 /* Am I among the candidate sync standbys? */
620 for (i = 0; i < num_standbys; i++)
621 {
622 if (sync_standbys[i].is_me)
623 {
624 *am_sync = true;
625 break;
626 }
627 }
628
629 /*
630 * Nothing more to do if we are not managing a sync standby or there are
631 * not enough synchronous standbys.
632 */
633 if (!(*am_sync) ||
635 {
637 return false;
638 }
639
640 /*
641 * In a priority-based sync replication, the synced positions are the
642 * oldest ones among sync standbys. In a quorum-based, they are the Nth
643 * latest ones.
644 *
645 * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
646 * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
647 * because it's a bit more efficient.
648 *
649 * XXX If the numbers of current and requested sync standbys are the same,
650 * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
651 * positions even in a quorum-based sync replication.
652 */
654 {
657 }
658 else
659 {
663 }
664
666 return true;
667}

References fb(), i, InvalidXLogRecPtr, SyncRepConfigData::num_sync, pfree(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetCandidateStandbys(), SyncRepGetNthLatestSyncRecPtr(), and SyncRepGetOldestSyncRecPtr().

Referenced by SyncRepReleaseWaiters().

◆ SyncRepInitConfig()

void SyncRepInitConfig ( void  )

Definition at line 458 of file syncrep.c.

459{
460 int priority;
461
462 /*
463 * Determine if we are a potential sync standby and remember the result
464 * for handling replies from standby.
465 */
468 {
472
474 (errmsg_internal("standby \"%s\" now has synchronous standby priority %d",
476 }
477}

References application_name, DEBUG1, ereport, errmsg_internal(), fb(), WalSnd::mutex, MyWalSnd, SpinLockAcquire(), SpinLockRelease(), WalSnd::sync_standby_priority, and SyncRepGetStandbyPriority().

Referenced by StartLogicalReplication(), StartReplication(), and WalSndHandleConfigReload().

◆ SyncRepQueueInsert()

static void SyncRepQueueInsert ( int  mode)
static

Definition at line 385 of file syncrep.c.

386{
387 dlist_head *queue;
388 dlist_iter iter;
389
391 queue = &WalSndCtl->SyncRepQueue[mode];
392
393 dlist_reverse_foreach(iter, queue)
394 {
395 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
396
397 /*
398 * Stop at the queue element that we should insert after to ensure the
399 * queue is ordered by LSN.
400 */
401 if (proc->waitLSN < MyProc->waitLSN)
402 {
404 return;
405 }
406 }
407
408 /*
409 * If we get here, the list was either empty, or this process needs to be
410 * at the head.
411 */
413}

References Assert, dlist_iter::cur, dlist_container, dlist_insert_after(), dlist_push_head(), dlist_reverse_foreach, mode, MyProc, NUM_SYNC_REP_WAIT_MODE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepWaitForLSN().

◆ SyncRepReleaseWaiters()

void SyncRepReleaseWaiters ( void  )

Definition at line 487 of file syncrep.c.

488{
493 bool got_recptr;
494 bool am_sync;
495 int numwrite = 0;
496 int numflush = 0;
497 int numapply = 0;
498
499 /*
500 * If this WALSender is serving a standby that is not on the list of
501 * potential sync standbys then we have nothing to do. If we are still
502 * starting up, still running base backup or the current flush position is
503 * still invalid, then leave quickly also. Streaming or stopping WAL
504 * senders are allowed to release waiters.
505 */
510 {
512 return;
513 }
514
515 /*
516 * We're a potential sync standby. Release waiters if there are enough
517 * sync standbys and we are considered as sync.
518 */
520
521 /*
522 * Check whether we are a sync standby or not, and calculate the synced
523 * positions among all sync standbys. (Note: although this step does not
524 * of itself require holding SyncRepLock, it seems like a good idea to do
525 * it after acquiring the lock. This ensures that the WAL pointers we use
526 * to release waiters are newer than any previous execution of this
527 * routine used.)
528 */
530
531 /*
532 * If we are managing a sync standby, though we weren't prior to this,
533 * then announce we are now a sync standby.
534 */
536 {
538
540 ereport(LOG,
541 (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
543 else
544 ereport(LOG,
545 (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
547 }
548
549 /*
550 * If the number of sync standbys is less than requested or we aren't
551 * managing a sync standby then just leave.
552 */
553 if (!got_recptr || !am_sync)
554 {
557 return;
558 }
559
560 /*
561 * Set the lsn first so that when we wake backends they will release up to
562 * this location.
563 */
565 {
568 }
570 {
573 }
575 {
578 }
579
581
582 elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
586}

References announce_next_takeover, application_name, DEBUG3, elog, ereport, errmsg, fb(), WalSnd::flush, LOG, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyWalSnd, WalSnd::state, SYNC_REP_PRIORITY, SYNC_REP_WAIT_APPLY, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAIT_WRITE, WalSnd::sync_standby_priority, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetSyncRecPtr(), SyncRepWakeQueue(), WalSndCtl, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, and XLogRecPtrIsValid.

Referenced by ProcessStandbyReplyMessage(), and WalSndHandleConfigReload().

◆ SyncRepUpdateSyncStandbysDefined()

void SyncRepUpdateSyncStandbysDefined ( void  )

Definition at line 976 of file syncrep.c.

977{
979
982 {
984
985 /*
986 * If synchronous_standby_names has been reset to empty, it's futile
987 * for backends to continue waiting. Since the user no longer wants
988 * synchronous replication, we'd better wake them up.
989 */
991 {
992 int i;
993
994 for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
995 SyncRepWakeQueue(true, i);
996 }
997
998 /*
999 * Only allow people to join the queue when there are synchronous
1000 * standbys defined. Without this interlock, there's a race
1001 * condition: we might wake up all the current waiters; then, some
1002 * backend that hasn't yet reloaded its config might go to sleep on
1003 * the queue (and never wake up). This prevents that.
1004 */
1007
1009 }
1011 {
1013
1014 /*
1015 * Note that there is no need to wake up the queues here. We would
1016 * reach this path only if SyncStandbysDefined() returns false, or it
1017 * would mean that some backends are waiting with the GUC set. See
1018 * SyncRepWaitForLSN().
1019 */
1021
1022 /*
1023 * Even if there is no sync standby defined, let the readers of this
1024 * information know that the sync standby data has been initialized.
1025 * This can just be done once, hence the previous check on
1026 * SYNC_STANDBY_INIT to avoid useless work.
1027 */
1029
1031 }
1032}

References Assert, fb(), i, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_SYNC_REP_WAIT_MODE, SYNC_STANDBY_DEFINED, SYNC_STANDBY_INIT, WalSndCtlData::sync_standbys_status, SyncRepWakeQueue(), SyncStandbysDefined, and WalSndCtl.

Referenced by UpdateSharedMemoryConfig().

◆ SyncRepWaitForLSN()

void SyncRepWaitForLSN ( XLogRecPtr  lsn,
bool  commit 
)

Definition at line 149 of file syncrep.c.

150{
151 int mode;
152
153 /*
154 * This should be called while holding interrupts during a transaction
155 * commit to prevent the follow-up shared memory queue cleanups to be
156 * influenced by external interruptions.
157 */
159
160 /*
161 * Fast exit if user has not requested sync replication, or there are no
162 * sync replication standby names defined.
163 *
164 * Since this routine gets called every commit time, it's important to
165 * exit quickly if sync replication is not requested.
166 *
167 * We check WalSndCtl->sync_standbys_status flag without the lock and exit
168 * immediately if SYNC_STANDBY_INIT is set (the checkpointer has
169 * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync
170 * replication requested).
171 *
172 * If SYNC_STANDBY_DEFINED is set, we need to check the status again later
173 * while holding the lock, to check the flag and operate the sync rep
174 * queue atomically. This is necessary to avoid the race condition
175 * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
176 * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we
177 * don't touch the queue.
178 */
179 if (!SyncRepRequested() ||
180 ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) &
182 return;
183
184 /* Cap the level for anything other than commit to remote flush only. */
185 if (commit)
187 else
189
192
195
196 /*
197 * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See
198 * SyncRepUpdateSyncStandbysDefined().
199 *
200 * Also check that the standby hasn't already replied. Unlikely race
201 * condition but we'll be fetching that cache line anyway so it's likely
202 * to be a low cost check.
203 *
204 * If the sync standby data has not been initialized yet
205 * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN,
206 * then do a direct GUC check.
207 */
209 {
212 {
214 return;
215 }
216 }
217 else if (lsn <= WalSndCtl->lsn[mode])
218 {
219 /*
220 * The LSN is older than what we need to wait for. The sync standby
221 * data has not been initialized yet, but we are OK to not wait
222 * because we know that there is no point in doing so based on the
223 * LSN.
224 */
226 return;
227 }
228 else if (!SyncStandbysDefined())
229 {
230 /*
231 * If we are here, the sync standby data has not been initialized yet,
232 * and the LSN is newer than what need to wait for, so we have fallen
233 * back to the best thing we could do in this case: a check on
234 * SyncStandbysDefined() to see if the GUC is set or not.
235 *
236 * When the GUC has a value, we wait until the checkpointer updates
237 * the status data because we cannot be sure yet if we should wait or
238 * not. Here, the GUC has *no* value, we are sure that there is no
239 * point to wait; this matters for example when initializing a
240 * cluster, where we should never wait, and no sync standbys is the
241 * default behavior.
242 */
244 return;
245 }
246
247 /*
248 * Set our waitLSN so WALSender will know when to wake us, and add
249 * ourselves to the queue.
250 */
251 MyProc->waitLSN = lsn;
256
257 /* Alter ps display to show waiting for sync rep. */
259 {
260 char buffer[32];
261
262 sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
263 set_ps_display_suffix(buffer);
264 }
265
266 /*
267 * Wait for specified LSN to be confirmed.
268 *
269 * Each proc has its own wait latch, so we perform a normal latch
270 * check/wait loop here.
271 */
272 for (;;)
273 {
274 int rc;
275
276 /* Must reset the latch before testing state. */
278
279 /*
280 * Acquiring the lock is not needed, the latch ensures proper
281 * barriers. If it looks like we're done, we must really be done,
282 * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
283 * it will never update it again, so we can't be seeing a stale value
284 * in that case.
285 */
287 break;
288
289 /*
290 * If a wait for synchronous replication is pending, we can neither
291 * acknowledge the commit nor raise ERROR or FATAL. The latter would
292 * lead the client to believe that the transaction aborted, which is
293 * not true: it's already committed locally. The former is no good
294 * either: the client has requested synchronous replication, and is
295 * entitled to assume that an acknowledged commit is also replicated,
296 * which might not be true. So in this case we issue a WARNING (which
297 * some clients may be able to interpret) and shut off further output.
298 * We do NOT reset ProcDiePending, so that the process will die after
299 * the commit is cleaned up.
300 */
301 if (ProcDiePending)
302 {
303 /*
304 * ProcDieSenderPid/Uid are read directly from the globals here
305 * rather than copied to locals first; a second SIGTERM could
306 * change them between reads, but that is harmless because the
307 * process is about to die anyway. The signal sender detail is
308 * inlined rather than using a separate errdetail() call because
309 * it must be appended to the existing detail message.
310 */
313 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
314 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
315 ProcDieSenderPid == 0 ? "" :
316 psprintf("\nSignal sent by PID %d, UID %d.",
318 (int) ProcDieSenderUid))));
321 break;
322 }
323
324 /*
325 * It's unclear what to do if a query cancel interrupt arrives. We
326 * can't actually abort at this point, but ignoring the interrupt
327 * altogether is not helpful, so we just terminate the wait with a
328 * suitable warning.
329 */
331 {
332 QueryCancelPending = false;
334 (errmsg("canceling wait for synchronous replication due to user request"),
335 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
337 break;
338 }
339
340 /*
341 * Wait on latch. Any condition that should wake us up will set the
342 * latch, so no need for timeout.
343 */
346
347 /*
348 * If the postmaster dies, we'll probably never get an acknowledgment,
349 * because all the wal sender processes will exit. So just bail out.
350 */
351 if (rc & WL_POSTMASTER_DEATH)
352 {
353 ProcDiePending = true;
356 break;
357 }
358 }
359
360 /*
361 * WalSender has checked our LSN and has removed us from queue. Clean up
362 * state and leave. It's OK to reset these shared memory fields without
363 * holding SyncRepLock, because any walsenders will ignore us anyway when
364 * we're not on the queue. We need a read barrier to make sure we see the
365 * changes to the queue link (this might be unnecessary without
366 * assertions, but better safe than sorry).
367 */
372
373 /* reset ps display to remove the suffix */
376}

References Assert, DestNone, dlist_node_is_detached(), ereport, errcode(), errdetail(), errmsg, fb(), InterruptHoldoffCount, InvalidXLogRecPtr, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), Min, mode, MyLatch, MyProc, pg_read_barrier, ProcDiePending, ProcDieSenderPid, ProcDieSenderUid, psprintf(), QueryCancelPending, ResetLatch(), set_ps_display_remove_suffix(), set_ps_display_suffix(), sprintf, SYNC_REP_NOT_WAITING, SYNC_REP_WAIT_COMPLETE, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAITING, SYNC_STANDBY_DEFINED, SYNC_STANDBY_INIT, WalSndCtlData::sync_standbys_status, SyncRepCancelWait(), PGPROC::syncRepLinks, SyncRepQueueInsert(), SyncRepRequested, PGPROC::syncRepState, SyncRepWaitMode, SyncStandbysDefined, update_process_title, WaitLatch(), PGPROC::waitLSN, WalSndCtl, WARNING, whereToSendOutput, WL_LATCH_SET, and WL_POSTMASTER_DEATH.

Referenced by EndPrepare(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), and RecordTransactionCommitPrepared().

◆ SyncRepWakeQueue()

static int SyncRepWakeQueue ( bool  all,
int  mode 
)
static

Definition at line 919 of file syncrep.c.

920{
922 int numprocs = 0;
924
928
930 {
931 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
932
933 /*
934 * Assume the queue is ordered by LSN
935 */
936 if (!all && walsndctl->lsn[mode] < proc->waitLSN)
937 return numprocs;
938
939 /*
940 * Remove from queue.
941 */
943
944 /*
945 * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
946 * make sure that it sees the queue link being removed before the
947 * syncRepState change.
948 */
950
951 /*
952 * Set state to complete; see SyncRepWaitForLSN() for discussion of
953 * the various states.
954 */
956
957 /*
958 * Wake only when we have set state and removed from queue.
959 */
960 SetLatch(&(proc->procLatch));
961
962 numprocs++;
963 }
964
965 return numprocs;
966}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_delete_thoroughly(), dlist_foreach_modify, fb(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), mode, NUM_SYNC_REP_WAIT_MODE, pg_write_barrier, PGPROC::procLatch, SetLatch(), SYNC_REP_WAIT_COMPLETE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::syncRepState, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepReleaseWaiters(), and SyncRepUpdateSyncStandbysDefined().

Variable Documentation

◆ announce_next_takeover

bool announce_next_takeover = true
static

Definition at line 96 of file syncrep.c.

Referenced by SyncRepReleaseWaiters().

◆ SyncRepConfig

◆ SyncRepStandbyNames

char* SyncRepStandbyNames

Definition at line 91 of file syncrep.c.

◆ SyncRepWaitMode

int SyncRepWaitMode = SYNC_REP_NO_WAIT
static

Definition at line 99 of file syncrep.c.

Referenced by assign_synchronous_commit(), and SyncRepWaitForLSN().