PostgreSQL Source Code git master
Loading...
Searching...
No Matches
syncrep.c File Reference
#include "postgres.h"
#include <unistd.h>
#include "access/xact.h"
#include "common/int.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/guc_hooks.h"
#include "utils/ps_status.h"
Include dependency graph for syncrep.c:

Go to the source code of this file.

Macros

#define SyncStandbysDefined()    (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')
 

Functions

static void SyncRepQueueInsert (int mode)
 
static void SyncRepCancelWait (void)
 
static int SyncRepWakeQueue (bool all, int mode)
 
static bool SyncRepGetSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
 
static void SyncRepGetOldestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
 
static void SyncRepGetNthLatestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
 
static int SyncRepGetStandbyPriority (void)
 
static int standby_priority_comparator (const void *a, const void *b)
 
static int cmp_lsn (const void *a, const void *b)
 
void SyncRepWaitForLSN (XLogRecPtr lsn, bool commit)
 
void SyncRepCleanupAtProcExit (void)
 
void SyncRepInitConfig (void)
 
void SyncRepReleaseWaiters (void)
 
int SyncRepGetCandidateStandbys (SyncRepStandbyData **standbys)
 
void SyncRepUpdateSyncStandbysDefined (void)
 
bool check_synchronous_standby_names (char **newval, void **extra, GucSource source)
 
void assign_synchronous_standby_names (const char *newval, void *extra)
 
void assign_synchronous_commit (int newval, void *extra)
 

Variables

charSyncRepStandbyNames
 
static bool announce_next_takeover = true
 
SyncRepConfigDataSyncRepConfig = NULL
 
static int SyncRepWaitMode = SYNC_REP_NO_WAIT
 

Macro Definition Documentation

◆ SyncStandbysDefined

#define SyncStandbysDefined ( )     (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')

Definition at line 92 of file syncrep.c.

148{
149 int mode;
150
151 /*
152 * This should be called while holding interrupts during a transaction
153 * commit to prevent the follow-up shared memory queue cleanups to be
154 * influenced by external interruptions.
155 */
157
158 /*
159 * Fast exit if user has not requested sync replication, or there are no
160 * sync replication standby names defined.
161 *
162 * Since this routine gets called every commit time, it's important to
163 * exit quickly if sync replication is not requested.
164 *
165 * We check WalSndCtl->sync_standbys_status flag without the lock and exit
166 * immediately if SYNC_STANDBY_INIT is set (the checkpointer has
167 * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync
168 * replication requested).
169 *
170 * If SYNC_STANDBY_DEFINED is set, we need to check the status again later
171 * while holding the lock, to check the flag and operate the sync rep
172 * queue atomically. This is necessary to avoid the race condition
173 * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
174 * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we
175 * don't touch the queue.
176 */
177 if (!SyncRepRequested() ||
178 ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) &
180 return;
181
182 /* Cap the level for anything other than commit to remote flush only. */
183 if (commit)
185 else
187
190
193
194 /*
195 * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See
196 * SyncRepUpdateSyncStandbysDefined().
197 *
198 * Also check that the standby hasn't already replied. Unlikely race
199 * condition but we'll be fetching that cache line anyway so it's likely
200 * to be a low cost check.
201 *
202 * If the sync standby data has not been initialized yet
203 * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN,
204 * then do a direct GUC check.
205 */
207 {
210 {
212 return;
213 }
214 }
215 else if (lsn <= WalSndCtl->lsn[mode])
216 {
217 /*
218 * The LSN is older than what we need to wait for. The sync standby
219 * data has not been initialized yet, but we are OK to not wait
220 * because we know that there is no point in doing so based on the
221 * LSN.
222 */
224 return;
225 }
226 else if (!SyncStandbysDefined())
227 {
228 /*
229 * If we are here, the sync standby data has not been initialized yet,
230 * and the LSN is newer than what need to wait for, so we have fallen
231 * back to the best thing we could do in this case: a check on
232 * SyncStandbysDefined() to see if the GUC is set or not.
233 *
234 * When the GUC has a value, we wait until the checkpointer updates
235 * the status data because we cannot be sure yet if we should wait or
236 * not. Here, the GUC has *no* value, we are sure that there is no
237 * point to wait; this matters for example when initializing a
238 * cluster, where we should never wait, and no sync standbys is the
239 * default behavior.
240 */
242 return;
243 }
244
245 /*
246 * Set our waitLSN so WALSender will know when to wake us, and add
247 * ourselves to the queue.
248 */
249 MyProc->waitLSN = lsn;
254
255 /* Alter ps display to show waiting for sync rep. */
257 {
258 char buffer[32];
259
260 sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
261 set_ps_display_suffix(buffer);
262 }
263
264 /*
265 * Wait for specified LSN to be confirmed.
266 *
267 * Each proc has its own wait latch, so we perform a normal latch
268 * check/wait loop here.
269 */
270 for (;;)
271 {
272 int rc;
273
274 /* Must reset the latch before testing state. */
276
277 /*
278 * Acquiring the lock is not needed, the latch ensures proper
279 * barriers. If it looks like we're done, we must really be done,
280 * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
281 * it will never update it again, so we can't be seeing a stale value
282 * in that case.
283 */
285 break;
286
287 /*
288 * If a wait for synchronous replication is pending, we can neither
289 * acknowledge the commit nor raise ERROR or FATAL. The latter would
290 * lead the client to believe that the transaction aborted, which is
291 * not true: it's already committed locally. The former is no good
292 * either: the client has requested synchronous replication, and is
293 * entitled to assume that an acknowledged commit is also replicated,
294 * which might not be true. So in this case we issue a WARNING (which
295 * some clients may be able to interpret) and shut off further output.
296 * We do NOT reset ProcDiePending, so that the process will die after
297 * the commit is cleaned up.
298 */
299 if (ProcDiePending)
300 {
303 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
304 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
307 break;
308 }
309
310 /*
311 * It's unclear what to do if a query cancel interrupt arrives. We
312 * can't actually abort at this point, but ignoring the interrupt
313 * altogether is not helpful, so we just terminate the wait with a
314 * suitable warning.
315 */
317 {
318 QueryCancelPending = false;
320 (errmsg("canceling wait for synchronous replication due to user request"),
321 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
323 break;
324 }
325
326 /*
327 * Wait on latch. Any condition that should wake us up will set the
328 * latch, so no need for timeout.
329 */
332
333 /*
334 * If the postmaster dies, we'll probably never get an acknowledgment,
335 * because all the wal sender processes will exit. So just bail out.
336 */
337 if (rc & WL_POSTMASTER_DEATH)
338 {
339 ProcDiePending = true;
342 break;
343 }
344 }
345
346 /*
347 * WalSender has checked our LSN and has removed us from queue. Clean up
348 * state and leave. It's OK to reset these shared memory fields without
349 * holding SyncRepLock, because any walsenders will ignore us anyway when
350 * we're not on the queue. We need a read barrier to make sure we see the
351 * changes to the queue link (this might be unnecessary without
352 * assertions, but better safe than sorry).
353 */
358
359 /* reset ps display to remove the suffix */
362}
363
364/*
365 * Insert MyProc into the specified SyncRepQueue, maintaining sorted invariant.
366 *
367 * Usually we will go at tail of queue, though it's possible that we arrive
368 * here out of order, so start at tail and work back to insertion point.
369 */
370static void
372{
373 dlist_head *queue;
374 dlist_iter iter;
375
377 queue = &WalSndCtl->SyncRepQueue[mode];
378
379 dlist_reverse_foreach(iter, queue)
380 {
381 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
382
383 /*
384 * Stop at the queue element that we should insert after to ensure the
385 * queue is ordered by LSN.
386 */
387 if (proc->waitLSN < MyProc->waitLSN)
388 {
390 return;
391 }
392 }
393
394 /*
395 * If we get here, the list was either empty, or this process needs to be
396 * at the head.
397 */
399}
400
401/*
402 * Acquire SyncRepLock and cancel any wait currently in progress.
403 */
404static void
406{
412}
413
414void
416{
417 /*
418 * First check if we are removed from the queue without the lock to not
419 * slow down backend exit.
420 */
422 {
424
425 /* maybe we have just been removed, so recheck */
428
430 }
431}
432
433/*
434 * ===========================================================
435 * Synchronous Replication functions for wal sender processes
436 * ===========================================================
437 */
438
439/*
440 * Take any action required to initialise sync rep state from config
441 * data. Called at WALSender startup and after each SIGHUP.
442 */
443void
445{
446 int priority;
447
448 /*
449 * Determine if we are a potential sync standby and remember the result
450 * for handling replies from standby.
451 */
454 {
458
460 (errmsg_internal("standby \"%s\" now has synchronous standby priority %d",
462 }
463}
464
465/*
466 * Update the LSNs on each queue based upon our latest state. This
467 * implements a simple policy of first-valid-sync-standby-releases-waiter.
468 *
469 * Other policies are possible, which would change what we do here and
470 * perhaps also which information we store as well.
471 */
472void
474{
479 bool got_recptr;
480 bool am_sync;
481 int numwrite = 0;
482 int numflush = 0;
483 int numapply = 0;
484
485 /*
486 * If this WALSender is serving a standby that is not on the list of
487 * potential sync standbys then we have nothing to do. If we are still
488 * starting up, still running base backup or the current flush position is
489 * still invalid, then leave quickly also. Streaming or stopping WAL
490 * senders are allowed to release waiters.
491 */
496 {
498 return;
499 }
500
501 /*
502 * We're a potential sync standby. Release waiters if there are enough
503 * sync standbys and we are considered as sync.
504 */
506
507 /*
508 * Check whether we are a sync standby or not, and calculate the synced
509 * positions among all sync standbys. (Note: although this step does not
510 * of itself require holding SyncRepLock, it seems like a good idea to do
511 * it after acquiring the lock. This ensures that the WAL pointers we use
512 * to release waiters are newer than any previous execution of this
513 * routine used.)
514 */
516
517 /*
518 * If we are managing a sync standby, though we weren't prior to this,
519 * then announce we are now a sync standby.
520 */
522 {
524
526 ereport(LOG,
527 (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
529 else
530 ereport(LOG,
531 (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
533 }
534
535 /*
536 * If the number of sync standbys is less than requested or we aren't
537 * managing a sync standby then just leave.
538 */
539 if (!got_recptr || !am_sync)
540 {
543 return;
544 }
545
546 /*
547 * Set the lsn first so that when we wake backends they will release up to
548 * this location.
549 */
551 {
554 }
556 {
559 }
561 {
564 }
565
567
568 elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
572}
573
574/*
575 * Calculate the synced Write, Flush and Apply positions among sync standbys.
576 *
577 * Return false if the number of sync standbys is less than
578 * synchronous_standby_names specifies. Otherwise return true and
579 * store the positions into *writePtr, *flushPtr and *applyPtr.
580 *
581 * On return, *am_sync is set to true if this walsender is connecting to
582 * sync standby. Otherwise it's set to false.
583 */
584static bool
587{
589 int num_standbys;
590 int i;
591
592 /* Initialize default results */
596 *am_sync = false;
597
598 /* Quick out if not even configured to be synchronous */
599 if (SyncRepConfig == NULL)
600 return false;
601
602 /* Get standbys that are considered as synchronous at this moment */
604
605 /* Am I among the candidate sync standbys? */
606 for (i = 0; i < num_standbys; i++)
607 {
608 if (sync_standbys[i].is_me)
609 {
610 *am_sync = true;
611 break;
612 }
613 }
614
615 /*
616 * Nothing more to do if we are not managing a sync standby or there are
617 * not enough synchronous standbys.
618 */
619 if (!(*am_sync) ||
621 {
623 return false;
624 }
625
626 /*
627 * In a priority-based sync replication, the synced positions are the
628 * oldest ones among sync standbys. In a quorum-based, they are the Nth
629 * latest ones.
630 *
631 * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
632 * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
633 * because it's a bit more efficient.
634 *
635 * XXX If the numbers of current and requested sync standbys are the same,
636 * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
637 * positions even in a quorum-based sync replication.
638 */
640 {
643 }
644 else
645 {
649 }
650
652 return true;
653}
654
655/*
656 * Calculate the oldest Write, Flush and Apply positions among sync standbys.
657 */
658static void
663 int num_standbys)
664{
665 int i;
666
667 /*
668 * Scan through all sync standbys and calculate the oldest Write, Flush
669 * and Apply positions. We assume *writePtr et al were initialized to
670 * InvalidXLogRecPtr.
671 */
672 for (i = 0; i < num_standbys; i++)
673 {
675 XLogRecPtr flush = sync_standbys[i].flush;
676 XLogRecPtr apply = sync_standbys[i].apply;
677
679 *writePtr = write;
680 if (!XLogRecPtrIsValid(*flushPtr) || *flushPtr > flush)
681 *flushPtr = flush;
682 if (!XLogRecPtrIsValid(*applyPtr) || *applyPtr > apply)
683 *applyPtr = apply;
684 }
685}
686
687/*
688 * Calculate the Nth latest Write, Flush and Apply positions among sync
689 * standbys.
690 */
691static void
696 int num_standbys,
697 uint8 nth)
698{
702 int i;
703
704 /* Should have enough candidates, or somebody messed up */
705 Assert(nth > 0 && nth <= num_standbys);
706
710
711 for (i = 0; i < num_standbys; i++)
712 {
713 write_array[i] = sync_standbys[i].write;
714 flush_array[i] = sync_standbys[i].flush;
715 apply_array[i] = sync_standbys[i].apply;
716 }
717
718 /* Sort each array in descending order */
722
723 /* Get Nth latest Write, Flush, Apply positions */
724 *writePtr = write_array[nth - 1];
725 *flushPtr = flush_array[nth - 1];
726 *applyPtr = apply_array[nth - 1];
727
731}
732
733/*
734 * Compare lsn in order to sort array in descending order.
735 */
736static int
737cmp_lsn(const void *a, const void *b)
738{
739 XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
740 XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
741
742 return pg_cmp_u64(lsn2, lsn1);
743}
744
745/*
746 * Return data about walsenders that are candidates to be sync standbys.
747 *
748 * *standbys is set to a palloc'd array of structs of per-walsender data,
749 * and the number of valid entries (candidate sync senders) is returned.
750 * (This might be more or fewer than num_sync; caller must check.)
751 */
752int
754{
755 int i;
756 int n;
757
758 /* Create result array */
760
761 /* Quick exit if sync replication is not requested */
762 if (SyncRepConfig == NULL)
763 return 0;
764
765 /* Collect raw data from shared memory */
766 n = 0;
767 for (i = 0; i < max_wal_senders; i++)
768 {
769 volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
770 * rearrangement */
772 WalSndState state; /* not included in SyncRepStandbyData */
773
775 stby = *standbys + n;
776
777 SpinLockAcquire(&walsnd->mutex);
778 stby->pid = walsnd->pid;
779 state = walsnd->state;
780 stby->write = walsnd->write;
781 stby->flush = walsnd->flush;
782 stby->apply = walsnd->apply;
783 stby->sync_standby_priority = walsnd->sync_standby_priority;
784 SpinLockRelease(&walsnd->mutex);
785
786 /* Must be active */
787 if (stby->pid == 0)
788 continue;
789
790 /* Must be streaming or stopping */
793 continue;
794
795 /* Must be synchronous */
796 if (stby->sync_standby_priority == 0)
797 continue;
798
799 /* Must have a valid flush position */
800 if (!XLogRecPtrIsValid(stby->flush))
801 continue;
802
803 /* OK, it's a candidate */
804 stby->walsnd_index = i;
805 stby->is_me = (walsnd == MyWalSnd);
806 n++;
807 }
808
809 /*
810 * In quorum mode, we return all the candidates. In priority mode, if we
811 * have too many candidates then return only the num_sync ones of highest
812 * priority.
813 */
816 {
817 /* Sort by priority ... */
818 qsort(*standbys, n, sizeof(SyncRepStandbyData),
820 /* ... then report just the first num_sync ones */
822 }
823
824 return n;
825}
826
827/*
828 * qsort comparator to sort SyncRepStandbyData entries by priority
829 */
830static int
831standby_priority_comparator(const void *a, const void *b)
832{
833 const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
834 const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
835
836 /* First, sort by increasing priority value */
837 if (sa->sync_standby_priority != sb->sync_standby_priority)
838 return sa->sync_standby_priority - sb->sync_standby_priority;
839
840 /*
841 * We might have equal priority values; arbitrarily break ties by position
842 * in the WalSnd array. (This is utterly bogus, since that is arrival
843 * order dependent, but there are regression tests that rely on it.)
844 */
845 return sa->walsnd_index - sb->walsnd_index;
846}
847
848
849/*
850 * Check if we are in the list of sync standbys, and if so, determine
851 * priority sequence. Return priority if set, or zero to indicate that
852 * we are not a potential sync standby.
853 *
854 * Compare the parameter SyncRepStandbyNames against the application_name
855 * for this WALSender, or allow any name if we find a wildcard "*".
856 */
857static int
859{
860 const char *standby_name;
861 int priority;
862 bool found = false;
863
864 /*
865 * Since synchronous cascade replication is not allowed, we always set the
866 * priority of cascading walsender to zero.
867 */
869 return 0;
870
872 return 0;
873
876 {
878 strcmp(standby_name, "*") == 0)
879 {
880 found = true;
881 break;
882 }
884 }
885
886 if (!found)
887 return 0;
888
889 /*
890 * In quorum-based sync replication, all the standbys in the list have the
891 * same priority, one.
892 */
894}
895
896/*
897 * Walk the specified queue from head. Set the state of any backends that
898 * need to be woken, remove them from the queue, and then wake them.
899 * Pass all = true to wake whole queue; otherwise, just wake up to
900 * the walsender's LSN.
901 *
902 * The caller must hold SyncRepLock in exclusive mode.
903 */
904static int
905SyncRepWakeQueue(bool all, int mode)
906{
908 int numprocs = 0;
910
914
916 {
917 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
918
919 /*
920 * Assume the queue is ordered by LSN
921 */
922 if (!all && walsndctl->lsn[mode] < proc->waitLSN)
923 return numprocs;
924
925 /*
926 * Remove from queue.
927 */
929
930 /*
931 * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
932 * make sure that it sees the queue link being removed before the
933 * syncRepState change.
934 */
936
937 /*
938 * Set state to complete; see SyncRepWaitForLSN() for discussion of
939 * the various states.
940 */
942
943 /*
944 * Wake only when we have set state and removed from queue.
945 */
946 SetLatch(&(proc->procLatch));
947
948 numprocs++;
949 }
950
951 return numprocs;
952}
953
954/*
955 * The checkpointer calls this as needed to update the shared
956 * sync_standbys_status flag, so that backends don't remain permanently wedged
957 * if synchronous_standby_names is unset. It's safe to check the current value
958 * without the lock, because it's only ever updated by one process. But we
959 * must take the lock to change it.
960 */
961void
963{
965
968 {
970
971 /*
972 * If synchronous_standby_names has been reset to empty, it's futile
973 * for backends to continue waiting. Since the user no longer wants
974 * synchronous replication, we'd better wake them up.
975 */
977 {
978 int i;
979
980 for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
981 SyncRepWakeQueue(true, i);
982 }
983
984 /*
985 * Only allow people to join the queue when there are synchronous
986 * standbys defined. Without this interlock, there's a race
987 * condition: we might wake up all the current waiters; then, some
988 * backend that hasn't yet reloaded its config might go to sleep on
989 * the queue (and never wake up). This prevents that.
990 */
993
995 }
997 {
999
1000 /*
1001 * Note that there is no need to wake up the queues here. We would
1002 * reach this path only if SyncStandbysDefined() returns false, or it
1003 * would mean that some backends are waiting with the GUC set. See
1004 * SyncRepWaitForLSN().
1005 */
1007
1008 /*
1009 * Even if there is no sync standby defined, let the readers of this
1010 * information know that the sync standby data has been initialized.
1011 * This can just be done once, hence the previous check on
1012 * SYNC_STANDBY_INIT to avoid useless work.
1013 */
1015
1017 }
1018}
1019
1020#ifdef USE_ASSERT_CHECKING
1021static bool
1023{
1025 dlist_iter iter;
1026
1028
1030
1032 {
1033 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
1034
1035 /*
1036 * Check the queue is ordered by LSN and that multiple procs don't
1037 * have matching LSNs
1038 */
1039 if (proc->waitLSN <= lastLSN)
1040 return false;
1041
1042 lastLSN = proc->waitLSN;
1043 }
1044
1045 return true;
1046}
1047#endif
1048
1049/*
1050 * ===========================================================
1051 * Synchronous Replication functions executed by any process
1052 * ===========================================================
1053 */
1054
1055bool
1057{
1058 if (*newval != NULL && (*newval)[0] != '\0')
1059 {
1060 yyscan_t scanner;
1061 int parse_rc;
1063
1064 /* Result of parsing is returned in one of these two variables */
1067
1068 /* Parse the synchronous_standby_names string */
1069 syncrep_scanner_init(*newval, &scanner);
1071 syncrep_scanner_finish(scanner);
1072
1073 if (parse_rc != 0 || syncrep_parse_result == NULL)
1074 {
1078 else
1079 GUC_check_errdetail("\"%s\" parser failed.",
1080 "synchronous_standby_names");
1081 return false;
1082 }
1083
1084 if (syncrep_parse_result->num_sync <= 0)
1085 {
1086 GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1087 syncrep_parse_result->num_sync);
1088 return false;
1089 }
1090
1091 /* GUC extra value must be guc_malloc'd, not palloc'd */
1093 guc_malloc(LOG, syncrep_parse_result->config_size);
1094 if (pconf == NULL)
1095 return false;
1097
1098 *extra = pconf;
1099
1100 /*
1101 * We need not explicitly clean up syncrep_parse_result. It, and any
1102 * other cruft generated during parsing, will be freed when the
1103 * current memory context is deleted. (This code is generally run in
1104 * a short-lived context used for config file processing, so that will
1105 * not be very long.)
1106 */
1107 }
1108 else
1109 *extra = NULL;
1110
1111 return true;
1112}
1113
1114void
1115assign_synchronous_standby_names(const char *newval, void *extra)
1116{
1117 SyncRepConfig = (SyncRepConfigData *) extra;
1118}
1119
1120void
1121assign_synchronous_commit(int newval, void *extra)
1122{
1123 switch (newval)
1124 {
1127 break;
1130 break;
1133 break;
1134 default:
1136 break;
1137 }
1138}
#define pg_read_barrier()
Definition atomics.h:154
#define pg_write_barrier()
Definition atomics.h:155
#define Min(x, y)
Definition c.h:997
uint8_t uint8
Definition c.h:544
#define Assert(condition)
Definition c.h:873
void * yyscan_t
Definition cubedata.h:65
@ DestNone
Definition dest.h:87
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define LOG
Definition elog.h:31
#define DEBUG3
Definition elog.h:28
#define WARNING
Definition elog.h:36
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
volatile uint32 InterruptHoldoffCount
Definition globals.c:43
volatile sig_atomic_t QueryCancelPending
Definition globals.c:33
struct Latch * MyLatch
Definition globals.c:63
volatile sig_atomic_t ProcDiePending
Definition globals.c:34
void GUC_check_errcode(int sqlerrcode)
Definition guc.c:6628
void * guc_malloc(int elevel, size_t size)
Definition guc.c:636
#define newval
#define GUC_check_errmsg
Definition guc.h:501
#define GUC_check_errdetail
Definition guc.h:505
GucSource
Definition guc.h:112
char * application_name
Definition guc_tables.c:561
static void dlist_insert_after(dlist_node *after, dlist_node *node)
Definition ilist.h:381
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_delete_thoroughly(dlist_node *node)
Definition ilist.h:416
static bool dlist_node_is_detached(const dlist_node *node)
Definition ilist.h:525
#define dlist_reverse_foreach(iter, lhead)
Definition ilist.h:654
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
static int pg_cmp_u64(uint64 a, uint64 b)
Definition int.h:731
#define write(a, b, c)
Definition win32.h:14
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
void SetLatch(Latch *latch)
Definition latch.c:290
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1955
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_EXCLUSIVE
Definition lwlock.h:112
void pfree(void *pointer)
Definition mcxt.c:1616
static PgChecksumMode mode
static rewind_source * source
Definition pg_rewind.c:89
int pg_strcasecmp(const char *s1, const char *s2)
#define sprintf
Definition port.h:262
#define qsort(a, b, c, d)
Definition port.h:495
CommandDest whereToSendOutput
Definition postgres.c:92
static int fb(int x)
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
bool update_process_title
Definition ps_status.c:31
#define SpinLockRelease(lock)
Definition spin.h:61
#define SpinLockAcquire(lock)
Definition spin.h:59
PGPROC * MyProc
Definition proc.c:67
Definition proc.h:179
XLogRecPtr waitLSN
Definition proc.h:275
dlist_node syncRepLinks
Definition proc.h:277
int syncRepState
Definition proc.h:276
Latch procLatch
Definition proc.h:186
uint8 syncrep_method
Definition syncrep.h:68
char member_names[FLEXIBLE_ARRAY_MEMBER]
Definition syncrep.h:71
WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]
dlist_head SyncRepQueue[NUM_SYNC_REP_WAIT_MODE]
slock_t mutex
XLogRecPtr flush
WalSndState state
int sync_standby_priority
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
static int SyncRepWaitMode
Definition syncrep.c:98
void SyncRepInitConfig(void)
Definition syncrep.c:445
static bool SyncRepGetSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
Definition syncrep.c:586
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
Definition syncrep.c:693
void assign_synchronous_commit(int newval, void *extra)
Definition syncrep.c:1122
void assign_synchronous_standby_names(const char *newval, void *extra)
Definition syncrep.c:1116
static int standby_priority_comparator(const void *a, const void *b)
Definition syncrep.c:832
static int SyncRepWakeQueue(bool all, int mode)
Definition syncrep.c:906
SyncRepConfigData * SyncRepConfig
Definition syncrep.c:97
int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys)
Definition syncrep.c:754
void SyncRepReleaseWaiters(void)
Definition syncrep.c:474
void SyncRepUpdateSyncStandbysDefined(void)
Definition syncrep.c:963
static bool announce_next_takeover
Definition syncrep.c:95
static int SyncRepGetStandbyPriority(void)
Definition syncrep.c:859
static void SyncRepQueueInsert(int mode)
Definition syncrep.c:372
static void SyncRepCancelWait(void)
Definition syncrep.c:406
bool check_synchronous_standby_names(char **newval, void **extra, GucSource source)
Definition syncrep.c:1057
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
Definition syncrep.c:660
void SyncRepCleanupAtProcExit(void)
Definition syncrep.c:416
static int cmp_lsn(const void *a, const void *b)
Definition syncrep.c:738
#define SyncStandbysDefined()
Definition syncrep.c:92
#define SYNC_REP_PRIORITY
Definition syncrep.h:35
#define NUM_SYNC_REP_WAIT_MODE
Definition syncrep.h:27
#define SyncRepRequested()
Definition syncrep.h:18
#define SYNC_REP_NO_WAIT
Definition syncrep.h:22
#define SYNC_REP_WAIT_WRITE
Definition syncrep.h:23
#define SYNC_REP_WAITING
Definition syncrep.h:31
#define SYNC_REP_WAIT_COMPLETE
Definition syncrep.h:32
#define SYNC_REP_WAIT_FLUSH
Definition syncrep.h:24
#define SYNC_REP_NOT_WAITING
Definition syncrep.h:30
int syncrep_yyparse(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner)
#define SYNC_REP_WAIT_APPLY
Definition syncrep.h:25
void syncrep_scanner_finish(yyscan_t yyscanner)
void syncrep_scanner_init(const char *str, yyscan_t *yyscannerp)
#define WL_LATCH_SET
#define WL_POSTMASTER_DEATH
WalSnd * MyWalSnd
Definition walsender.c:120
int max_wal_senders
Definition walsender.c:129
bool am_cascading_walsender
Definition walsender.c:124
WalSndCtlData * WalSndCtl
Definition walsender.c:117
#define SYNC_STANDBY_DEFINED
WalSndState
@ WALSNDSTATE_STREAMING
@ WALSNDSTATE_STOPPING
#define SYNC_STANDBY_INIT
@ SYNCHRONOUS_COMMIT_REMOTE_WRITE
Definition xact.h:73
@ SYNCHRONOUS_COMMIT_REMOTE_APPLY
Definition xact.h:76
@ SYNCHRONOUS_COMMIT_REMOTE_FLUSH
Definition xact.h:75
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

Function Documentation

◆ assign_synchronous_commit()

void assign_synchronous_commit ( int  newval,
void extra 
)

◆ assign_synchronous_standby_names()

void assign_synchronous_standby_names ( const char newval,
void extra 
)

Definition at line 1116 of file syncrep.c.

1117{
1118 SyncRepConfig = (SyncRepConfigData *) extra;
1119}

References SyncRepConfig.

◆ check_synchronous_standby_names()

bool check_synchronous_standby_names ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 1057 of file syncrep.c.

1058{
1059 if (*newval != NULL && (*newval)[0] != '\0')
1060 {
1061 yyscan_t scanner;
1062 int parse_rc;
1064
1065 /* Result of parsing is returned in one of these two variables */
1068
1069 /* Parse the synchronous_standby_names string */
1070 syncrep_scanner_init(*newval, &scanner);
1072 syncrep_scanner_finish(scanner);
1073
1074 if (parse_rc != 0 || syncrep_parse_result == NULL)
1075 {
1079 else
1080 GUC_check_errdetail("\"%s\" parser failed.",
1081 "synchronous_standby_names");
1082 return false;
1083 }
1084
1085 if (syncrep_parse_result->num_sync <= 0)
1086 {
1087 GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1088 syncrep_parse_result->num_sync);
1089 return false;
1090 }
1091
1092 /* GUC extra value must be guc_malloc'd, not palloc'd */
1094 guc_malloc(LOG, syncrep_parse_result->config_size);
1095 if (pconf == NULL)
1096 return false;
1098
1099 *extra = pconf;
1100
1101 /*
1102 * We need not explicitly clean up syncrep_parse_result. It, and any
1103 * other cruft generated during parsing, will be freed when the
1104 * current memory context is deleted. (This code is generally run in
1105 * a short-lived context used for config file processing, so that will
1106 * not be very long.)
1107 */
1108 }
1109 else
1110 *extra = NULL;
1111
1112 return true;
1113}

References fb(), GUC_check_errcode(), GUC_check_errdetail, GUC_check_errmsg, guc_malloc(), LOG, newval, syncrep_scanner_finish(), syncrep_scanner_init(), and syncrep_yyparse().

◆ cmp_lsn()

static int cmp_lsn ( const void a,
const void b 
)
static

Definition at line 738 of file syncrep.c.

739{
740 XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
741 XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
742
743 return pg_cmp_u64(lsn2, lsn1);
744}

References a, b, fb(), and pg_cmp_u64().

Referenced by SyncRepGetNthLatestSyncRecPtr().

◆ standby_priority_comparator()

static int standby_priority_comparator ( const void a,
const void b 
)
static

Definition at line 832 of file syncrep.c.

833{
834 const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
835 const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
836
837 /* First, sort by increasing priority value */
838 if (sa->sync_standby_priority != sb->sync_standby_priority)
839 return sa->sync_standby_priority - sb->sync_standby_priority;
840
841 /*
842 * We might have equal priority values; arbitrarily break ties by position
843 * in the WalSnd array. (This is utterly bogus, since that is arrival
844 * order dependent, but there are regression tests that rely on it.)
845 */
846 return sa->walsnd_index - sb->walsnd_index;
847}

References a, b, and fb().

Referenced by SyncRepGetCandidateStandbys().

◆ SyncRepCancelWait()

◆ SyncRepCleanupAtProcExit()

void SyncRepCleanupAtProcExit ( void  )

Definition at line 416 of file syncrep.c.

417{
418 /*
419 * First check if we are removed from the queue without the lock to not
420 * slow down backend exit.
421 */
423 {
425
426 /* maybe we have just been removed, so recheck */
429
431 }
432}

References dlist_delete_thoroughly(), dlist_node_is_detached(), fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, and PGPROC::syncRepLinks.

Referenced by ProcKill().

◆ SyncRepGetCandidateStandbys()

int SyncRepGetCandidateStandbys ( SyncRepStandbyData **  standbys)

Definition at line 754 of file syncrep.c.

755{
756 int i;
757 int n;
758
759 /* Create result array */
761
762 /* Quick exit if sync replication is not requested */
763 if (SyncRepConfig == NULL)
764 return 0;
765
766 /* Collect raw data from shared memory */
767 n = 0;
768 for (i = 0; i < max_wal_senders; i++)
769 {
770 volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
771 * rearrangement */
773 WalSndState state; /* not included in SyncRepStandbyData */
774
776 stby = *standbys + n;
777
778 SpinLockAcquire(&walsnd->mutex);
779 stby->pid = walsnd->pid;
780 state = walsnd->state;
781 stby->write = walsnd->write;
782 stby->flush = walsnd->flush;
783 stby->apply = walsnd->apply;
784 stby->sync_standby_priority = walsnd->sync_standby_priority;
785 SpinLockRelease(&walsnd->mutex);
786
787 /* Must be active */
788 if (stby->pid == 0)
789 continue;
790
791 /* Must be streaming or stopping */
794 continue;
795
796 /* Must be synchronous */
797 if (stby->sync_standby_priority == 0)
798 continue;
799
800 /* Must have a valid flush position */
801 if (!XLogRecPtrIsValid(stby->flush))
802 continue;
803
804 /* OK, it's a candidate */
805 stby->walsnd_index = i;
806 stby->is_me = (walsnd == MyWalSnd);
807 n++;
808 }
809
810 /*
811 * In quorum mode, we return all the candidates. In priority mode, if we
812 * have too many candidates then return only the num_sync ones of highest
813 * priority.
814 */
817 {
818 /* Sort by priority ... */
819 qsort(*standbys, n, sizeof(SyncRepStandbyData),
821 /* ... then report just the first num_sync ones */
823 }
824
825 return n;
826}

References fb(), i, max_wal_senders, MyWalSnd, SyncRepConfigData::num_sync, palloc_array, qsort, SpinLockAcquire, SpinLockRelease, standby_priority_comparator(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, WalSndCtl, WalSndCtlData::walsnds, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, and XLogRecPtrIsValid.

Referenced by pg_stat_get_wal_senders(), and SyncRepGetSyncRecPtr().

◆ SyncRepGetNthLatestSyncRecPtr()

static void SyncRepGetNthLatestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys,
uint8  nth 
)
static

Definition at line 693 of file syncrep.c.

699{
703 int i;
704
705 /* Should have enough candidates, or somebody messed up */
706 Assert(nth > 0 && nth <= num_standbys);
707
711
712 for (i = 0; i < num_standbys; i++)
713 {
714 write_array[i] = sync_standbys[i].write;
715 flush_array[i] = sync_standbys[i].flush;
716 apply_array[i] = sync_standbys[i].apply;
717 }
718
719 /* Sort each array in descending order */
723
724 /* Get Nth latest Write, Flush, Apply positions */
725 *writePtr = write_array[nth - 1];
726 *flushPtr = flush_array[nth - 1];
727 *applyPtr = apply_array[nth - 1];
728
732}

References Assert, cmp_lsn(), fb(), i, palloc_array, pfree(), and qsort.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetOldestSyncRecPtr()

static void SyncRepGetOldestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys 
)
static

Definition at line 660 of file syncrep.c.

665{
666 int i;
667
668 /*
669 * Scan through all sync standbys and calculate the oldest Write, Flush
670 * and Apply positions. We assume *writePtr et al were initialized to
671 * InvalidXLogRecPtr.
672 */
673 for (i = 0; i < num_standbys; i++)
674 {
676 XLogRecPtr flush = sync_standbys[i].flush;
677 XLogRecPtr apply = sync_standbys[i].apply;
678
680 *writePtr = write;
681 if (!XLogRecPtrIsValid(*flushPtr) || *flushPtr > flush)
682 *flushPtr = flush;
683 if (!XLogRecPtrIsValid(*applyPtr) || *applyPtr > apply)
684 *applyPtr = apply;
685 }
686}

References fb(), i, write, and XLogRecPtrIsValid.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetStandbyPriority()

static int SyncRepGetStandbyPriority ( void  )
static

Definition at line 859 of file syncrep.c.

860{
861 const char *standby_name;
862 int priority;
863 bool found = false;
864
865 /*
866 * Since synchronous cascade replication is not allowed, we always set the
867 * priority of cascading walsender to zero.
868 */
870 return 0;
871
873 return 0;
874
877 {
879 strcmp(standby_name, "*") == 0)
880 {
881 found = true;
882 break;
883 }
885 }
886
887 if (!found)
888 return 0;
889
890 /*
891 * In quorum-based sync replication, all the standbys in the list have the
892 * same priority, one.
893 */
895}

References am_cascading_walsender, application_name, fb(), SyncRepConfigData::member_names, SyncRepConfigData::nmembers, pg_strcasecmp(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, and SyncStandbysDefined.

Referenced by SyncRepInitConfig().

◆ SyncRepGetSyncRecPtr()

static bool SyncRepGetSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
bool am_sync 
)
static

Definition at line 586 of file syncrep.c.

588{
590 int num_standbys;
591 int i;
592
593 /* Initialize default results */
597 *am_sync = false;
598
599 /* Quick out if not even configured to be synchronous */
600 if (SyncRepConfig == NULL)
601 return false;
602
603 /* Get standbys that are considered as synchronous at this moment */
605
606 /* Am I among the candidate sync standbys? */
607 for (i = 0; i < num_standbys; i++)
608 {
609 if (sync_standbys[i].is_me)
610 {
611 *am_sync = true;
612 break;
613 }
614 }
615
616 /*
617 * Nothing more to do if we are not managing a sync standby or there are
618 * not enough synchronous standbys.
619 */
620 if (!(*am_sync) ||
622 {
624 return false;
625 }
626
627 /*
628 * In a priority-based sync replication, the synced positions are the
629 * oldest ones among sync standbys. In a quorum-based, they are the Nth
630 * latest ones.
631 *
632 * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
633 * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
634 * because it's a bit more efficient.
635 *
636 * XXX If the numbers of current and requested sync standbys are the same,
637 * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
638 * positions even in a quorum-based sync replication.
639 */
641 {
644 }
645 else
646 {
650 }
651
653 return true;
654}

References fb(), i, InvalidXLogRecPtr, SyncRepConfigData::num_sync, pfree(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetCandidateStandbys(), SyncRepGetNthLatestSyncRecPtr(), and SyncRepGetOldestSyncRecPtr().

Referenced by SyncRepReleaseWaiters().

◆ SyncRepInitConfig()

void SyncRepInitConfig ( void  )

Definition at line 445 of file syncrep.c.

446{
447 int priority;
448
449 /*
450 * Determine if we are a potential sync standby and remember the result
451 * for handling replies from standby.
452 */
455 {
459
461 (errmsg_internal("standby \"%s\" now has synchronous standby priority %d",
463 }
464}

References application_name, DEBUG1, ereport, errmsg_internal(), fb(), WalSnd::mutex, MyWalSnd, SpinLockAcquire, SpinLockRelease, WalSnd::sync_standby_priority, and SyncRepGetStandbyPriority().

Referenced by ProcessPendingWrites(), StartLogicalReplication(), StartReplication(), WalSndLoop(), and WalSndWaitForWal().

◆ SyncRepQueueInsert()

static void SyncRepQueueInsert ( int  mode)
static

Definition at line 372 of file syncrep.c.

373{
374 dlist_head *queue;
375 dlist_iter iter;
376
378 queue = &WalSndCtl->SyncRepQueue[mode];
379
380 dlist_reverse_foreach(iter, queue)
381 {
382 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
383
384 /*
385 * Stop at the queue element that we should insert after to ensure the
386 * queue is ordered by LSN.
387 */
388 if (proc->waitLSN < MyProc->waitLSN)
389 {
391 return;
392 }
393 }
394
395 /*
396 * If we get here, the list was either empty, or this process needs to be
397 * at the head.
398 */
400}

References Assert, dlist_iter::cur, dlist_container, dlist_insert_after(), dlist_push_head(), dlist_reverse_foreach, mode, MyProc, NUM_SYNC_REP_WAIT_MODE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepWaitForLSN().

◆ SyncRepReleaseWaiters()

void SyncRepReleaseWaiters ( void  )

Definition at line 474 of file syncrep.c.

475{
480 bool got_recptr;
481 bool am_sync;
482 int numwrite = 0;
483 int numflush = 0;
484 int numapply = 0;
485
486 /*
487 * If this WALSender is serving a standby that is not on the list of
488 * potential sync standbys then we have nothing to do. If we are still
489 * starting up, still running base backup or the current flush position is
490 * still invalid, then leave quickly also. Streaming or stopping WAL
491 * senders are allowed to release waiters.
492 */
497 {
499 return;
500 }
501
502 /*
503 * We're a potential sync standby. Release waiters if there are enough
504 * sync standbys and we are considered as sync.
505 */
507
508 /*
509 * Check whether we are a sync standby or not, and calculate the synced
510 * positions among all sync standbys. (Note: although this step does not
511 * of itself require holding SyncRepLock, it seems like a good idea to do
512 * it after acquiring the lock. This ensures that the WAL pointers we use
513 * to release waiters are newer than any previous execution of this
514 * routine used.)
515 */
517
518 /*
519 * If we are managing a sync standby, though we weren't prior to this,
520 * then announce we are now a sync standby.
521 */
523 {
525
527 ereport(LOG,
528 (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
530 else
531 ereport(LOG,
532 (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
534 }
535
536 /*
537 * If the number of sync standbys is less than requested or we aren't
538 * managing a sync standby then just leave.
539 */
540 if (!got_recptr || !am_sync)
541 {
544 return;
545 }
546
547 /*
548 * Set the lsn first so that when we wake backends they will release up to
549 * this location.
550 */
552 {
555 }
557 {
560 }
562 {
565 }
566
568
569 elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
573}

References announce_next_takeover, application_name, DEBUG3, elog, ereport, errmsg(), fb(), WalSnd::flush, LOG, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyWalSnd, WalSnd::state, SYNC_REP_PRIORITY, SYNC_REP_WAIT_APPLY, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAIT_WRITE, WalSnd::sync_standby_priority, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetSyncRecPtr(), SyncRepWakeQueue(), WalSndCtl, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, and XLogRecPtrIsValid.

Referenced by ProcessStandbyReplyMessage().

◆ SyncRepUpdateSyncStandbysDefined()

void SyncRepUpdateSyncStandbysDefined ( void  )

Definition at line 963 of file syncrep.c.

964{
966
969 {
971
972 /*
973 * If synchronous_standby_names has been reset to empty, it's futile
974 * for backends to continue waiting. Since the user no longer wants
975 * synchronous replication, we'd better wake them up.
976 */
978 {
979 int i;
980
981 for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
982 SyncRepWakeQueue(true, i);
983 }
984
985 /*
986 * Only allow people to join the queue when there are synchronous
987 * standbys defined. Without this interlock, there's a race
988 * condition: we might wake up all the current waiters; then, some
989 * backend that hasn't yet reloaded its config might go to sleep on
990 * the queue (and never wake up). This prevents that.
991 */
994
996 }
998 {
1000
1001 /*
1002 * Note that there is no need to wake up the queues here. We would
1003 * reach this path only if SyncStandbysDefined() returns false, or it
1004 * would mean that some backends are waiting with the GUC set. See
1005 * SyncRepWaitForLSN().
1006 */
1008
1009 /*
1010 * Even if there is no sync standby defined, let the readers of this
1011 * information know that the sync standby data has been initialized.
1012 * This can just be done once, hence the previous check on
1013 * SYNC_STANDBY_INIT to avoid useless work.
1014 */
1016
1018 }
1019}

References Assert, fb(), i, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_SYNC_REP_WAIT_MODE, SYNC_STANDBY_DEFINED, SYNC_STANDBY_INIT, WalSndCtlData::sync_standbys_status, SyncRepWakeQueue(), SyncStandbysDefined, and WalSndCtl.

Referenced by UpdateSharedMemoryConfig().

◆ SyncRepWaitForLSN()

void SyncRepWaitForLSN ( XLogRecPtr  lsn,
bool  commit 
)

Definition at line 148 of file syncrep.c.

149{
150 int mode;
151
152 /*
153 * This should be called while holding interrupts during a transaction
154 * commit to prevent the follow-up shared memory queue cleanups to be
155 * influenced by external interruptions.
156 */
158
159 /*
160 * Fast exit if user has not requested sync replication, or there are no
161 * sync replication standby names defined.
162 *
163 * Since this routine gets called every commit time, it's important to
164 * exit quickly if sync replication is not requested.
165 *
166 * We check WalSndCtl->sync_standbys_status flag without the lock and exit
167 * immediately if SYNC_STANDBY_INIT is set (the checkpointer has
168 * initialized this data) but SYNC_STANDBY_DEFINED is missing (no sync
169 * replication requested).
170 *
171 * If SYNC_STANDBY_DEFINED is set, we need to check the status again later
172 * while holding the lock, to check the flag and operate the sync rep
173 * queue atomically. This is necessary to avoid the race condition
174 * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
175 * SYNC_STANDBY_DEFINED is not set, the lock is not necessary because we
176 * don't touch the queue.
177 */
178 if (!SyncRepRequested() ||
179 ((((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_status) &
181 return;
182
183 /* Cap the level for anything other than commit to remote flush only. */
184 if (commit)
186 else
188
191
194
195 /*
196 * We don't wait for sync rep if SYNC_STANDBY_DEFINED is not set. See
197 * SyncRepUpdateSyncStandbysDefined().
198 *
199 * Also check that the standby hasn't already replied. Unlikely race
200 * condition but we'll be fetching that cache line anyway so it's likely
201 * to be a low cost check.
202 *
203 * If the sync standby data has not been initialized yet
204 * (SYNC_STANDBY_INIT is not set), fall back to a check based on the LSN,
205 * then do a direct GUC check.
206 */
208 {
211 {
213 return;
214 }
215 }
216 else if (lsn <= WalSndCtl->lsn[mode])
217 {
218 /*
219 * The LSN is older than what we need to wait for. The sync standby
220 * data has not been initialized yet, but we are OK to not wait
221 * because we know that there is no point in doing so based on the
222 * LSN.
223 */
225 return;
226 }
227 else if (!SyncStandbysDefined())
228 {
229 /*
230 * If we are here, the sync standby data has not been initialized yet,
231 * and the LSN is newer than what need to wait for, so we have fallen
232 * back to the best thing we could do in this case: a check on
233 * SyncStandbysDefined() to see if the GUC is set or not.
234 *
235 * When the GUC has a value, we wait until the checkpointer updates
236 * the status data because we cannot be sure yet if we should wait or
237 * not. Here, the GUC has *no* value, we are sure that there is no
238 * point to wait; this matters for example when initializing a
239 * cluster, where we should never wait, and no sync standbys is the
240 * default behavior.
241 */
243 return;
244 }
245
246 /*
247 * Set our waitLSN so WALSender will know when to wake us, and add
248 * ourselves to the queue.
249 */
250 MyProc->waitLSN = lsn;
255
256 /* Alter ps display to show waiting for sync rep. */
258 {
259 char buffer[32];
260
261 sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
262 set_ps_display_suffix(buffer);
263 }
264
265 /*
266 * Wait for specified LSN to be confirmed.
267 *
268 * Each proc has its own wait latch, so we perform a normal latch
269 * check/wait loop here.
270 */
271 for (;;)
272 {
273 int rc;
274
275 /* Must reset the latch before testing state. */
277
278 /*
279 * Acquiring the lock is not needed, the latch ensures proper
280 * barriers. If it looks like we're done, we must really be done,
281 * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
282 * it will never update it again, so we can't be seeing a stale value
283 * in that case.
284 */
286 break;
287
288 /*
289 * If a wait for synchronous replication is pending, we can neither
290 * acknowledge the commit nor raise ERROR or FATAL. The latter would
291 * lead the client to believe that the transaction aborted, which is
292 * not true: it's already committed locally. The former is no good
293 * either: the client has requested synchronous replication, and is
294 * entitled to assume that an acknowledged commit is also replicated,
295 * which might not be true. So in this case we issue a WARNING (which
296 * some clients may be able to interpret) and shut off further output.
297 * We do NOT reset ProcDiePending, so that the process will die after
298 * the commit is cleaned up.
299 */
300 if (ProcDiePending)
301 {
304 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
305 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
308 break;
309 }
310
311 /*
312 * It's unclear what to do if a query cancel interrupt arrives. We
313 * can't actually abort at this point, but ignoring the interrupt
314 * altogether is not helpful, so we just terminate the wait with a
315 * suitable warning.
316 */
318 {
319 QueryCancelPending = false;
321 (errmsg("canceling wait for synchronous replication due to user request"),
322 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
324 break;
325 }
326
327 /*
328 * Wait on latch. Any condition that should wake us up will set the
329 * latch, so no need for timeout.
330 */
333
334 /*
335 * If the postmaster dies, we'll probably never get an acknowledgment,
336 * because all the wal sender processes will exit. So just bail out.
337 */
338 if (rc & WL_POSTMASTER_DEATH)
339 {
340 ProcDiePending = true;
343 break;
344 }
345 }
346
347 /*
348 * WalSender has checked our LSN and has removed us from queue. Clean up
349 * state and leave. It's OK to reset these shared memory fields without
350 * holding SyncRepLock, because any walsenders will ignore us anyway when
351 * we're not on the queue. We need a read barrier to make sure we see the
352 * changes to the queue link (this might be unnecessary without
353 * assertions, but better safe than sorry).
354 */
359
360 /* reset ps display to remove the suffix */
363}

References Assert, DestNone, dlist_node_is_detached(), ereport, errcode(), errdetail(), errmsg(), fb(), InterruptHoldoffCount, InvalidXLogRecPtr, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), Min, mode, MyLatch, MyProc, pg_read_barrier, ProcDiePending, QueryCancelPending, ResetLatch(), set_ps_display_remove_suffix(), set_ps_display_suffix(), sprintf, SYNC_REP_NOT_WAITING, SYNC_REP_WAIT_COMPLETE, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAITING, SYNC_STANDBY_DEFINED, SYNC_STANDBY_INIT, WalSndCtlData::sync_standbys_status, SyncRepCancelWait(), PGPROC::syncRepLinks, SyncRepQueueInsert(), SyncRepRequested, PGPROC::syncRepState, SyncRepWaitMode, SyncStandbysDefined, update_process_title, WaitLatch(), PGPROC::waitLSN, WalSndCtl, WARNING, whereToSendOutput, WL_LATCH_SET, and WL_POSTMASTER_DEATH.

Referenced by EndPrepare(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), and RecordTransactionCommitPrepared().

◆ SyncRepWakeQueue()

static int SyncRepWakeQueue ( bool  all,
int  mode 
)
static

Definition at line 906 of file syncrep.c.

907{
909 int numprocs = 0;
911
915
917 {
918 PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
919
920 /*
921 * Assume the queue is ordered by LSN
922 */
923 if (!all && walsndctl->lsn[mode] < proc->waitLSN)
924 return numprocs;
925
926 /*
927 * Remove from queue.
928 */
930
931 /*
932 * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
933 * make sure that it sees the queue link being removed before the
934 * syncRepState change.
935 */
937
938 /*
939 * Set state to complete; see SyncRepWaitForLSN() for discussion of
940 * the various states.
941 */
943
944 /*
945 * Wake only when we have set state and removed from queue.
946 */
947 SetLatch(&(proc->procLatch));
948
949 numprocs++;
950 }
951
952 return numprocs;
953}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_delete_thoroughly(), dlist_foreach_modify, fb(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), mode, NUM_SYNC_REP_WAIT_MODE, pg_write_barrier, PGPROC::procLatch, SetLatch(), SYNC_REP_WAIT_COMPLETE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::syncRepState, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepReleaseWaiters(), and SyncRepUpdateSyncStandbysDefined().

Variable Documentation

◆ announce_next_takeover

bool announce_next_takeover = true
static

Definition at line 95 of file syncrep.c.

Referenced by SyncRepReleaseWaiters().

◆ SyncRepConfig

◆ SyncRepStandbyNames

char* SyncRepStandbyNames

Definition at line 90 of file syncrep.c.

◆ SyncRepWaitMode

int SyncRepWaitMode = SYNC_REP_NO_WAIT
static

Definition at line 98 of file syncrep.c.

Referenced by assign_synchronous_commit(), and SyncRepWaitForLSN().