PostgreSQL Source Code  git master
syncrep.c File Reference
#include "postgres.h"
#include <unistd.h>
#include "access/xact.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/guc_hooks.h"
#include "utils/ps_status.h"
Include dependency graph for syncrep.c:

Go to the source code of this file.

Macros

#define SyncStandbysDefined()    (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')
 

Functions

static void SyncRepQueueInsert (int mode)
 
static void SyncRepCancelWait (void)
 
static int SyncRepWakeQueue (bool all, int mode)
 
static bool SyncRepGetSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
 
static void SyncRepGetOldestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
 
static void SyncRepGetNthLatestSyncRecPtr (XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
 
static int SyncRepGetStandbyPriority (void)
 
static int standby_priority_comparator (const void *a, const void *b)
 
static int cmp_lsn (const void *a, const void *b)
 
void SyncRepWaitForLSN (XLogRecPtr lsn, bool commit)
 
void SyncRepCleanupAtProcExit (void)
 
void SyncRepInitConfig (void)
 
void SyncRepReleaseWaiters (void)
 
int SyncRepGetCandidateStandbys (SyncRepStandbyData **standbys)
 
void SyncRepUpdateSyncStandbysDefined (void)
 
bool check_synchronous_standby_names (char **newval, void **extra, GucSource source)
 
void assign_synchronous_standby_names (const char *newval, void *extra)
 
void assign_synchronous_commit (int newval, void *extra)
 

Variables

char * SyncRepStandbyNames
 
static bool announce_next_takeover = true
 
SyncRepConfigDataSyncRepConfig = NULL
 
static int SyncRepWaitMode = SYNC_REP_NO_WAIT
 

Macro Definition Documentation

◆ SyncStandbysDefined

#define SyncStandbysDefined ( )     (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0')

Definition at line 93 of file syncrep.c.

Function Documentation

◆ assign_synchronous_commit()

void assign_synchronous_commit ( int  newval,
void *  extra 
)

Definition at line 1060 of file syncrep.c.

1061 {
1062  switch (newval)
1063  {
1066  break;
1069  break;
1072  break;
1073  default:
1075  break;
1076  }
1077 }
#define newval
static int SyncRepWaitMode
Definition: syncrep.c:99
#define SYNC_REP_NO_WAIT
Definition: syncrep.h:22
#define SYNC_REP_WAIT_WRITE
Definition: syncrep.h:23
#define SYNC_REP_WAIT_FLUSH
Definition: syncrep.h:24
#define SYNC_REP_WAIT_APPLY
Definition: syncrep.h:25
@ SYNCHRONOUS_COMMIT_REMOTE_WRITE
Definition: xact.h:72
@ SYNCHRONOUS_COMMIT_REMOTE_APPLY
Definition: xact.h:75
@ SYNCHRONOUS_COMMIT_REMOTE_FLUSH
Definition: xact.h:74

References newval, SYNC_REP_NO_WAIT, SYNC_REP_WAIT_APPLY, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAIT_WRITE, SYNCHRONOUS_COMMIT_REMOTE_APPLY, SYNCHRONOUS_COMMIT_REMOTE_FLUSH, SYNCHRONOUS_COMMIT_REMOTE_WRITE, and SyncRepWaitMode.

◆ assign_synchronous_standby_names()

void assign_synchronous_standby_names ( const char *  newval,
void *  extra 
)

Definition at line 1054 of file syncrep.c.

1055 {
1056  SyncRepConfig = (SyncRepConfigData *) extra;
1057 }
SyncRepConfigData * SyncRepConfig
Definition: syncrep.c:98

References SyncRepConfig.

◆ check_synchronous_standby_names()

bool check_synchronous_standby_names ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 997 of file syncrep.c.

998 {
999  if (*newval != NULL && (*newval)[0] != '\0')
1000  {
1001  int parse_rc;
1002  SyncRepConfigData *pconf;
1003 
1004  /* Reset communication variables to ensure a fresh start */
1005  syncrep_parse_result = NULL;
1006  syncrep_parse_error_msg = NULL;
1007 
1008  /* Parse the synchronous_standby_names string */
1010  parse_rc = syncrep_yyparse();
1012 
1013  if (parse_rc != 0 || syncrep_parse_result == NULL)
1014  {
1015  GUC_check_errcode(ERRCODE_SYNTAX_ERROR);
1018  else
1019  GUC_check_errdetail("synchronous_standby_names parser failed");
1020  return false;
1021  }
1022 
1023  if (syncrep_parse_result->num_sync <= 0)
1024  {
1025  GUC_check_errmsg("number of synchronous standbys (%d) must be greater than zero",
1027  return false;
1028  }
1029 
1030  /* GUC extra value must be guc_malloc'd, not palloc'd */
1031  pconf = (SyncRepConfigData *)
1033  if (pconf == NULL)
1034  return false;
1036 
1037  *extra = (void *) pconf;
1038 
1039  /*
1040  * We need not explicitly clean up syncrep_parse_result. It, and any
1041  * other cruft generated during parsing, will be freed when the
1042  * current memory context is deleted. (This code is generally run in
1043  * a short-lived context used for config file processing, so that will
1044  * not be very long.)
1045  */
1046  }
1047  else
1048  *extra = NULL;
1049 
1050  return true;
1051 }
#define LOG
Definition: elog.h:31
void GUC_check_errcode(int sqlerrcode)
Definition: guc.c:6609
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:631
#define GUC_check_errmsg
Definition: guc.h:432
#define GUC_check_errdetail
Definition: guc.h:436
void syncrep_scanner_init(const char *str)
void syncrep_scanner_finish(void)
int syncrep_yyparse(void)
PGDLLIMPORT SyncRepConfigData * syncrep_parse_result
PGDLLIMPORT char * syncrep_parse_error_msg

References SyncRepConfigData::config_size, GUC_check_errcode(), GUC_check_errdetail, GUC_check_errmsg, guc_malloc(), LOG, newval, SyncRepConfigData::num_sync, syncrep_parse_error_msg, syncrep_parse_result, syncrep_scanner_finish(), syncrep_scanner_init(), and syncrep_yyparse().

◆ cmp_lsn()

static int cmp_lsn ( const void *  a,
const void *  b 
)
static

Definition at line 696 of file syncrep.c.

697 {
698  XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
699  XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
700 
701  if (lsn1 > lsn2)
702  return -1;
703  else if (lsn1 == lsn2)
704  return 0;
705  else
706  return 1;
707 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References a, and b.

Referenced by SyncRepGetNthLatestSyncRecPtr().

◆ standby_priority_comparator()

static int standby_priority_comparator ( const void *  a,
const void *  b 
)
static

Definition at line 796 of file syncrep.c.

797 {
798  const SyncRepStandbyData *sa = (const SyncRepStandbyData *) a;
799  const SyncRepStandbyData *sb = (const SyncRepStandbyData *) b;
800 
801  /* First, sort by increasing priority value */
802  if (sa->sync_standby_priority != sb->sync_standby_priority)
803  return sa->sync_standby_priority - sb->sync_standby_priority;
804 
805  /*
806  * We might have equal priority values; arbitrarily break ties by position
807  * in the WalSnd array. (This is utterly bogus, since that is arrival
808  * order dependent, but there are regression tests that rely on it.)
809  */
810  return sa->walsnd_index - sb->walsnd_index;
811 }
int sync_standby_priority
Definition: syncrep.h:49

References a, b, SyncRepStandbyData::sync_standby_priority, and SyncRepStandbyData::walsnd_index.

Referenced by SyncRepGetCandidateStandbys().

◆ SyncRepCancelWait()

static void SyncRepCancelWait ( void  )
static

Definition at line 364 of file syncrep.c.

365 {
366  LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
370  LWLockRelease(SyncRepLock);
371 }
static void dlist_delete_thoroughly(dlist_node *node)
Definition: ilist.h:416
static bool dlist_node_is_detached(const dlist_node *node)
Definition: ilist.h:525
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
@ LW_EXCLUSIVE
Definition: lwlock.h:116
PGPROC * MyProc
Definition: proc.c:66
dlist_node syncRepLinks
Definition: proc.h:245
int syncRepState
Definition: proc.h:244
#define SYNC_REP_NOT_WAITING
Definition: syncrep.h:30

References dlist_delete_thoroughly(), dlist_node_is_detached(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, SYNC_REP_NOT_WAITING, PGPROC::syncRepLinks, and PGPROC::syncRepState.

Referenced by SyncRepWaitForLSN().

◆ SyncRepCleanupAtProcExit()

void SyncRepCleanupAtProcExit ( void  )

Definition at line 374 of file syncrep.c.

375 {
376  /*
377  * First check if we are removed from the queue without the lock to not
378  * slow down backend exit.
379  */
381  {
382  LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
383 
384  /* maybe we have just been removed, so recheck */
387 
388  LWLockRelease(SyncRepLock);
389  }
390 }

References dlist_delete_thoroughly(), dlist_node_is_detached(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, and PGPROC::syncRepLinks.

Referenced by ProcKill().

◆ SyncRepGetCandidateStandbys()

int SyncRepGetCandidateStandbys ( SyncRepStandbyData **  standbys)

Definition at line 717 of file syncrep.c.

718 {
719  int i;
720  int n;
721 
722  /* Create result array */
723  *standbys = (SyncRepStandbyData *)
725 
726  /* Quick exit if sync replication is not requested */
727  if (SyncRepConfig == NULL)
728  return 0;
729 
730  /* Collect raw data from shared memory */
731  n = 0;
732  for (i = 0; i < max_wal_senders; i++)
733  {
734  volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
735  * rearrangement */
736  SyncRepStandbyData *stby;
737  WalSndState state; /* not included in SyncRepStandbyData */
738 
739  walsnd = &WalSndCtl->walsnds[i];
740  stby = *standbys + n;
741 
742  SpinLockAcquire(&walsnd->mutex);
743  stby->pid = walsnd->pid;
744  state = walsnd->state;
745  stby->write = walsnd->write;
746  stby->flush = walsnd->flush;
747  stby->apply = walsnd->apply;
749  SpinLockRelease(&walsnd->mutex);
750 
751  /* Must be active */
752  if (stby->pid == 0)
753  continue;
754 
755  /* Must be streaming or stopping */
756  if (state != WALSNDSTATE_STREAMING &&
758  continue;
759 
760  /* Must be synchronous */
761  if (stby->sync_standby_priority == 0)
762  continue;
763 
764  /* Must have a valid flush position */
765  if (XLogRecPtrIsInvalid(stby->flush))
766  continue;
767 
768  /* OK, it's a candidate */
769  stby->walsnd_index = i;
770  stby->is_me = (walsnd == MyWalSnd);
771  n++;
772  }
773 
774  /*
775  * In quorum mode, we return all the candidates. In priority mode, if we
776  * have too many candidates then return only the num_sync ones of highest
777  * priority.
778  */
780  n > SyncRepConfig->num_sync)
781  {
782  /* Sort by priority ... */
783  qsort(*standbys, n, sizeof(SyncRepStandbyData),
785  /* ... then report just the first num_sync ones */
786  n = SyncRepConfig->num_sync;
787  }
788 
789  return n;
790 }
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1226
#define qsort(a, b, c, d)
Definition: port.h:445
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint8 syncrep_method
Definition: syncrep.h:68
XLogRecPtr apply
Definition: syncrep.h:48
XLogRecPtr write
Definition: syncrep.h:46
XLogRecPtr flush
Definition: syncrep.h:47
WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]
slock_t mutex
XLogRecPtr flush
WalSndState state
XLogRecPtr write
int sync_standby_priority
XLogRecPtr apply
Definition: regguts.h:323
static int standby_priority_comparator(const void *a, const void *b)
Definition: syncrep.c:796
#define SYNC_REP_PRIORITY
Definition: syncrep.h:35
WalSnd * MyWalSnd
Definition: walsender.c:113
int max_wal_senders
Definition: walsender.c:122
WalSndCtlData * WalSndCtl
Definition: walsender.c:110
WalSndState
@ WALSNDSTATE_STREAMING
@ WALSNDSTATE_STOPPING
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

References SyncRepStandbyData::apply, WalSnd::apply, SyncRepStandbyData::flush, WalSnd::flush, i, SyncRepStandbyData::is_me, max_wal_senders, WalSnd::mutex, MyWalSnd, SyncRepConfigData::num_sync, palloc(), SyncRepStandbyData::pid, WalSnd::pid, qsort, SpinLockAcquire, SpinLockRelease, standby_priority_comparator(), WalSnd::state, SYNC_REP_PRIORITY, SyncRepStandbyData::sync_standby_priority, WalSnd::sync_standby_priority, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepStandbyData::walsnd_index, WalSndCtl, WalSndCtlData::walsnds, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, SyncRepStandbyData::write, WalSnd::write, and XLogRecPtrIsInvalid.

Referenced by pg_stat_get_wal_senders(), and SyncRepGetSyncRecPtr().

◆ SyncRepGetNthLatestSyncRecPtr()

static void SyncRepGetNthLatestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys,
uint8  nth 
)
static

Definition at line 651 of file syncrep.c.

657 {
658  XLogRecPtr *write_array;
659  XLogRecPtr *flush_array;
660  XLogRecPtr *apply_array;
661  int i;
662 
663  /* Should have enough candidates, or somebody messed up */
664  Assert(nth > 0 && nth <= num_standbys);
665 
666  write_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * num_standbys);
667  flush_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * num_standbys);
668  apply_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * num_standbys);
669 
670  for (i = 0; i < num_standbys; i++)
671  {
672  write_array[i] = sync_standbys[i].write;
673  flush_array[i] = sync_standbys[i].flush;
674  apply_array[i] = sync_standbys[i].apply;
675  }
676 
677  /* Sort each array in descending order */
678  qsort(write_array, num_standbys, sizeof(XLogRecPtr), cmp_lsn);
679  qsort(flush_array, num_standbys, sizeof(XLogRecPtr), cmp_lsn);
680  qsort(apply_array, num_standbys, sizeof(XLogRecPtr), cmp_lsn);
681 
682  /* Get Nth latest Write, Flush, Apply positions */
683  *writePtr = write_array[nth - 1];
684  *flushPtr = flush_array[nth - 1];
685  *applyPtr = apply_array[nth - 1];
686 
687  pfree(write_array);
688  pfree(flush_array);
689  pfree(apply_array);
690 }
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1456
static int cmp_lsn(const void *a, const void *b)
Definition: syncrep.c:696

References SyncRepStandbyData::apply, Assert(), cmp_lsn(), SyncRepStandbyData::flush, i, palloc(), pfree(), qsort, and SyncRepStandbyData::write.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetOldestSyncRecPtr()

static void SyncRepGetOldestSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
SyncRepStandbyData sync_standbys,
int  num_standbys 
)
static

Definition at line 618 of file syncrep.c.

623 {
624  int i;
625 
626  /*
627  * Scan through all sync standbys and calculate the oldest Write, Flush
628  * and Apply positions. We assume *writePtr et al were initialized to
629  * InvalidXLogRecPtr.
630  */
631  for (i = 0; i < num_standbys; i++)
632  {
633  XLogRecPtr write = sync_standbys[i].write;
634  XLogRecPtr flush = sync_standbys[i].flush;
635  XLogRecPtr apply = sync_standbys[i].apply;
636 
637  if (XLogRecPtrIsInvalid(*writePtr) || *writePtr > write)
638  *writePtr = write;
639  if (XLogRecPtrIsInvalid(*flushPtr) || *flushPtr > flush)
640  *flushPtr = flush;
641  if (XLogRecPtrIsInvalid(*applyPtr) || *applyPtr > apply)
642  *applyPtr = apply;
643  }
644 }
#define write(a, b, c)
Definition: win32.h:14

References SyncRepStandbyData::apply, SyncRepStandbyData::flush, i, SyncRepStandbyData::write, write, and XLogRecPtrIsInvalid.

Referenced by SyncRepGetSyncRecPtr().

◆ SyncRepGetStandbyPriority()

static int SyncRepGetStandbyPriority ( void  )
static

Definition at line 823 of file syncrep.c.

824 {
825  const char *standby_name;
826  int priority;
827  bool found = false;
828 
829  /*
830  * Since synchronous cascade replication is not allowed, we always set the
831  * priority of cascading walsender to zero.
832  */
834  return 0;
835 
836  if (!SyncStandbysDefined() || SyncRepConfig == NULL)
837  return 0;
838 
839  standby_name = SyncRepConfig->member_names;
840  for (priority = 1; priority <= SyncRepConfig->nmembers; priority++)
841  {
842  if (pg_strcasecmp(standby_name, application_name) == 0 ||
843  strcmp(standby_name, "*") == 0)
844  {
845  found = true;
846  break;
847  }
848  standby_name += strlen(standby_name) + 1;
849  }
850 
851  if (!found)
852  return 0;
853 
854  /*
855  * In quorum-based sync replication, all the standbys in the list have the
856  * same priority, one.
857  */
858  return (SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY) ? priority : 1;
859 }
char * application_name
Definition: guc_tables.c:541
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char member_names[FLEXIBLE_ARRAY_MEMBER]
Definition: syncrep.h:71
#define SyncStandbysDefined()
Definition: syncrep.c:93
bool am_cascading_walsender
Definition: walsender.c:117

References am_cascading_walsender, application_name, SyncRepConfigData::member_names, SyncRepConfigData::nmembers, pg_strcasecmp(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, and SyncStandbysDefined.

Referenced by SyncRepInitConfig().

◆ SyncRepGetSyncRecPtr()

static bool SyncRepGetSyncRecPtr ( XLogRecPtr writePtr,
XLogRecPtr flushPtr,
XLogRecPtr applyPtr,
bool am_sync 
)
static

Definition at line 544 of file syncrep.c.

546 {
547  SyncRepStandbyData *sync_standbys;
548  int num_standbys;
549  int i;
550 
551  /* Initialize default results */
552  *writePtr = InvalidXLogRecPtr;
553  *flushPtr = InvalidXLogRecPtr;
554  *applyPtr = InvalidXLogRecPtr;
555  *am_sync = false;
556 
557  /* Quick out if not even configured to be synchronous */
558  if (SyncRepConfig == NULL)
559  return false;
560 
561  /* Get standbys that are considered as synchronous at this moment */
562  num_standbys = SyncRepGetCandidateStandbys(&sync_standbys);
563 
564  /* Am I among the candidate sync standbys? */
565  for (i = 0; i < num_standbys; i++)
566  {
567  if (sync_standbys[i].is_me)
568  {
569  *am_sync = true;
570  break;
571  }
572  }
573 
574  /*
575  * Nothing more to do if we are not managing a sync standby or there are
576  * not enough synchronous standbys.
577  */
578  if (!(*am_sync) ||
579  num_standbys < SyncRepConfig->num_sync)
580  {
581  pfree(sync_standbys);
582  return false;
583  }
584 
585  /*
586  * In a priority-based sync replication, the synced positions are the
587  * oldest ones among sync standbys. In a quorum-based, they are the Nth
588  * latest ones.
589  *
590  * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest
591  * positions. But we use SyncRepGetOldestSyncRecPtr() for that calculation
592  * because it's a bit more efficient.
593  *
594  * XXX If the numbers of current and requested sync standbys are the same,
595  * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
596  * positions even in a quorum-based sync replication.
597  */
599  {
600  SyncRepGetOldestSyncRecPtr(writePtr, flushPtr, applyPtr,
601  sync_standbys, num_standbys);
602  }
603  else
604  {
605  SyncRepGetNthLatestSyncRecPtr(writePtr, flushPtr, applyPtr,
606  sync_standbys, num_standbys,
608  }
609 
610  pfree(sync_standbys);
611  return true;
612 }
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys, uint8 nth)
Definition: syncrep.c:651
int SyncRepGetCandidateStandbys(SyncRepStandbyData **standbys)
Definition: syncrep.c:717
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, SyncRepStandbyData *sync_standbys, int num_standbys)
Definition: syncrep.c:618
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References i, InvalidXLogRecPtr, SyncRepConfigData::num_sync, pfree(), SYNC_REP_PRIORITY, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetCandidateStandbys(), SyncRepGetNthLatestSyncRecPtr(), and SyncRepGetOldestSyncRecPtr().

Referenced by SyncRepReleaseWaiters().

◆ SyncRepInitConfig()

void SyncRepInitConfig ( void  )

Definition at line 403 of file syncrep.c.

404 {
405  int priority;
406 
407  /*
408  * Determine if we are a potential sync standby and remember the result
409  * for handling replies from standby.
410  */
411  priority = SyncRepGetStandbyPriority();
412  if (MyWalSnd->sync_standby_priority != priority)
413  {
415  MyWalSnd->sync_standby_priority = priority;
417 
418  ereport(DEBUG1,
419  (errmsg_internal("standby \"%s\" now has synchronous standby priority %u",
420  application_name, priority)));
421  }
422 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
#define DEBUG1
Definition: elog.h:30
#define ereport(elevel,...)
Definition: elog.h:149
static int SyncRepGetStandbyPriority(void)
Definition: syncrep.c:823

References application_name, DEBUG1, ereport, errmsg_internal(), WalSnd::mutex, MyWalSnd, SpinLockAcquire, SpinLockRelease, WalSnd::sync_standby_priority, and SyncRepGetStandbyPriority().

Referenced by ProcessPendingWrites(), StartLogicalReplication(), StartReplication(), WalSndLoop(), and WalSndWaitForWal().

◆ SyncRepQueueInsert()

static void SyncRepQueueInsert ( int  mode)
static

Definition at line 330 of file syncrep.c.

331 {
332  dlist_head *queue;
333  dlist_iter iter;
334 
336  queue = &WalSndCtl->SyncRepQueue[mode];
337 
338  dlist_reverse_foreach(iter, queue)
339  {
340  PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
341 
342  /*
343  * Stop at the queue element that we should insert after to ensure the
344  * queue is ordered by LSN.
345  */
346  if (proc->waitLSN < MyProc->waitLSN)
347  {
349  return;
350  }
351  }
352 
353  /*
354  * If we get here, the list was either empty, or this process needs to be
355  * at the head.
356  */
358 }
static void dlist_insert_after(dlist_node *after, dlist_node *node)
Definition: ilist.h:381
#define dlist_reverse_foreach(iter, lhead)
Definition: ilist.h:654
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:347
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
static PgChecksumMode mode
Definition: pg_checksums.c:56
Definition: proc.h:162
XLogRecPtr waitLSN
Definition: proc.h:243
dlist_head SyncRepQueue[NUM_SYNC_REP_WAIT_MODE]
dlist_node * cur
Definition: ilist.h:179
#define NUM_SYNC_REP_WAIT_MODE
Definition: syncrep.h:27

References Assert(), dlist_iter::cur, dlist_container, dlist_insert_after(), dlist_push_head(), dlist_reverse_foreach, mode, MyProc, NUM_SYNC_REP_WAIT_MODE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepWaitForLSN().

◆ SyncRepReleaseWaiters()

void SyncRepReleaseWaiters ( void  )

Definition at line 432 of file syncrep.c.

433 {
434  volatile WalSndCtlData *walsndctl = WalSndCtl;
435  XLogRecPtr writePtr;
436  XLogRecPtr flushPtr;
437  XLogRecPtr applyPtr;
438  bool got_recptr;
439  bool am_sync;
440  int numwrite = 0;
441  int numflush = 0;
442  int numapply = 0;
443 
444  /*
445  * If this WALSender is serving a standby that is not on the list of
446  * potential sync standbys then we have nothing to do. If we are still
447  * starting up, still running base backup or the current flush position is
448  * still invalid, then leave quickly also. Streaming or stopping WAL
449  * senders are allowed to release waiters.
450  */
451  if (MyWalSnd->sync_standby_priority == 0 ||
455  {
456  announce_next_takeover = true;
457  return;
458  }
459 
460  /*
461  * We're a potential sync standby. Release waiters if there are enough
462  * sync standbys and we are considered as sync.
463  */
464  LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
465 
466  /*
467  * Check whether we are a sync standby or not, and calculate the synced
468  * positions among all sync standbys. (Note: although this step does not
469  * of itself require holding SyncRepLock, it seems like a good idea to do
470  * it after acquiring the lock. This ensures that the WAL pointers we use
471  * to release waiters are newer than any previous execution of this
472  * routine used.)
473  */
474  got_recptr = SyncRepGetSyncRecPtr(&writePtr, &flushPtr, &applyPtr, &am_sync);
475 
476  /*
477  * If we are managing a sync standby, though we weren't prior to this,
478  * then announce we are now a sync standby.
479  */
480  if (announce_next_takeover && am_sync)
481  {
482  announce_next_takeover = false;
483 
485  ereport(LOG,
486  (errmsg("standby \"%s\" is now a synchronous standby with priority %u",
488  else
489  ereport(LOG,
490  (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
491  application_name)));
492  }
493 
494  /*
495  * If the number of sync standbys is less than requested or we aren't
496  * managing a sync standby then just leave.
497  */
498  if (!got_recptr || !am_sync)
499  {
500  LWLockRelease(SyncRepLock);
501  announce_next_takeover = !am_sync;
502  return;
503  }
504 
505  /*
506  * Set the lsn first so that when we wake backends they will release up to
507  * this location.
508  */
509  if (walsndctl->lsn[SYNC_REP_WAIT_WRITE] < writePtr)
510  {
511  walsndctl->lsn[SYNC_REP_WAIT_WRITE] = writePtr;
512  numwrite = SyncRepWakeQueue(false, SYNC_REP_WAIT_WRITE);
513  }
514  if (walsndctl->lsn[SYNC_REP_WAIT_FLUSH] < flushPtr)
515  {
516  walsndctl->lsn[SYNC_REP_WAIT_FLUSH] = flushPtr;
517  numflush = SyncRepWakeQueue(false, SYNC_REP_WAIT_FLUSH);
518  }
519  if (walsndctl->lsn[SYNC_REP_WAIT_APPLY] < applyPtr)
520  {
521  walsndctl->lsn[SYNC_REP_WAIT_APPLY] = applyPtr;
522  numapply = SyncRepWakeQueue(false, SYNC_REP_WAIT_APPLY);
523  }
524 
525  LWLockRelease(SyncRepLock);
526 
527  elog(DEBUG3, "released %d procs up to write %X/%X, %d procs up to flush %X/%X, %d procs up to apply %X/%X",
528  numwrite, LSN_FORMAT_ARGS(writePtr),
529  numflush, LSN_FORMAT_ARGS(flushPtr),
530  numapply, LSN_FORMAT_ARGS(applyPtr));
531 }
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define DEBUG3
Definition: elog.h:28
XLogRecPtr lsn[NUM_SYNC_REP_WAIT_MODE]
static bool SyncRepGetSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr *applyPtr, bool *am_sync)
Definition: syncrep.c:544
static int SyncRepWakeQueue(bool all, int mode)
Definition: syncrep.c:870
static bool announce_next_takeover
Definition: syncrep.c:96
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43

References announce_next_takeover, application_name, DEBUG3, elog(), ereport, errmsg(), WalSnd::flush, LOG, WalSndCtlData::lsn, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyWalSnd, WalSnd::state, SYNC_REP_PRIORITY, SYNC_REP_WAIT_APPLY, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAIT_WRITE, WalSnd::sync_standby_priority, SyncRepConfigData::syncrep_method, SyncRepConfig, SyncRepGetSyncRecPtr(), SyncRepWakeQueue(), WalSndCtl, WALSNDSTATE_STOPPING, WALSNDSTATE_STREAMING, and XLogRecPtrIsInvalid.

Referenced by ProcessStandbyReplyMessage().

◆ SyncRepUpdateSyncStandbysDefined()

void SyncRepUpdateSyncStandbysDefined ( void  )

Definition at line 927 of file syncrep.c.

928 {
929  bool sync_standbys_defined = SyncStandbysDefined();
930 
931  if (sync_standbys_defined != WalSndCtl->sync_standbys_defined)
932  {
933  LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
934 
935  /*
936  * If synchronous_standby_names has been reset to empty, it's futile
937  * for backends to continue waiting. Since the user no longer wants
938  * synchronous replication, we'd better wake them up.
939  */
940  if (!sync_standbys_defined)
941  {
942  int i;
943 
944  for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
945  SyncRepWakeQueue(true, i);
946  }
947 
948  /*
949  * Only allow people to join the queue when there are synchronous
950  * standbys defined. Without this interlock, there's a race
951  * condition: we might wake up all the current waiters; then, some
952  * backend that hasn't yet reloaded its config might go to sleep on
953  * the queue (and never wake up). This prevents that.
954  */
955  WalSndCtl->sync_standbys_defined = sync_standbys_defined;
956 
957  LWLockRelease(SyncRepLock);
958  }
959 }

References i, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_SYNC_REP_WAIT_MODE, WalSndCtlData::sync_standbys_defined, SyncRepWakeQueue(), SyncStandbysDefined, and WalSndCtl.

Referenced by UpdateSharedMemoryConfig().

◆ SyncRepWaitForLSN()

void SyncRepWaitForLSN ( XLogRecPtr  lsn,
bool  commit 
)

Definition at line 149 of file syncrep.c.

150 {
151  int mode;
152 
153  /*
154  * This should be called while holding interrupts during a transaction
155  * commit to prevent the follow-up shared memory queue cleanups to be
156  * influenced by external interruptions.
157  */
159 
160  /*
161  * Fast exit if user has not requested sync replication, or there are no
162  * sync replication standby names defined.
163  *
164  * Since this routine gets called every commit time, it's important to
165  * exit quickly if sync replication is not requested. So we check
166  * WalSndCtl->sync_standbys_defined flag without the lock and exit
167  * immediately if it's false. If it's true, we need to check it again
168  * later while holding the lock, to check the flag and operate the sync
169  * rep queue atomically. This is necessary to avoid the race condition
170  * described in SyncRepUpdateSyncStandbysDefined(). On the other hand, if
171  * it's false, the lock is not necessary because we don't touch the queue.
172  */
173  if (!SyncRepRequested() ||
174  !((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_defined)
175  return;
176 
177  /* Cap the level for anything other than commit to remote flush only. */
178  if (commit)
180  else
182 
184  Assert(WalSndCtl != NULL);
185 
186  LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
188 
189  /*
190  * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not
191  * set. See SyncRepUpdateSyncStandbysDefined.
192  *
193  * Also check that the standby hasn't already replied. Unlikely race
194  * condition but we'll be fetching that cache line anyway so it's likely
195  * to be a low cost check.
196  */
198  lsn <= WalSndCtl->lsn[mode])
199  {
200  LWLockRelease(SyncRepLock);
201  return;
202  }
203 
204  /*
205  * Set our waitLSN so WALSender will know when to wake us, and add
206  * ourselves to the queue.
207  */
208  MyProc->waitLSN = lsn;
211  Assert(SyncRepQueueIsOrderedByLSN(mode));
212  LWLockRelease(SyncRepLock);
213 
214  /* Alter ps display to show waiting for sync rep. */
216  {
217  char buffer[32];
218 
219  sprintf(buffer, "waiting for %X/%X", LSN_FORMAT_ARGS(lsn));
220  set_ps_display_suffix(buffer);
221  }
222 
223  /*
224  * Wait for specified LSN to be confirmed.
225  *
226  * Each proc has its own wait latch, so we perform a normal latch
227  * check/wait loop here.
228  */
229  for (;;)
230  {
231  int rc;
232 
233  /* Must reset the latch before testing state. */
235 
236  /*
237  * Acquiring the lock is not needed, the latch ensures proper
238  * barriers. If it looks like we're done, we must really be done,
239  * because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,
240  * it will never update it again, so we can't be seeing a stale value
241  * in that case.
242  */
244  break;
245 
246  /*
247  * If a wait for synchronous replication is pending, we can neither
248  * acknowledge the commit nor raise ERROR or FATAL. The latter would
249  * lead the client to believe that the transaction aborted, which is
250  * not true: it's already committed locally. The former is no good
251  * either: the client has requested synchronous replication, and is
252  * entitled to assume that an acknowledged commit is also replicated,
253  * which might not be true. So in this case we issue a WARNING (which
254  * some clients may be able to interpret) and shut off further output.
255  * We do NOT reset ProcDiePending, so that the process will die after
256  * the commit is cleaned up.
257  */
258  if (ProcDiePending)
259  {
261  (errcode(ERRCODE_ADMIN_SHUTDOWN),
262  errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
263  errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
266  break;
267  }
268 
269  /*
270  * It's unclear what to do if a query cancel interrupt arrives. We
271  * can't actually abort at this point, but ignoring the interrupt
272  * altogether is not helpful, so we just terminate the wait with a
273  * suitable warning.
274  */
275  if (QueryCancelPending)
276  {
277  QueryCancelPending = false;
279  (errmsg("canceling wait for synchronous replication due to user request"),
280  errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
282  break;
283  }
284 
285  /*
286  * Wait on latch. Any condition that should wake us up will set the
287  * latch, so no need for timeout.
288  */
290  WAIT_EVENT_SYNC_REP);
291 
292  /*
293  * If the postmaster dies, we'll probably never get an acknowledgment,
294  * because all the wal sender processes will exit. So just bail out.
295  */
296  if (rc & WL_POSTMASTER_DEATH)
297  {
298  ProcDiePending = true;
301  break;
302  }
303  }
304 
305  /*
306  * WalSender has checked our LSN and has removed us from queue. Clean up
307  * state and leave. It's OK to reset these shared memory fields without
308  * holding SyncRepLock, because any walsenders will ignore us anyway when
309  * we're not on the queue. We need a read barrier to make sure we see the
310  * changes to the queue link (this might be unnecessary without
311  * assertions, but better safe than sorry).
312  */
313  pg_read_barrier();
316  MyProc->waitLSN = 0;
317 
318  /* reset ps display to remove the suffix */
321 }
#define pg_read_barrier()
Definition: atomics.h:153
#define Min(x, y)
Definition: c.h:993
@ DestNone
Definition: dest.h:87
int errdetail(const char *fmt,...)
Definition: elog.c:1202
int errcode(int sqlerrcode)
Definition: elog.c:858
#define WARNING
Definition: elog.h:36
volatile uint32 InterruptHoldoffCount
Definition: globals.c:40
volatile sig_atomic_t QueryCancelPending
Definition: globals.c:31
struct Latch * MyLatch
Definition: globals.c:58
volatile sig_atomic_t ProcDiePending
Definition: globals.c:32
void ResetLatch(Latch *latch)
Definition: latch.c:697
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:490
#define WL_LATCH_SET
Definition: latch.h:125
#define WL_POSTMASTER_DEATH
Definition: latch.h:129
#define sprintf
Definition: port.h:240
CommandDest whereToSendOutput
Definition: postgres.c:88
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:396
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:344
bool update_process_title
Definition: ps_status.c:31
static void SyncRepQueueInsert(int mode)
Definition: syncrep.c:330
static void SyncRepCancelWait(void)
Definition: syncrep.c:364
#define SyncRepRequested()
Definition: syncrep.h:18
#define SYNC_REP_WAITING
Definition: syncrep.h:31
#define SYNC_REP_WAIT_COMPLETE
Definition: syncrep.h:32

References Assert(), DestNone, dlist_node_is_detached(), ereport, errcode(), errdetail(), errmsg(), InterruptHoldoffCount, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), Min, mode, MyLatch, MyProc, pg_read_barrier, ProcDiePending, QueryCancelPending, ResetLatch(), set_ps_display_remove_suffix(), set_ps_display_suffix(), sprintf, SYNC_REP_NOT_WAITING, SYNC_REP_WAIT_COMPLETE, SYNC_REP_WAIT_FLUSH, SYNC_REP_WAITING, WalSndCtlData::sync_standbys_defined, SyncRepCancelWait(), PGPROC::syncRepLinks, SyncRepQueueInsert(), SyncRepRequested, PGPROC::syncRepState, SyncRepWaitMode, update_process_title, WaitLatch(), PGPROC::waitLSN, WalSndCtl, WARNING, whereToSendOutput, WL_LATCH_SET, and WL_POSTMASTER_DEATH.

Referenced by EndPrepare(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), and RecordTransactionCommitPrepared().

◆ SyncRepWakeQueue()

static int SyncRepWakeQueue ( bool  all,
int  mode 
)
static

Definition at line 870 of file syncrep.c.

871 {
872  volatile WalSndCtlData *walsndctl = WalSndCtl;
873  int numprocs = 0;
874  dlist_mutable_iter iter;
875 
878  Assert(SyncRepQueueIsOrderedByLSN(mode));
879 
881  {
882  PGPROC *proc = dlist_container(PGPROC, syncRepLinks, iter.cur);
883 
884  /*
885  * Assume the queue is ordered by LSN
886  */
887  if (!all && walsndctl->lsn[mode] < proc->waitLSN)
888  return numprocs;
889 
890  /*
891  * Remove from queue.
892  */
894 
895  /*
896  * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
897  * make sure that it sees the queue link being removed before the
898  * syncRepState change.
899  */
901 
902  /*
903  * Set state to complete; see SyncRepWaitForLSN() for discussion of
904  * the various states.
905  */
907 
908  /*
909  * Wake only when we have set state and removed from queue.
910  */
911  SetLatch(&(proc->procLatch));
912 
913  numprocs++;
914  }
915 
916  return numprocs;
917 }
#define pg_write_barrier()
Definition: atomics.h:154
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:640
void SetLatch(Latch *latch)
Definition: latch.c:605
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1964
Latch procLatch
Definition: proc.h:170
dlist_node * cur
Definition: ilist.h:200

References Assert(), dlist_mutable_iter::cur, dlist_container, dlist_delete_thoroughly(), dlist_foreach_modify, WalSndCtlData::lsn, LW_EXCLUSIVE, LWLockHeldByMeInMode(), mode, NUM_SYNC_REP_WAIT_MODE, pg_write_barrier, PGPROC::procLatch, SetLatch(), SYNC_REP_WAIT_COMPLETE, PGPROC::syncRepLinks, WalSndCtlData::SyncRepQueue, PGPROC::syncRepState, PGPROC::waitLSN, and WalSndCtl.

Referenced by SyncRepReleaseWaiters(), and SyncRepUpdateSyncStandbysDefined().

Variable Documentation

◆ announce_next_takeover

bool announce_next_takeover = true
static

Definition at line 96 of file syncrep.c.

Referenced by SyncRepReleaseWaiters().

◆ SyncRepConfig

◆ SyncRepStandbyNames

char* SyncRepStandbyNames

Definition at line 91 of file syncrep.c.

◆ SyncRepWaitMode

int SyncRepWaitMode = SYNC_REP_NO_WAIT
static

Definition at line 99 of file syncrep.c.

Referenced by assign_synchronous_commit(), and SyncRepWaitForLSN().