PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slotsync.c File Reference
#include "postgres.h"
#include <time.h>
#include "access/xlog_internal.h"
#include "access/xlogrecovery.h"
#include "catalog/pg_database.h"
#include "libpq/pqsignal.h"
#include "pgstat.h"
#include "postmaster/interrupt.h"
#include "replication/logical.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/subsystems.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/ps_status.h"
#include "utils/timeout.h"
#include "utils/wait_event.h"
Include dependency graph for slotsync.c:

Go to the source code of this file.

Data Structures

struct  SlotSyncCtxStruct
 
struct  RemoteSlot
 

Macros

#define MIN_SLOTSYNC_WORKER_NAPTIME_MS   200
 
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS   30000 /* 30s */
 
#define SLOTSYNC_RESTART_INTERVAL_SEC   10
 
#define SLOTSYNC_COLUMN_COUNT   10
 
#define PRIMARY_INFO_OUTPUT_COL_COUNT   2
 

Typedefs

typedef struct SlotSyncCtxStruct SlotSyncCtxStruct
 
typedef struct RemoteSlot RemoteSlot
 

Functions

static void SlotSyncShmemRequest (void *arg)
 
static void SlotSyncShmemInit (void *arg)
 
static void slotsync_failure_callback (int code, Datum arg)
 
static void update_synced_slots_inactive_since (void)
 
static void update_slotsync_skip_stats (SlotSyncSkipReason skip_reason)
 
static bool update_local_synced_slot (RemoteSlot *remote_slot, Oid remote_dbid)
 
static Listget_local_synced_slots (void)
 
static bool local_sync_slot_required (ReplicationSlot *local_slot, List *remote_slots)
 
static void drop_local_obsolete_slots (List *remote_slot_list)
 
static void reserve_wal_for_local_slot (XLogRecPtr restart_lsn)
 
static bool update_and_persist_local_synced_slot (RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
 
static bool synchronize_one_slot (RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
 
static Listfetch_remote_slots (WalReceiverConn *wrconn, List *slot_names)
 
static bool synchronize_slots (WalReceiverConn *wrconn, List *remote_slot_list, bool *slot_persistence_pending)
 
static void validate_remote_info (WalReceiverConn *wrconn)
 
charCheckAndGetDbnameFromConninfo (void)
 
bool ValidateSlotSyncParams (int elevel)
 
static void slotsync_reread_config (void)
 
void HandleSlotSyncMessageInterrupt (void)
 
void ProcessSlotSyncMessage (void)
 
static void slotsync_worker_disconnect (int code, Datum arg)
 
static void slotsync_worker_onexit (int code, Datum arg)
 
static void wait_for_slot_activity (bool some_slot_updated)
 
static void check_and_set_sync_info (pid_t sync_process_pid)
 
static void reset_syncing_flag (void)
 
void ReplSlotSyncWorkerMain (const void *startup_data, size_t startup_data_len)
 
void ShutDownSlotSync (void)
 
bool SlotSyncWorkerCanRestart (void)
 
bool IsSyncingReplicationSlots (void)
 
static Listextract_slot_names (List *remote_slots)
 
void SyncReplicationSlots (WalReceiverConn *wrconn)
 

Variables

static SlotSyncCtxStructSlotSyncCtx = NULL
 
const ShmemCallbacks SlotSyncShmemCallbacks
 
bool sync_replication_slots = false
 
static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS
 
static bool syncing_slots = false
 
volatile sig_atomic_t SlotSyncShutdownPending = false
 

Macro Definition Documentation

◆ MAX_SLOTSYNC_WORKER_NAPTIME_MS

#define MAX_SLOTSYNC_WORKER_NAPTIME_MS   30000 /* 30s */

Definition at line 140 of file slotsync.c.

◆ MIN_SLOTSYNC_WORKER_NAPTIME_MS

#define MIN_SLOTSYNC_WORKER_NAPTIME_MS   200

Definition at line 139 of file slotsync.c.

◆ PRIMARY_INFO_OUTPUT_COL_COUNT

#define PRIMARY_INFO_OUTPUT_COL_COUNT   2

◆ SLOTSYNC_COLUMN_COUNT

#define SLOTSYNC_COLUMN_COUNT   10

◆ SLOTSYNC_RESTART_INTERVAL_SEC

#define SLOTSYNC_RESTART_INTERVAL_SEC   10

Definition at line 145 of file slotsync.c.

Typedef Documentation

◆ RemoteSlot

◆ SlotSyncCtxStruct

Function Documentation

◆ check_and_set_sync_info()

static void check_and_set_sync_info ( pid_t  sync_process_pid)
static

Definition at line 1479 of file slotsync.c.

1480{
1482
1483 /*
1484 * Exit immediately if promotion has been triggered. This guards against
1485 * a new worker (or a call to pg_sync_replication_slots()) that starts
1486 * after the old worker was stopped by ShutDownSlotSync().
1487 */
1489 {
1491
1493 {
1495 errmsg("replication slot synchronization worker will not start because promotion was triggered"));
1496
1497 proc_exit(0);
1498 }
1499 else
1500 {
1501 /*
1502 * For the backend executing SQL function
1503 * pg_sync_replication_slots().
1504 */
1505 ereport(ERROR,
1507 errmsg("replication slot synchronization will not start because promotion was triggered"));
1508 }
1509 }
1510
1511 if (SlotSyncCtx->syncing)
1512 {
1514 ereport(ERROR,
1516 errmsg("cannot synchronize replication slots concurrently"));
1517 }
1518
1519 /* The pid must not be already assigned in SlotSyncCtx */
1521
1522 SlotSyncCtx->syncing = true;
1523
1524 /*
1525 * Advertise the required PID so that the startup process can kill the
1526 * slot sync process on promotion.
1527 */
1529
1531
1532 syncing_slots = true;
1533}
#define Assert(condition)
Definition c.h:943
int errcode(int sqlerrcode)
Definition elog.c:875
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define ereport(elevel,...)
Definition elog.h:152
void proc_exit(int code)
Definition ipc.c:105
#define AmLogicalSlotSyncWorkerProcess()
Definition miscadmin.h:392
#define InvalidPid
Definition miscadmin.h:32
static char * errmsg
static int fb(int x)
static SlotSyncCtxStruct * SlotSyncCtx
Definition slotsync.c:121
static bool syncing_slots
Definition slotsync.c:152
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56

References AmLogicalSlotSyncWorkerProcess, Assert, DEBUG1, ereport, errcode(), errmsg, ERROR, fb(), InvalidPid, SlotSyncCtxStruct::mutex, SlotSyncCtxStruct::pid, proc_exit(), SlotSyncCtx, SpinLockAcquire(), SpinLockRelease(), SlotSyncCtxStruct::stopSignaled, SlotSyncCtxStruct::syncing, and syncing_slots.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ CheckAndGetDbnameFromConninfo()

char * CheckAndGetDbnameFromConninfo ( void  )

Definition at line 1158 of file slotsync.c.

1159{
1160 char *dbname;
1161
1162 /*
1163 * The slot synchronization needs a database connection for walrcv_exec to
1164 * work.
1165 */
1167 if (dbname == NULL)
1168 ereport(ERROR,
1170
1171 /*
1172 * translator: first %s is a connection option; second %s is a GUC
1173 * variable name
1174 */
1175 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1176 "dbname", "primary_conninfo"));
1177 return dbname;
1178}
char * dbname
Definition streamutil.c:49
#define walrcv_get_dbname_from_conninfo(conninfo)
char * PrimaryConnInfo

References dbname, ereport, errcode(), errmsg, ERROR, fb(), PrimaryConnInfo, and walrcv_get_dbname_from_conninfo.

Referenced by pg_sync_replication_slots(), and ReplSlotSyncWorkerMain().

◆ drop_local_obsolete_slots()

static void drop_local_obsolete_slots ( List remote_slot_list)
static

Definition at line 535 of file slotsync.c.

536{
538
540 {
541 /* Drop the local slot if it is not required to be retained. */
543 {
544 bool synced_slot;
545
546 /*
547 * Use shared lock to prevent a conflict with
548 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
549 * during a drop-database operation.
550 */
552 0, AccessShareLock);
553
554 /*
555 * In the small window between getting the slot to drop and
556 * locking the database, there is a possibility of a parallel
557 * database drop by the startup process and the creation of a new
558 * slot by the user. This new user-created slot may end up using
559 * the same shared memory as that of 'local_slot'. Thus check if
560 * local_slot is still the synced one before performing the actual
561 * drop.
562 */
564 synced_slot = local_slot->in_use && local_slot->data.synced;
566
567 if (synced_slot)
568 {
569 /*
570 * Now acquire and drop the slot. Note we purposely don't
571 * request logical decoding to be disabled here: since this is
572 * a standby, which derives its logical decoding state from
573 * the primary, it would be wrong to do so.
574 */
575 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
577 }
578
580 0, AccessShareLock);
581
582 ereport(LOG,
583 errmsg("dropped replication slot \"%s\" of database with OID %u",
584 NameStr(local_slot->data.name),
585 local_slot->data.database));
586 }
587 }
588}
#define NameStr(name)
Definition c.h:835
#define LOG
Definition elog.h:32
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1088
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1148
#define AccessShareLock
Definition lockdefs.h:36
#define foreach_ptr(type, var, lst)
Definition pg_list.h:501
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition slot.c:629
void ReplicationSlotDropAcquired(bool try_disable)
Definition slot.c:1031
static List * get_local_synced_slots(void)
Definition slotsync.c:451
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition slotsync.c:482
Definition pg_list.h:54

References AccessShareLock, ereport, errmsg, fb(), foreach_ptr, get_local_synced_slots(), local_sync_slot_required(), LockSharedObject(), LOG, NameStr, ReplicationSlotAcquire(), ReplicationSlotDropAcquired(), SpinLockAcquire(), SpinLockRelease(), and UnlockSharedObject().

Referenced by synchronize_slots().

◆ extract_slot_names()

static List * extract_slot_names ( List remote_slots)
static

Definition at line 1984 of file slotsync.c.

1985{
1986 List *slot_names = NIL;
1987
1989 {
1990 char *slot_name;
1991
1992 slot_name = pstrdup(remote_slot->name);
1993 slot_names = lappend(slot_names, slot_name);
1994 }
1995
1996 return slot_names;
1997}
List * lappend(List *list, void *datum)
Definition list.c:339
char * pstrdup(const char *in)
Definition mcxt.c:1910
#define NIL
Definition pg_list.h:68

References fb(), foreach_ptr, lappend(), NIL, and pstrdup().

Referenced by SyncReplicationSlots().

◆ fetch_remote_slots()

static List * fetch_remote_slots ( WalReceiverConn wrconn,
List slot_names 
)
static

Definition at line 901 of file slotsync.c.

902{
903#define SLOTSYNC_COLUMN_COUNT 10
906
907 WalRcvExecResult *res;
908 TupleTableSlot *tupslot;
910 StringInfoData query;
911
912 initStringInfo(&query);
914 "SELECT slot_name, plugin, confirmed_flush_lsn,"
915 " restart_lsn, catalog_xmin, two_phase,"
916 " two_phase_at, failover,"
917 " database, invalidation_reason"
918 " FROM pg_catalog.pg_replication_slots"
919 " WHERE failover and NOT temporary");
920
921 if (slot_names != NIL)
922 {
923 bool first_slot = true;
924
925 /*
926 * Construct the query to fetch only the specified slots
927 */
928 appendStringInfoString(&query, " AND slot_name IN (");
929
930 foreach_ptr(char, slot_name, slot_names)
931 {
932 if (!first_slot)
933 appendStringInfoString(&query, ", ");
934
935 appendStringInfoString(&query, quote_literal_cstr(slot_name));
936 first_slot = false;
937 }
938 appendStringInfoChar(&query, ')');
939 }
940
941 /* Execute the query */
943 pfree(query.data);
944 if (res->status != WALRCV_OK_TUPLES)
946 errmsg("could not fetch failover logical slots info from the primary server: %s",
947 res->err));
948
950 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
951 {
952 bool isnull;
954 Datum d;
955 int col = 0;
956
958 &isnull));
959 Assert(!isnull);
960
961 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
962 &isnull));
963 Assert(!isnull);
964
965 /*
966 * It is possible to get null values for LSN and Xmin if slot is
967 * invalidated on the primary server, so handle accordingly.
968 */
969 d = slot_getattr(tupslot, ++col, &isnull);
970 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
971 DatumGetLSN(d);
972
973 d = slot_getattr(tupslot, ++col, &isnull);
974 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
975
976 d = slot_getattr(tupslot, ++col, &isnull);
977 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
979
980 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
981 &isnull));
982 Assert(!isnull);
983
984 d = slot_getattr(tupslot, ++col, &isnull);
985 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
986
987 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
988 &isnull));
989 Assert(!isnull);
990
991 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
992 ++col, &isnull));
993 Assert(!isnull);
994
995 d = slot_getattr(tupslot, ++col, &isnull);
996 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
998
999 /* Sanity check */
1001
1002 /*
1003 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
1004 * slot is valid, that means we have fetched the remote_slot in its
1005 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
1006 * sync it in the next sync cycle when the remote_slot is persisted
1007 * and has valid lsn(s) and xmin values.
1008 *
1009 * XXX: In future, if we plan to expose 'slot->data.persistency' in
1010 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
1011 * slots in the first place.
1012 */
1013 if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
1014 !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
1015 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
1016 remote_slot->invalidated == RS_INVAL_NONE)
1018 else
1019 /* Create list of remote slots */
1021
1022 ExecClearTuple(tupslot);
1023 }
1024
1027
1028 return remote_slot_list;
1029}
#define TextDatumGetCString(d)
Definition builtins.h:99
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
const TupleTableSlotOps TTSOpsMinimalTuple
Definition execTuples.c:86
#define palloc0_object(type)
Definition fe_memutils.h:90
void pfree(void *pointer)
Definition mcxt.c:1619
static XLogRecPtr DatumGetLSN(Datum X)
Definition pg_lsn.h:25
static bool DatumGetBool(Datum X)
Definition postgres.h:100
uint64_t Datum
Definition postgres.h:70
static TransactionId DatumGetTransactionId(Datum X)
Definition postgres.h:282
unsigned int Oid
char * quote_literal_cstr(const char *rawstr)
Definition quote.c:101
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition slot.c:2932
@ RS_INVAL_NONE
Definition slot.h:60
#define SLOTSYNC_COLUMN_COUNT
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Tuplestorestate * tuplestore
TupleDesc tupledesc
WalRcvExecStatus status
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:417
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
static WalReceiverConn * wrconn
Definition walreceiver.c:95
@ WALRCV_OK_TUPLES
static void walrcv_clear_result(WalRcvExecResult *walres)
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

References appendStringInfoChar(), appendStringInfoString(), Assert, StringInfoData::data, DatumGetBool(), DatumGetLSN(), DatumGetTransactionId(), ereport, WalRcvExecResult::err, errmsg, ERROR, ExecClearTuple(), ExecDropSingleTupleTableSlot(), fb(), foreach_ptr, GetSlotInvalidationCause(), initStringInfo(), InvalidTransactionId, InvalidXLogRecPtr, lappend(), MakeSingleTupleTableSlot(), NIL, palloc0_object, pfree(), quote_literal_cstr(), RS_INVAL_NONE, slot_getattr(), SLOTSYNC_COLUMN_COUNT, WalRcvExecResult::status, TextDatumGetCString, TransactionIdIsValid, TTSOpsMinimalTuple, WalRcvExecResult::tupledesc, WalRcvExecResult::tuplestore, tuplestore_gettupleslot(), walrcv_clear_result(), walrcv_exec, WALRCV_OK_TUPLES, wrconn, and XLogRecPtrIsValid.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ get_local_synced_slots()

static List * get_local_synced_slots ( void  )
static

Definition at line 451 of file slotsync.c.

452{
454
456
458 {
460
461 /* Check if it is a synchronized slot */
462 if (s->in_use && s->data.synced)
463 {
466 }
467 }
468
470
471 return local_slots;
472}
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_SHARED
Definition lwlock.h:105
int max_replication_slots
Definition slot.c:161
ReplicationSlotCtlData * ReplicationSlotCtl
Definition slot.c:147
int max_repack_replication_slots
Definition slot.c:163
#define SlotIsLogical(slot)
Definition slot.h:288
ReplicationSlot replication_slots[1]
Definition slot.h:299
bool in_use
Definition slot.h:186
ReplicationSlotPersistentData data
Definition slot.h:213

References Assert, ReplicationSlot::data, fb(), i, ReplicationSlot::in_use, lappend(), LW_SHARED, LWLockAcquire(), LWLockRelease(), max_repack_replication_slots, max_replication_slots, NIL, ReplicationSlotCtlData::replication_slots, ReplicationSlotCtl, SlotIsLogical, and ReplicationSlotPersistentData::synced.

Referenced by drop_local_obsolete_slots().

◆ HandleSlotSyncMessageInterrupt()

void HandleSlotSyncMessageInterrupt ( void  )

Definition at line 1338 of file slotsync.c.

1339{
1340 InterruptPending = true;
1342 /* latch will be set by procsignal_sigusr1_handler */
1343}
volatile sig_atomic_t InterruptPending
Definition globals.c:32
volatile sig_atomic_t SlotSyncShutdownPending
Definition slotsync.c:159

References InterruptPending, and SlotSyncShutdownPending.

Referenced by procsignal_sigusr1_handler().

◆ IsSyncingReplicationSlots()

bool IsSyncingReplicationSlots ( void  )

Definition at line 1916 of file slotsync.c.

1917{
1918 return syncing_slots;
1919}

References syncing_slots.

Referenced by CreateDecodingContext(), GetStandbyFlushRecPtr(), ProcessSlotSyncMessage(), and ReplicationSlotCreate().

◆ local_sync_slot_required()

static bool local_sync_slot_required ( ReplicationSlot local_slot,
List remote_slots 
)
static

Definition at line 482 of file slotsync.c.

483{
484 bool remote_exists = false;
485 bool locally_invalidated = false;
486
488 {
489 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
490 {
491 remote_exists = true;
492
493 /*
494 * If remote slot is not invalidated but local slot is marked as
495 * invalidated, then set locally_invalidated flag.
496 */
499 (remote_slot->invalidated == RS_INVAL_NONE) &&
500 (local_slot->data.invalidated != RS_INVAL_NONE);
502
503 break;
504 }
505 }
506
508}

References fb(), foreach_ptr, NameStr, RS_INVAL_NONE, SpinLockAcquire(), and SpinLockRelease().

Referenced by drop_local_obsolete_slots().

◆ ProcessSlotSyncMessage()

void ProcessSlotSyncMessage ( void  )

Definition at line 1354 of file slotsync.c.

1355{
1357
1359 {
1360 ereport(LOG,
1361 errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1362 proc_exit(0);
1363 }
1364 else
1365 {
1366 /*
1367 * If sync has already completed, there is no need to interrupt the
1368 * caller with an error.
1369 */
1371 return;
1372
1373 ereport(ERROR,
1375 errmsg("replication slot synchronization will stop because promotion is triggered"));
1376 }
1377}
bool IsSyncingReplicationSlots(void)
Definition slotsync.c:1916

References AmLogicalSlotSyncWorkerProcess, ereport, errcode(), errmsg, ERROR, fb(), IsSyncingReplicationSlots(), LOG, proc_exit(), and SlotSyncShutdownPending.

Referenced by ProcessInterrupts().

◆ ReplSlotSyncWorkerMain()

void ReplSlotSyncWorkerMain ( const void startup_data,
size_t  startup_data_len 
)

Definition at line 1559 of file slotsync.c.

1560{
1562 char *dbname;
1563 char *err;
1566
1568
1569 /* Release postmaster's working memory context */
1571 {
1574 }
1575
1577
1579
1580 /*
1581 * Create a per-backend PGPROC struct in shared memory. We must do this
1582 * before we access any shared memory.
1583 */
1584 InitProcess();
1585
1586 /*
1587 * Early initialization.
1588 */
1589 BaseInit();
1590
1592
1593 /*
1594 * If an exception is encountered, processing resumes here.
1595 *
1596 * We just need to clean up, report the error, and go away.
1597 *
1598 * If we do not have this handling here, then since this worker process
1599 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1600 * Therefore, we create our own exception handler to catch ERRORs.
1601 */
1602 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1603 {
1604 /* since not using PG_TRY, must reset error stack by hand */
1606
1607 /* Prevents interrupts while cleaning up */
1609
1610 /* Report the error to the server log */
1612
1613 /*
1614 * We can now go away. Note that because we called InitProcess, a
1615 * callback was registered to do ProcKill, which will clean up
1616 * necessary state.
1617 */
1618 proc_exit(0);
1619 }
1620
1621 /* We can now handle ereport(ERROR) */
1623
1624 /* Setup signal handling */
1633
1635
1636 ereport(LOG, errmsg("slot sync worker started"));
1637
1638 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1640
1641 /*
1642 * Establishes SIGALRM handler and initialize timeout module. It is needed
1643 * by InitPostgres to register different timeouts.
1644 */
1646
1647 /* Load the libpq-specific functions */
1648 load_file("libpqwalreceiver", false);
1649
1650 /*
1651 * Unblock signals (they were blocked when the postmaster forked us)
1652 */
1654
1655 /*
1656 * Set always-secure search path, so malicious users can't redirect user
1657 * code (e.g. operators).
1658 *
1659 * It's not strictly necessary since we won't be scanning or writing to
1660 * any user table locally, but it's good to retain it here for added
1661 * precaution.
1662 */
1663 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1664
1666
1667 /*
1668 * Connect to the database specified by the user in primary_conninfo. We
1669 * need a database connection for walrcv_exec to work which we use to
1670 * fetch slot information from the remote node. See comments atop
1671 * libpqrcv_exec.
1672 *
1673 * We do not specify a specific user here since the slot sync worker will
1674 * operate as a superuser. This is safe because the slot sync worker does
1675 * not interact with user tables, eliminating the risk of executing
1676 * arbitrary code within triggers.
1677 */
1679
1681
1683 if (cluster_name[0])
1684 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1685 else
1686 appendStringInfoString(&app_name, "slotsync worker");
1687
1688 /*
1689 * Establish the connection to the primary server for slot
1690 * synchronization.
1691 */
1692 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1693 app_name.data, &err);
1694
1695 if (!wrconn)
1696 ereport(ERROR,
1698 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1699 app_name.data, err));
1700
1701 pfree(app_name.data);
1702
1703 /*
1704 * Register the disconnection callback.
1705 *
1706 * XXX: This can be combined with previous cleanup registration of
1707 * slotsync_worker_onexit() but that will need the connection to be made
1708 * global and we want to avoid introducing global for this purpose.
1709 */
1711
1712 /*
1713 * Using the specified primary server connection, check that we are not a
1714 * cascading standby and slot configured in 'primary_slot_name' exists on
1715 * the primary server.
1716 */
1718
1719 /* Main loop to synchronize slots */
1720 for (;;)
1721 {
1722 bool some_slot_updated = false;
1723 bool started_tx = false;
1725
1727
1730
1731 /*
1732 * The syscache access in fetch_remote_slots() needs a transaction
1733 * env.
1734 */
1735 if (!IsTransactionState())
1736 {
1738 started_tx = true;
1739 }
1740
1744
1745 if (started_tx)
1747
1749 }
1750
1751 /*
1752 * The slot sync worker can't get here because it will only stop when it
1753 * receives a stop request from the startup process, or when there is an
1754 * error.
1755 */
1756 Assert(false);
1757}
sigset_t UnBlockSig
Definition pqsignal.c:22
void load_file(const char *filename, bool restricted)
Definition dfmgr.c:149
void EmitErrorReport(void)
Definition elog.c:1883
ErrorContextCallback * error_context_stack
Definition elog.c:100
sigjmp_buf * PG_exception_stack
Definition elog.c:102
void err(int eval, const char *fmt,...)
Definition err.c:43
int MyProcPid
Definition globals.c:49
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4234
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_SUSET
Definition guc.h:78
char * cluster_name
Definition guc_tables.c:582
volatile sig_atomic_t ConfigReloadPending
Definition interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
void list_free_deep(List *list)
Definition list.c:1560
MemoryContext PostmasterContext
Definition mcxt.c:169
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:475
@ NormalProcessing
Definition miscadmin.h:481
@ InitProcessing
Definition miscadmin.h:480
#define GetProcessingMode()
Definition miscadmin.h:490
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define HOLD_INTERRUPTS()
Definition miscadmin.h:136
#define SetProcessingMode(mode)
Definition miscadmin.h:492
#define die(msg)
#define pqsignal
Definition port.h:548
#define PG_SIG_IGN
Definition port.h:552
#define PG_SIG_DFL
Definition port.h:551
void FloatExceptionHandler(SIGNAL_ARGS)
Definition postgres.c:3082
void StatementCancelHandler(SIGNAL_ARGS)
Definition postgres.c:3065
#define PointerGetDatum(X)
Definition postgres.h:354
#define InvalidOid
void BaseInit(void)
Definition postinit.c:616
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, uint32 flags, char *out_dbname)
Definition postinit.c:716
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition procsignal.c:696
void init_ps_display(const char *fixed_part)
Definition ps_status.c:286
static void slotsync_worker_disconnect(int code, Datum arg)
Definition slotsync.c:1385
char * CheckAndGetDbnameFromConninfo(void)
Definition slotsync.c:1158
static void wait_for_slot_activity(bool some_slot_updated)
Definition slotsync.c:1444
static void slotsync_reread_config(void)
Definition slotsync.c:1254
static void slotsync_worker_onexit(int code, Datum arg)
Definition slotsync.c:1398
static void validate_remote_info(WalReceiverConn *wrconn)
Definition slotsync.c:1080
static void check_and_set_sync_info(pid_t sync_process_pid)
Definition slotsync.c:1479
static List * fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
Definition slotsync.c:901
static bool synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list, bool *slot_persistence_pending)
Definition slotsync.c:1044
void InitProcess(void)
Definition proc.c:392
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void InitializeTimeouts(void)
Definition timeout.c:470
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
#define SIGCHLD
Definition win32_port.h:168
#define SIGHUP
Definition win32_port.h:158
#define SIGPIPE
Definition win32_port.h:163
#define SIGUSR1
Definition win32_port.h:170
#define SIGUSR2
Definition win32_port.h:171
bool IsTransactionState(void)
Definition xact.c:389
void StartTransactionCommand(void)
Definition xact.c:3109
void CommitTransactionCommand(void)
Definition xact.c:3207

References appendStringInfo(), appendStringInfoString(), Assert, BaseInit(), before_shmem_exit(), check_and_set_sync_info(), CHECK_FOR_INTERRUPTS, CheckAndGetDbnameFromConninfo(), cluster_name, CommitTransactionCommand(), ConfigReloadPending, dbname, die, EmitErrorReport(), ereport, err(), errcode(), errmsg, ERROR, error_context_stack, fb(), fetch_remote_slots(), FloatExceptionHandler(), GetProcessingMode, HOLD_INTERRUPTS, init_ps_display(), InitializeTimeouts(), InitPostgres(), InitProcess(), InitProcessing, initStringInfo(), InvalidOid, IsTransactionState(), list_free_deep(), load_file(), LOG, MemoryContextDelete(), MyProcPid, NIL, NormalProcessing, pfree(), PG_exception_stack, PG_SIG_DFL, PG_SIG_IGN, PGC_S_OVERRIDE, PGC_SUSET, PointerGetDatum, PostmasterContext, pqsignal, PrimaryConnInfo, proc_exit(), procsignal_sigusr1_handler(), SetConfigOption(), SetProcessingMode, SIGCHLD, SIGHUP, SignalHandlerForConfigReload(), SIGPIPE, SIGUSR1, SIGUSR2, slotsync_reread_config(), slotsync_worker_disconnect(), slotsync_worker_onexit(), SlotSyncCtx, StartTransactionCommand(), StatementCancelHandler(), synchronize_slots(), UnBlockSig, validate_remote_info(), wait_for_slot_activity(), walrcv_connect, and wrconn.

◆ reserve_wal_for_local_slot()

static void reserve_wal_for_local_slot ( XLogRecPtr  restart_lsn)
static

Definition at line 598 of file slotsync.c.

599{
602 XLogSegNo segno;
604
605 Assert(slot != NULL);
607
608 /*
609 * Acquire an exclusive lock to prevent the checkpoint process from
610 * concurrently calculating the minimum slot LSN (see
611 * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
612 * first, the checkpoint must wait for the restart_lsn update before
613 * calculating the minimum LSN.
614 *
615 * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
616 * newly synced slot from being invalidated if a concurrent checkpoint has
617 * invoked CheckPointReplicationSlots() before the WAL reservation here.
618 * This can happen because the initial restart_lsn received from the
619 * remote server can precede the redo pointer. Therefore, when selecting
620 * the initial restart_lsn, we consider using the redo pointer or the
621 * minimum slot LSN (if those values are greater than the remote
622 * restart_lsn) instead of relying solely on the remote value.
623 */
625
626 /*
627 * Determine the minimum non-removable LSN by comparing the redo pointer
628 * with the minimum slot LSN.
629 *
630 * The minimum slot LSN is considered because the redo pointer advances at
631 * every checkpoint, even when replication slots are present on the
632 * standby. In such scenarios, the redo pointer can exceed the remote
633 * restart_lsn, while WALs preceding the remote restart_lsn remain
634 * protected by a local replication slot.
635 */
638
641
642 /*
643 * If the minimum safe LSN is greater than the given restart_lsn, use it
644 * as the initial restart_lsn for the newly synced slot. Otherwise, use
645 * the given remote restart_lsn.
646 */
647 SpinLockAcquire(&slot->mutex);
648 slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
649 SpinLockRelease(&slot->mutex);
650
652
654 if (XLogGetLastRemovedSegno() >= segno)
655 elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
656 NameStr(slot->data.name));
657
659}
#define Max(x, y)
Definition c.h:1085
#define elog(elevel,...)
Definition elog.h:228
@ LW_EXCLUSIVE
Definition lwlock.h:104
ReplicationSlot * MyReplicationSlot
Definition slot.c:158
void ReplicationSlotsComputeRequiredLSN(void)
Definition slot.c:1304
slock_t mutex
Definition slot.h:183
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3809
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6937
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2700
int wal_segment_size
Definition xlog.c:150
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
uint64 XLogRecPtr
Definition xlogdefs.h:21
uint64 XLogSegNo
Definition xlogdefs.h:52

References Assert, ReplicationSlot::data, elog, ERROR, fb(), GetRedoRecPtr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), Max, ReplicationSlot::mutex, MyReplicationSlot, ReplicationSlotPersistentData::name, NameStr, ReplicationSlotsComputeRequiredLSN(), ReplicationSlotPersistentData::restart_lsn, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLByteToSeg, XLogGetLastRemovedSegno(), XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by synchronize_one_slot().

◆ reset_syncing_flag()

◆ ShutDownSlotSync()

void ShutDownSlotSync ( void  )

Definition at line 1818 of file slotsync.c.

1819{
1821
1823
1824 SlotSyncCtx->stopSignaled = true;
1825
1826 /*
1827 * Return if neither the slot sync worker is running nor the function
1828 * pg_sync_replication_slots() is executing.
1829 */
1830 if (!SlotSyncCtx->syncing)
1831 {
1834 return;
1835 }
1836
1838
1840
1841 /*
1842 * Signal process doing slotsync, if any, asking it to stop.
1843 */
1847
1848 /* Wait for slot sync to end */
1849 for (;;)
1850 {
1851 int rc;
1852
1853 /* Wait a bit, we don't expect to have to wait long */
1854 rc = WaitLatch(MyLatch,
1857
1858 if (rc & WL_LATCH_SET)
1859 {
1862 }
1863
1865
1866 /* Ensure that no process is syncing the slots. */
1867 if (!SlotSyncCtx->syncing)
1868 break;
1869
1871 }
1872
1874
1876}
struct Latch * MyLatch
Definition globals.c:65
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition procsignal.c:296
@ PROCSIG_SLOTSYNC_MESSAGE
Definition procsignal.h:39
static void update_synced_slots_inactive_since(void)
Definition slotsync.c:1766
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET

References CHECK_FOR_INTERRUPTS, fb(), INVALID_PROC_NUMBER, InvalidPid, SlotSyncCtxStruct::mutex, MyLatch, SlotSyncCtxStruct::pid, PROCSIG_SLOTSYNC_MESSAGE, ResetLatch(), SendProcSignal(), SlotSyncCtx, SpinLockAcquire(), SpinLockRelease(), SlotSyncCtxStruct::stopSignaled, SlotSyncCtxStruct::syncing, update_synced_slots_inactive_since(), WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by FinishWalRecovery().

◆ slotsync_failure_callback()

static void slotsync_failure_callback ( int  code,
Datum  arg 
)
static

Definition at line 1948 of file slotsync.c.

1949{
1951
1952 /*
1953 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1954 *
1955 * The startup process during promotion invokes ShutDownSlotSync() which
1956 * waits for slot sync to finish and it does that by checking the
1957 * 'syncing' flag. Thus the SQL function must be done with slots' release
1958 * and cleanup to avoid any dangling temporary slots or active slots
1959 * before it marks itself as finished syncing.
1960 */
1961
1962 /* Make sure active replication slots are released */
1963 if (MyReplicationSlot != NULL)
1965
1966 /* Also cleanup the synced temporary slots. */
1968
1969 /*
1970 * The set syncing_slots indicates that the process errored out without
1971 * resetting the flag. So, we need to clean up shared memory and reset the
1972 * flag here.
1973 */
1974 if (syncing_slots)
1976
1978}
Datum arg
Definition elog.c:1323
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
void ReplicationSlotRelease(void)
Definition slot.c:769
void ReplicationSlotCleanup(bool synced_only)
Definition slot.c:861
static void reset_syncing_flag(void)
Definition slotsync.c:1539
#define walrcv_disconnect(conn)

References arg, DatumGetPointer(), fb(), MyReplicationSlot, ReplicationSlotCleanup(), ReplicationSlotRelease(), reset_syncing_flag(), syncing_slots, walrcv_disconnect, and wrconn.

Referenced by SyncReplicationSlots().

◆ slotsync_reread_config()

static void slotsync_reread_config ( void  )
static

Definition at line 1254 of file slotsync.c.

1255{
1260 bool conninfo_changed;
1263 bool parameter_changed = false;
1264
1267
1268 ConfigReloadPending = false;
1270
1275
1277 {
1279 {
1280 ereport(LOG,
1281 /* translator: %s is a GUC variable name */
1282 errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1283 "sync_replication_slots"));
1284
1285 proc_exit(0);
1286 }
1287
1288 parameter_changed = true;
1289 }
1290 else
1291 {
1292 if (conninfo_changed ||
1295 {
1296
1298 {
1299 ereport(LOG,
1300 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1301
1302 /*
1303 * Reset the last-start time for this worker so that the
1304 * postmaster can restart it without waiting for
1305 * SLOTSYNC_RESTART_INTERVAL_SEC.
1306 */
1308
1309 proc_exit(0);
1310 }
1311
1312 parameter_changed = true;
1313 }
1314 }
1315
1316 /*
1317 * If we have reached here with a parameter change, we must be running in
1318 * SQL function, emit error in such a case.
1319 */
1321 {
1323 ereport(ERROR,
1325 errmsg("replication slot synchronization will stop because of a parameter change"));
1326 }
1327
1328}
void ProcessConfigFile(GucContext context)
Definition guc-file.l:120
@ PGC_SIGHUP
Definition guc.h:75
bool sync_replication_slots
Definition slotsync.c:132
time_t last_start_time
Definition slotsync.c:117
bool hot_standby_feedback
Definition walreceiver.c:92
char * PrimarySlotName

References AmLogicalSlotSyncWorkerProcess, Assert, ConfigReloadPending, ereport, errcode(), errmsg, ERROR, fb(), hot_standby_feedback, SlotSyncCtxStruct::last_start_time, LOG, pfree(), PGC_SIGHUP, PrimaryConnInfo, PrimarySlotName, proc_exit(), ProcessConfigFile(), pstrdup(), SlotSyncCtx, and sync_replication_slots.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ slotsync_worker_disconnect()

static void slotsync_worker_disconnect ( int  code,
Datum  arg 
)
static

Definition at line 1385 of file slotsync.c.

References arg, DatumGetPointer(), walrcv_disconnect, and wrconn.

Referenced by ReplSlotSyncWorkerMain().

◆ slotsync_worker_onexit()

static void slotsync_worker_onexit ( int  code,
Datum  arg 
)
static

Definition at line 1398 of file slotsync.c.

1399{
1400 /*
1401 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1402 *
1403 * The startup process during promotion invokes ShutDownSlotSync() which
1404 * waits for slot sync to finish and it does that by checking the
1405 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1406 * release and cleanup to avoid any dangling temporary slots or active
1407 * slots before it marks itself as finished syncing.
1408 */
1409
1410 /* Make sure active replication slots are released */
1411 if (MyReplicationSlot != NULL)
1413
1414 /* Also cleanup the temporary slots. */
1416
1418
1420
1421 /*
1422 * If syncing_slots is true, it indicates that the process errored out
1423 * without resetting the flag. So, we need to clean up shared memory and
1424 * reset the flag here.
1425 */
1426 if (syncing_slots)
1427 {
1428 SlotSyncCtx->syncing = false;
1429 syncing_slots = false;
1430 }
1431
1433}

References fb(), InvalidPid, SlotSyncCtxStruct::mutex, MyReplicationSlot, SlotSyncCtxStruct::pid, ReplicationSlotCleanup(), ReplicationSlotRelease(), SlotSyncCtx, SpinLockAcquire(), SpinLockRelease(), SlotSyncCtxStruct::syncing, and syncing_slots.

Referenced by ReplSlotSyncWorkerMain().

◆ SlotSyncShmemInit()

static void SlotSyncShmemInit ( void arg)
static

Definition at line 1937 of file slotsync.c.

1938{
1942}
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50

References fb(), InvalidPid, SlotSyncCtxStruct::mutex, SlotSyncCtxStruct::pid, SlotSyncCtx, and SpinLockInit().

◆ SlotSyncShmemRequest()

static void SlotSyncShmemRequest ( void arg)
static

Definition at line 1925 of file slotsync.c.

1926{
1927 ShmemRequestStruct(.name = "Slot Sync Data",
1928 .size = sizeof(SlotSyncCtxStruct),
1929 .ptr = (void **) &SlotSyncCtx,
1930 );
1931}
#define ShmemRequestStruct(...)
Definition shmem.h:176
const char * name

References name, ShmemRequestStruct, and SlotSyncCtx.

◆ SlotSyncWorkerCanRestart()

bool SlotSyncWorkerCanRestart ( void  )

Definition at line 1891 of file slotsync.c.

1892{
1893 time_t curtime = time(NULL);
1894
1895 /*
1896 * If first time through, or time somehow went backwards, always update
1897 * last_start_time to match the current clock and allow worker start.
1898 * Otherwise allow it only once enough time has elapsed.
1899 */
1900 if (SlotSyncCtx->last_start_time == 0 ||
1901 curtime < SlotSyncCtx->last_start_time ||
1903 {
1905 return true;
1906 }
1907 return false;
1908}
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition slotsync.c:145

References fb(), SlotSyncCtxStruct::last_start_time, SLOTSYNC_RESTART_INTERVAL_SEC, and SlotSyncCtx.

Referenced by LaunchMissingBackgroundProcesses().

◆ synchronize_one_slot()

static bool synchronize_one_slot ( RemoteSlot remote_slot,
Oid  remote_dbid,
bool slot_persistence_pending 
)
static

Definition at line 734 of file slotsync.c.

736{
737 ReplicationSlot *slot;
738 bool slot_updated = false;
739
740 /* Search for the named slot */
741 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
742 {
743 bool synced;
744
745 SpinLockAcquire(&slot->mutex);
746 synced = slot->data.synced;
747 SpinLockRelease(&slot->mutex);
748
749 /* User-created slot with the same name exists, raise ERROR. */
750 if (!synced)
753 errmsg("exiting from slot synchronization because same"
754 " name slot \"%s\" already exists on the standby",
755 remote_slot->name));
756
757 /*
758 * The slot has been synchronized before.
759 *
760 * It is important to acquire the slot here before checking
761 * invalidation. If we don't acquire the slot first, there could be a
762 * race condition that the local slot could be invalidated just after
763 * checking the 'invalidated' flag here and we could end up
764 * overwriting 'invalidated' flag to remote_slot's value. See
765 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
766 * if the slot is not acquired by other processes.
767 *
768 * XXX: If it ever turns out that slot acquire/release is costly for
769 * cases when none of the slot properties is changed then we can do a
770 * pre-check to ensure that at least one of the slot properties is
771 * changed before acquiring the slot.
772 */
773 ReplicationSlotAcquire(remote_slot->name, true, false);
774
775 Assert(slot == MyReplicationSlot);
776
777 /*
778 * Copy the invalidation cause from remote only if local slot is not
779 * invalidated locally, we don't want to overwrite existing one.
780 */
781 if (slot->data.invalidated == RS_INVAL_NONE &&
782 remote_slot->invalidated != RS_INVAL_NONE)
783 {
784 SpinLockAcquire(&slot->mutex);
785 slot->data.invalidated = remote_slot->invalidated;
786 SpinLockRelease(&slot->mutex);
787
788 /* Make sure the invalidated state persists across server restart */
791
792 slot_updated = true;
793 }
794
795 /* Skip the sync of an invalidated slot */
796 if (slot->data.invalidated != RS_INVAL_NONE)
797 {
799
801 return slot_updated;
802 }
803
804 /* Slot not ready yet, let's attempt to make it sync-ready now. */
805 if (slot->data.persistency == RS_TEMPORARY)
806 {
810 }
811
812 /* Slot ready for sync, so sync it. */
813 else
814 {
815 /*
816 * Sanity check: As long as the invalidations are handled
817 * appropriately as above, this should never happen.
818 *
819 * We don't need to check restart_lsn here. See the comments in
820 * update_local_synced_slot() for details.
821 */
822 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
824 errmsg_internal("cannot synchronize local slot \"%s\"",
825 remote_slot->name),
826 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
828 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
829
831 }
832 }
833 /* Otherwise create the slot first. */
834 else
835 {
838
839 /* Skip creating the local slot if remote_slot is invalidated already */
840 if (remote_slot->invalidated != RS_INVAL_NONE)
841 return false;
842
843 /*
844 * We create temporary slots instead of ephemeral slots here because
845 * we want the slots to survive after releasing them. This is done to
846 * avoid dropping and re-creating the slots in each synchronization
847 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
848 * caught up.
849 */
851 remote_slot->two_phase,
852 false,
853 remote_slot->failover,
854 true);
855
856 /* For shorter lines. */
857 slot = MyReplicationSlot;
858
859 /* Avoid expensive operations while holding a spinlock. */
861
862 SpinLockAcquire(&slot->mutex);
863 slot->data.database = remote_dbid;
864 slot->data.plugin = plugin_name;
865 SpinLockRelease(&slot->mutex);
866
868
872 SpinLockAcquire(&slot->mutex);
875 SpinLockRelease(&slot->mutex);
879
882
883 slot_updated = true;
884 }
885
887
888 return slot_updated;
889}
uint32 TransactionId
Definition c.h:736
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
void namestrcpy(Name name, const char *str)
Definition name.c:233
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition procarray.c:2898
void ReplicationSlotMarkDirty(void)
Definition slot.c:1180
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool repack, bool failover, bool synced)
Definition slot.c:378
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition slot.c:1222
void ReplicationSlotSave(void)
Definition slot.c:1162
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition slot.c:548
@ RS_TEMPORARY
Definition slot.h:47
@ SS_SKIP_INVALID
Definition slot.h:89
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition slotsync.c:598
static void update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
Definition slotsync.c:189
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:673
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition slotsync.c:221
TransactionId catalog_xmin
Definition slot.h:122
ReplicationSlotPersistency persistency
Definition slot.h:106
ReplicationSlotInvalidationCause invalidated
Definition slot.h:128
TransactionId effective_catalog_xmin
Definition slot.h:210
Definition c.h:830
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47

References Assert, ReplicationSlotPersistentData::catalog_xmin, ReplicationSlotPersistentData::confirmed_flush, ReplicationSlot::data, ReplicationSlotPersistentData::database, ReplicationSlot::effective_catalog_xmin, ereport, errcode(), errdetail_internal(), errmsg, errmsg_internal(), ERROR, fb(), GetOldestSafeDecodingTransactionId(), ReplicationSlotPersistentData::invalidated, InvalidTransactionId, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ReplicationSlot::mutex, MyReplicationSlot, namestrcpy(), ReplicationSlotPersistentData::persistency, ReplicationSlotPersistentData::plugin, ReplicationSlotAcquire(), ReplicationSlotCreate(), ReplicationSlotMarkDirty(), ReplicationSlotRelease(), ReplicationSlotSave(), ReplicationSlotsComputeRequiredXmin(), reserve_wal_for_local_slot(), RS_INVAL_NONE, RS_TEMPORARY, SearchNamedReplicationSlot(), SpinLockAcquire(), SpinLockRelease(), SS_SKIP_INVALID, ReplicationSlotPersistentData::synced, update_and_persist_local_synced_slot(), update_local_synced_slot(), and update_slotsync_skip_stats().

Referenced by synchronize_slots().

◆ synchronize_slots()

static bool synchronize_slots ( WalReceiverConn wrconn,
List remote_slot_list,
bool slot_persistence_pending 
)
static

Definition at line 1044 of file slotsync.c.

1046{
1047 bool some_slot_updated = false;
1048
1049 /* Drop local slots that no longer need to be synced. */
1051
1052 /* Now sync the slots locally */
1054 {
1055 Oid remote_dbid = get_database_oid(remote_slot->database, false);
1056
1057 /*
1058 * Use shared lock to prevent a conflict with
1059 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1060 * a drop-database operation.
1061 */
1063
1066
1068 }
1069
1070 return some_slot_updated;
1071}
Oid get_database_oid(const char *dbname, bool missing_ok)
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition slotsync.c:535
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:734

References AccessShareLock, drop_local_obsolete_slots(), fb(), foreach_ptr, get_database_oid(), LockSharedObject(), synchronize_one_slot(), and UnlockSharedObject().

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ SyncReplicationSlots()

void SyncReplicationSlots ( WalReceiverConn wrconn)

Definition at line 2010 of file slotsync.c.

2011{
2013 {
2015 List *slot_names = NIL; /* List of slot names to track */
2017
2019
2021
2022 /*
2023 * Setup and use a per-sync-cycle memory context, which is reset every
2024 * time we loop below. This avoids having to retail freeing the memory
2025 * used in each sync cycle.
2026 */
2028 "slot sync retry context",
2030
2031 /* Retry until all the slots are sync-ready */
2032 for (;;)
2033 {
2034 bool slot_persistence_pending = false;
2035 bool some_slot_updated = false;
2037
2038 /* Check for interrupts and config changes */
2040
2043
2044 /* We must be in a valid transaction state */
2046
2049
2050 /*
2051 * Fetch remote slot info for the given slot_names. If slot_names
2052 * is NIL, fetch all failover-enabled slots. Note that we reuse
2053 * slot_names from the first iteration; re-fetching all failover
2054 * slots each time could cause an endless loop. Instead of
2055 * reprocessing only the pending slots in each iteration, it's
2056 * better to process all the slots received in the first
2057 * iteration. This ensures that by the time we're done, all slots
2058 * reflect the latest values.
2059 */
2060 remote_slots = fetch_remote_slots(wrconn, slot_names);
2061
2062 /* Attempt to synchronize slots */
2065
2066 /*
2067 * slot_names must survive later sync_retry_ctx resets, so copy it
2068 * in the outer context.
2069 */
2071
2072 /*
2073 * If slot_persistence_pending is true, extract slot names for
2074 * future iterations (only needed if we haven't done it yet)
2075 */
2076 if (slot_names == NIL && slot_persistence_pending)
2077 slot_names = extract_slot_names(remote_slots);
2078
2079 /* Done if all slots are persisted i.e are sync-ready */
2081 break;
2082
2083 /* wait before retrying again */
2085 }
2086
2088
2089 if (slot_names)
2090 list_free_deep(slot_names);
2091
2092 /* Cleanup the synced temporary slots */
2094
2095 /* We are done with sync, so reset sync flag */
2097 }
2099}
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:406
MemoryContext CurrentMemoryContext
Definition mcxt.c:161
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:138
static void slotsync_failure_callback(int code, Datum arg)
Definition slotsync.c:1948
static List * extract_slot_names(List *remote_slots)
Definition slotsync.c:1984

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, check_and_set_sync_info(), CHECK_FOR_INTERRUPTS, ConfigReloadPending, CurrentMemoryContext, extract_slot_names(), fb(), fetch_remote_slots(), IsTransactionState(), list_free_deep(), MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MyProcPid, NIL, PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PointerGetDatum, ReplicationSlotCleanup(), reset_syncing_flag(), slotsync_failure_callback(), slotsync_reread_config(), synchronize_slots(), validate_remote_info(), wait_for_slot_activity(), and wrconn.

Referenced by pg_sync_replication_slots().

◆ update_and_persist_local_synced_slot()

static bool update_and_persist_local_synced_slot ( RemoteSlot remote_slot,
Oid  remote_dbid,
bool slot_persistence_pending 
)
static

Definition at line 673 of file slotsync.c.

675{
677
678 /* Slotsync skip stats are handled in function update_local_synced_slot() */
680
681 /*
682 * Check if the slot cannot be synchronized. Refer to the comment atop the
683 * file for details on this check.
684 */
686 {
687 /*
688 * We reach this point when the remote slot didn't catch up to locally
689 * reserved position, or it cannot reach the consistent point from the
690 * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
691 * not been received and flushed.
692 *
693 * We do not drop the slot because the restart_lsn and confirmed_lsn
694 * can be ahead of the current location when recreating the slot in
695 * the next cycle. It may take more time to create such a slot or
696 * reach the consistent point. Therefore, we keep this slot and
697 * attempt the synchronization in the next cycle.
698 *
699 * We also update the slot_persistence_pending parameter, so the SQL
700 * function can retry.
701 */
704
705 return false;
706 }
707
709
710 ereport(LOG,
711 errmsg("newly created replication slot \"%s\" is sync-ready now",
712 remote_slot->name));
713
714 return true;
715}
void ReplicationSlotPersist(void)
Definition slot.c:1197
@ SS_SKIP_NONE
Definition slot.h:82
SlotSyncSkipReason slotsync_skip_reason
Definition slot.h:284

References ereport, errmsg, fb(), LOG, MyReplicationSlot, ReplicationSlotPersist(), ReplicationSlot::slotsync_skip_reason, SS_SKIP_NONE, and update_local_synced_slot().

Referenced by synchronize_one_slot().

◆ update_local_synced_slot()

static bool update_local_synced_slot ( RemoteSlot remote_slot,
Oid  remote_dbid 
)
static

Definition at line 221 of file slotsync.c.

222{
224 bool updated_xmin_or_lsn = false;
225 bool updated_config = false;
228
230
231 /*
232 * Make sure that concerned WAL is received and flushed before syncing
233 * slot to target lsn received from the primary server.
234 */
235 if (remote_slot->confirmed_lsn > latestFlushPtr)
236 {
238
239 /*
240 * Can get here only if GUC 'synchronized_standby_slots' on the
241 * primary server was not configured correctly.
242 */
243 ereport(LOG,
245 errmsg("skipping slot synchronization because the received slot sync"
246 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
247 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
248 remote_slot->name,
250
251 return false;
252 }
253
254 /*
255 * Don't overwrite if we already have a newer catalog_xmin and
256 * restart_lsn.
257 */
258 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
260 slot->data.catalog_xmin))
261 {
262 /* Update slot sync skip stats */
264
265 /*
266 * This can happen in following situations:
267 *
268 * If the slot is temporary, it means either the initial WAL location
269 * reserved for the local slot is ahead of the remote slot's
270 * restart_lsn or the initial xmin_horizon computed for the local slot
271 * is ahead of the remote slot.
272 *
273 * If the slot is persistent, both restart_lsn and catalog_xmin of the
274 * synced slot could still be ahead of the remote slot. Since we use
275 * slot advance functionality to keep snapbuild/slot updated, it is
276 * possible that the restart_lsn and catalog_xmin are advanced to a
277 * later position than it has on the primary. This can happen when
278 * slot advancing machinery finds running xacts record after reaching
279 * the consistent state at a later point than the primary where it
280 * serializes the snapshot and updates the restart_lsn.
281 *
282 * We LOG the message if the slot is temporary as it can help the user
283 * to understand why the slot is not sync-ready. In the case of a
284 * persistent slot, it would be a more common case and won't directly
285 * impact the users, so we used DEBUG1 level to log the message.
286 */
288 errmsg("could not synchronize replication slot \"%s\"",
290 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
291 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
292 remote_slot->catalog_xmin,
293 LSN_FORMAT_ARGS(slot->data.restart_lsn),
294 slot->data.catalog_xmin));
295
296 /*
297 * Skip updating the configuration. This is required to avoid syncing
298 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
299 * transaction between old confirmed_lsn and two_phase_at will
300 * unexpectedly get decoded and sent to the downstream after
301 * promotion. See comments in ReorderBufferFinishPrepared.
302 */
303 return false;
304 }
305
306 /*
307 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
308 * slot.
309 */
310 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
311 remote_slot->restart_lsn > slot->data.restart_lsn ||
312 TransactionIdFollows(remote_slot->catalog_xmin,
313 slot->data.catalog_xmin))
314 {
315 /*
316 * We can't directly copy the remote slot's LSN or xmin unless there
317 * exists a consistent snapshot at that point. Otherwise, after
318 * promotion, the slots may not reach a consistent point before the
319 * confirmed_flush_lsn which can lead to a data loss. To avoid data
320 * loss, we let slot machinery advance the slot which ensures that
321 * snapbuilder/slot statuses are updated properly.
322 */
323 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
324 {
325 /*
326 * Update the slot info directly if there is a serialized snapshot
327 * at the restart_lsn, as the slot can quickly reach consistency
328 * at restart_lsn by restoring the snapshot.
329 */
330 SpinLockAcquire(&slot->mutex);
331 slot->data.restart_lsn = remote_slot->restart_lsn;
332 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
333 slot->data.catalog_xmin = remote_slot->catalog_xmin;
334 SpinLockRelease(&slot->mutex);
335
336 updated_xmin_or_lsn = true;
337 }
338 else
339 {
344
347
348 /* Sanity check */
349 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
351 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
352 remote_slot->name),
353 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
354 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
356
357 /*
358 * If we can't reach a consistent snapshot, the slot won't be
359 * persisted. See update_and_persist_local_synced_slot().
360 */
362 {
364
365 ereport(LOG,
366 errmsg("could not synchronize replication slot \"%s\"",
367 remote_slot->name),
368 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
370
372 }
373
374 /*
375 * It is possible that the slot's xmin or LSNs are not updated,
376 * when the synced slot has reached consistent snapshot state or
377 * cannot build one at all.
378 */
382 }
383 }
384
385 /* Update slot sync skip stats */
387
388 if (remote_dbid != slot->data.database ||
389 remote_slot->two_phase != slot->data.two_phase ||
390 remote_slot->failover != slot->data.failover ||
391 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
392 remote_slot->two_phase_at != slot->data.two_phase_at)
393 {
395
396 /* Avoid expensive operations while holding a spinlock. */
398
399 SpinLockAcquire(&slot->mutex);
400 slot->data.plugin = plugin_name;
401 slot->data.database = remote_dbid;
402 slot->data.two_phase = remote_slot->two_phase;
403 slot->data.two_phase_at = remote_slot->two_phase_at;
404 slot->data.failover = remote_slot->failover;
405 SpinLockRelease(&slot->mutex);
406
407 updated_config = true;
408
409 /*
410 * Ensure that there is no risk of sending prepared transactions
411 * unexpectedly after the promotion.
412 */
414 }
415
416 /*
417 * We have to write the changed xmin to disk *before* we change the
418 * in-memory value, otherwise after a crash we wouldn't know that some
419 * catalog tuples might have been removed already.
420 */
422 {
425 }
426
427 /*
428 * Now the new xmin is safely on disk, we can let the global value
429 * advance. We do not take ProcArrayLock or similar since we only advance
430 * xmin here and there's not much harm done by a concurrent computation
431 * missing that.
432 */
434 {
435 SpinLockAcquire(&slot->mutex);
436 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
437 SpinLockRelease(&slot->mutex);
438
441 }
442
444}
int errdetail(const char *fmt,...) pg_attribute_printf(1
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition logical.c:2102
const void * data
SlotSyncSkipReason
Definition slot.h:81
@ SS_SKIP_WAL_NOT_FLUSHED
Definition slot.h:83
@ SS_SKIP_NO_CONSISTENT_SNAPSHOT
Definition slot.h:87
@ SS_SKIP_WAL_OR_ROWS_REMOVED
Definition slot.h:85
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition snapbuild.c:2062
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition walsender.c:3858

References Assert, ReplicationSlotPersistentData::catalog_xmin, ReplicationSlotPersistentData::confirmed_flush, ReplicationSlot::data, ReplicationSlotPersistentData::database, DEBUG1, ReplicationSlot::effective_catalog_xmin, ereport, errcode(), errdetail(), errdetail_internal(), errmsg, errmsg_internal(), ERROR, ReplicationSlotPersistentData::failover, fb(), GetStandbyFlushRecPtr(), ReplicationSlotPersistentData::invalidated, LOG, LogicalSlotAdvanceAndCheckSnapState(), LSN_FORMAT_ARGS, ReplicationSlot::mutex, MyReplicationSlot, NameStr, namestrcpy(), ReplicationSlotPersistentData::persistency, ReplicationSlotPersistentData::plugin, ReplicationSlotMarkDirty(), ReplicationSlotSave(), ReplicationSlotsComputeRequiredLSN(), ReplicationSlotsComputeRequiredXmin(), ReplicationSlotPersistentData::restart_lsn, RS_INVAL_NONE, RS_TEMPORARY, SnapBuildSnapshotExists(), SpinLockAcquire(), SpinLockRelease(), SS_SKIP_NO_CONSISTENT_SNAPSHOT, SS_SKIP_NONE, SS_SKIP_WAL_NOT_FLUSHED, SS_SKIP_WAL_OR_ROWS_REMOVED, TransactionIdFollows(), TransactionIdPrecedes(), ReplicationSlotPersistentData::two_phase, ReplicationSlotPersistentData::two_phase_at, and update_slotsync_skip_stats().

Referenced by synchronize_one_slot(), and update_and_persist_local_synced_slot().

◆ update_slotsync_skip_stats()

static void update_slotsync_skip_stats ( SlotSyncSkipReason  skip_reason)
static

Definition at line 189 of file slotsync.c.

190{
191 ReplicationSlot *slot;
192
194
195 slot = MyReplicationSlot;
196
197 /*
198 * Update the slot sync related stats in pg_stat_replication_slots when a
199 * slot sync is skipped
200 */
203
204 /* Update the slot sync skip reason */
206 {
207 SpinLockAcquire(&slot->mutex);
209 SpinLockRelease(&slot->mutex);
210 }
211}
void pgstat_report_replslotsync(ReplicationSlot *slot)

References Assert, fb(), ReplicationSlot::mutex, MyReplicationSlot, pgstat_report_replslotsync(), ReplicationSlot::slotsync_skip_reason, SpinLockAcquire(), SpinLockRelease(), and SS_SKIP_NONE.

Referenced by synchronize_one_slot(), and update_local_synced_slot().

◆ update_synced_slots_inactive_since()

static void update_synced_slots_inactive_since ( void  )
static

Definition at line 1766 of file slotsync.c.

1767{
1768 TimestampTz now = 0;
1769
1770 /*
1771 * We need to update inactive_since only when we are promoting standby to
1772 * correctly interpret the inactive_since if the standby gets promoted
1773 * without a restart. We don't want the slots to appear inactive for a
1774 * long time after promotion if they haven't been synchronized recently.
1775 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1776 */
1777 if (!StandbyMode)
1778 return;
1779
1780 /* The slot sync worker or the SQL function mustn't be running by now */
1782
1784
1786 {
1788
1789 /* Check if it is a synchronized slot */
1790 if (s->in_use && s->data.synced)
1791 {
1793
1794 /* The slot must not be acquired by any process */
1796
1797 /* Use the same inactive_since time for all the slots. */
1798 if (now == 0)
1800
1802 }
1803 }
1804
1806}
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1649
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1613
int64 TimestampTz
Definition timestamp.h:39
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition slot.h:306
ProcNumber active_proc
Definition slot.h:192
bool StandbyMode

References ReplicationSlot::active_proc, Assert, ReplicationSlot::data, fb(), GetCurrentTimestamp(), i, ReplicationSlot::in_use, INVALID_PROC_NUMBER, InvalidPid, LW_SHARED, LWLockAcquire(), LWLockRelease(), max_repack_replication_slots, max_replication_slots, now(), SlotSyncCtxStruct::pid, ReplicationSlotCtlData::replication_slots, ReplicationSlotCtl, ReplicationSlotSetInactiveSince(), SlotIsLogical, SlotSyncCtx, StandbyMode, ReplicationSlotPersistentData::synced, and SlotSyncCtxStruct::syncing.

Referenced by ShutDownSlotSync().

◆ validate_remote_info()

static void validate_remote_info ( WalReceiverConn wrconn)
static

Definition at line 1080 of file slotsync.c.

1081{
1082#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1083 WalRcvExecResult *res;
1085 StringInfoData cmd;
1086 bool isnull;
1087 TupleTableSlot *tupslot;
1088 bool remote_in_recovery;
1089 bool primary_slot_valid;
1090 bool started_tx = false;
1091
1092 initStringInfo(&cmd);
1093 appendStringInfo(&cmd,
1094 "SELECT pg_is_in_recovery(), count(*) = 1"
1095 " FROM pg_catalog.pg_replication_slots"
1096 " WHERE slot_type='physical' AND slot_name=%s",
1098
1099 /* The syscache access in walrcv_exec() needs a transaction env. */
1100 if (!IsTransactionState())
1101 {
1103 started_tx = true;
1104 }
1105
1107 pfree(cmd.data);
1108
1109 if (res->status != WALRCV_OK_TUPLES)
1110 ereport(ERROR,
1111 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1112 PrimarySlotName, res->err),
1113 errhint("Check if \"primary_slot_name\" is configured correctly."));
1114
1116 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1117 elog(ERROR,
1118 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1119
1120 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1121 Assert(!isnull);
1122
1123 /*
1124 * Slot sync is currently not supported on a cascading standby. This is
1125 * because if we allow it, the primary server needs to wait for all the
1126 * cascading standbys, otherwise, logical subscribers can still be ahead
1127 * of one of the cascading standbys which we plan to promote. Thus, to
1128 * avoid this additional complexity, we restrict it for the time being.
1129 */
1131 ereport(ERROR,
1133 errmsg("cannot synchronize replication slots from a standby server"));
1134
1135 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1136 Assert(!isnull);
1137
1138 if (!primary_slot_valid)
1139 ereport(ERROR,
1141 /* translator: second %s is a GUC variable name */
1142 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1143 PrimarySlotName, "primary_slot_name"));
1144
1147
1148 if (started_tx)
1150}
int errhint(const char *fmt,...) pg_attribute_printf(1
#define PRIMARY_INFO_OUTPUT_COL_COUNT

References appendStringInfo(), Assert, CommitTransactionCommand(), StringInfoData::data, DatumGetBool(), elog, ereport, WalRcvExecResult::err, errcode(), errhint(), errmsg, ERROR, ExecDropSingleTupleTableSlot(), fb(), initStringInfo(), IsTransactionState(), MakeSingleTupleTableSlot(), pfree(), PRIMARY_INFO_OUTPUT_COL_COUNT, PrimarySlotName, quote_literal_cstr(), slot_getattr(), StartTransactionCommand(), WalRcvExecResult::status, TTSOpsMinimalTuple, WalRcvExecResult::tupledesc, WalRcvExecResult::tuplestore, tuplestore_gettupleslot(), walrcv_clear_result(), walrcv_exec, WALRCV_OK_TUPLES, and wrconn.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ ValidateSlotSyncParams()

bool ValidateSlotSyncParams ( int  elevel)

Definition at line 1185 of file slotsync.c.

1186{
1187 /*
1188 * Logical slot sync/creation requires logical decoding to be enabled.
1189 */
1191 {
1192 ereport(elevel,
1194 errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1195 errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1196
1197 return false;
1198 }
1199
1200 /*
1201 * A physical replication slot(primary_slot_name) is required on the
1202 * primary to ensure that the rows needed by the standby are not removed
1203 * after restarting, so that the synchronized slot on the standby will not
1204 * be invalidated.
1205 */
1206 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1207 {
1208 ereport(elevel,
1210 /* translator: %s is a GUC variable name */
1211 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1212 return false;
1213 }
1214
1215 /*
1216 * hot_standby_feedback must be enabled to cooperate with the physical
1217 * replication slot, which allows informing the primary about the xmin and
1218 * catalog_xmin values on the standby.
1219 */
1221 {
1222 ereport(elevel,
1224 /* translator: %s is a GUC variable name */
1225 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1226 "hot_standby_feedback"));
1227 return false;
1228 }
1229
1230 /*
1231 * The primary_conninfo is required to make connection to primary for
1232 * getting slots information.
1233 */
1234 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1235 {
1236 ereport(elevel,
1238 /* translator: %s is a GUC variable name */
1239 errmsg("replication slot synchronization requires \"%s\" to be set",
1240 "primary_conninfo"));
1241 return false;
1242 }
1243
1244 return true;
1245}
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:202

References ereport, errcode(), errhint(), errmsg, fb(), hot_standby_feedback, IsLogicalDecodingEnabled(), PrimaryConnInfo, and PrimarySlotName.

Referenced by LaunchMissingBackgroundProcesses(), and pg_sync_replication_slots().

◆ wait_for_slot_activity()

static void wait_for_slot_activity ( bool  some_slot_updated)
static

Definition at line 1444 of file slotsync.c.

1445{
1446 int rc;
1447
1448 if (!some_slot_updated)
1449 {
1450 /*
1451 * No slots were updated, so double the sleep time, but not beyond the
1452 * maximum allowable value.
1453 */
1455 }
1456 else
1457 {
1458 /*
1459 * Some slots were updated since the last sleep, so reset the sleep
1460 * time.
1461 */
1463 }
1464
1465 rc = WaitLatch(MyLatch,
1467 sleep_ms,
1469
1470 if (rc & WL_LATCH_SET)
1472}
#define Min(x, y)
Definition c.h:1091
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:139
static long sleep_ms
Definition slotsync.c:142
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:140

References fb(), MAX_SLOTSYNC_WORKER_NAPTIME_MS, Min, MIN_SLOTSYNC_WORKER_NAPTIME_MS, MyLatch, ResetLatch(), sleep_ms, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

Variable Documentation

◆ sleep_ms

Definition at line 142 of file slotsync.c.

Referenced by do_watch(), and wait_for_slot_activity().

◆ SlotSyncCtx

◆ SlotSyncShmemCallbacks

const ShmemCallbacks SlotSyncShmemCallbacks
Initial value:
= {
.request_fn = SlotSyncShmemRequest,
.init_fn = SlotSyncShmemInit,
}
static void SlotSyncShmemInit(void *arg)
Definition slotsync.c:1937
static void SlotSyncShmemRequest(void *arg)
Definition slotsync.c:1925

Definition at line 126 of file slotsync.c.

126 {
127 .request_fn = SlotSyncShmemRequest,
128 .init_fn = SlotSyncShmemInit,
129};

◆ SlotSyncShutdownPending

volatile sig_atomic_t SlotSyncShutdownPending = false

◆ sync_replication_slots

bool sync_replication_slots = false

◆ syncing_slots