PostgreSQL Source Code git master
slotsync.c File Reference
#include "postgres.h"
#include <time.h>
#include "access/xlog_internal.h"
#include "access/xlogrecovery.h"
#include "catalog/pg_database.h"
#include "commands/dbcommands.h"
#include "libpq/pqsignal.h"
#include "pgstat.h"
#include "postmaster/interrupt.h"
#include "replication/logical.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
#include "utils/ps_status.h"
#include "utils/timeout.h"
Include dependency graph for slotsync.c:

Go to the source code of this file.

Data Structures

struct  SlotSyncCtxStruct
 
struct  RemoteSlot
 

Macros

#define MIN_SLOTSYNC_WORKER_NAPTIME_MS   200
 
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS   30000 /* 30s */
 
#define SLOTSYNC_RESTART_INTERVAL_SEC   10
 
#define SLOTSYNC_COLUMN_COUNT   9
 
#define PRIMARY_INFO_OUTPUT_COL_COUNT   2
 

Typedefs

typedef struct SlotSyncCtxStruct SlotSyncCtxStruct
 
typedef struct RemoteSlot RemoteSlot
 

Functions

static void slotsync_failure_callback (int code, Datum arg)
 
static void update_synced_slots_inactive_since (void)
 
static bool update_local_synced_slot (RemoteSlot *remote_slot, Oid remote_dbid, bool *found_consistent_snapshot, bool *remote_slot_precedes)
 
static Listget_local_synced_slots (void)
 
static bool local_sync_slot_required (ReplicationSlot *local_slot, List *remote_slots)
 
static void drop_local_obsolete_slots (List *remote_slot_list)
 
static void reserve_wal_for_local_slot (XLogRecPtr restart_lsn)
 
static bool update_and_persist_local_synced_slot (RemoteSlot *remote_slot, Oid remote_dbid)
 
static bool synchronize_one_slot (RemoteSlot *remote_slot, Oid remote_dbid)
 
static bool synchronize_slots (WalReceiverConn *wrconn)
 
static void validate_remote_info (WalReceiverConn *wrconn)
 
char * CheckAndGetDbnameFromConninfo (void)
 
bool ValidateSlotSyncParams (int elevel)
 
static void slotsync_reread_config (void)
 
static void ProcessSlotSyncInterrupts (WalReceiverConn *wrconn)
 
static void slotsync_worker_disconnect (int code, Datum arg)
 
static void slotsync_worker_onexit (int code, Datum arg)
 
static void wait_for_slot_activity (bool some_slot_updated)
 
static void check_and_set_sync_info (pid_t worker_pid)
 
static void reset_syncing_flag ()
 
void ReplSlotSyncWorkerMain (char *startup_data, size_t startup_data_len)
 
void ShutDownSlotSync (void)
 
bool SlotSyncWorkerCanRestart (void)
 
bool IsSyncingReplicationSlots (void)
 
Size SlotSyncShmemSize (void)
 
void SlotSyncShmemInit (void)
 
void SyncReplicationSlots (WalReceiverConn *wrconn)
 

Variables

static SlotSyncCtxStructSlotSyncCtx = NULL
 
bool sync_replication_slots = false
 
static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS
 
static bool syncing_slots = false
 

Macro Definition Documentation

◆ MAX_SLOTSYNC_WORKER_NAPTIME_MS

#define MAX_SLOTSYNC_WORKER_NAPTIME_MS   30000 /* 30s */

Definition at line 115 of file slotsync.c.

◆ MIN_SLOTSYNC_WORKER_NAPTIME_MS

#define MIN_SLOTSYNC_WORKER_NAPTIME_MS   200

Definition at line 114 of file slotsync.c.

◆ PRIMARY_INFO_OUTPUT_COL_COUNT

#define PRIMARY_INFO_OUTPUT_COL_COUNT   2

◆ SLOTSYNC_COLUMN_COUNT

#define SLOTSYNC_COLUMN_COUNT   9

◆ SLOTSYNC_RESTART_INTERVAL_SEC

#define SLOTSYNC_RESTART_INTERVAL_SEC   10

Definition at line 120 of file slotsync.c.

Typedef Documentation

◆ RemoteSlot

typedef struct RemoteSlot RemoteSlot

◆ SlotSyncCtxStruct

Function Documentation

◆ check_and_set_sync_info()

static void check_and_set_sync_info ( pid_t  worker_pid)
static

Definition at line 1269 of file slotsync.c.

1270{
1272
1273 /* The worker pid must not be already assigned in SlotSyncCtx */
1274 Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);
1275
1276 /*
1277 * Emit an error if startup process signaled the slot sync machinery to
1278 * stop. See comments atop SlotSyncCtxStruct.
1279 */
1281 {
1283 ereport(ERROR,
1284 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1285 errmsg("cannot synchronize replication slots when standby promotion is ongoing"));
1286 }
1287
1288 if (SlotSyncCtx->syncing)
1289 {
1291 ereport(ERROR,
1292 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1293 errmsg("cannot synchronize replication slots concurrently"));
1294 }
1295
1296 SlotSyncCtx->syncing = true;
1297
1298 /*
1299 * Advertise the required PID so that the startup process can kill the
1300 * slot sync worker on promotion.
1301 */
1302 SlotSyncCtx->pid = worker_pid;
1303
1305
1306 syncing_slots = true;
1307}
#define Assert(condition)
Definition: c.h:815
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define InvalidPid
Definition: miscadmin.h:32
static SlotSyncCtxStruct * SlotSyncCtx
Definition: slotsync.c:104
static bool syncing_slots
Definition: slotsync.c:127
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59

References Assert, ereport, errcode(), errmsg(), ERROR, InvalidPid, SlotSyncCtxStruct::mutex, SlotSyncCtxStruct::pid, SlotSyncCtx, SpinLockAcquire, SpinLockRelease, SlotSyncCtxStruct::stopSignaled, SlotSyncCtxStruct::syncing, and syncing_slots.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ CheckAndGetDbnameFromConninfo()

char * CheckAndGetDbnameFromConninfo ( void  )

Definition at line 1010 of file slotsync.c.

1011{
1012 char *dbname;
1013
1014 /*
1015 * The slot synchronization needs a database connection for walrcv_exec to
1016 * work.
1017 */
1019 if (dbname == NULL)
1020 ereport(ERROR,
1021 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1022
1023 /*
1024 * translator: first %s is a connection option; second %s is a GUC
1025 * variable name
1026 */
1027 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1028 "dbname", "primary_conninfo"));
1029 return dbname;
1030}
char * dbname
Definition: streamutil.c:50
#define walrcv_get_dbname_from_conninfo(conninfo)
Definition: walreceiver.h:445
char * PrimaryConnInfo
Definition: xlogrecovery.c:97

References dbname, ereport, errcode(), errmsg(), ERROR, PrimaryConnInfo, and walrcv_get_dbname_from_conninfo.

Referenced by pg_sync_replication_slots(), and ReplSlotSyncWorkerMain().

◆ drop_local_obsolete_slots()

static void drop_local_obsolete_slots ( List remote_slot_list)
static

Definition at line 415 of file slotsync.c.

416{
417 List *local_slots = get_local_synced_slots();
418
419 foreach_ptr(ReplicationSlot, local_slot, local_slots)
420 {
421 /* Drop the local slot if it is not required to be retained. */
422 if (!local_sync_slot_required(local_slot, remote_slot_list))
423 {
424 bool synced_slot;
425
426 /*
427 * Use shared lock to prevent a conflict with
428 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
429 * during a drop-database operation.
430 */
431 LockSharedObject(DatabaseRelationId, local_slot->data.database,
432 0, AccessShareLock);
433
434 /*
435 * In the small window between getting the slot to drop and
436 * locking the database, there is a possibility of a parallel
437 * database drop by the startup process and the creation of a new
438 * slot by the user. This new user-created slot may end up using
439 * the same shared memory as that of 'local_slot'. Thus check if
440 * local_slot is still the synced one before performing actual
441 * drop.
442 */
443 SpinLockAcquire(&local_slot->mutex);
444 synced_slot = local_slot->in_use && local_slot->data.synced;
445 SpinLockRelease(&local_slot->mutex);
446
447 if (synced_slot)
448 {
449 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
451 }
452
453 UnlockSharedObject(DatabaseRelationId, local_slot->data.database,
454 0, AccessShareLock);
455
456 ereport(LOG,
457 errmsg("dropped replication slot \"%s\" of database with OID %u",
458 NameStr(local_slot->data.name),
459 local_slot->data.database));
460 }
461 }
462}
#define NameStr(name)
Definition: c.h:703
#define LOG
Definition: elog.h:31
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1072
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1131
#define AccessShareLock
Definition: lockdefs.h:36
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition: slot.c:544
void ReplicationSlotDropAcquired(void)
Definition: slot.c:900
static List * get_local_synced_slots(void)
Definition: slotsync.c:331
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition: slotsync.c:362
Definition: pg_list.h:54

References AccessShareLock, ereport, errmsg(), foreach_ptr, get_local_synced_slots(), local_sync_slot_required(), LockSharedObject(), LOG, NameStr, ReplicationSlotAcquire(), ReplicationSlotDropAcquired(), SpinLockAcquire, SpinLockRelease, and UnlockSharedObject().

Referenced by synchronize_slots().

◆ get_local_synced_slots()

static List * get_local_synced_slots ( void  )
static

Definition at line 331 of file slotsync.c.

332{
333 List *local_slots = NIL;
334
335 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
336
337 for (int i = 0; i < max_replication_slots; i++)
338 {
340
341 /* Check if it is a synchronized slot */
342 if (s->in_use && s->data.synced)
343 {
345 local_slots = lappend(local_slots, s);
346 }
347 }
348
349 LWLockRelease(ReplicationSlotControlLock);
350
351 return local_slots;
352}
int i
Definition: isn.c:72
List * lappend(List *list, void *datum)
Definition: list.c:339
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
#define NIL
Definition: pg_list.h:68
int max_replication_slots
Definition: slot.c:141
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:135
#define SlotIsLogical(slot)
Definition: slot.h:217
ReplicationSlot replication_slots[1]
Definition: slot.h:228
bool in_use
Definition: slot.h:157
ReplicationSlotPersistentData data
Definition: slot.h:181

References Assert, ReplicationSlot::data, i, ReplicationSlot::in_use, lappend(), LW_SHARED, LWLockAcquire(), LWLockRelease(), max_replication_slots, NIL, ReplicationSlotCtlData::replication_slots, ReplicationSlotCtl, SlotIsLogical, and ReplicationSlotPersistentData::synced.

Referenced by drop_local_obsolete_slots().

◆ IsSyncingReplicationSlots()

bool IsSyncingReplicationSlots ( void  )

Definition at line 1647 of file slotsync.c.

1648{
1649 return syncing_slots;
1650}

References syncing_slots.

Referenced by CreateDecodingContext(), GetStandbyFlushRecPtr(), and ReplicationSlotCreate().

◆ local_sync_slot_required()

static bool local_sync_slot_required ( ReplicationSlot local_slot,
List remote_slots 
)
static

Definition at line 362 of file slotsync.c.

363{
364 bool remote_exists = false;
365 bool locally_invalidated = false;
366
367 foreach_ptr(RemoteSlot, remote_slot, remote_slots)
368 {
369 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
370 {
371 remote_exists = true;
372
373 /*
374 * If remote slot is not invalidated but local slot is marked as
375 * invalidated, then set locally_invalidated flag.
376 */
377 SpinLockAcquire(&local_slot->mutex);
378 locally_invalidated =
379 (remote_slot->invalidated == RS_INVAL_NONE) &&
380 (local_slot->data.invalidated != RS_INVAL_NONE);
381 SpinLockRelease(&local_slot->mutex);
382
383 break;
384 }
385 }
386
387 return (remote_exists && !locally_invalidated);
388}
@ RS_INVAL_NONE
Definition: slot.h:52
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:99
slock_t mutex
Definition: slot.h:154

References ReplicationSlot::data, foreach_ptr, ReplicationSlotPersistentData::invalidated, ReplicationSlot::mutex, ReplicationSlotPersistentData::name, NameStr, RS_INVAL_NONE, SpinLockAcquire, and SpinLockRelease.

Referenced by drop_local_obsolete_slots().

◆ ProcessSlotSyncInterrupts()

static void ProcessSlotSyncInterrupts ( WalReceiverConn wrconn)
static

Definition at line 1153 of file slotsync.c.

1154{
1156
1158 {
1159 ereport(LOG,
1160 errmsg("replication slot synchronization worker is shutting down on receiving SIGINT"));
1161
1162 proc_exit(0);
1163 }
1164
1167}
volatile sig_atomic_t ShutdownRequestPending
Definition: interrupt.c:28
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void proc_exit(int code)
Definition: ipc.c:104
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static void slotsync_reread_config(void)
Definition: slotsync.c:1104

References CHECK_FOR_INTERRUPTS, ConfigReloadPending, ereport, errmsg(), LOG, proc_exit(), ShutdownRequestPending, and slotsync_reread_config().

Referenced by ReplSlotSyncWorkerMain().

◆ ReplSlotSyncWorkerMain()

void ReplSlotSyncWorkerMain ( char *  startup_data,
size_t  startup_data_len 
)

Definition at line 1329 of file slotsync.c.

1330{
1331 WalReceiverConn *wrconn = NULL;
1332 char *dbname;
1333 char *err;
1334 sigjmp_buf local_sigjmp_buf;
1335 StringInfoData app_name;
1336
1337 Assert(startup_data_len == 0);
1338
1340
1341 init_ps_display(NULL);
1342
1344
1345 /*
1346 * Create a per-backend PGPROC struct in shared memory. We must do this
1347 * before we access any shared memory.
1348 */
1349 InitProcess();
1350
1351 /*
1352 * Early initialization.
1353 */
1354 BaseInit();
1355
1356 Assert(SlotSyncCtx != NULL);
1357
1358 /*
1359 * If an exception is encountered, processing resumes here.
1360 *
1361 * We just need to clean up, report the error, and go away.
1362 *
1363 * If we do not have this handling here, then since this worker process
1364 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1365 * Therefore, we create our own exception handler to catch ERRORs.
1366 */
1367 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1368 {
1369 /* since not using PG_TRY, must reset error stack by hand */
1370 error_context_stack = NULL;
1371
1372 /* Prevents interrupts while cleaning up */
1374
1375 /* Report the error to the server log */
1377
1378 /*
1379 * We can now go away. Note that because we called InitProcess, a
1380 * callback was registered to do ProcKill, which will clean up
1381 * necessary state.
1382 */
1383 proc_exit(0);
1384 }
1385
1386 /* We can now handle ereport(ERROR) */
1387 PG_exception_stack = &local_sigjmp_buf;
1388
1389 /* Setup signal handling */
1392 pqsignal(SIGTERM, die);
1395 pqsignal(SIGUSR2, SIG_IGN);
1396 pqsignal(SIGPIPE, SIG_IGN);
1397 pqsignal(SIGCHLD, SIG_DFL);
1398
1400
1401 ereport(LOG, errmsg("slot sync worker started"));
1402
1403 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1405
1406 /*
1407 * Establishes SIGALRM handler and initialize timeout module. It is needed
1408 * by InitPostgres to register different timeouts.
1409 */
1411
1412 /* Load the libpq-specific functions */
1413 load_file("libpqwalreceiver", false);
1414
1415 /*
1416 * Unblock signals (they were blocked when the postmaster forked us)
1417 */
1418 sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
1419
1420 /*
1421 * Set always-secure search path, so malicious users can't redirect user
1422 * code (e.g. operators).
1423 *
1424 * It's not strictly necessary since we won't be scanning or writing to
1425 * any user table locally, but it's good to retain it here for added
1426 * precaution.
1427 */
1428 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1429
1431
1432 /*
1433 * Connect to the database specified by the user in primary_conninfo. We
1434 * need a database connection for walrcv_exec to work which we use to
1435 * fetch slot information from the remote node. See comments atop
1436 * libpqrcv_exec.
1437 *
1438 * We do not specify a specific user here since the slot sync worker will
1439 * operate as a superuser. This is safe because the slot sync worker does
1440 * not interact with user tables, eliminating the risk of executing
1441 * arbitrary code within triggers.
1442 */
1443 InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);
1444
1446
1447 initStringInfo(&app_name);
1448 if (cluster_name[0])
1449 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1450 else
1451 appendStringInfoString(&app_name, "slotsync worker");
1452
1453 /*
1454 * Establish the connection to the primary server for slot
1455 * synchronization.
1456 */
1457 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1458 app_name.data, &err);
1459 pfree(app_name.data);
1460
1461 if (!wrconn)
1462 ereport(ERROR,
1463 errcode(ERRCODE_CONNECTION_FAILURE),
1464 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1465 app_name.data, err));
1466
1467 /*
1468 * Register the disconnection callback.
1469 *
1470 * XXX: This can be combined with previous cleanup registration of
1471 * slotsync_worker_onexit() but that will need the connection to be made
1472 * global and we want to avoid introducing global for this purpose.
1473 */
1475
1476 /*
1477 * Using the specified primary server connection, check that we are not a
1478 * cascading standby and slot configured in 'primary_slot_name' exists on
1479 * the primary server.
1480 */
1482
1483 /* Main loop to synchronize slots */
1484 for (;;)
1485 {
1486 bool some_slot_updated = false;
1487
1489
1490 some_slot_updated = synchronize_slots(wrconn);
1491
1492 wait_for_slot_activity(some_slot_updated);
1493 }
1494
1495 /*
1496 * The slot sync worker can't get here because it will only stop when it
1497 * receives a SIGINT from the startup process, or when there is an error.
1498 */
1499 Assert(false);
1500}
sigset_t UnBlockSig
Definition: pqsignal.c:22
void load_file(const char *filename, bool restricted)
Definition: dfmgr.c:134
void EmitErrorReport(void)
Definition: elog.c:1687
ErrorContextCallback * error_context_stack
Definition: elog.c:94
sigjmp_buf * PG_exception_stack
Definition: elog.c:96
void err(int eval, const char *fmt,...)
Definition: err.c:43
int MyProcPid
Definition: globals.c:46
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_SUSET
Definition: guc.h:78
char * cluster_name
Definition: guc_tables.c:537
void SignalHandlerForShutdownRequest(SIGNAL_ARGS)
Definition: interrupt.c:105
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
void pfree(void *pointer)
Definition: mcxt.c:1521
@ NormalProcessing
Definition: miscadmin.h:461
@ InitProcessing
Definition: miscadmin.h:460
#define GetProcessingMode()
Definition: miscadmin.h:470
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:133
#define SetProcessingMode(mode)
Definition: miscadmin.h:472
@ B_SLOTSYNC_WORKER
Definition: miscadmin.h:347
BackendType MyBackendType
Definition: miscinit.c:64
#define die(msg)
#define pqsignal
Definition: port.h:521
void FloatExceptionHandler(SIGNAL_ARGS)
Definition: postgres.c:3048
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
#define InvalidOid
Definition: postgres_ext.h:37
void BaseInit(void)
Definition: postinit.c:606
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)
Definition: postinit.c:700
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:671
void init_ps_display(const char *fixed_part)
Definition: ps_status.c:269
static void slotsync_worker_disconnect(int code, Datum arg)
Definition: slotsync.c:1175
char * CheckAndGetDbnameFromConninfo(void)
Definition: slotsync.c:1010
static bool synchronize_slots(WalReceiverConn *wrconn)
Definition: slotsync.c:789
static void wait_for_slot_activity(bool some_slot_updated)
Definition: slotsync.c:1234
static void slotsync_worker_onexit(int code, Datum arg)
Definition: slotsync.c:1188
static void check_and_set_sync_info(pid_t worker_pid)
Definition: slotsync.c:1269
static void validate_remote_info(WalReceiverConn *wrconn)
Definition: slotsync.c:932
static void ProcessSlotSyncInterrupts(WalReceiverConn *wrconn)
Definition: slotsync.c:1153
void InitProcess(void)
Definition: proc.c:341
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
void InitializeTimeouts(void)
Definition: timeout.c:470
static WalReceiverConn * wrconn
Definition: walreceiver.c:92
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
Definition: walreceiver.h:435
#define SIGCHLD
Definition: win32_port.h:168
#define SIGHUP
Definition: win32_port.h:158
#define SIGPIPE
Definition: win32_port.h:163
#define SIGUSR1
Definition: win32_port.h:170
#define SIGUSR2
Definition: win32_port.h:171

References appendStringInfo(), appendStringInfoString(), Assert, B_SLOTSYNC_WORKER, BaseInit(), before_shmem_exit(), check_and_set_sync_info(), CheckAndGetDbnameFromConninfo(), cluster_name, StringInfoData::data, dbname, die, EmitErrorReport(), ereport, err(), errcode(), errmsg(), ERROR, error_context_stack, FloatExceptionHandler(), GetProcessingMode, HOLD_INTERRUPTS, init_ps_display(), InitializeTimeouts(), InitPostgres(), InitProcess(), InitProcessing, initStringInfo(), InvalidOid, load_file(), LOG, MyBackendType, MyProcPid, NormalProcessing, pfree(), PG_exception_stack, PGC_S_OVERRIDE, PGC_SUSET, PointerGetDatum(), pqsignal, PrimaryConnInfo, proc_exit(), ProcessSlotSyncInterrupts(), procsignal_sigusr1_handler(), SetConfigOption(), SetProcessingMode, SIGCHLD, SIGHUP, SignalHandlerForConfigReload(), SignalHandlerForShutdownRequest(), SIGPIPE, SIGUSR1, SIGUSR2, slotsync_worker_disconnect(), slotsync_worker_onexit(), SlotSyncCtx, synchronize_slots(), UnBlockSig, validate_remote_info(), wait_for_slot_activity(), walrcv_connect, and wrconn.

◆ reserve_wal_for_local_slot()

static void reserve_wal_for_local_slot ( XLogRecPtr  restart_lsn)
static

Definition at line 472 of file slotsync.c.

473{
474 XLogSegNo oldest_segno;
475 XLogSegNo segno;
477
478 Assert(slot != NULL);
480
481 while (true)
482 {
483 SpinLockAcquire(&slot->mutex);
484 slot->data.restart_lsn = restart_lsn;
485 SpinLockRelease(&slot->mutex);
486
487 /* Prevent WAL removal as fast as possible */
489
491
492 /*
493 * Find the oldest existing WAL segment file.
494 *
495 * Normally, we can determine it by using the last removed segment
496 * number. However, if no WAL segment files have been removed by a
497 * checkpoint since startup, we need to search for the oldest segment
498 * file from the current timeline existing in XLOGDIR.
499 *
500 * XXX: Currently, we are searching for the oldest segment in the
501 * current timeline as there is less chance of the slot's restart_lsn
502 * from being some prior timeline, and even if it happens, in the
503 * worst case, we will wait to sync till the slot's restart_lsn moved
504 * to the current timeline.
505 */
506 oldest_segno = XLogGetLastRemovedSegno() + 1;
507
508 if (oldest_segno == 1)
509 {
510 TimeLineID cur_timeline;
511
512 GetWalRcvFlushRecPtr(NULL, &cur_timeline);
513 oldest_segno = XLogGetOldestSegno(cur_timeline);
514 }
515
516 elog(DEBUG1, "segno: " UINT64_FORMAT " of purposed restart_lsn for the synced slot, oldest_segno: " UINT64_FORMAT " available",
517 segno, oldest_segno);
518
519 /*
520 * If all required WAL is still there, great, otherwise retry. The
521 * slot should prevent further removal of WAL, unless there's a
522 * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
523 * the new restart_lsn above, so normally we should never need to loop
524 * more than twice.
525 */
526 if (segno >= oldest_segno)
527 break;
528
529 /* Retry using the location of the oldest wal segment */
530 XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);
531 }
532}
#define UINT64_FORMAT
Definition: c.h:507
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
ReplicationSlot * MyReplicationSlot
Definition: slot.c:138
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1137
XLogRecPtr restart_lsn
Definition: slot.h:96
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3779
int wal_segment_size
Definition: xlog.c:143
XLogSegNo XLogGetOldestSegno(TimeLineID tli)
Definition: xlog.c:3795
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48

References Assert, ReplicationSlot::data, DEBUG1, elog, GetWalRcvFlushRecPtr(), ReplicationSlot::mutex, MyReplicationSlot, ReplicationSlotsComputeRequiredLSN(), ReplicationSlotPersistentData::restart_lsn, SpinLockAcquire, SpinLockRelease, UINT64_FORMAT, wal_segment_size, XLByteToSeg, XLogGetLastRemovedSegno(), XLogGetOldestSegno(), XLogRecPtrIsInvalid, and XLogSegNoOffsetToRecPtr.

Referenced by synchronize_one_slot().

◆ reset_syncing_flag()

static void reset_syncing_flag ( )
static

◆ ShutDownSlotSync()

void ShutDownSlotSync ( void  )

Definition at line 1559 of file slotsync.c.

1560{
1561 pid_t worker_pid;
1562
1564
1565 SlotSyncCtx->stopSignaled = true;
1566
1567 /*
1568 * Return if neither the slot sync worker is running nor the function
1569 * pg_sync_replication_slots() is executing.
1570 */
1571 if (!SlotSyncCtx->syncing)
1572 {
1575 return;
1576 }
1577
1578 worker_pid = SlotSyncCtx->pid;
1579
1581
1582 if (worker_pid != InvalidPid)
1583 kill(worker_pid, SIGINT);
1584
1585 /* Wait for slot sync to end */
1586 for (;;)
1587 {
1588 int rc;
1589
1590 /* Wait a bit, we don't expect to have to wait long */
1591 rc = WaitLatch(MyLatch,
1593 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);
1594
1595 if (rc & WL_LATCH_SET)
1596 {
1599 }
1600
1602
1603 /* Ensure that no process is syncing the slots. */
1604 if (!SlotSyncCtx->syncing)
1605 break;
1606
1608 }
1609
1611
1613}
struct Latch * MyLatch
Definition: globals.c:62
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void update_synced_slots_inactive_since(void)
Definition: slotsync.c:1509
#define kill(pid, sig)
Definition: win32_port.h:493

References CHECK_FOR_INTERRUPTS, InvalidPid, kill, SlotSyncCtxStruct::mutex, MyLatch, SlotSyncCtxStruct::pid, ResetLatch(), SlotSyncCtx, SpinLockAcquire, SpinLockRelease, SlotSyncCtxStruct::stopSignaled, SlotSyncCtxStruct::syncing, update_synced_slots_inactive_since(), WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by FinishWalRecovery().

◆ slotsync_failure_callback()

static void slotsync_failure_callback ( int  code,
Datum  arg 
)
static

Definition at line 1685 of file slotsync.c.

1686{
1688
1689 /*
1690 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1691 *
1692 * The startup process during promotion invokes ShutDownSlotSync() which
1693 * waits for slot sync to finish and it does that by checking the
1694 * 'syncing' flag. Thus the SQL function must be done with slots' release
1695 * and cleanup to avoid any dangling temporary slots or active slots
1696 * before it marks itself as finished syncing.
1697 */
1698
1699 /* Make sure active replication slots are released */
1700 if (MyReplicationSlot != NULL)
1702
1703 /* Also cleanup the synced temporary slots. */
1705
1706 /*
1707 * The set syncing_slots indicates that the process errored out without
1708 * resetting the flag. So, we need to clean up shared memory and reset the
1709 * flag here.
1710 */
1711 if (syncing_slots)
1713
1715}
void * arg
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
void ReplicationSlotRelease(void)
Definition: slot.c:667
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:756
static void reset_syncing_flag()
Definition: slotsync.c:1313
#define walrcv_disconnect(conn)
Definition: walreceiver.h:467

References arg, DatumGetPointer(), MyReplicationSlot, ReplicationSlotCleanup(), ReplicationSlotRelease(), reset_syncing_flag(), syncing_slots, walrcv_disconnect, and wrconn.

Referenced by SyncReplicationSlots().

◆ slotsync_reread_config()

static void slotsync_reread_config ( void  )
static

Definition at line 1104 of file slotsync.c.

1105{
1106 char *old_primary_conninfo = pstrdup(PrimaryConnInfo);
1107 char *old_primary_slotname = pstrdup(PrimarySlotName);
1108 bool old_sync_replication_slots = sync_replication_slots;
1109 bool old_hot_standby_feedback = hot_standby_feedback;
1110 bool conninfo_changed;
1111 bool primary_slotname_changed;
1112
1114
1115 ConfigReloadPending = false;
1117
1118 conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;
1119 primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;
1120 pfree(old_primary_conninfo);
1121 pfree(old_primary_slotname);
1122
1123 if (old_sync_replication_slots != sync_replication_slots)
1124 {
1125 ereport(LOG,
1126 /* translator: %s is a GUC variable name */
1127 errmsg("replication slot synchronization worker will shut down because \"%s\" is disabled", "sync_replication_slots"));
1128 proc_exit(0);
1129 }
1130
1131 if (conninfo_changed ||
1132 primary_slotname_changed ||
1133 (old_hot_standby_feedback != hot_standby_feedback))
1134 {
1135 ereport(LOG,
1136 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1137
1138 /*
1139 * Reset the last-start time for this worker so that the postmaster
1140 * can restart it without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
1141 */
1143
1144 proc_exit(0);
1145 }
1146
1147}
void ProcessConfigFile(GucContext context)
Definition: guc-file.l:120
@ PGC_SIGHUP
Definition: guc.h:75
char * pstrdup(const char *in)
Definition: mcxt.c:1696
bool sync_replication_slots
Definition: slotsync.c:107
time_t last_start_time
Definition: slotsync.c:100
bool hot_standby_feedback
Definition: walreceiver.c:89
char * PrimarySlotName
Definition: xlogrecovery.c:98

References Assert, ConfigReloadPending, ereport, errmsg(), hot_standby_feedback, SlotSyncCtxStruct::last_start_time, LOG, pfree(), PGC_SIGHUP, PrimaryConnInfo, PrimarySlotName, proc_exit(), ProcessConfigFile(), pstrdup(), SlotSyncCtx, and sync_replication_slots.

Referenced by ProcessSlotSyncInterrupts().

◆ slotsync_worker_disconnect()

static void slotsync_worker_disconnect ( int  code,
Datum  arg 
)
static

Definition at line 1175 of file slotsync.c.

References arg, DatumGetPointer(), walrcv_disconnect, and wrconn.

Referenced by ReplSlotSyncWorkerMain().

◆ slotsync_worker_onexit()

static void slotsync_worker_onexit ( int  code,
Datum  arg 
)
static

Definition at line 1188 of file slotsync.c.

1189{
1190 /*
1191 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1192 *
1193 * The startup process during promotion invokes ShutDownSlotSync() which
1194 * waits for slot sync to finish and it does that by checking the
1195 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1196 * release and cleanup to avoid any dangling temporary slots or active
1197 * slots before it marks itself as finished syncing.
1198 */
1199
1200 /* Make sure active replication slots are released */
1201 if (MyReplicationSlot != NULL)
1203
1204 /* Also cleanup the temporary slots. */
1206
1208
1210
1211 /*
1212 * If syncing_slots is true, it indicates that the process errored out
1213 * without resetting the flag. So, we need to clean up shared memory and
1214 * reset the flag here.
1215 */
1216 if (syncing_slots)
1217 {
1218 SlotSyncCtx->syncing = false;
1219 syncing_slots = false;
1220 }
1221
1223}

References InvalidPid, SlotSyncCtxStruct::mutex, MyReplicationSlot, SlotSyncCtxStruct::pid, ReplicationSlotCleanup(), ReplicationSlotRelease(), SlotSyncCtx, SpinLockAcquire, SpinLockRelease, SlotSyncCtxStruct::syncing, and syncing_slots.

Referenced by ReplSlotSyncWorkerMain().

◆ SlotSyncShmemInit()

void SlotSyncShmemInit ( void  )

Definition at line 1665 of file slotsync.c.

1666{
1668 bool found;
1669
1671 ShmemInitStruct("Slot Sync Data", size, &found);
1672
1673 if (!found)
1674 {
1675 memset(SlotSyncCtx, 0, size);
1678 }
1679}
size_t Size
Definition: c.h:562
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
static pg_noinline void Size size
Definition: slab.c:607
Size SlotSyncShmemSize(void)
Definition: slotsync.c:1656
#define SpinLockInit(lock)
Definition: spin.h:57

References InvalidPid, SlotSyncCtxStruct::mutex, SlotSyncCtxStruct::pid, ShmemInitStruct(), size, SlotSyncCtx, SlotSyncShmemSize(), and SpinLockInit.

Referenced by CreateOrAttachShmemStructs().

◆ SlotSyncShmemSize()

Size SlotSyncShmemSize ( void  )

Definition at line 1656 of file slotsync.c.

1657{
1658 return sizeof(SlotSyncCtxStruct);
1659}
struct SlotSyncCtxStruct SlotSyncCtxStruct

Referenced by CalculateShmemSize(), and SlotSyncShmemInit().

◆ SlotSyncWorkerCanRestart()

bool SlotSyncWorkerCanRestart ( void  )

Definition at line 1627 of file slotsync.c.

1628{
1629 time_t curtime = time(NULL);
1630
1631 /* Return false if too soon since last start. */
1632 if ((unsigned int) (curtime - SlotSyncCtx->last_start_time) <
1633 (unsigned int) SLOTSYNC_RESTART_INTERVAL_SEC)
1634 return false;
1635
1636 SlotSyncCtx->last_start_time = curtime;
1637
1638 return true;
1639}
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition: slotsync.c:120

References SlotSyncCtxStruct::last_start_time, SLOTSYNC_RESTART_INTERVAL_SEC, and SlotSyncCtx.

Referenced by LaunchMissingBackgroundProcesses().

◆ synchronize_one_slot()

static bool synchronize_one_slot ( RemoteSlot remote_slot,
Oid  remote_dbid 
)
static

Definition at line 607 of file slotsync.c.

608{
609 ReplicationSlot *slot;
610 XLogRecPtr latestFlushPtr;
611 bool slot_updated = false;
612
613 /*
614 * Make sure that concerned WAL is received and flushed before syncing
615 * slot to target lsn received from the primary server.
616 */
617 latestFlushPtr = GetStandbyFlushRecPtr(NULL);
618 if (remote_slot->confirmed_lsn > latestFlushPtr)
619 {
620 /*
621 * Can get here only if GUC 'synchronized_standby_slots' on the
622 * primary server was not configured correctly.
623 */
625 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
626 errmsg("skipping slot synchronization because the received slot sync"
627 " LSN %X/%X for slot \"%s\" is ahead of the standby position %X/%X",
628 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
629 remote_slot->name,
630 LSN_FORMAT_ARGS(latestFlushPtr)));
631
632 return false;
633 }
634
635 /* Search for the named slot */
636 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
637 {
638 bool synced;
639
640 SpinLockAcquire(&slot->mutex);
641 synced = slot->data.synced;
642 SpinLockRelease(&slot->mutex);
643
644 /* User-created slot with the same name exists, raise ERROR. */
645 if (!synced)
647 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
648 errmsg("exiting from slot synchronization because same"
649 " name slot \"%s\" already exists on the standby",
650 remote_slot->name));
651
652 /*
653 * The slot has been synchronized before.
654 *
655 * It is important to acquire the slot here before checking
656 * invalidation. If we don't acquire the slot first, there could be a
657 * race condition that the local slot could be invalidated just after
658 * checking the 'invalidated' flag here and we could end up
659 * overwriting 'invalidated' flag to remote_slot's value. See
660 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
661 * if the slot is not acquired by other processes.
662 *
663 * XXX: If it ever turns out that slot acquire/release is costly for
664 * cases when none of the slot properties is changed then we can do a
665 * pre-check to ensure that at least one of the slot properties is
666 * changed before acquiring the slot.
667 */
668 ReplicationSlotAcquire(remote_slot->name, true, false);
669
670 Assert(slot == MyReplicationSlot);
671
672 /*
673 * Copy the invalidation cause from remote only if local slot is not
674 * invalidated locally, we don't want to overwrite existing one.
675 */
676 if (slot->data.invalidated == RS_INVAL_NONE &&
677 remote_slot->invalidated != RS_INVAL_NONE)
678 {
679 SpinLockAcquire(&slot->mutex);
680 slot->data.invalidated = remote_slot->invalidated;
681 SpinLockRelease(&slot->mutex);
682
683 /* Make sure the invalidated state persists across server restart */
686
687 slot_updated = true;
688 }
689
690 /* Skip the sync of an invalidated slot */
691 if (slot->data.invalidated != RS_INVAL_NONE)
692 {
694 return slot_updated;
695 }
696
697 /* Slot not ready yet, let's attempt to make it sync-ready now. */
698 if (slot->data.persistency == RS_TEMPORARY)
699 {
700 slot_updated = update_and_persist_local_synced_slot(remote_slot,
701 remote_dbid);
702 }
703
704 /* Slot ready for sync, so sync it. */
705 else
706 {
707 /*
708 * Sanity check: As long as the invalidations are handled
709 * appropriately as above, this should never happen.
710 *
711 * We don't need to check restart_lsn here. See the comments in
712 * update_local_synced_slot() for details.
713 */
714 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
716 errmsg_internal("cannot synchronize local slot \"%s\"",
717 remote_slot->name),
718 errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).",
720 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
721
722 slot_updated = update_local_synced_slot(remote_slot, remote_dbid,
723 NULL, NULL);
724 }
725 }
726 /* Otherwise create the slot first. */
727 else
728 {
729 NameData plugin_name;
730 TransactionId xmin_horizon = InvalidTransactionId;
731
732 /* Skip creating the local slot if remote_slot is invalidated already */
733 if (remote_slot->invalidated != RS_INVAL_NONE)
734 return false;
735
736 /*
737 * We create temporary slots instead of ephemeral slots here because
738 * we want the slots to survive after releasing them. This is done to
739 * avoid dropping and re-creating the slots in each synchronization
740 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
741 * caught up.
742 */
743 ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
744 remote_slot->two_phase,
745 remote_slot->failover,
746 true);
747
748 /* For shorter lines. */
749 slot = MyReplicationSlot;
750
751 /* Avoid expensive operations while holding a spinlock. */
752 namestrcpy(&plugin_name, remote_slot->plugin);
753
754 SpinLockAcquire(&slot->mutex);
755 slot->data.database = remote_dbid;
756 slot->data.plugin = plugin_name;
757 SpinLockRelease(&slot->mutex);
758
760
761 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
762 xmin_horizon = GetOldestSafeDecodingTransactionId(true);
763 SpinLockAcquire(&slot->mutex);
764 slot->effective_catalog_xmin = xmin_horizon;
765 slot->data.catalog_xmin = xmin_horizon;
766 SpinLockRelease(&slot->mutex);
768 LWLockRelease(ProcArrayLock);
769
770 update_and_persist_local_synced_slot(remote_slot, remote_dbid);
771
772 slot_updated = true;
773 }
774
776
777 return slot_updated;
778}
uint32 TransactionId
Definition: c.h:609
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1230
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define AmLogicalSlotSyncWorkerProcess()
Definition: miscadmin.h:384
void namestrcpy(Name name, const char *str)
Definition: name.c:233
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2945
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:309
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1042
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1081
void ReplicationSlotSave(void)
Definition: slot.c:1024
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:464
@ RS_TEMPORARY
Definition: slot.h:40
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition: slotsync.c:472
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:543
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *found_consistent_snapshot, bool *remote_slot_precedes)
Definition: slotsync.c:166
bool two_phase
Definition: slotsync.c:138
char * plugin
Definition: slotsync.c:136
char * name
Definition: slotsync.c:135
bool failover
Definition: slotsync.c:139
ReplicationSlotInvalidationCause invalidated
Definition: slotsync.c:145
XLogRecPtr confirmed_lsn
Definition: slotsync.c:141
XLogRecPtr restart_lsn
Definition: slotsync.c:140
TransactionId catalog_xmin
Definition: slot.h:93
XLogRecPtr confirmed_flush
Definition: slot.h:107
ReplicationSlotPersistency persistency
Definition: slot.h:77
TransactionId effective_catalog_xmin
Definition: slot.h:178
Definition: c.h:698
#define InvalidTransactionId
Definition: transam.h:31
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition: walsender.c:3475
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References AmLogicalSlotSyncWorkerProcess, Assert, ReplicationSlotPersistentData::catalog_xmin, ReplicationSlotPersistentData::confirmed_flush, RemoteSlot::confirmed_lsn, ReplicationSlot::data, ReplicationSlotPersistentData::database, ReplicationSlot::effective_catalog_xmin, ereport, errcode(), errdetail_internal(), errmsg(), errmsg_internal(), ERROR, RemoteSlot::failover, GetOldestSafeDecodingTransactionId(), GetStandbyFlushRecPtr(), RemoteSlot::invalidated, ReplicationSlotPersistentData::invalidated, InvalidTransactionId, LOG, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ReplicationSlot::mutex, MyReplicationSlot, RemoteSlot::name, namestrcpy(), ReplicationSlotPersistentData::persistency, RemoteSlot::plugin, ReplicationSlotPersistentData::plugin, ReplicationSlotAcquire(), ReplicationSlotCreate(), ReplicationSlotMarkDirty(), ReplicationSlotRelease(), ReplicationSlotSave(), ReplicationSlotsComputeRequiredXmin(), reserve_wal_for_local_slot(), RemoteSlot::restart_lsn, RS_INVAL_NONE, RS_TEMPORARY, SearchNamedReplicationSlot(), SpinLockAcquire, SpinLockRelease, ReplicationSlotPersistentData::synced, RemoteSlot::two_phase, update_and_persist_local_synced_slot(), and update_local_synced_slot().

Referenced by synchronize_slots().

◆ synchronize_slots()

static bool synchronize_slots ( WalReceiverConn wrconn)
static

Definition at line 789 of file slotsync.c.

790{
791#define SLOTSYNC_COLUMN_COUNT 9
792 Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,
793 LSNOID, XIDOID, BOOLOID, BOOLOID, TEXTOID, TEXTOID};
794
796 TupleTableSlot *tupslot;
797 List *remote_slot_list = NIL;
798 bool some_slot_updated = false;
799 bool started_tx = false;
800 const char *query = "SELECT slot_name, plugin, confirmed_flush_lsn,"
801 " restart_lsn, catalog_xmin, two_phase, failover,"
802 " database, invalidation_reason"
803 " FROM pg_catalog.pg_replication_slots"
804 " WHERE failover and NOT temporary";
805
806 /* The syscache access in walrcv_exec() needs a transaction env. */
807 if (!IsTransactionState())
808 {
810 started_tx = true;
811 }
812
813 /* Execute the query */
814 res = walrcv_exec(wrconn, query, SLOTSYNC_COLUMN_COUNT, slotRow);
815 if (res->status != WALRCV_OK_TUPLES)
817 errmsg("could not fetch failover logical slots info from the primary server: %s",
818 res->err));
819
820 /* Construct the remote_slot tuple and synchronize each slot locally */
821 tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
822 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
823 {
824 bool isnull;
825 RemoteSlot *remote_slot = palloc0(sizeof(RemoteSlot));
826 Datum d;
827 int col = 0;
828
829 remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,
830 &isnull));
831 Assert(!isnull);
832
833 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
834 &isnull));
835 Assert(!isnull);
836
837 /*
838 * It is possible to get null values for LSN and Xmin if slot is
839 * invalidated on the primary server, so handle accordingly.
840 */
841 d = slot_getattr(tupslot, ++col, &isnull);
842 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
843 DatumGetLSN(d);
844
845 d = slot_getattr(tupslot, ++col, &isnull);
846 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
847
848 d = slot_getattr(tupslot, ++col, &isnull);
849 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
851
852 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
853 &isnull));
854 Assert(!isnull);
855
856 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
857 &isnull));
858 Assert(!isnull);
859
860 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
861 ++col, &isnull));
862 Assert(!isnull);
863
864 d = slot_getattr(tupslot, ++col, &isnull);
865 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
867
868 /* Sanity check */
870
871 /*
872 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
873 * slot is valid, that means we have fetched the remote_slot in its
874 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
875 * sync it in the next sync cycle when the remote_slot is persisted
876 * and has valid lsn(s) and xmin values.
877 *
878 * XXX: In future, if we plan to expose 'slot->data.persistency' in
879 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
880 * slots in the first place.
881 */
882 if ((XLogRecPtrIsInvalid(remote_slot->restart_lsn) ||
883 XLogRecPtrIsInvalid(remote_slot->confirmed_lsn) ||
884 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
885 remote_slot->invalidated == RS_INVAL_NONE)
886 pfree(remote_slot);
887 else
888 /* Create list of remote slots */
889 remote_slot_list = lappend(remote_slot_list, remote_slot);
890
891 ExecClearTuple(tupslot);
892 }
893
894 /* Drop local slots that no longer need to be synced. */
895 drop_local_obsolete_slots(remote_slot_list);
896
897 /* Now sync the slots locally */
898 foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)
899 {
900 Oid remote_dbid = get_database_oid(remote_slot->database, false);
901
902 /*
903 * Use shared lock to prevent a conflict with
904 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
905 * a drop-database operation.
906 */
907 LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
908
909 some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid);
910
911 UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
912 }
913
914 /* We are done, free remote_slot_list elements */
915 list_free_deep(remote_slot_list);
916
918
919 if (started_tx)
921
922 return some_slot_updated;
923}
#define TextDatumGetCString(d)
Definition: builtins.h:98
Oid get_database_oid(const char *dbname, bool missing_ok)
Definition: dbcommands.c:3140
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1425
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
void list_free_deep(List *list)
Definition: list.c:1560
void * palloc0(Size size)
Definition: mcxt.c:1347
static XLogRecPtr DatumGetLSN(Datum X)
Definition: pg_lsn.h:22
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
static TransactionId DatumGetTransactionId(Datum X)
Definition: postgres.h:267
unsigned int Oid
Definition: postgres_ext.h:32
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason)
Definition: slot.c:2429
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition: slotsync.c:415
#define SLOTSYNC_COLUMN_COUNT
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:607
char * database
Definition: slotsync.c:137
TransactionId catalog_xmin
Definition: slotsync.c:142
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1130
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:395
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
@ WALRCV_OK_TUPLES
Definition: walreceiver.h:207
static void walrcv_clear_result(WalRcvExecResult *walres)
Definition: walreceiver.h:471
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
Definition: walreceiver.h:465
bool IsTransactionState(void)
Definition: xact.c:386
void StartTransactionCommand(void)
Definition: xact.c:3051
void CommitTransactionCommand(void)
Definition: xact.c:3149
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References AccessShareLock, Assert, RemoteSlot::catalog_xmin, CommitTransactionCommand(), RemoteSlot::confirmed_lsn, RemoteSlot::database, DatumGetBool(), DatumGetLSN(), DatumGetTransactionId(), drop_local_obsolete_slots(), ereport, errmsg(), ERROR, ExecClearTuple(), RemoteSlot::failover, foreach_ptr, get_database_oid(), GetSlotInvalidationCause(), RemoteSlot::invalidated, InvalidTransactionId, InvalidXLogRecPtr, IsTransactionState(), lappend(), list_free_deep(), LockSharedObject(), MakeSingleTupleTableSlot(), RemoteSlot::name, NIL, palloc0(), pfree(), RemoteSlot::plugin, res, RemoteSlot::restart_lsn, RS_INVAL_NONE, slot_getattr(), SLOTSYNC_COLUMN_COUNT, StartTransactionCommand(), synchronize_one_slot(), TextDatumGetCString, TransactionIdIsValid, TTSOpsMinimalTuple, tuplestore_gettupleslot(), RemoteSlot::two_phase, UnlockSharedObject(), walrcv_clear_result(), walrcv_exec, WALRCV_OK_TUPLES, wrconn, and XLogRecPtrIsInvalid.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ SyncReplicationSlots()

void SyncReplicationSlots ( WalReceiverConn wrconn)

Definition at line 1722 of file slotsync.c.

1723{
1725 {
1727
1729
1731
1732 /* Cleanup the synced temporary slots */
1734
1735 /* We are done with sync, so reset sync flag */
1737 }
1739}
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
static void slotsync_failure_callback(int code, Datum arg)
Definition: slotsync.c:1685

References check_and_set_sync_info(), InvalidPid, PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PointerGetDatum(), ReplicationSlotCleanup(), reset_syncing_flag(), slotsync_failure_callback(), synchronize_slots(), validate_remote_info(), and wrconn.

Referenced by pg_sync_replication_slots().

◆ update_and_persist_local_synced_slot()

static bool update_and_persist_local_synced_slot ( RemoteSlot remote_slot,
Oid  remote_dbid 
)
static

Definition at line 543 of file slotsync.c.

544{
546 bool found_consistent_snapshot = false;
547 bool remote_slot_precedes = false;
548
549 (void) update_local_synced_slot(remote_slot, remote_dbid,
550 &found_consistent_snapshot,
551 &remote_slot_precedes);
552
553 /*
554 * Check if the primary server has caught up. Refer to the comment atop
555 * the file for details on this check.
556 */
557 if (remote_slot_precedes)
558 {
559 /*
560 * The remote slot didn't catch up to locally reserved position.
561 *
562 * We do not drop the slot because the restart_lsn can be ahead of the
563 * current location when recreating the slot in the next cycle. It may
564 * take more time to create such a slot. Therefore, we keep this slot
565 * and attempt the synchronization in the next cycle.
566 */
567 return false;
568 }
569
570 /*
571 * Don't persist the slot if it cannot reach the consistent point from the
572 * restart_lsn. See comments atop this file.
573 */
574 if (!found_consistent_snapshot)
575 {
576 ereport(LOG,
577 errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
578 errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
580
581 return false;
582 }
583
585
586 ereport(LOG,
587 errmsg("newly created replication slot \"%s\" is sync-ready now",
588 remote_slot->name));
589
590 return true;
591}
int errdetail(const char *fmt,...)
Definition: elog.c:1203
void ReplicationSlotPersist(void)
Definition: slot.c:1059

References ReplicationSlot::data, ereport, errdetail(), errmsg(), LOG, LSN_FORMAT_ARGS, MyReplicationSlot, RemoteSlot::name, ReplicationSlotPersist(), ReplicationSlotPersistentData::restart_lsn, and update_local_synced_slot().

Referenced by synchronize_one_slot().

◆ update_local_synced_slot()

static bool update_local_synced_slot ( RemoteSlot remote_slot,
Oid  remote_dbid,
bool *  found_consistent_snapshot,
bool *  remote_slot_precedes 
)
static

Definition at line 166 of file slotsync.c.

169{
171 bool updated_xmin_or_lsn = false;
172 bool updated_config = false;
173
175
176 if (found_consistent_snapshot)
177 *found_consistent_snapshot = false;
178
179 if (remote_slot_precedes)
180 *remote_slot_precedes = false;
181
182 /*
183 * Don't overwrite if we already have a newer catalog_xmin and
184 * restart_lsn.
185 */
186 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
188 slot->data.catalog_xmin))
189 {
190 /*
191 * This can happen in following situations:
192 *
193 * If the slot is temporary, it means either the initial WAL location
194 * reserved for the local slot is ahead of the remote slot's
195 * restart_lsn or the initial xmin_horizon computed for the local slot
196 * is ahead of the remote slot.
197 *
198 * If the slot is persistent, restart_lsn of the synced slot could
199 * still be ahead of the remote slot. Since we use slot advance
200 * functionality to keep snapbuild/slot updated, it is possible that
201 * the restart_lsn is advanced to a later position than it has on the
202 * primary. This can happen when slot advancing machinery finds
203 * running xacts record after reaching the consistent state at a later
204 * point than the primary where it serializes the snapshot and updates
205 * the restart_lsn.
206 *
207 * We LOG the message if the slot is temporary as it can help the user
208 * to understand why the slot is not sync-ready. In the case of a
209 * persistent slot, it would be a more common case and won't directly
210 * impact the users, so we used DEBUG1 level to log the message.
211 */
213 errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
214 remote_slot->name),
215 errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
216 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
217 remote_slot->catalog_xmin,
219 slot->data.catalog_xmin));
220
221 if (remote_slot_precedes)
222 *remote_slot_precedes = true;
223 }
224
225 /*
226 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
227 * slot.
228 */
229 else if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
230 remote_slot->restart_lsn > slot->data.restart_lsn ||
232 slot->data.catalog_xmin))
233 {
234 /*
235 * We can't directly copy the remote slot's LSN or xmin unless there
236 * exists a consistent snapshot at that point. Otherwise, after
237 * promotion, the slots may not reach a consistent point before the
238 * confirmed_flush_lsn which can lead to a data loss. To avoid data
239 * loss, we let slot machinery advance the slot which ensures that
240 * snapbuilder/slot statuses are updated properly.
241 */
242 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
243 {
244 /*
245 * Update the slot info directly if there is a serialized snapshot
246 * at the restart_lsn, as the slot can quickly reach consistency
247 * at restart_lsn by restoring the snapshot.
248 */
249 SpinLockAcquire(&slot->mutex);
250 slot->data.restart_lsn = remote_slot->restart_lsn;
251 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
252 slot->data.catalog_xmin = remote_slot->catalog_xmin;
253 SpinLockRelease(&slot->mutex);
254
255 if (found_consistent_snapshot)
256 *found_consistent_snapshot = true;
257 }
258 else
259 {
261 found_consistent_snapshot);
262
263 /* Sanity check */
264 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
266 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
267 remote_slot->name),
268 errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.",
269 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
271 }
272
273 updated_xmin_or_lsn = true;
274 }
275
276 if (remote_dbid != slot->data.database ||
277 remote_slot->two_phase != slot->data.two_phase ||
278 remote_slot->failover != slot->data.failover ||
279 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0)
280 {
281 NameData plugin_name;
282
283 /* Avoid expensive operations while holding a spinlock. */
284 namestrcpy(&plugin_name, remote_slot->plugin);
285
286 SpinLockAcquire(&slot->mutex);
287 slot->data.plugin = plugin_name;
288 slot->data.database = remote_dbid;
289 slot->data.two_phase = remote_slot->two_phase;
290 slot->data.failover = remote_slot->failover;
291 SpinLockRelease(&slot->mutex);
292
293 updated_config = true;
294 }
295
296 /*
297 * We have to write the changed xmin to disk *before* we change the
298 * in-memory value, otherwise after a crash we wouldn't know that some
299 * catalog tuples might have been removed already.
300 */
301 if (updated_config || updated_xmin_or_lsn)
302 {
305 }
306
307 /*
308 * Now the new xmin is safely on disk, we can let the global value
309 * advance. We do not take ProcArrayLock or similar since we only advance
310 * xmin here and there's not much harm done by a concurrent computation
311 * missing that.
312 */
313 if (updated_xmin_or_lsn)
314 {
315 SpinLockAcquire(&slot->mutex);
316 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
317 SpinLockRelease(&slot->mutex);
318
321 }
322
323 return updated_config || updated_xmin_or_lsn;
324}
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition: logical.c:2044
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition: snapbuild.c:2010
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314

References Assert, RemoteSlot::catalog_xmin, ReplicationSlotPersistentData::catalog_xmin, ReplicationSlotPersistentData::confirmed_flush, RemoteSlot::confirmed_lsn, ReplicationSlot::data, ReplicationSlotPersistentData::database, DEBUG1, ReplicationSlot::effective_catalog_xmin, ereport, errdetail(), errdetail_internal(), errmsg(), errmsg_internal(), ERROR, RemoteSlot::failover, ReplicationSlotPersistentData::failover, ReplicationSlotPersistentData::invalidated, LOG, LogicalSlotAdvanceAndCheckSnapState(), LSN_FORMAT_ARGS, ReplicationSlot::mutex, MyReplicationSlot, RemoteSlot::name, NameStr, namestrcpy(), ReplicationSlotPersistentData::persistency, RemoteSlot::plugin, ReplicationSlotPersistentData::plugin, ReplicationSlotMarkDirty(), ReplicationSlotSave(), ReplicationSlotsComputeRequiredLSN(), ReplicationSlotsComputeRequiredXmin(), RemoteSlot::restart_lsn, ReplicationSlotPersistentData::restart_lsn, RS_INVAL_NONE, RS_TEMPORARY, SnapBuildSnapshotExists(), SpinLockAcquire, SpinLockRelease, TransactionIdFollows(), TransactionIdPrecedes(), RemoteSlot::two_phase, and ReplicationSlotPersistentData::two_phase.

Referenced by synchronize_one_slot(), and update_and_persist_local_synced_slot().

◆ update_synced_slots_inactive_since()

static void update_synced_slots_inactive_since ( void  )
static

Definition at line 1509 of file slotsync.c.

1510{
1511 TimestampTz now = 0;
1512
1513 /*
1514 * We need to update inactive_since only when we are promoting standby to
1515 * correctly interpret the inactive_since if the standby gets promoted
1516 * without a restart. We don't want the slots to appear inactive for a
1517 * long time after promotion if they haven't been synchronized recently.
1518 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1519 */
1520 if (!StandbyMode)
1521 return;
1522
1523 /* The slot sync worker or SQL function mustn't be running by now */
1525
1526 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1527
1528 for (int i = 0; i < max_replication_slots; i++)
1529 {
1531
1532 /* Check if it is a synchronized slot */
1533 if (s->in_use && s->data.synced)
1534 {
1536
1537 /* The slot must not be acquired by any process */
1538 Assert(s->active_pid == 0);
1539
1540 /* Use the same inactive_since time for all the slots. */
1541 if (now == 0)
1543
1545 }
1546 }
1547
1548 LWLockRelease(ReplicationSlotControlLock);
1549}
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
int64 TimestampTz
Definition: timestamp.h:39
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition: slot.h:235
pid_t active_pid
Definition: slot.h:160
bool StandbyMode
Definition: xlogrecovery.c:148

References ReplicationSlot::active_pid, Assert, ReplicationSlot::data, GetCurrentTimestamp(), i, ReplicationSlot::in_use, InvalidPid, LW_SHARED, LWLockAcquire(), LWLockRelease(), max_replication_slots, now(), SlotSyncCtxStruct::pid, ReplicationSlotCtlData::replication_slots, ReplicationSlotCtl, ReplicationSlotSetInactiveSince(), SlotIsLogical, SlotSyncCtx, StandbyMode, ReplicationSlotPersistentData::synced, and SlotSyncCtxStruct::syncing.

Referenced by ShutDownSlotSync().

◆ validate_remote_info()

static void validate_remote_info ( WalReceiverConn wrconn)
static

Definition at line 932 of file slotsync.c.

933{
934#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
936 Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};
937 StringInfoData cmd;
938 bool isnull;
939 TupleTableSlot *tupslot;
940 bool remote_in_recovery;
941 bool primary_slot_valid;
942 bool started_tx = false;
943
944 initStringInfo(&cmd);
945 appendStringInfo(&cmd,
946 "SELECT pg_is_in_recovery(), count(*) = 1"
947 " FROM pg_catalog.pg_replication_slots"
948 " WHERE slot_type='physical' AND slot_name=%s",
950
951 /* The syscache access in walrcv_exec() needs a transaction env. */
952 if (!IsTransactionState())
953 {
955 started_tx = true;
956 }
957
959 pfree(cmd.data);
960
961 if (res->status != WALRCV_OK_TUPLES)
963 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
964 PrimarySlotName, res->err),
965 errhint("Check if \"primary_slot_name\" is configured correctly."));
966
967 tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
968 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
969 elog(ERROR,
970 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
971
972 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
973 Assert(!isnull);
974
975 /*
976 * Slot sync is currently not supported on a cascading standby. This is
977 * because if we allow it, the primary server needs to wait for all the
978 * cascading standbys, otherwise, logical subscribers can still be ahead
979 * of one of the cascading standbys which we plan to promote. Thus, to
980 * avoid this additional complexity, we restrict it for the time being.
981 */
982 if (remote_in_recovery)
984 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
985 errmsg("cannot synchronize replication slots from a standby server"));
986
987 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
988 Assert(!isnull);
989
990 if (!primary_slot_valid)
992 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
993 /* translator: second %s is a GUC variable name */
994 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
995 PrimarySlotName, "primary_slot_name"));
996
997 ExecClearTuple(tupslot);
999
1000 if (started_tx)
1002}
int errhint(const char *fmt,...)
Definition: elog.c:1317
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:103
#define PRIMARY_INFO_OUTPUT_COL_COUNT

References appendStringInfo(), Assert, CommitTransactionCommand(), StringInfoData::data, DatumGetBool(), elog, ereport, errcode(), errhint(), errmsg(), ERROR, ExecClearTuple(), initStringInfo(), IsTransactionState(), MakeSingleTupleTableSlot(), pfree(), PRIMARY_INFO_OUTPUT_COL_COUNT, PrimarySlotName, quote_literal_cstr(), res, slot_getattr(), StartTransactionCommand(), TTSOpsMinimalTuple, tuplestore_gettupleslot(), walrcv_clear_result(), walrcv_exec, WALRCV_OK_TUPLES, and wrconn.

Referenced by ReplSlotSyncWorkerMain(), and SyncReplicationSlots().

◆ ValidateSlotSyncParams()

bool ValidateSlotSyncParams ( int  elevel)

Definition at line 1037 of file slotsync.c.

1038{
1039 /*
1040 * Logical slot sync/creation requires wal_level >= logical.
1041 *
1042 * Since altering the wal_level requires a server restart, so error out in
1043 * this case regardless of elevel provided by caller.
1044 */
1046 ereport(ERROR,
1047 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1048 errmsg("replication slot synchronization requires \"wal_level\" >= \"logical\""));
1049
1050 /*
1051 * A physical replication slot(primary_slot_name) is required on the
1052 * primary to ensure that the rows needed by the standby are not removed
1053 * after restarting, so that the synchronized slot on the standby will not
1054 * be invalidated.
1055 */
1056 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1057 {
1058 ereport(elevel,
1059 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1060 /* translator: %s is a GUC variable name */
1061 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1062 return false;
1063 }
1064
1065 /*
1066 * hot_standby_feedback must be enabled to cooperate with the physical
1067 * replication slot, which allows informing the primary about the xmin and
1068 * catalog_xmin values on the standby.
1069 */
1071 {
1072 ereport(elevel,
1073 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1074 /* translator: %s is a GUC variable name */
1075 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1076 "hot_standby_feedback"));
1077 return false;
1078 }
1079
1080 /*
1081 * The primary_conninfo is required to make connection to primary for
1082 * getting slots information.
1083 */
1084 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1085 {
1086 ereport(elevel,
1087 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1088 /* translator: %s is a GUC variable name */
1089 errmsg("replication slot synchronization requires \"%s\" to be set",
1090 "primary_conninfo"));
1091 return false;
1092 }
1093
1094 return true;
1095}
int wal_level
Definition: xlog.c:131
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76

References ereport, errcode(), errmsg(), ERROR, hot_standby_feedback, PrimaryConnInfo, PrimarySlotName, wal_level, and WAL_LEVEL_LOGICAL.

Referenced by LaunchMissingBackgroundProcesses(), and pg_sync_replication_slots().

◆ wait_for_slot_activity()

static void wait_for_slot_activity ( bool  some_slot_updated)
static

Definition at line 1234 of file slotsync.c.

1235{
1236 int rc;
1237
1238 if (!some_slot_updated)
1239 {
1240 /*
1241 * No slots were updated, so double the sleep time, but not beyond the
1242 * maximum allowable value.
1243 */
1245 }
1246 else
1247 {
1248 /*
1249 * Some slots were updated since the last sleep, so reset the sleep
1250 * time.
1251 */
1253 }
1254
1255 rc = WaitLatch(MyLatch,
1257 sleep_ms,
1258 WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);
1259
1260 if (rc & WL_LATCH_SET)
1262}
#define Min(x, y)
Definition: c.h:961
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:114
static long sleep_ms
Definition: slotsync.c:117
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:115

References MAX_SLOTSYNC_WORKER_NAPTIME_MS, Min, MIN_SLOTSYNC_WORKER_NAPTIME_MS, MyLatch, ResetLatch(), sleep_ms, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by ReplSlotSyncWorkerMain().

Variable Documentation

◆ sleep_ms

long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS
static

Definition at line 117 of file slotsync.c.

Referenced by do_watch(), and wait_for_slot_activity().

◆ SlotSyncCtx

◆ sync_replication_slots

bool sync_replication_slots = false

Definition at line 107 of file slotsync.c.

Referenced by LaunchMissingBackgroundProcesses(), and slotsync_reread_config().

◆ syncing_slots