PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slotsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * slotsync.c
3 * Functionality for synchronizing slots to a standby server from the
4 * primary server.
5 *
6 * Copyright (c) 2024-2026, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/slotsync.c
10 *
11 * This file contains the code for slot synchronization on a physical standby
12 * to fetch logical failover slots information from the primary server, create
13 * the slots on the standby and synchronize them periodically.
14 *
15 * Slot synchronization can be performed either automatically by enabling slot
16 * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 *
18 * If the WAL corresponding to the remote's restart_lsn is not available on the
19 * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 * which it is guaranteed that rows wouldn't have been removed then we cannot
21 * create the local standby slot because that would mean moving the local slot
22 * backward and decoding won't be possible via such a slot. In this case, the
23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 * which slot sync worker can perform the sync periodically or user can call
26 * pg_sync_replication_slots() periodically to perform the syncs.
27 *
28 * If synchronized slots fail to build a consistent snapshot from the
29 * restart_lsn before reaching confirmed_flush_lsn, they would become
30 * unreliable after promotion due to potential data loss from changes
31 * before reaching a consistent point. This can happen because the slots can
32 * be synced at some random time and we may not reach the consistent point
33 * at the same WAL location as the primary. So, we mark such slots as
34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 * consistent point, they will be marked as RS_PERSISTENT.
36 *
37 * The slot sync worker waits for some time before the next synchronization,
38 * with the duration varying based on whether any slots were updated during
39 * the last cycle. Refer to the comments above wait_for_slot_activity() for
40 * more details.
41 *
42 * If the SQL function pg_sync_replication_slots() is used to sync the slots,
43 * and if the slots are not ready to be synced and are marked as RS_TEMPORARY
44 * because of any of the reasons mentioned above, then the SQL function also
45 * waits and retries until the slots are marked as RS_PERSISTENT (which means
46 * sync-ready). Refer to the comments in SyncReplicationSlots() for more
47 * details.
48 *
49 * Any standby synchronized slots will be dropped if they no longer need
50 * to be synchronized. See comment atop drop_local_obsolete_slots() for more
51 * details.
52 *---------------------------------------------------------------------------
53 */
54
55#include "postgres.h"
56
57#include <time.h>
58
60#include "access/xlogrecovery.h"
61#include "catalog/pg_database.h"
62#include "libpq/pqsignal.h"
63#include "pgstat.h"
65#include "replication/logical.h"
68#include "storage/ipc.h"
69#include "storage/lmgr.h"
70#include "storage/proc.h"
71#include "storage/procarray.h"
72#include "tcop/tcopprot.h"
73#include "utils/builtins.h"
74#include "utils/memutils.h"
75#include "utils/pg_lsn.h"
76#include "utils/ps_status.h"
77#include "utils/timeout.h"
78
79/*
80 * Struct for sharing information to control slot synchronization.
81 *
82 * The 'pid' is either the slot sync worker's pid or the backend's pid running
83 * the SQL function pg_sync_replication_slots(). When the startup process sets
84 * 'stopSignaled' during promotion, it uses this 'pid' to wake up the currently
85 * synchronizing process so that the process can immediately stop its
86 * synchronizing work on seeing 'stopSignaled' set.
87 * Setting 'stopSignaled' is also used to handle the race condition when the
88 * postmaster has not noticed the promotion yet and thus may end up restarting
89 * the slot sync worker. If 'stopSignaled' is set, the worker will exit in such a
90 * case. The SQL function pg_sync_replication_slots() will also error out if
91 * this flag is set. Note that we don't need to reset this variable as after
92 * promotion the slot sync worker won't be restarted because the pmState
93 * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
94 * primary without restarting the server. See LaunchMissingBackgroundProcesses.
95 *
96 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
97 * overwrites.
98 *
99 * The 'last_start_time' is needed by postmaster to start the slot sync worker
100 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
101 * is expected (e.g., slot sync GUCs change), slot sync worker will reset
102 * last_start_time before exiting, so that postmaster can start the worker
103 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
104 */
113
115
116/* GUC variable */
118
119/*
120 * The sleep time (ms) between slot-sync cycles varies dynamically
121 * (within a MIN/MAX range) according to slot activity. See
122 * wait_for_slot_activity() for details.
123 */
124#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
125#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
126
128
129/* The restart interval for slot sync work used by postmaster */
130#define SLOTSYNC_RESTART_INTERVAL_SEC 10
131
132/*
133 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
134 * in SlotSyncCtxStruct, this flag is true only if the current process is
135 * performing slot synchronization.
136 */
137static bool syncing_slots = false;
138
139/*
140 * Structure to hold information fetched from the primary server about a logical
141 * replication slot.
142 */
158
159static void slotsync_failure_callback(int code, Datum arg);
160static void update_synced_slots_inactive_since(void);
161
162/*
163 * Update slot sync skip stats. This function requires the caller to acquire
164 * the slot.
165 */
166static void
168{
169 ReplicationSlot *slot;
170
172
173 slot = MyReplicationSlot;
174
175 /*
176 * Update the slot sync related stats in pg_stat_replication_slots when a
177 * slot sync is skipped
178 */
181
182 /* Update the slot sync skip reason */
184 {
185 SpinLockAcquire(&slot->mutex);
187 SpinLockRelease(&slot->mutex);
188 }
189}
190
191/*
192 * If necessary, update the local synced slot's metadata based on the data
193 * from the remote slot.
194 *
195 * If no update was needed (the data of the remote slot is the same as the
196 * local slot) return false, otherwise true.
197 */
198static bool
200{
202 bool updated_xmin_or_lsn = false;
203 bool updated_config = false;
206
208
209 /*
210 * Make sure that concerned WAL is received and flushed before syncing
211 * slot to target lsn received from the primary server.
212 */
213 if (remote_slot->confirmed_lsn > latestFlushPtr)
214 {
216
217 /*
218 * Can get here only if GUC 'synchronized_standby_slots' on the
219 * primary server was not configured correctly.
220 */
223 errmsg("skipping slot synchronization because the received slot sync"
224 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
225 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
226 remote_slot->name,
228
229 return false;
230 }
231
232 /*
233 * Don't overwrite if we already have a newer catalog_xmin and
234 * restart_lsn.
235 */
236 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
238 slot->data.catalog_xmin))
239 {
240 /* Update slot sync skip stats */
242
243 /*
244 * This can happen in following situations:
245 *
246 * If the slot is temporary, it means either the initial WAL location
247 * reserved for the local slot is ahead of the remote slot's
248 * restart_lsn or the initial xmin_horizon computed for the local slot
249 * is ahead of the remote slot.
250 *
251 * If the slot is persistent, both restart_lsn and catalog_xmin of the
252 * synced slot could still be ahead of the remote slot. Since we use
253 * slot advance functionality to keep snapbuild/slot updated, it is
254 * possible that the restart_lsn and catalog_xmin are advanced to a
255 * later position than it has on the primary. This can happen when
256 * slot advancing machinery finds running xacts record after reaching
257 * the consistent state at a later point than the primary where it
258 * serializes the snapshot and updates the restart_lsn.
259 *
260 * We LOG the message if the slot is temporary as it can help the user
261 * to understand why the slot is not sync-ready. In the case of a
262 * persistent slot, it would be a more common case and won't directly
263 * impact the users, so we used DEBUG1 level to log the message.
264 */
266 errmsg("could not synchronize replication slot \"%s\"",
267 remote_slot->name),
268 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
269 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
270 remote_slot->catalog_xmin,
272 slot->data.catalog_xmin));
273
274 /*
275 * Skip updating the configuration. This is required to avoid syncing
276 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
277 * transaction between old confirmed_lsn and two_phase_at will
278 * unexpectedly get decoded and sent to the downstream after
279 * promotion. See comments in ReorderBufferFinishPrepared.
280 */
281 return false;
282 }
283
284 /*
285 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
286 * slot.
287 */
288 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
289 remote_slot->restart_lsn > slot->data.restart_lsn ||
290 TransactionIdFollows(remote_slot->catalog_xmin,
291 slot->data.catalog_xmin))
292 {
293 /*
294 * We can't directly copy the remote slot's LSN or xmin unless there
295 * exists a consistent snapshot at that point. Otherwise, after
296 * promotion, the slots may not reach a consistent point before the
297 * confirmed_flush_lsn which can lead to a data loss. To avoid data
298 * loss, we let slot machinery advance the slot which ensures that
299 * snapbuilder/slot statuses are updated properly.
300 */
301 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
302 {
303 /*
304 * Update the slot info directly if there is a serialized snapshot
305 * at the restart_lsn, as the slot can quickly reach consistency
306 * at restart_lsn by restoring the snapshot.
307 */
308 SpinLockAcquire(&slot->mutex);
309 slot->data.restart_lsn = remote_slot->restart_lsn;
310 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
311 slot->data.catalog_xmin = remote_slot->catalog_xmin;
312 SpinLockRelease(&slot->mutex);
313 }
314 else
315 {
317
320
321 /* Sanity check */
322 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
324 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
325 remote_slot->name),
326 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
327 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
329
330 /*
331 * If we can't reach a consistent snapshot, the slot won't be
332 * persisted. See update_and_persist_local_synced_slot().
333 */
335 {
337
338 ereport(LOG,
339 errmsg("could not synchronize replication slot \"%s\"",
340 remote_slot->name),
341 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
343
345 }
346 }
347
348 updated_xmin_or_lsn = true;
349 }
350
351 /* Update slot sync skip stats */
353
354 if (remote_dbid != slot->data.database ||
355 remote_slot->two_phase != slot->data.two_phase ||
356 remote_slot->failover != slot->data.failover ||
357 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
358 remote_slot->two_phase_at != slot->data.two_phase_at)
359 {
361
362 /* Avoid expensive operations while holding a spinlock. */
364
365 SpinLockAcquire(&slot->mutex);
366 slot->data.plugin = plugin_name;
367 slot->data.database = remote_dbid;
368 slot->data.two_phase = remote_slot->two_phase;
369 slot->data.two_phase_at = remote_slot->two_phase_at;
370 slot->data.failover = remote_slot->failover;
371 SpinLockRelease(&slot->mutex);
372
373 updated_config = true;
374
375 /*
376 * Ensure that there is no risk of sending prepared transactions
377 * unexpectedly after the promotion.
378 */
380 }
381
382 /*
383 * We have to write the changed xmin to disk *before* we change the
384 * in-memory value, otherwise after a crash we wouldn't know that some
385 * catalog tuples might have been removed already.
386 */
388 {
391 }
392
393 /*
394 * Now the new xmin is safely on disk, we can let the global value
395 * advance. We do not take ProcArrayLock or similar since we only advance
396 * xmin here and there's not much harm done by a concurrent computation
397 * missing that.
398 */
400 {
401 SpinLockAcquire(&slot->mutex);
402 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
403 SpinLockRelease(&slot->mutex);
404
407 }
408
410}
411
412/*
413 * Get the list of local logical slots that are synchronized from the
414 * primary server.
415 */
416static List *
418{
420
422
423 for (int i = 0; i < max_replication_slots; i++)
424 {
426
427 /* Check if it is a synchronized slot */
428 if (s->in_use && s->data.synced)
429 {
432 }
433 }
434
436
437 return local_slots;
438}
439
440/*
441 * Helper function to check if local_slot is required to be retained.
442 *
443 * Return false either if local_slot does not exist in the remote_slots list
444 * or is invalidated while the corresponding remote slot is still valid,
445 * otherwise true.
446 */
447static bool
449{
450 bool remote_exists = false;
451 bool locally_invalidated = false;
452
454 {
455 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
456 {
457 remote_exists = true;
458
459 /*
460 * If remote slot is not invalidated but local slot is marked as
461 * invalidated, then set locally_invalidated flag.
462 */
465 (remote_slot->invalidated == RS_INVAL_NONE) &&
466 (local_slot->data.invalidated != RS_INVAL_NONE);
468
469 break;
470 }
471 }
472
474}
475
476/*
477 * Drop local obsolete slots.
478 *
479 * Drop the local slots that no longer need to be synced i.e. these either do
480 * not exist on the primary or are no longer enabled for failover.
481 *
482 * Additionally, drop any slots that are valid on the primary but got
483 * invalidated on the standby. This situation may occur due to the following
484 * reasons:
485 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
486 * records from the restart_lsn of the slot.
487 * - 'primary_slot_name' is temporarily reset to null and the physical slot is
488 * removed.
489 * These dropped slots will get recreated in next sync-cycle and it is okay to
490 * drop and recreate such slots as long as these are not consumable on the
491 * standby (which is the case currently).
492 *
493 * Note: Change of 'wal_level' on the primary server to a level lower than
494 * logical may also result in slot invalidation and removal on the standby.
495 * This is because such 'wal_level' change is only possible if the logical
496 * slots are removed on the primary server, so it's expected to see the
497 * slots being invalidated and removed on the standby too (and re-created
498 * if they are re-created on the primary server).
499 */
500static void
502{
504
506 {
507 /* Drop the local slot if it is not required to be retained. */
509 {
510 bool synced_slot;
511
512 /*
513 * Use shared lock to prevent a conflict with
514 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
515 * during a drop-database operation.
516 */
518 0, AccessShareLock);
519
520 /*
521 * In the small window between getting the slot to drop and
522 * locking the database, there is a possibility of a parallel
523 * database drop by the startup process and the creation of a new
524 * slot by the user. This new user-created slot may end up using
525 * the same shared memory as that of 'local_slot'. Thus check if
526 * local_slot is still the synced one before performing actual
527 * drop.
528 */
530 synced_slot = local_slot->in_use && local_slot->data.synced;
532
533 if (synced_slot)
534 {
535 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
537 }
538
540 0, AccessShareLock);
541
542 ereport(LOG,
543 errmsg("dropped replication slot \"%s\" of database with OID %u",
544 NameStr(local_slot->data.name),
545 local_slot->data.database));
546 }
547 }
548}
549
550/*
551 * Reserve WAL for the currently active local slot using the specified WAL
552 * location (restart_lsn).
553 *
554 * If the given WAL location has been removed or is at risk of removal,
555 * reserve WAL using the oldest segment that is non-removable.
556 */
557static void
559{
562 XLogSegNo segno;
564
565 Assert(slot != NULL);
567
568 /*
569 * Acquire an exclusive lock to prevent the checkpoint process from
570 * concurrently calculating the minimum slot LSN (see
571 * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
572 * first, the checkpoint must wait for the restart_lsn update before
573 * calculating the minimum LSN.
574 *
575 * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
576 * newly synced slot from being invalidated if a concurrent checkpoint has
577 * invoked CheckPointReplicationSlots() before the WAL reservation here.
578 * This can happen because the initial restart_lsn received from the
579 * remote server can precede the redo pointer. Therefore, when selecting
580 * the initial restart_lsn, we consider using the redo pointer or the
581 * minimum slot LSN (if those values are greater than the remote
582 * restart_lsn) instead of relying solely on the remote value.
583 */
585
586 /*
587 * Determine the minimum non-removable LSN by comparing the redo pointer
588 * with the minimum slot LSN.
589 *
590 * The minimum slot LSN is considered because the redo pointer advances at
591 * every checkpoint, even when replication slots are present on the
592 * standby. In such scenarios, the redo pointer can exceed the remote
593 * restart_lsn, while WALs preceding the remote restart_lsn remain
594 * protected by a local replication slot.
595 */
598
601
602 /*
603 * If the minimum safe LSN is greater than the given restart_lsn, use it
604 * as the initial restart_lsn for the newly synced slot. Otherwise, use
605 * the given remote restart_lsn.
606 */
607 SpinLockAcquire(&slot->mutex);
608 slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
609 SpinLockRelease(&slot->mutex);
610
612
614 if (XLogGetLastRemovedSegno() >= segno)
615 elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
616 NameStr(slot->data.name));
617
619}
620
621/*
622 * If the remote restart_lsn and catalog_xmin have caught up with the
623 * local ones, then update the LSNs and persist the local synced slot for
624 * future synchronization; otherwise, do nothing.
625 *
626 * *slot_persistence_pending is set to true if any of the slots fail to
627 * persist.
628 *
629 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
630 * false.
631 */
632static bool
635{
637
638 /* Slotsync skip stats are handled in function update_local_synced_slot() */
640
641 /*
642 * Check if the slot cannot be synchronized. Refer to the comment atop the
643 * file for details on this check.
644 */
646 {
647 /*
648 * We reach this point when the remote slot didn't catch up to locally
649 * reserved position, or it cannot reach the consistent point from the
650 * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
651 * not been received and flushed.
652 *
653 * We do not drop the slot because the restart_lsn and confirmed_lsn
654 * can be ahead of the current location when recreating the slot in
655 * the next cycle. It may take more time to create such a slot or
656 * reach the consistent point. Therefore, we keep this slot and
657 * attempt the synchronization in the next cycle.
658 *
659 * We also update the slot_persistence_pending parameter, so the SQL
660 * function can retry.
661 */
664
665 return false;
666 }
667
669
670 ereport(LOG,
671 errmsg("newly created replication slot \"%s\" is sync-ready now",
672 remote_slot->name));
673
674 return true;
675}
676
677/*
678 * Synchronize a single slot to the given position.
679 *
680 * This creates a new slot if there is no existing one and updates the
681 * metadata of the slot as per the data received from the primary server.
682 *
683 * The slot is created as a temporary slot and stays in the same state until the
684 * remote_slot catches up with locally reserved position and local slot is
685 * updated. The slot is then persisted and is considered as sync-ready for
686 * periodic syncs.
687 *
688 * *slot_persistence_pending is set to true if any of the slots fail to
689 * persist.
690 *
691 * Returns TRUE if the local slot is updated.
692 */
693static bool
696{
697 ReplicationSlot *slot;
698 bool slot_updated = false;
699
700 /* Search for the named slot */
701 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
702 {
703 bool synced;
704
705 SpinLockAcquire(&slot->mutex);
706 synced = slot->data.synced;
707 SpinLockRelease(&slot->mutex);
708
709 /* User-created slot with the same name exists, raise ERROR. */
710 if (!synced)
713 errmsg("exiting from slot synchronization because same"
714 " name slot \"%s\" already exists on the standby",
715 remote_slot->name));
716
717 /*
718 * The slot has been synchronized before.
719 *
720 * It is important to acquire the slot here before checking
721 * invalidation. If we don't acquire the slot first, there could be a
722 * race condition that the local slot could be invalidated just after
723 * checking the 'invalidated' flag here and we could end up
724 * overwriting 'invalidated' flag to remote_slot's value. See
725 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
726 * if the slot is not acquired by other processes.
727 *
728 * XXX: If it ever turns out that slot acquire/release is costly for
729 * cases when none of the slot properties is changed then we can do a
730 * pre-check to ensure that at least one of the slot properties is
731 * changed before acquiring the slot.
732 */
733 ReplicationSlotAcquire(remote_slot->name, true, false);
734
735 Assert(slot == MyReplicationSlot);
736
737 /*
738 * Copy the invalidation cause from remote only if local slot is not
739 * invalidated locally, we don't want to overwrite existing one.
740 */
741 if (slot->data.invalidated == RS_INVAL_NONE &&
742 remote_slot->invalidated != RS_INVAL_NONE)
743 {
744 SpinLockAcquire(&slot->mutex);
745 slot->data.invalidated = remote_slot->invalidated;
746 SpinLockRelease(&slot->mutex);
747
748 /* Make sure the invalidated state persists across server restart */
751
752 slot_updated = true;
753 }
754
755 /* Skip the sync of an invalidated slot */
756 if (slot->data.invalidated != RS_INVAL_NONE)
757 {
759
761 return slot_updated;
762 }
763
764 /* Slot not ready yet, let's attempt to make it sync-ready now. */
765 if (slot->data.persistency == RS_TEMPORARY)
766 {
770 }
771
772 /* Slot ready for sync, so sync it. */
773 else
774 {
775 /*
776 * Sanity check: As long as the invalidations are handled
777 * appropriately as above, this should never happen.
778 *
779 * We don't need to check restart_lsn here. See the comments in
780 * update_local_synced_slot() for details.
781 */
782 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
784 errmsg_internal("cannot synchronize local slot \"%s\"",
785 remote_slot->name),
786 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
788 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
789
791 }
792 }
793 /* Otherwise create the slot first. */
794 else
795 {
798
799 /* Skip creating the local slot if remote_slot is invalidated already */
800 if (remote_slot->invalidated != RS_INVAL_NONE)
801 return false;
802
803 /*
804 * We create temporary slots instead of ephemeral slots here because
805 * we want the slots to survive after releasing them. This is done to
806 * avoid dropping and re-creating the slots in each synchronization
807 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
808 * caught up.
809 */
811 remote_slot->two_phase,
812 remote_slot->failover,
813 true);
814
815 /* For shorter lines. */
816 slot = MyReplicationSlot;
817
818 /* Avoid expensive operations while holding a spinlock. */
820
821 SpinLockAcquire(&slot->mutex);
822 slot->data.database = remote_dbid;
823 slot->data.plugin = plugin_name;
824 SpinLockRelease(&slot->mutex);
825
827
831 SpinLockAcquire(&slot->mutex);
834 SpinLockRelease(&slot->mutex);
838
841
842 slot_updated = true;
843 }
844
846
847 return slot_updated;
848}
849
850/*
851 * Fetch remote slots.
852 *
853 * If slot_names is NIL, fetches all failover logical slots from the
854 * primary server, otherwise fetches only the ones with names in slot_names.
855 *
856 * Returns a list of remote slot information structures, or NIL if none
857 * are found.
858 */
859static List *
861{
862#define SLOTSYNC_COLUMN_COUNT 10
865
866 WalRcvExecResult *res;
867 TupleTableSlot *tupslot;
869 StringInfoData query;
870
871 initStringInfo(&query);
873 "SELECT slot_name, plugin, confirmed_flush_lsn,"
874 " restart_lsn, catalog_xmin, two_phase,"
875 " two_phase_at, failover,"
876 " database, invalidation_reason"
877 " FROM pg_catalog.pg_replication_slots"
878 " WHERE failover and NOT temporary");
879
880 if (slot_names != NIL)
881 {
882 bool first_slot = true;
883
884 /*
885 * Construct the query to fetch only the specified slots
886 */
887 appendStringInfoString(&query, " AND slot_name IN (");
888
889 foreach_ptr(char, slot_name, slot_names)
890 {
891 if (!first_slot)
892 appendStringInfoString(&query, ", ");
893
894 appendStringInfo(&query, "%s", quote_literal_cstr(slot_name));
895 first_slot = false;
896 }
897 appendStringInfoChar(&query, ')');
898 }
899
900 /* Execute the query */
902 pfree(query.data);
903 if (res->status != WALRCV_OK_TUPLES)
905 errmsg("could not fetch failover logical slots info from the primary server: %s",
906 res->err));
907
909 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
910 {
911 bool isnull;
913 Datum d;
914 int col = 0;
915
917 &isnull));
918 Assert(!isnull);
919
920 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
921 &isnull));
922 Assert(!isnull);
923
924 /*
925 * It is possible to get null values for LSN and Xmin if slot is
926 * invalidated on the primary server, so handle accordingly.
927 */
928 d = slot_getattr(tupslot, ++col, &isnull);
929 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
930 DatumGetLSN(d);
931
932 d = slot_getattr(tupslot, ++col, &isnull);
933 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
934
935 d = slot_getattr(tupslot, ++col, &isnull);
936 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
938
939 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
940 &isnull));
941 Assert(!isnull);
942
943 d = slot_getattr(tupslot, ++col, &isnull);
944 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
945
946 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
947 &isnull));
948 Assert(!isnull);
949
950 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
951 ++col, &isnull));
952 Assert(!isnull);
953
954 d = slot_getattr(tupslot, ++col, &isnull);
955 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
957
958 /* Sanity check */
960
961 /*
962 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
963 * slot is valid, that means we have fetched the remote_slot in its
964 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
965 * sync it in the next sync cycle when the remote_slot is persisted
966 * and has valid lsn(s) and xmin values.
967 *
968 * XXX: In future, if we plan to expose 'slot->data.persistency' in
969 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
970 * slots in the first place.
971 */
972 if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
973 !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
974 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
975 remote_slot->invalidated == RS_INVAL_NONE)
977 else
978 /* Create list of remote slots */
980
981 ExecClearTuple(tupslot);
982 }
983
985
986 return remote_slot_list;
987}
988
989/*
990 * Synchronize slots.
991 *
992 * This function takes a list of remote slots and synchronizes them locally. It
993 * creates the slots if not present on the standby and updates existing ones.
994 *
995 * If slot_persistence_pending is not NULL, it will be set to true if one or
996 * more slots could not be persisted. This allows callers such as
997 * SyncReplicationSlots() to retry those slots.
998 *
999 * Returns TRUE if any of the slots gets updated in this sync-cycle.
1000 */
1001static bool
1004{
1005 bool some_slot_updated = false;
1006
1007 /* Drop local slots that no longer need to be synced. */
1009
1010 /* Now sync the slots locally */
1012 {
1013 Oid remote_dbid = get_database_oid(remote_slot->database, false);
1014
1015 /*
1016 * Use shared lock to prevent a conflict with
1017 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1018 * a drop-database operation.
1019 */
1021
1024
1026 }
1027
1028 return some_slot_updated;
1029}
1030
1031/*
1032 * Checks the remote server info.
1033 *
1034 * We ensure that the 'primary_slot_name' exists on the remote server and the
1035 * remote server is not a standby node.
1036 */
1037static void
1039{
1040#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1041 WalRcvExecResult *res;
1043 StringInfoData cmd;
1044 bool isnull;
1045 TupleTableSlot *tupslot;
1046 bool remote_in_recovery;
1047 bool primary_slot_valid;
1048 bool started_tx = false;
1049
1050 initStringInfo(&cmd);
1051 appendStringInfo(&cmd,
1052 "SELECT pg_is_in_recovery(), count(*) = 1"
1053 " FROM pg_catalog.pg_replication_slots"
1054 " WHERE slot_type='physical' AND slot_name=%s",
1056
1057 /* The syscache access in walrcv_exec() needs a transaction env. */
1058 if (!IsTransactionState())
1059 {
1061 started_tx = true;
1062 }
1063
1065 pfree(cmd.data);
1066
1067 if (res->status != WALRCV_OK_TUPLES)
1068 ereport(ERROR,
1069 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1070 PrimarySlotName, res->err),
1071 errhint("Check if \"primary_slot_name\" is configured correctly."));
1072
1074 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1075 elog(ERROR,
1076 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1077
1078 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1079 Assert(!isnull);
1080
1081 /*
1082 * Slot sync is currently not supported on a cascading standby. This is
1083 * because if we allow it, the primary server needs to wait for all the
1084 * cascading standbys, otherwise, logical subscribers can still be ahead
1085 * of one of the cascading standbys which we plan to promote. Thus, to
1086 * avoid this additional complexity, we restrict it for the time being.
1087 */
1089 ereport(ERROR,
1091 errmsg("cannot synchronize replication slots from a standby server"));
1092
1093 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1094 Assert(!isnull);
1095
1096 if (!primary_slot_valid)
1097 ereport(ERROR,
1099 /* translator: second %s is a GUC variable name */
1100 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1101 PrimarySlotName, "primary_slot_name"));
1102
1103 ExecClearTuple(tupslot);
1105
1106 if (started_tx)
1108}
1109
1110/*
1111 * Checks if dbname is specified in 'primary_conninfo'.
1112 *
1113 * Error out if not specified otherwise return it.
1114 */
1115char *
1117{
1118 char *dbname;
1119
1120 /*
1121 * The slot synchronization needs a database connection for walrcv_exec to
1122 * work.
1123 */
1125 if (dbname == NULL)
1126 ereport(ERROR,
1128
1129 /*
1130 * translator: first %s is a connection option; second %s is a GUC
1131 * variable name
1132 */
1133 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1134 "dbname", "primary_conninfo"));
1135 return dbname;
1136}
1137
1138/*
1139 * Return true if all necessary GUCs for slot synchronization are set
1140 * appropriately, otherwise, return false.
1141 */
1142bool
1144{
1145 /*
1146 * Logical slot sync/creation requires logical decoding to be enabled.
1147 */
1149 {
1150 ereport(elevel,
1152 errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1153 errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1154
1155 return false;
1156 }
1157
1158 /*
1159 * A physical replication slot(primary_slot_name) is required on the
1160 * primary to ensure that the rows needed by the standby are not removed
1161 * after restarting, so that the synchronized slot on the standby will not
1162 * be invalidated.
1163 */
1164 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1165 {
1166 ereport(elevel,
1168 /* translator: %s is a GUC variable name */
1169 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1170 return false;
1171 }
1172
1173 /*
1174 * hot_standby_feedback must be enabled to cooperate with the physical
1175 * replication slot, which allows informing the primary about the xmin and
1176 * catalog_xmin values on the standby.
1177 */
1179 {
1180 ereport(elevel,
1182 /* translator: %s is a GUC variable name */
1183 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1184 "hot_standby_feedback"));
1185 return false;
1186 }
1187
1188 /*
1189 * The primary_conninfo is required to make connection to primary for
1190 * getting slots information.
1191 */
1192 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1193 {
1194 ereport(elevel,
1196 /* translator: %s is a GUC variable name */
1197 errmsg("replication slot synchronization requires \"%s\" to be set",
1198 "primary_conninfo"));
1199 return false;
1200 }
1201
1202 return true;
1203}
1204
1205/*
1206 * Re-read the config file for slot synchronization.
1207 *
1208 * Exit or throw error if relevant GUCs have changed depending on whether
1209 * called from slot sync worker or from the SQL function pg_sync_replication_slots()
1210 */
1211static void
1213{
1218 bool conninfo_changed;
1221 bool parameter_changed = false;
1222
1225
1226 ConfigReloadPending = false;
1228
1233
1235 {
1237 {
1238 ereport(LOG,
1239 /* translator: %s is a GUC variable name */
1240 errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1241 "sync_replication_slots"));
1242
1243 proc_exit(0);
1244 }
1245
1246 parameter_changed = true;
1247 }
1248 else
1249 {
1250 if (conninfo_changed ||
1253 {
1254
1256 {
1257 ereport(LOG,
1258 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1259
1260 /*
1261 * Reset the last-start time for this worker so that the
1262 * postmaster can restart it without waiting for
1263 * SLOTSYNC_RESTART_INTERVAL_SEC.
1264 */
1266
1267 proc_exit(0);
1268 }
1269
1270 parameter_changed = true;
1271 }
1272 }
1273
1274 /*
1275 * If we have reached here with a parameter change, we must be running in
1276 * SQL function, emit error in such a case.
1277 */
1279 {
1281 ereport(ERROR,
1283 errmsg("replication slot synchronization will stop because of a parameter change"));
1284 }
1285
1286}
1287
1288/*
1289 * Interrupt handler for process performing slot synchronization.
1290 */
1291static void
1293{
1295
1297 {
1299 {
1300 ereport(LOG,
1301 errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1302
1303 proc_exit(0);
1304 }
1305 else
1306 {
1307 /*
1308 * For the backend executing SQL function
1309 * pg_sync_replication_slots().
1310 */
1311 ereport(ERROR,
1313 errmsg("replication slot synchronization will stop because promotion is triggered"));
1314 }
1315 }
1316
1319}
1320
1321/*
1322 * Connection cleanup function for slotsync worker.
1323 *
1324 * Called on slotsync worker exit.
1325 */
1326static void
1333
1334/*
1335 * Cleanup function for slotsync worker.
1336 *
1337 * Called on slotsync worker exit.
1338 */
1339static void
1341{
1342 /*
1343 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1344 *
1345 * The startup process during promotion invokes ShutDownSlotSync() which
1346 * waits for slot sync to finish and it does that by checking the
1347 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1348 * release and cleanup to avoid any dangling temporary slots or active
1349 * slots before it marks itself as finished syncing.
1350 */
1351
1352 /* Make sure active replication slots are released */
1353 if (MyReplicationSlot != NULL)
1355
1356 /* Also cleanup the temporary slots. */
1358
1360
1362
1363 /*
1364 * If syncing_slots is true, it indicates that the process errored out
1365 * without resetting the flag. So, we need to clean up shared memory and
1366 * reset the flag here.
1367 */
1368 if (syncing_slots)
1369 {
1370 SlotSyncCtx->syncing = false;
1371 syncing_slots = false;
1372 }
1373
1375}
1376
1377/*
1378 * Sleep for long enough that we believe it's likely that the slots on primary
1379 * get updated.
1380 *
1381 * If there is no slot activity the wait time between sync-cycles will double
1382 * (to a maximum of 30s). If there is some slot activity the wait time between
1383 * sync-cycles is reset to the minimum (200ms).
1384 */
1385static void
1387{
1388 int rc;
1389
1390 if (!some_slot_updated)
1391 {
1392 /*
1393 * No slots were updated, so double the sleep time, but not beyond the
1394 * maximum allowable value.
1395 */
1397 }
1398 else
1399 {
1400 /*
1401 * Some slots were updated since the last sleep, so reset the sleep
1402 * time.
1403 */
1405 }
1406
1407 rc = WaitLatch(MyLatch,
1409 sleep_ms,
1411
1412 if (rc & WL_LATCH_SET)
1414}
1415
1416/*
1417 * Emit an error if a concurrent sync call is in progress.
1418 * Otherwise, advertise that a sync is in progress.
1419 */
1420static void
1422{
1424
1425 if (SlotSyncCtx->syncing)
1426 {
1428 ereport(ERROR,
1430 errmsg("cannot synchronize replication slots concurrently"));
1431 }
1432
1433 /* The pid must not be already assigned in SlotSyncCtx */
1435
1436 SlotSyncCtx->syncing = true;
1437
1438 /*
1439 * Advertise the required PID so that the startup process can kill the
1440 * slot sync process on promotion.
1441 */
1443
1445
1446 syncing_slots = true;
1447}
1448
1449/*
1450 * Reset syncing flag.
1451 */
1452static void
1462
1463/*
1464 * The main loop of our worker process.
1465 *
1466 * It connects to the primary server, fetches logical failover slots
1467 * information periodically in order to create and sync the slots.
1468 *
1469 * Note: If any changes are made here, check if the corresponding SQL
1470 * function logic in SyncReplicationSlots() also needs to be changed.
1471 */
1472void
1474{
1476 char *dbname;
1477 char *err;
1480
1482
1484
1486
1487 /*
1488 * Create a per-backend PGPROC struct in shared memory. We must do this
1489 * before we access any shared memory.
1490 */
1491 InitProcess();
1492
1493 /*
1494 * Early initialization.
1495 */
1496 BaseInit();
1497
1499
1500 /*
1501 * If an exception is encountered, processing resumes here.
1502 *
1503 * We just need to clean up, report the error, and go away.
1504 *
1505 * If we do not have this handling here, then since this worker process
1506 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1507 * Therefore, we create our own exception handler to catch ERRORs.
1508 */
1509 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1510 {
1511 /* since not using PG_TRY, must reset error stack by hand */
1513
1514 /* Prevents interrupts while cleaning up */
1516
1517 /* Report the error to the server log */
1519
1520 /*
1521 * We can now go away. Note that because we called InitProcess, a
1522 * callback was registered to do ProcKill, which will clean up
1523 * necessary state.
1524 */
1525 proc_exit(0);
1526 }
1527
1528 /* We can now handle ereport(ERROR) */
1530
1531 /* Setup signal handling */
1540
1542
1543 ereport(LOG, errmsg("slot sync worker started"));
1544
1545 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1547
1548 /*
1549 * Establishes SIGALRM handler and initialize timeout module. It is needed
1550 * by InitPostgres to register different timeouts.
1551 */
1553
1554 /* Load the libpq-specific functions */
1555 load_file("libpqwalreceiver", false);
1556
1557 /*
1558 * Unblock signals (they were blocked when the postmaster forked us)
1559 */
1561
1562 /*
1563 * Set always-secure search path, so malicious users can't redirect user
1564 * code (e.g. operators).
1565 *
1566 * It's not strictly necessary since we won't be scanning or writing to
1567 * any user table locally, but it's good to retain it here for added
1568 * precaution.
1569 */
1570 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1571
1573
1574 /*
1575 * Connect to the database specified by the user in primary_conninfo. We
1576 * need a database connection for walrcv_exec to work which we use to
1577 * fetch slot information from the remote node. See comments atop
1578 * libpqrcv_exec.
1579 *
1580 * We do not specify a specific user here since the slot sync worker will
1581 * operate as a superuser. This is safe because the slot sync worker does
1582 * not interact with user tables, eliminating the risk of executing
1583 * arbitrary code within triggers.
1584 */
1586
1588
1590 if (cluster_name[0])
1591 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1592 else
1593 appendStringInfoString(&app_name, "slotsync worker");
1594
1595 /*
1596 * Establish the connection to the primary server for slot
1597 * synchronization.
1598 */
1599 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1600 app_name.data, &err);
1601
1602 if (!wrconn)
1603 ereport(ERROR,
1605 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1606 app_name.data, err));
1607
1608 pfree(app_name.data);
1609
1610 /*
1611 * Register the disconnection callback.
1612 *
1613 * XXX: This can be combined with previous cleanup registration of
1614 * slotsync_worker_onexit() but that will need the connection to be made
1615 * global and we want to avoid introducing global for this purpose.
1616 */
1618
1619 /*
1620 * Using the specified primary server connection, check that we are not a
1621 * cascading standby and slot configured in 'primary_slot_name' exists on
1622 * the primary server.
1623 */
1625
1626 /* Main loop to synchronize slots */
1627 for (;;)
1628 {
1629 bool some_slot_updated = false;
1630 bool started_tx = false;
1632
1634
1635 /*
1636 * The syscache access in fetch_remote_slots() needs a transaction
1637 * env.
1638 */
1639 if (!IsTransactionState())
1640 {
1642 started_tx = true;
1643 }
1644
1648
1649 if (started_tx)
1651
1653 }
1654
1655 /*
1656 * The slot sync worker can't get here because it will only stop when it
1657 * receives a stop request from the startup process, or when there is an
1658 * error.
1659 */
1660 Assert(false);
1661}
1662
1663/*
1664 * Update the inactive_since property for synced slots.
1665 *
1666 * Note that this function is currently called when we shutdown the slot
1667 * sync machinery.
1668 */
1669static void
1671{
1672 TimestampTz now = 0;
1673
1674 /*
1675 * We need to update inactive_since only when we are promoting standby to
1676 * correctly interpret the inactive_since if the standby gets promoted
1677 * without a restart. We don't want the slots to appear inactive for a
1678 * long time after promotion if they haven't been synchronized recently.
1679 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1680 */
1681 if (!StandbyMode)
1682 return;
1683
1684 /* The slot sync worker or the SQL function mustn't be running by now */
1686
1688
1689 for (int i = 0; i < max_replication_slots; i++)
1690 {
1692
1693 /* Check if it is a synchronized slot */
1694 if (s->in_use && s->data.synced)
1695 {
1697
1698 /* The slot must not be acquired by any process */
1700
1701 /* Use the same inactive_since time for all the slots. */
1702 if (now == 0)
1704
1706 }
1707 }
1708
1710}
1711
1712/*
1713 * Shut down slot synchronization.
1714 *
1715 * This function sets stopSignaled=true and wakes up the slot sync process
1716 * (either worker or backend running the SQL function pg_sync_replication_slots())
1717 * so that worker can exit or the SQL function pg_sync_replication_slots() can
1718 * finish. It also waits till the slot sync worker has exited or
1719 * pg_sync_replication_slots() has finished.
1720 */
1721void
1723{
1725
1727
1728 SlotSyncCtx->stopSignaled = true;
1729
1730 /*
1731 * Return if neither the slot sync worker is running nor the function
1732 * pg_sync_replication_slots() is executing.
1733 */
1734 if (!SlotSyncCtx->syncing)
1735 {
1738 return;
1739 }
1740
1742
1744
1745 /*
1746 * Signal process doing slotsync, if any. The process will stop upon
1747 * detecting that the stopSignaled flag is set to true.
1748 */
1751
1752 /* Wait for slot sync to end */
1753 for (;;)
1754 {
1755 int rc;
1756
1757 /* Wait a bit, we don't expect to have to wait long */
1758 rc = WaitLatch(MyLatch,
1761
1762 if (rc & WL_LATCH_SET)
1763 {
1766 }
1767
1769
1770 /* Ensure that no process is syncing the slots. */
1771 if (!SlotSyncCtx->syncing)
1772 break;
1773
1775 }
1776
1778
1780}
1781
1782/*
1783 * SlotSyncWorkerCanRestart
1784 *
1785 * Return true, indicating worker is allowed to restart, if enough time has
1786 * passed since it was last launched to reach SLOTSYNC_RESTART_INTERVAL_SEC.
1787 * Otherwise return false.
1788 *
1789 * This is a safety valve to protect against continuous respawn attempts if the
1790 * worker is dying immediately at launch. Note that since we will retry to
1791 * launch the worker from the postmaster main loop, we will get another
1792 * chance later.
1793 */
1794bool
1796{
1797 time_t curtime = time(NULL);
1798
1799 /*
1800 * If first time through, or time somehow went backwards, always update
1801 * last_start_time to match the current clock and allow worker start.
1802 * Otherwise allow it only once enough time has elapsed.
1803 */
1804 if (SlotSyncCtx->last_start_time == 0 ||
1805 curtime < SlotSyncCtx->last_start_time ||
1807 {
1809 return true;
1810 }
1811 return false;
1812}
1813
1814/*
1815 * Is current process syncing replication slots?
1816 *
1817 * Could be either backend executing SQL function or slot sync worker.
1818 */
1819bool
1821{
1822 return syncing_slots;
1823}
1824
1825/*
1826 * Amount of shared memory required for slot synchronization.
1827 */
1828Size
1830{
1831 return sizeof(SlotSyncCtxStruct);
1832}
1833
1834/*
1835 * Allocate and initialize the shared memory of slot synchronization.
1836 */
1837void
1839{
1840 Size size = SlotSyncShmemSize();
1841 bool found;
1842
1844 ShmemInitStruct("Slot Sync Data", size, &found);
1845
1846 if (!found)
1847 {
1848 memset(SlotSyncCtx, 0, size);
1851 }
1852}
1853
1854/*
1855 * Error cleanup callback for slot sync SQL function.
1856 */
1857static void
1859{
1861
1862 /*
1863 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1864 *
1865 * The startup process during promotion invokes ShutDownSlotSync() which
1866 * waits for slot sync to finish and it does that by checking the
1867 * 'syncing' flag. Thus the SQL function must be done with slots' release
1868 * and cleanup to avoid any dangling temporary slots or active slots
1869 * before it marks itself as finished syncing.
1870 */
1871
1872 /* Make sure active replication slots are released */
1873 if (MyReplicationSlot != NULL)
1875
1876 /* Also cleanup the synced temporary slots. */
1878
1879 /*
1880 * The set syncing_slots indicates that the process errored out without
1881 * resetting the flag. So, we need to clean up shared memory and reset the
1882 * flag here.
1883 */
1884 if (syncing_slots)
1886
1888}
1889
1890/*
1891 * Helper function to extract slot names from a list of remote slots
1892 */
1893static List *
1895{
1896 List *slot_names = NIL;
1897
1899 {
1900 char *slot_name;
1901
1902 slot_name = pstrdup(remote_slot->name);
1903 slot_names = lappend(slot_names, slot_name);
1904 }
1905
1906 return slot_names;
1907}
1908
1909/*
1910 * Synchronize the failover enabled replication slots using the specified
1911 * primary server connection.
1912 *
1913 * Repeatedly fetches and updates replication slot information from the
1914 * primary until all slots are at least "sync ready".
1915 *
1916 * Exits early if promotion is triggered or certain critical
1917 * configuration parameters have changed.
1918 */
1919void
1921{
1923 {
1925 List *slot_names = NIL; /* List of slot names to track */
1926
1928
1929 /* Check for interrupts and config changes */
1931
1933
1934 /* Retry until all the slots are sync-ready */
1935 for (;;)
1936 {
1937 bool slot_persistence_pending = false;
1938 bool some_slot_updated = false;
1939
1940 /* Check for interrupts and config changes */
1942
1943 /* We must be in a valid transaction state */
1945
1946 /*
1947 * Fetch remote slot info for the given slot_names. If slot_names
1948 * is NIL, fetch all failover-enabled slots. Note that we reuse
1949 * slot_names from the first iteration; re-fetching all failover
1950 * slots each time could cause an endless loop. Instead of
1951 * reprocessing only the pending slots in each iteration, it's
1952 * better to process all the slots received in the first
1953 * iteration. This ensures that by the time we're done, all slots
1954 * reflect the latest values.
1955 */
1956 remote_slots = fetch_remote_slots(wrconn, slot_names);
1957
1958 /* Attempt to synchronize slots */
1961
1962 /*
1963 * If slot_persistence_pending is true, extract slot names for
1964 * future iterations (only needed if we haven't done it yet)
1965 */
1966 if (slot_names == NIL && slot_persistence_pending)
1967 slot_names = extract_slot_names(remote_slots);
1968
1969 /* Free the current remote_slots list */
1971
1972 /* Done if all slots are persisted i.e are sync-ready */
1974 break;
1975
1976 /* wait before retrying again */
1978 }
1979
1980 if (slot_names)
1981 list_free_deep(slot_names);
1982
1983 /* Cleanup the synced temporary slots */
1985
1986 /* We are done with sync, so reset sync flag */
1988 }
1990}
sigset_t UnBlockSig
Definition pqsignal.c:22
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
#define TextDatumGetCString(d)
Definition builtins.h:99
#define NameStr(name)
Definition c.h:777
#define Min(x, y)
Definition c.h:1019
#define Max(x, y)
Definition c.h:1013
#define Assert(condition)
Definition c.h:885
uint32 TransactionId
Definition c.h:678
size_t Size
Definition c.h:631
int64 TimestampTz
Definition timestamp.h:39
Oid get_database_oid(const char *dbname, bool missing_ok)
void load_file(const char *filename, bool restricted)
Definition dfmgr.c:149
Datum arg
Definition elog.c:1322
void EmitErrorReport(void)
Definition elog.c:1882
ErrorContextCallback * error_context_stack
Definition elog.c:99
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
sigjmp_buf * PG_exception_stack
Definition elog.c:101
#define LOG
Definition elog.h:31
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
void err(int eval, const char *fmt,...)
Definition err.c:43
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsMinimalTuple
Definition execTuples.c:86
#define palloc0_object(type)
Definition fe_memutils.h:75
int MyProcPid
Definition globals.c:47
struct Latch * MyLatch
Definition globals.c:63
void ProcessConfigFile(GucContext context)
Definition guc-file.l:120
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4196
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_SUSET
Definition guc.h:78
@ PGC_SIGHUP
Definition guc.h:75
char * cluster_name
Definition guc_tables.c:564
volatile sig_atomic_t ConfigReloadPending
Definition interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
void proc_exit(int code)
Definition ipc.c:105
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
int i
Definition isn.c:77
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_free_deep(List *list)
Definition list.c:1560
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1088
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1148
#define AccessShareLock
Definition lockdefs.h:36
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition logical.c:2094
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:205
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
@ NormalProcessing
Definition miscadmin.h:472
@ InitProcessing
Definition miscadmin.h:471
#define GetProcessingMode()
Definition miscadmin.h:481
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define AmLogicalSlotSyncWorkerProcess()
Definition miscadmin.h:386
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
#define SetProcessingMode(mode)
Definition miscadmin.h:483
#define InvalidPid
Definition miscadmin.h:32
void namestrcpy(Name name, const char *str)
Definition name.c:233
#define NIL
Definition pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition pg_list.h:469
static XLogRecPtr DatumGetLSN(Datum X)
Definition pg_lsn.h:25
#define die(msg)
void pgstat_report_replslotsync(ReplicationSlot *slot)
#define pqsignal
Definition port.h:547
void FloatExceptionHandler(SIGNAL_ARGS)
Definition postgres.c:3058
void StatementCancelHandler(SIGNAL_ARGS)
Definition postgres.c:3041
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static TransactionId DatumGetTransactionId(Datum X)
Definition postgres.h:292
#define InvalidOid
unsigned int Oid
void BaseInit(void)
Definition postinit.c:615
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)
Definition postinit.c:718
static int fb(int x)
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition procarray.c:2910
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition procsignal.c:679
void init_ps_display(const char *fixed_part)
Definition ps_status.c:285
char * quote_literal_cstr(const char *rawstr)
Definition quote.c:101
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:378
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition slot.c:621
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition slot.c:379
void ReplicationSlotDropAcquired(void)
Definition slot.c:1034
void ReplicationSlotMarkDirty(void)
Definition slot.c:1176
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition slot.c:2915
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition slot.c:1218
void ReplicationSlotPersist(void)
Definition slot.c:1193
ReplicationSlot * MyReplicationSlot
Definition slot.c:148
void ReplicationSlotSave(void)
Definition slot.c:1158
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition slot.c:541
void ReplicationSlotRelease(void)
Definition slot.c:761
int max_replication_slots
Definition slot.c:151
ReplicationSlotCtlData * ReplicationSlotCtl
Definition slot.c:145
void ReplicationSlotsComputeRequiredLSN(void)
Definition slot.c:1300
void ReplicationSlotCleanup(bool synced_only)
Definition slot.c:860
@ RS_TEMPORARY
Definition slot.h:47
ReplicationSlotInvalidationCause
Definition slot.h:59
@ RS_INVAL_NONE
Definition slot.h:60
#define SlotIsLogical(slot)
Definition slot.h:288
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition slot.h:306
SlotSyncSkipReason
Definition slot.h:81
@ SS_SKIP_WAL_NOT_FLUSHED
Definition slot.h:83
@ SS_SKIP_NO_CONSISTENT_SNAPSHOT
Definition slot.h:87
@ SS_SKIP_NONE
Definition slot.h:82
@ SS_SKIP_INVALID
Definition slot.h:89
@ SS_SKIP_WAL_OR_ROWS_REMOVED
Definition slot.h:85
static List * get_local_synced_slots(void)
Definition slotsync.c:417
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:124
#define PRIMARY_INFO_OUTPUT_COL_COUNT
static void slotsync_worker_disconnect(int code, Datum arg)
Definition slotsync.c:1327
void SyncReplicationSlots(WalReceiverConn *wrconn)
Definition slotsync.c:1920
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition slotsync.c:448
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition slotsync.c:501
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition slotsync.c:558
static void update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
Definition slotsync.c:167
void ShutDownSlotSync(void)
Definition slotsync.c:1722
bool sync_replication_slots
Definition slotsync.c:117
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:694
static SlotSyncCtxStruct * SlotSyncCtx
Definition slotsync.c:114
static void slotsync_failure_callback(int code, Datum arg)
Definition slotsync.c:1858
#define SLOTSYNC_COLUMN_COUNT
static List * extract_slot_names(List *remote_slots)
Definition slotsync.c:1894
static long sleep_ms
Definition slotsync.c:127
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition slotsync.c:130
char * CheckAndGetDbnameFromConninfo(void)
Definition slotsync.c:1116
static bool syncing_slots
Definition slotsync.c:137
static void ProcessSlotSyncInterrupts(void)
Definition slotsync.c:1292
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:125
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:633
bool SlotSyncWorkerCanRestart(void)
Definition slotsync.c:1795
static void wait_for_slot_activity(bool some_slot_updated)
Definition slotsync.c:1386
static void slotsync_reread_config(void)
Definition slotsync.c:1212
static void reset_syncing_flag(void)
Definition slotsync.c:1453
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition slotsync.c:199
void SlotSyncShmemInit(void)
Definition slotsync.c:1838
static void slotsync_worker_onexit(int code, Datum arg)
Definition slotsync.c:1340
static void update_synced_slots_inactive_since(void)
Definition slotsync.c:1670
bool ValidateSlotSyncParams(int elevel)
Definition slotsync.c:1143
static void validate_remote_info(WalReceiverConn *wrconn)
Definition slotsync.c:1038
static void check_and_set_sync_info(pid_t sync_process_pid)
Definition slotsync.c:1421
bool IsSyncingReplicationSlots(void)
Definition slotsync.c:1820
void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Definition slotsync.c:1473
static List * fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
Definition slotsync.c:860
Size SlotSyncShmemSize(void)
Definition slotsync.c:1829
static bool synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list, bool *slot_persistence_pending)
Definition slotsync.c:1002
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition snapbuild.c:2057
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
void InitProcess(void)
Definition proc.c:379
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition pg_list.h:54
bool two_phase
Definition slotsync.c:148
char * plugin
Definition slotsync.c:146
char * name
Definition slotsync.c:145
char * database
Definition slotsync.c:147
bool failover
Definition slotsync.c:149
ReplicationSlotInvalidationCause invalidated
Definition slotsync.c:156
XLogRecPtr confirmed_lsn
Definition slotsync.c:151
XLogRecPtr restart_lsn
Definition slotsync.c:150
XLogRecPtr two_phase_at
Definition slotsync.c:152
TransactionId catalog_xmin
Definition slotsync.c:153
ReplicationSlot replication_slots[1]
Definition slot.h:299
TransactionId catalog_xmin
Definition slot.h:122
ReplicationSlotPersistency persistency
Definition slot.h:106
ReplicationSlotInvalidationCause invalidated
Definition slot.h:128
TransactionId effective_catalog_xmin
Definition slot.h:210
slock_t mutex
Definition slot.h:183
SlotSyncSkipReason slotsync_skip_reason
Definition slot.h:284
bool in_use
Definition slot.h:186
ProcNumber active_proc
Definition slot.h:192
ReplicationSlotPersistentData data
Definition slot.h:213
time_t last_start_time
Definition slotsync.c:110
Tuplestorestate * tuplestore
TupleDesc tupledesc
WalRcvExecStatus status
Definition c.h:772
void InitializeTimeouts(void)
Definition timeout.c:470
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:398
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static WalReceiverConn * wrconn
Definition walreceiver.c:94
bool hot_standby_feedback
Definition walreceiver.c:91
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
@ WALRCV_OK_TUPLES
static void walrcv_clear_result(WalRcvExecResult *walres)
#define walrcv_get_dbname_from_conninfo(conninfo)
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
#define walrcv_disconnect(conn)
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition walsender.c:3648
#define SIGCHLD
Definition win32_port.h:168
#define SIGHUP
Definition win32_port.h:158
#define SIGPIPE
Definition win32_port.h:163
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
#define SIGUSR2
Definition win32_port.h:171
bool IsTransactionState(void)
Definition xact.c:388
void StartTransactionCommand(void)
Definition xact.c:3080
void CommitTransactionCommand(void)
Definition xact.c:3178
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3795
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6563
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2682
int wal_segment_size
Definition xlog.c:146
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52
char * PrimarySlotName
bool StandbyMode
char * PrimaryConnInfo