PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slotsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * slotsync.c
3 * Functionality for synchronizing slots to a standby server from the
4 * primary server.
5 *
6 * Copyright (c) 2024-2026, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/slotsync.c
10 *
11 * This file contains the code for slot synchronization on a physical standby
12 * to fetch logical failover slots information from the primary server, create
13 * the slots on the standby and synchronize them periodically.
14 *
15 * Slot synchronization can be performed either automatically by enabling slot
16 * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 *
18 * If the WAL corresponding to the remote's restart_lsn is not available on the
19 * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 * which it is guaranteed that rows wouldn't have been removed then we cannot
21 * create the local standby slot because that would mean moving the local slot
22 * backward and decoding won't be possible via such a slot. In this case, the
23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 * which slot sync worker can perform the sync periodically or user can call
26 * pg_sync_replication_slots() periodically to perform the syncs.
27 *
28 * If synchronized slots fail to build a consistent snapshot from the
29 * restart_lsn before reaching confirmed_flush_lsn, they would become
30 * unreliable after promotion due to potential data loss from changes
31 * before reaching a consistent point. This can happen because the slots can
32 * be synced at some random time and we may not reach the consistent point
33 * at the same WAL location as the primary. So, we mark such slots as
34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 * consistent point, they will be marked as RS_PERSISTENT.
36 *
37 * If the WAL prior to the remote slot's confirmed_flush_lsn has not been
38 * flushed on the standby, the slot is marked as RS_TEMPORARY. Once the standby
39 * catches up and flushes that WAL, the slot will be marked as RS_PERSISTENT.
40 *
41 * The slot sync worker waits for some time before the next synchronization,
42 * with the duration varying based on whether any slots were updated during
43 * the last cycle. Refer to the comments above wait_for_slot_activity() for
44 * more details.
45 *
46 * If the SQL function pg_sync_replication_slots() is used to sync the slots,
47 * and if the slots are not ready to be synced and are marked as RS_TEMPORARY
48 * because of any of the reasons mentioned above, then the SQL function also
49 * waits and retries until the slots are marked as RS_PERSISTENT (which means
50 * sync-ready). Refer to the comments in SyncReplicationSlots() for more
51 * details.
52 *
53 * Any standby synchronized slots will be dropped if they no longer need
54 * to be synchronized. See comment atop drop_local_obsolete_slots() for more
55 * details.
56 *---------------------------------------------------------------------------
57 */
58
59#include "postgres.h"
60
61#include <time.h>
62
64#include "access/xlogrecovery.h"
65#include "catalog/pg_database.h"
66#include "libpq/pqsignal.h"
67#include "pgstat.h"
69#include "replication/logical.h"
72#include "storage/ipc.h"
73#include "storage/lmgr.h"
74#include "storage/proc.h"
75#include "storage/procarray.h"
76#include "tcop/tcopprot.h"
77#include "utils/builtins.h"
78#include "utils/memutils.h"
79#include "utils/pg_lsn.h"
80#include "utils/ps_status.h"
81#include "utils/timeout.h"
82#include "utils/wait_event.h"
83
84/*
85 * Struct for sharing information to control slot synchronization.
86 *
87 * The 'pid' is either the slot sync worker's pid or the backend's pid running
88 * the SQL function pg_sync_replication_slots(). When the startup process sets
89 * 'stopSignaled' during promotion, it uses this 'pid' to wake up the currently
90 * synchronizing process so that the process can immediately stop its
91 * synchronizing work on seeing 'stopSignaled' set.
92 * Setting 'stopSignaled' is also used to handle the race condition when the
93 * postmaster has not noticed the promotion yet and thus may end up restarting
94 * the slot sync worker. If 'stopSignaled' is set, the worker will exit in such a
95 * case. The SQL function pg_sync_replication_slots() will also error out if
96 * this flag is set. Note that we don't need to reset this variable as after
97 * promotion the slot sync worker won't be restarted because the pmState
98 * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
99 * primary without restarting the server. See LaunchMissingBackgroundProcesses.
100 *
101 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
102 * overwrites.
103 *
104 * The 'last_start_time' is needed by postmaster to start the slot sync worker
105 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
106 * is expected (e.g., slot sync GUCs change), slot sync worker will reset
107 * last_start_time before exiting, so that postmaster can start the worker
108 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
109 */
118
120
121/* GUC variable */
123
124/*
125 * The sleep time (ms) between slot-sync cycles varies dynamically
126 * (within a MIN/MAX range) according to slot activity. See
127 * wait_for_slot_activity() for details.
128 */
129#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
130#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
131
133
134/* The restart interval for slot sync work used by postmaster */
135#define SLOTSYNC_RESTART_INTERVAL_SEC 10
136
137/*
138 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
139 * in SlotSyncCtxStruct, this flag is true only if the current process is
140 * performing slot synchronization.
141 */
142static bool syncing_slots = false;
143
144/*
145 * Structure to hold information fetched from the primary server about a logical
146 * replication slot.
147 */
163
164static void slotsync_failure_callback(int code, Datum arg);
165static void update_synced_slots_inactive_since(void);
166
167/*
168 * Update slot sync skip stats. This function requires the caller to acquire
169 * the slot.
170 */
171static void
173{
174 ReplicationSlot *slot;
175
177
178 slot = MyReplicationSlot;
179
180 /*
181 * Update the slot sync related stats in pg_stat_replication_slots when a
182 * slot sync is skipped
183 */
186
187 /* Update the slot sync skip reason */
189 {
190 SpinLockAcquire(&slot->mutex);
192 SpinLockRelease(&slot->mutex);
193 }
194}
195
196/*
197 * If necessary, update the local synced slot's metadata based on the data
198 * from the remote slot.
199 *
200 * If no update was needed (the data of the remote slot is the same as the
201 * local slot) return false, otherwise true.
202 */
203static bool
205{
207 bool updated_xmin_or_lsn = false;
208 bool updated_config = false;
211
213
214 /*
215 * Make sure that concerned WAL is received and flushed before syncing
216 * slot to target lsn received from the primary server.
217 */
218 if (remote_slot->confirmed_lsn > latestFlushPtr)
219 {
221
222 /*
223 * Can get here only if GUC 'synchronized_standby_slots' on the
224 * primary server was not configured correctly.
225 */
226 ereport(LOG,
228 errmsg("skipping slot synchronization because the received slot sync"
229 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
230 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
231 remote_slot->name,
233
234 return false;
235 }
236
237 /*
238 * Don't overwrite if we already have a newer catalog_xmin and
239 * restart_lsn.
240 */
241 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
243 slot->data.catalog_xmin))
244 {
245 /* Update slot sync skip stats */
247
248 /*
249 * This can happen in following situations:
250 *
251 * If the slot is temporary, it means either the initial WAL location
252 * reserved for the local slot is ahead of the remote slot's
253 * restart_lsn or the initial xmin_horizon computed for the local slot
254 * is ahead of the remote slot.
255 *
256 * If the slot is persistent, both restart_lsn and catalog_xmin of the
257 * synced slot could still be ahead of the remote slot. Since we use
258 * slot advance functionality to keep snapbuild/slot updated, it is
259 * possible that the restart_lsn and catalog_xmin are advanced to a
260 * later position than it has on the primary. This can happen when
261 * slot advancing machinery finds running xacts record after reaching
262 * the consistent state at a later point than the primary where it
263 * serializes the snapshot and updates the restart_lsn.
264 *
265 * We LOG the message if the slot is temporary as it can help the user
266 * to understand why the slot is not sync-ready. In the case of a
267 * persistent slot, it would be a more common case and won't directly
268 * impact the users, so we used DEBUG1 level to log the message.
269 */
271 errmsg("could not synchronize replication slot \"%s\"",
272 remote_slot->name),
273 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
274 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
275 remote_slot->catalog_xmin,
277 slot->data.catalog_xmin));
278
279 /*
280 * Skip updating the configuration. This is required to avoid syncing
281 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
282 * transaction between old confirmed_lsn and two_phase_at will
283 * unexpectedly get decoded and sent to the downstream after
284 * promotion. See comments in ReorderBufferFinishPrepared.
285 */
286 return false;
287 }
288
289 /*
290 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
291 * slot.
292 */
293 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
294 remote_slot->restart_lsn > slot->data.restart_lsn ||
295 TransactionIdFollows(remote_slot->catalog_xmin,
296 slot->data.catalog_xmin))
297 {
298 /*
299 * We can't directly copy the remote slot's LSN or xmin unless there
300 * exists a consistent snapshot at that point. Otherwise, after
301 * promotion, the slots may not reach a consistent point before the
302 * confirmed_flush_lsn which can lead to a data loss. To avoid data
303 * loss, we let slot machinery advance the slot which ensures that
304 * snapbuilder/slot statuses are updated properly.
305 */
306 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
307 {
308 /*
309 * Update the slot info directly if there is a serialized snapshot
310 * at the restart_lsn, as the slot can quickly reach consistency
311 * at restart_lsn by restoring the snapshot.
312 */
313 SpinLockAcquire(&slot->mutex);
314 slot->data.restart_lsn = remote_slot->restart_lsn;
315 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
316 slot->data.catalog_xmin = remote_slot->catalog_xmin;
317 SpinLockRelease(&slot->mutex);
318 }
319 else
320 {
322
325
326 /* Sanity check */
327 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
329 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
330 remote_slot->name),
331 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
332 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
334
335 /*
336 * If we can't reach a consistent snapshot, the slot won't be
337 * persisted. See update_and_persist_local_synced_slot().
338 */
340 {
342
343 ereport(LOG,
344 errmsg("could not synchronize replication slot \"%s\"",
345 remote_slot->name),
346 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
348
350 }
351 }
352
353 updated_xmin_or_lsn = true;
354 }
355
356 /* Update slot sync skip stats */
358
359 if (remote_dbid != slot->data.database ||
360 remote_slot->two_phase != slot->data.two_phase ||
361 remote_slot->failover != slot->data.failover ||
362 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
363 remote_slot->two_phase_at != slot->data.two_phase_at)
364 {
366
367 /* Avoid expensive operations while holding a spinlock. */
369
370 SpinLockAcquire(&slot->mutex);
371 slot->data.plugin = plugin_name;
372 slot->data.database = remote_dbid;
373 slot->data.two_phase = remote_slot->two_phase;
374 slot->data.two_phase_at = remote_slot->two_phase_at;
375 slot->data.failover = remote_slot->failover;
376 SpinLockRelease(&slot->mutex);
377
378 updated_config = true;
379
380 /*
381 * Ensure that there is no risk of sending prepared transactions
382 * unexpectedly after the promotion.
383 */
385 }
386
387 /*
388 * We have to write the changed xmin to disk *before* we change the
389 * in-memory value, otherwise after a crash we wouldn't know that some
390 * catalog tuples might have been removed already.
391 */
393 {
396 }
397
398 /*
399 * Now the new xmin is safely on disk, we can let the global value
400 * advance. We do not take ProcArrayLock or similar since we only advance
401 * xmin here and there's not much harm done by a concurrent computation
402 * missing that.
403 */
405 {
406 SpinLockAcquire(&slot->mutex);
407 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
408 SpinLockRelease(&slot->mutex);
409
412 }
413
415}
416
417/*
418 * Get the list of local logical slots that are synchronized from the
419 * primary server.
420 */
421static List *
423{
425
427
428 for (int i = 0; i < max_replication_slots; i++)
429 {
431
432 /* Check if it is a synchronized slot */
433 if (s->in_use && s->data.synced)
434 {
437 }
438 }
439
441
442 return local_slots;
443}
444
445/*
446 * Helper function to check if local_slot is required to be retained.
447 *
448 * Return false either if local_slot does not exist in the remote_slots list
449 * or is invalidated while the corresponding remote slot is still valid,
450 * otherwise true.
451 */
452static bool
454{
455 bool remote_exists = false;
456 bool locally_invalidated = false;
457
459 {
460 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
461 {
462 remote_exists = true;
463
464 /*
465 * If remote slot is not invalidated but local slot is marked as
466 * invalidated, then set locally_invalidated flag.
467 */
470 (remote_slot->invalidated == RS_INVAL_NONE) &&
471 (local_slot->data.invalidated != RS_INVAL_NONE);
473
474 break;
475 }
476 }
477
479}
480
481/*
482 * Drop local obsolete slots.
483 *
484 * Drop the local slots that no longer need to be synced i.e. these either do
485 * not exist on the primary or are no longer enabled for failover.
486 *
487 * Additionally, drop any slots that are valid on the primary but got
488 * invalidated on the standby. This situation may occur due to the following
489 * reasons:
490 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
491 * records from the restart_lsn of the slot.
492 * - 'primary_slot_name' is temporarily reset to null and the physical slot is
493 * removed.
494 * These dropped slots will get recreated in next sync-cycle and it is okay to
495 * drop and recreate such slots as long as these are not consumable on the
496 * standby (which is the case currently).
497 *
498 * Note: Change of 'wal_level' on the primary server to a level lower than
499 * logical may also result in slot invalidation and removal on the standby.
500 * This is because such 'wal_level' change is only possible if the logical
501 * slots are removed on the primary server, so it's expected to see the
502 * slots being invalidated and removed on the standby too (and re-created
503 * if they are re-created on the primary server).
504 */
505static void
507{
509
511 {
512 /* Drop the local slot if it is not required to be retained. */
514 {
515 bool synced_slot;
516
517 /*
518 * Use shared lock to prevent a conflict with
519 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
520 * during a drop-database operation.
521 */
523 0, AccessShareLock);
524
525 /*
526 * In the small window between getting the slot to drop and
527 * locking the database, there is a possibility of a parallel
528 * database drop by the startup process and the creation of a new
529 * slot by the user. This new user-created slot may end up using
530 * the same shared memory as that of 'local_slot'. Thus check if
531 * local_slot is still the synced one before performing actual
532 * drop.
533 */
535 synced_slot = local_slot->in_use && local_slot->data.synced;
537
538 if (synced_slot)
539 {
540 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
542 }
543
545 0, AccessShareLock);
546
547 ereport(LOG,
548 errmsg("dropped replication slot \"%s\" of database with OID %u",
549 NameStr(local_slot->data.name),
550 local_slot->data.database));
551 }
552 }
553}
554
555/*
556 * Reserve WAL for the currently active local slot using the specified WAL
557 * location (restart_lsn).
558 *
559 * If the given WAL location has been removed or is at risk of removal,
560 * reserve WAL using the oldest segment that is non-removable.
561 */
562static void
564{
567 XLogSegNo segno;
569
570 Assert(slot != NULL);
572
573 /*
574 * Acquire an exclusive lock to prevent the checkpoint process from
575 * concurrently calculating the minimum slot LSN (see
576 * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
577 * first, the checkpoint must wait for the restart_lsn update before
578 * calculating the minimum LSN.
579 *
580 * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
581 * newly synced slot from being invalidated if a concurrent checkpoint has
582 * invoked CheckPointReplicationSlots() before the WAL reservation here.
583 * This can happen because the initial restart_lsn received from the
584 * remote server can precede the redo pointer. Therefore, when selecting
585 * the initial restart_lsn, we consider using the redo pointer or the
586 * minimum slot LSN (if those values are greater than the remote
587 * restart_lsn) instead of relying solely on the remote value.
588 */
590
591 /*
592 * Determine the minimum non-removable LSN by comparing the redo pointer
593 * with the minimum slot LSN.
594 *
595 * The minimum slot LSN is considered because the redo pointer advances at
596 * every checkpoint, even when replication slots are present on the
597 * standby. In such scenarios, the redo pointer can exceed the remote
598 * restart_lsn, while WALs preceding the remote restart_lsn remain
599 * protected by a local replication slot.
600 */
603
606
607 /*
608 * If the minimum safe LSN is greater than the given restart_lsn, use it
609 * as the initial restart_lsn for the newly synced slot. Otherwise, use
610 * the given remote restart_lsn.
611 */
612 SpinLockAcquire(&slot->mutex);
613 slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
614 SpinLockRelease(&slot->mutex);
615
617
619 if (XLogGetLastRemovedSegno() >= segno)
620 elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
621 NameStr(slot->data.name));
622
624}
625
626/*
627 * If the remote restart_lsn and catalog_xmin have caught up with the
628 * local ones, then update the LSNs and persist the local synced slot for
629 * future synchronization; otherwise, do nothing.
630 *
631 * *slot_persistence_pending is set to true if any of the slots fail to
632 * persist.
633 *
634 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
635 * false.
636 */
637static bool
640{
642
643 /* Slotsync skip stats are handled in function update_local_synced_slot() */
645
646 /*
647 * Check if the slot cannot be synchronized. Refer to the comment atop the
648 * file for details on this check.
649 */
651 {
652 /*
653 * We reach this point when the remote slot didn't catch up to locally
654 * reserved position, or it cannot reach the consistent point from the
655 * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
656 * not been received and flushed.
657 *
658 * We do not drop the slot because the restart_lsn and confirmed_lsn
659 * can be ahead of the current location when recreating the slot in
660 * the next cycle. It may take more time to create such a slot or
661 * reach the consistent point. Therefore, we keep this slot and
662 * attempt the synchronization in the next cycle.
663 *
664 * We also update the slot_persistence_pending parameter, so the SQL
665 * function can retry.
666 */
669
670 return false;
671 }
672
674
675 ereport(LOG,
676 errmsg("newly created replication slot \"%s\" is sync-ready now",
677 remote_slot->name));
678
679 return true;
680}
681
682/*
683 * Synchronize a single slot to the given position.
684 *
685 * This creates a new slot if there is no existing one and updates the
686 * metadata of the slot as per the data received from the primary server.
687 *
688 * The slot is created as a temporary slot and stays in the same state until the
689 * remote_slot catches up with locally reserved position and local slot is
690 * updated. The slot is then persisted and is considered as sync-ready for
691 * periodic syncs.
692 *
693 * *slot_persistence_pending is set to true if any of the slots fail to
694 * persist.
695 *
696 * Returns TRUE if the local slot is updated.
697 */
698static bool
701{
702 ReplicationSlot *slot;
703 bool slot_updated = false;
704
705 /* Search for the named slot */
706 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
707 {
708 bool synced;
709
710 SpinLockAcquire(&slot->mutex);
711 synced = slot->data.synced;
712 SpinLockRelease(&slot->mutex);
713
714 /* User-created slot with the same name exists, raise ERROR. */
715 if (!synced)
718 errmsg("exiting from slot synchronization because same"
719 " name slot \"%s\" already exists on the standby",
720 remote_slot->name));
721
722 /*
723 * The slot has been synchronized before.
724 *
725 * It is important to acquire the slot here before checking
726 * invalidation. If we don't acquire the slot first, there could be a
727 * race condition that the local slot could be invalidated just after
728 * checking the 'invalidated' flag here and we could end up
729 * overwriting 'invalidated' flag to remote_slot's value. See
730 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
731 * if the slot is not acquired by other processes.
732 *
733 * XXX: If it ever turns out that slot acquire/release is costly for
734 * cases when none of the slot properties is changed then we can do a
735 * pre-check to ensure that at least one of the slot properties is
736 * changed before acquiring the slot.
737 */
738 ReplicationSlotAcquire(remote_slot->name, true, false);
739
740 Assert(slot == MyReplicationSlot);
741
742 /*
743 * Copy the invalidation cause from remote only if local slot is not
744 * invalidated locally, we don't want to overwrite existing one.
745 */
746 if (slot->data.invalidated == RS_INVAL_NONE &&
747 remote_slot->invalidated != RS_INVAL_NONE)
748 {
749 SpinLockAcquire(&slot->mutex);
750 slot->data.invalidated = remote_slot->invalidated;
751 SpinLockRelease(&slot->mutex);
752
753 /* Make sure the invalidated state persists across server restart */
756
757 slot_updated = true;
758 }
759
760 /* Skip the sync of an invalidated slot */
761 if (slot->data.invalidated != RS_INVAL_NONE)
762 {
764
766 return slot_updated;
767 }
768
769 /* Slot not ready yet, let's attempt to make it sync-ready now. */
770 if (slot->data.persistency == RS_TEMPORARY)
771 {
775 }
776
777 /* Slot ready for sync, so sync it. */
778 else
779 {
780 /*
781 * Sanity check: As long as the invalidations are handled
782 * appropriately as above, this should never happen.
783 *
784 * We don't need to check restart_lsn here. See the comments in
785 * update_local_synced_slot() for details.
786 */
787 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
789 errmsg_internal("cannot synchronize local slot \"%s\"",
790 remote_slot->name),
791 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
793 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
794
796 }
797 }
798 /* Otherwise create the slot first. */
799 else
800 {
803
804 /* Skip creating the local slot if remote_slot is invalidated already */
805 if (remote_slot->invalidated != RS_INVAL_NONE)
806 return false;
807
808 /*
809 * We create temporary slots instead of ephemeral slots here because
810 * we want the slots to survive after releasing them. This is done to
811 * avoid dropping and re-creating the slots in each synchronization
812 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
813 * caught up.
814 */
816 remote_slot->two_phase,
817 remote_slot->failover,
818 true);
819
820 /* For shorter lines. */
821 slot = MyReplicationSlot;
822
823 /* Avoid expensive operations while holding a spinlock. */
825
826 SpinLockAcquire(&slot->mutex);
827 slot->data.database = remote_dbid;
828 slot->data.plugin = plugin_name;
829 SpinLockRelease(&slot->mutex);
830
832
836 SpinLockAcquire(&slot->mutex);
839 SpinLockRelease(&slot->mutex);
843
846
847 slot_updated = true;
848 }
849
851
852 return slot_updated;
853}
854
855/*
856 * Fetch remote slots.
857 *
858 * If slot_names is NIL, fetches all failover logical slots from the
859 * primary server, otherwise fetches only the ones with names in slot_names.
860 *
861 * Returns a list of remote slot information structures, or NIL if none
862 * are found.
863 */
864static List *
866{
867#define SLOTSYNC_COLUMN_COUNT 10
870
871 WalRcvExecResult *res;
872 TupleTableSlot *tupslot;
874 StringInfoData query;
875
876 initStringInfo(&query);
878 "SELECT slot_name, plugin, confirmed_flush_lsn,"
879 " restart_lsn, catalog_xmin, two_phase,"
880 " two_phase_at, failover,"
881 " database, invalidation_reason"
882 " FROM pg_catalog.pg_replication_slots"
883 " WHERE failover and NOT temporary");
884
885 if (slot_names != NIL)
886 {
887 bool first_slot = true;
888
889 /*
890 * Construct the query to fetch only the specified slots
891 */
892 appendStringInfoString(&query, " AND slot_name IN (");
893
894 foreach_ptr(char, slot_name, slot_names)
895 {
896 if (!first_slot)
897 appendStringInfoString(&query, ", ");
898
899 appendStringInfo(&query, "%s", quote_literal_cstr(slot_name));
900 first_slot = false;
901 }
902 appendStringInfoChar(&query, ')');
903 }
904
905 /* Execute the query */
907 pfree(query.data);
908 if (res->status != WALRCV_OK_TUPLES)
910 errmsg("could not fetch failover logical slots info from the primary server: %s",
911 res->err));
912
914 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
915 {
916 bool isnull;
918 Datum d;
919 int col = 0;
920
922 &isnull));
923 Assert(!isnull);
924
925 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
926 &isnull));
927 Assert(!isnull);
928
929 /*
930 * It is possible to get null values for LSN and Xmin if slot is
931 * invalidated on the primary server, so handle accordingly.
932 */
933 d = slot_getattr(tupslot, ++col, &isnull);
934 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
935 DatumGetLSN(d);
936
937 d = slot_getattr(tupslot, ++col, &isnull);
938 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
939
940 d = slot_getattr(tupslot, ++col, &isnull);
941 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
943
944 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
945 &isnull));
946 Assert(!isnull);
947
948 d = slot_getattr(tupslot, ++col, &isnull);
949 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
950
951 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
952 &isnull));
953 Assert(!isnull);
954
955 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
956 ++col, &isnull));
957 Assert(!isnull);
958
959 d = slot_getattr(tupslot, ++col, &isnull);
960 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
962
963 /* Sanity check */
965
966 /*
967 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
968 * slot is valid, that means we have fetched the remote_slot in its
969 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
970 * sync it in the next sync cycle when the remote_slot is persisted
971 * and has valid lsn(s) and xmin values.
972 *
973 * XXX: In future, if we plan to expose 'slot->data.persistency' in
974 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
975 * slots in the first place.
976 */
977 if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
978 !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
979 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
980 remote_slot->invalidated == RS_INVAL_NONE)
982 else
983 /* Create list of remote slots */
985
986 ExecClearTuple(tupslot);
987 }
988
990
991 return remote_slot_list;
992}
993
994/*
995 * Synchronize slots.
996 *
997 * This function takes a list of remote slots and synchronizes them locally. It
998 * creates the slots if not present on the standby and updates existing ones.
999 *
1000 * If slot_persistence_pending is not NULL, it will be set to true if one or
1001 * more slots could not be persisted. This allows callers such as
1002 * SyncReplicationSlots() to retry those slots.
1003 *
1004 * Returns TRUE if any of the slots gets updated in this sync-cycle.
1005 */
1006static bool
1009{
1010 bool some_slot_updated = false;
1011
1012 /* Drop local slots that no longer need to be synced. */
1014
1015 /* Now sync the slots locally */
1017 {
1018 Oid remote_dbid = get_database_oid(remote_slot->database, false);
1019
1020 /*
1021 * Use shared lock to prevent a conflict with
1022 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1023 * a drop-database operation.
1024 */
1026
1029
1031 }
1032
1033 return some_slot_updated;
1034}
1035
1036/*
1037 * Checks the remote server info.
1038 *
1039 * We ensure that the 'primary_slot_name' exists on the remote server and the
1040 * remote server is not a standby node.
1041 */
1042static void
1044{
1045#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1046 WalRcvExecResult *res;
1048 StringInfoData cmd;
1049 bool isnull;
1050 TupleTableSlot *tupslot;
1051 bool remote_in_recovery;
1052 bool primary_slot_valid;
1053 bool started_tx = false;
1054
1055 initStringInfo(&cmd);
1056 appendStringInfo(&cmd,
1057 "SELECT pg_is_in_recovery(), count(*) = 1"
1058 " FROM pg_catalog.pg_replication_slots"
1059 " WHERE slot_type='physical' AND slot_name=%s",
1061
1062 /* The syscache access in walrcv_exec() needs a transaction env. */
1063 if (!IsTransactionState())
1064 {
1066 started_tx = true;
1067 }
1068
1070 pfree(cmd.data);
1071
1072 if (res->status != WALRCV_OK_TUPLES)
1073 ereport(ERROR,
1074 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1075 PrimarySlotName, res->err),
1076 errhint("Check if \"primary_slot_name\" is configured correctly."));
1077
1079 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1080 elog(ERROR,
1081 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1082
1083 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1084 Assert(!isnull);
1085
1086 /*
1087 * Slot sync is currently not supported on a cascading standby. This is
1088 * because if we allow it, the primary server needs to wait for all the
1089 * cascading standbys, otherwise, logical subscribers can still be ahead
1090 * of one of the cascading standbys which we plan to promote. Thus, to
1091 * avoid this additional complexity, we restrict it for the time being.
1092 */
1094 ereport(ERROR,
1096 errmsg("cannot synchronize replication slots from a standby server"));
1097
1098 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1099 Assert(!isnull);
1100
1101 if (!primary_slot_valid)
1102 ereport(ERROR,
1104 /* translator: second %s is a GUC variable name */
1105 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1106 PrimarySlotName, "primary_slot_name"));
1107
1108 ExecClearTuple(tupslot);
1110
1111 if (started_tx)
1113}
1114
1115/*
1116 * Checks if dbname is specified in 'primary_conninfo'.
1117 *
1118 * Error out if not specified otherwise return it.
1119 */
1120char *
1122{
1123 char *dbname;
1124
1125 /*
1126 * The slot synchronization needs a database connection for walrcv_exec to
1127 * work.
1128 */
1130 if (dbname == NULL)
1131 ereport(ERROR,
1133
1134 /*
1135 * translator: first %s is a connection option; second %s is a GUC
1136 * variable name
1137 */
1138 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1139 "dbname", "primary_conninfo"));
1140 return dbname;
1141}
1142
1143/*
1144 * Return true if all necessary GUCs for slot synchronization are set
1145 * appropriately, otherwise, return false.
1146 */
1147bool
1149{
1150 /*
1151 * Logical slot sync/creation requires logical decoding to be enabled.
1152 */
1154 {
1155 ereport(elevel,
1157 errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1158 errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1159
1160 return false;
1161 }
1162
1163 /*
1164 * A physical replication slot(primary_slot_name) is required on the
1165 * primary to ensure that the rows needed by the standby are not removed
1166 * after restarting, so that the synchronized slot on the standby will not
1167 * be invalidated.
1168 */
1169 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1170 {
1171 ereport(elevel,
1173 /* translator: %s is a GUC variable name */
1174 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1175 return false;
1176 }
1177
1178 /*
1179 * hot_standby_feedback must be enabled to cooperate with the physical
1180 * replication slot, which allows informing the primary about the xmin and
1181 * catalog_xmin values on the standby.
1182 */
1184 {
1185 ereport(elevel,
1187 /* translator: %s is a GUC variable name */
1188 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1189 "hot_standby_feedback"));
1190 return false;
1191 }
1192
1193 /*
1194 * The primary_conninfo is required to make connection to primary for
1195 * getting slots information.
1196 */
1197 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1198 {
1199 ereport(elevel,
1201 /* translator: %s is a GUC variable name */
1202 errmsg("replication slot synchronization requires \"%s\" to be set",
1203 "primary_conninfo"));
1204 return false;
1205 }
1206
1207 return true;
1208}
1209
1210/*
1211 * Re-read the config file for slot synchronization.
1212 *
1213 * Exit or throw error if relevant GUCs have changed depending on whether
1214 * called from slot sync worker or from the SQL function pg_sync_replication_slots()
1215 */
1216static void
1218{
1223 bool conninfo_changed;
1226 bool parameter_changed = false;
1227
1230
1231 ConfigReloadPending = false;
1233
1238
1240 {
1242 {
1243 ereport(LOG,
1244 /* translator: %s is a GUC variable name */
1245 errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1246 "sync_replication_slots"));
1247
1248 proc_exit(0);
1249 }
1250
1251 parameter_changed = true;
1252 }
1253 else
1254 {
1255 if (conninfo_changed ||
1258 {
1259
1261 {
1262 ereport(LOG,
1263 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1264
1265 /*
1266 * Reset the last-start time for this worker so that the
1267 * postmaster can restart it without waiting for
1268 * SLOTSYNC_RESTART_INTERVAL_SEC.
1269 */
1271
1272 proc_exit(0);
1273 }
1274
1275 parameter_changed = true;
1276 }
1277 }
1278
1279 /*
1280 * If we have reached here with a parameter change, we must be running in
1281 * SQL function, emit error in such a case.
1282 */
1284 {
1286 ereport(ERROR,
1288 errmsg("replication slot synchronization will stop because of a parameter change"));
1289 }
1290
1291}
1292
1293/*
1294 * Interrupt handler for process performing slot synchronization.
1295 */
1296static void
1298{
1300
1302 {
1304 {
1305 ereport(LOG,
1306 errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1307
1308 proc_exit(0);
1309 }
1310 else
1311 {
1312 /*
1313 * For the backend executing SQL function
1314 * pg_sync_replication_slots().
1315 */
1316 ereport(ERROR,
1318 errmsg("replication slot synchronization will stop because promotion is triggered"));
1319 }
1320 }
1321
1324}
1325
1326/*
1327 * Connection cleanup function for slotsync worker.
1328 *
1329 * Called on slotsync worker exit.
1330 */
1331static void
1338
1339/*
1340 * Cleanup function for slotsync worker.
1341 *
1342 * Called on slotsync worker exit.
1343 */
1344static void
1346{
1347 /*
1348 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1349 *
1350 * The startup process during promotion invokes ShutDownSlotSync() which
1351 * waits for slot sync to finish and it does that by checking the
1352 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1353 * release and cleanup to avoid any dangling temporary slots or active
1354 * slots before it marks itself as finished syncing.
1355 */
1356
1357 /* Make sure active replication slots are released */
1358 if (MyReplicationSlot != NULL)
1360
1361 /* Also cleanup the temporary slots. */
1363
1365
1367
1368 /*
1369 * If syncing_slots is true, it indicates that the process errored out
1370 * without resetting the flag. So, we need to clean up shared memory and
1371 * reset the flag here.
1372 */
1373 if (syncing_slots)
1374 {
1375 SlotSyncCtx->syncing = false;
1376 syncing_slots = false;
1377 }
1378
1380}
1381
1382/*
1383 * Sleep for long enough that we believe it's likely that the slots on primary
1384 * get updated.
1385 *
1386 * If there is no slot activity the wait time between sync-cycles will double
1387 * (to a maximum of 30s). If there is some slot activity the wait time between
1388 * sync-cycles is reset to the minimum (200ms).
1389 */
1390static void
1392{
1393 int rc;
1394
1395 if (!some_slot_updated)
1396 {
1397 /*
1398 * No slots were updated, so double the sleep time, but not beyond the
1399 * maximum allowable value.
1400 */
1402 }
1403 else
1404 {
1405 /*
1406 * Some slots were updated since the last sleep, so reset the sleep
1407 * time.
1408 */
1410 }
1411
1412 rc = WaitLatch(MyLatch,
1414 sleep_ms,
1416
1417 if (rc & WL_LATCH_SET)
1419}
1420
1421/*
1422 * Emit an error if a concurrent sync call is in progress.
1423 * Otherwise, advertise that a sync is in progress.
1424 */
1425static void
1427{
1429
1430 if (SlotSyncCtx->syncing)
1431 {
1433 ereport(ERROR,
1435 errmsg("cannot synchronize replication slots concurrently"));
1436 }
1437
1438 /* The pid must not be already assigned in SlotSyncCtx */
1440
1441 SlotSyncCtx->syncing = true;
1442
1443 /*
1444 * Advertise the required PID so that the startup process can kill the
1445 * slot sync process on promotion.
1446 */
1448
1450
1451 syncing_slots = true;
1452}
1453
1454/*
1455 * Reset syncing flag.
1456 */
1457static void
1467
1468/*
1469 * The main loop of our worker process.
1470 *
1471 * It connects to the primary server, fetches logical failover slots
1472 * information periodically in order to create and sync the slots.
1473 *
1474 * Note: If any changes are made here, check if the corresponding SQL
1475 * function logic in SyncReplicationSlots() also needs to be changed.
1476 */
1477void
1479{
1481 char *dbname;
1482 char *err;
1485
1487
1489
1491
1492 /*
1493 * Create a per-backend PGPROC struct in shared memory. We must do this
1494 * before we access any shared memory.
1495 */
1496 InitProcess();
1497
1498 /*
1499 * Early initialization.
1500 */
1501 BaseInit();
1502
1504
1505 /*
1506 * If an exception is encountered, processing resumes here.
1507 *
1508 * We just need to clean up, report the error, and go away.
1509 *
1510 * If we do not have this handling here, then since this worker process
1511 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1512 * Therefore, we create our own exception handler to catch ERRORs.
1513 */
1514 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1515 {
1516 /* since not using PG_TRY, must reset error stack by hand */
1518
1519 /* Prevents interrupts while cleaning up */
1521
1522 /* Report the error to the server log */
1524
1525 /*
1526 * We can now go away. Note that because we called InitProcess, a
1527 * callback was registered to do ProcKill, which will clean up
1528 * necessary state.
1529 */
1530 proc_exit(0);
1531 }
1532
1533 /* We can now handle ereport(ERROR) */
1535
1536 /* Setup signal handling */
1545
1547
1548 ereport(LOG, errmsg("slot sync worker started"));
1549
1550 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1552
1553 /*
1554 * Establishes SIGALRM handler and initialize timeout module. It is needed
1555 * by InitPostgres to register different timeouts.
1556 */
1558
1559 /* Load the libpq-specific functions */
1560 load_file("libpqwalreceiver", false);
1561
1562 /*
1563 * Unblock signals (they were blocked when the postmaster forked us)
1564 */
1566
1567 /*
1568 * Set always-secure search path, so malicious users can't redirect user
1569 * code (e.g. operators).
1570 *
1571 * It's not strictly necessary since we won't be scanning or writing to
1572 * any user table locally, but it's good to retain it here for added
1573 * precaution.
1574 */
1575 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1576
1578
1579 /*
1580 * Connect to the database specified by the user in primary_conninfo. We
1581 * need a database connection for walrcv_exec to work which we use to
1582 * fetch slot information from the remote node. See comments atop
1583 * libpqrcv_exec.
1584 *
1585 * We do not specify a specific user here since the slot sync worker will
1586 * operate as a superuser. This is safe because the slot sync worker does
1587 * not interact with user tables, eliminating the risk of executing
1588 * arbitrary code within triggers.
1589 */
1591
1593
1595 if (cluster_name[0])
1596 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1597 else
1598 appendStringInfoString(&app_name, "slotsync worker");
1599
1600 /*
1601 * Establish the connection to the primary server for slot
1602 * synchronization.
1603 */
1604 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1605 app_name.data, &err);
1606
1607 if (!wrconn)
1608 ereport(ERROR,
1610 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1611 app_name.data, err));
1612
1613 pfree(app_name.data);
1614
1615 /*
1616 * Register the disconnection callback.
1617 *
1618 * XXX: This can be combined with previous cleanup registration of
1619 * slotsync_worker_onexit() but that will need the connection to be made
1620 * global and we want to avoid introducing global for this purpose.
1621 */
1623
1624 /*
1625 * Using the specified primary server connection, check that we are not a
1626 * cascading standby and slot configured in 'primary_slot_name' exists on
1627 * the primary server.
1628 */
1630
1631 /* Main loop to synchronize slots */
1632 for (;;)
1633 {
1634 bool some_slot_updated = false;
1635 bool started_tx = false;
1637
1639
1640 /*
1641 * The syscache access in fetch_remote_slots() needs a transaction
1642 * env.
1643 */
1644 if (!IsTransactionState())
1645 {
1647 started_tx = true;
1648 }
1649
1653
1654 if (started_tx)
1656
1658 }
1659
1660 /*
1661 * The slot sync worker can't get here because it will only stop when it
1662 * receives a stop request from the startup process, or when there is an
1663 * error.
1664 */
1665 Assert(false);
1666}
1667
1668/*
1669 * Update the inactive_since property for synced slots.
1670 *
1671 * Note that this function is currently called when we shutdown the slot
1672 * sync machinery.
1673 */
1674static void
1676{
1677 TimestampTz now = 0;
1678
1679 /*
1680 * We need to update inactive_since only when we are promoting standby to
1681 * correctly interpret the inactive_since if the standby gets promoted
1682 * without a restart. We don't want the slots to appear inactive for a
1683 * long time after promotion if they haven't been synchronized recently.
1684 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1685 */
1686 if (!StandbyMode)
1687 return;
1688
1689 /* The slot sync worker or the SQL function mustn't be running by now */
1691
1693
1694 for (int i = 0; i < max_replication_slots; i++)
1695 {
1697
1698 /* Check if it is a synchronized slot */
1699 if (s->in_use && s->data.synced)
1700 {
1702
1703 /* The slot must not be acquired by any process */
1705
1706 /* Use the same inactive_since time for all the slots. */
1707 if (now == 0)
1709
1711 }
1712 }
1713
1715}
1716
1717/*
1718 * Shut down slot synchronization.
1719 *
1720 * This function sets stopSignaled=true and wakes up the slot sync process
1721 * (either worker or backend running the SQL function pg_sync_replication_slots())
1722 * so that worker can exit or the SQL function pg_sync_replication_slots() can
1723 * finish. It also waits till the slot sync worker has exited or
1724 * pg_sync_replication_slots() has finished.
1725 */
1726void
1728{
1730
1732
1733 SlotSyncCtx->stopSignaled = true;
1734
1735 /*
1736 * Return if neither the slot sync worker is running nor the function
1737 * pg_sync_replication_slots() is executing.
1738 */
1739 if (!SlotSyncCtx->syncing)
1740 {
1743 return;
1744 }
1745
1747
1749
1750 /*
1751 * Signal process doing slotsync, if any. The process will stop upon
1752 * detecting that the stopSignaled flag is set to true.
1753 */
1756
1757 /* Wait for slot sync to end */
1758 for (;;)
1759 {
1760 int rc;
1761
1762 /* Wait a bit, we don't expect to have to wait long */
1763 rc = WaitLatch(MyLatch,
1766
1767 if (rc & WL_LATCH_SET)
1768 {
1771 }
1772
1774
1775 /* Ensure that no process is syncing the slots. */
1776 if (!SlotSyncCtx->syncing)
1777 break;
1778
1780 }
1781
1783
1785}
1786
1787/*
1788 * SlotSyncWorkerCanRestart
1789 *
1790 * Return true, indicating worker is allowed to restart, if enough time has
1791 * passed since it was last launched to reach SLOTSYNC_RESTART_INTERVAL_SEC.
1792 * Otherwise return false.
1793 *
1794 * This is a safety valve to protect against continuous respawn attempts if the
1795 * worker is dying immediately at launch. Note that since we will retry to
1796 * launch the worker from the postmaster main loop, we will get another
1797 * chance later.
1798 */
1799bool
1801{
1802 time_t curtime = time(NULL);
1803
1804 /*
1805 * If first time through, or time somehow went backwards, always update
1806 * last_start_time to match the current clock and allow worker start.
1807 * Otherwise allow it only once enough time has elapsed.
1808 */
1809 if (SlotSyncCtx->last_start_time == 0 ||
1810 curtime < SlotSyncCtx->last_start_time ||
1812 {
1814 return true;
1815 }
1816 return false;
1817}
1818
1819/*
1820 * Is current process syncing replication slots?
1821 *
1822 * Could be either backend executing SQL function or slot sync worker.
1823 */
1824bool
1826{
1827 return syncing_slots;
1828}
1829
1830/*
1831 * Amount of shared memory required for slot synchronization.
1832 */
1833Size
1835{
1836 return sizeof(SlotSyncCtxStruct);
1837}
1838
1839/*
1840 * Allocate and initialize the shared memory of slot synchronization.
1841 */
1842void
1844{
1845 Size size = SlotSyncShmemSize();
1846 bool found;
1847
1849 ShmemInitStruct("Slot Sync Data", size, &found);
1850
1851 if (!found)
1852 {
1853 memset(SlotSyncCtx, 0, size);
1856 }
1857}
1858
1859/*
1860 * Error cleanup callback for slot sync SQL function.
1861 */
1862static void
1864{
1866
1867 /*
1868 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1869 *
1870 * The startup process during promotion invokes ShutDownSlotSync() which
1871 * waits for slot sync to finish and it does that by checking the
1872 * 'syncing' flag. Thus the SQL function must be done with slots' release
1873 * and cleanup to avoid any dangling temporary slots or active slots
1874 * before it marks itself as finished syncing.
1875 */
1876
1877 /* Make sure active replication slots are released */
1878 if (MyReplicationSlot != NULL)
1880
1881 /* Also cleanup the synced temporary slots. */
1883
1884 /*
1885 * The set syncing_slots indicates that the process errored out without
1886 * resetting the flag. So, we need to clean up shared memory and reset the
1887 * flag here.
1888 */
1889 if (syncing_slots)
1891
1893}
1894
1895/*
1896 * Helper function to extract slot names from a list of remote slots
1897 */
1898static List *
1900{
1901 List *slot_names = NIL;
1902
1904 {
1905 char *slot_name;
1906
1907 slot_name = pstrdup(remote_slot->name);
1908 slot_names = lappend(slot_names, slot_name);
1909 }
1910
1911 return slot_names;
1912}
1913
1914/*
1915 * Synchronize the failover enabled replication slots using the specified
1916 * primary server connection.
1917 *
1918 * Repeatedly fetches and updates replication slot information from the
1919 * primary until all slots are at least "sync ready".
1920 *
1921 * Exits early if promotion is triggered or certain critical
1922 * configuration parameters have changed.
1923 */
1924void
1926{
1928 {
1930 List *slot_names = NIL; /* List of slot names to track */
1931
1933
1934 /* Check for interrupts and config changes */
1936
1938
1939 /* Retry until all the slots are sync-ready */
1940 for (;;)
1941 {
1942 bool slot_persistence_pending = false;
1943 bool some_slot_updated = false;
1944
1945 /* Check for interrupts and config changes */
1947
1948 /* We must be in a valid transaction state */
1950
1951 /*
1952 * Fetch remote slot info for the given slot_names. If slot_names
1953 * is NIL, fetch all failover-enabled slots. Note that we reuse
1954 * slot_names from the first iteration; re-fetching all failover
1955 * slots each time could cause an endless loop. Instead of
1956 * reprocessing only the pending slots in each iteration, it's
1957 * better to process all the slots received in the first
1958 * iteration. This ensures that by the time we're done, all slots
1959 * reflect the latest values.
1960 */
1961 remote_slots = fetch_remote_slots(wrconn, slot_names);
1962
1963 /* Attempt to synchronize slots */
1966
1967 /*
1968 * If slot_persistence_pending is true, extract slot names for
1969 * future iterations (only needed if we haven't done it yet)
1970 */
1971 if (slot_names == NIL && slot_persistence_pending)
1972 slot_names = extract_slot_names(remote_slots);
1973
1974 /* Free the current remote_slots list */
1976
1977 /* Done if all slots are persisted i.e are sync-ready */
1979 break;
1980
1981 /* wait before retrying again */
1983 }
1984
1985 if (slot_names)
1986 list_free_deep(slot_names);
1987
1988 /* Cleanup the synced temporary slots */
1990
1991 /* We are done with sync, so reset sync flag */
1993 }
1995}
sigset_t UnBlockSig
Definition pqsignal.c:22
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1636
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1600
#define TextDatumGetCString(d)
Definition builtins.h:99
#define NameStr(name)
Definition c.h:837
#define Min(x, y)
Definition c.h:1093
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
int64 TimestampTz
Definition timestamp.h:39
Oid get_database_oid(const char *dbname, bool missing_ok)
void load_file(const char *filename, bool restricted)
Definition dfmgr.c:149
Datum arg
Definition elog.c:1322
void EmitErrorReport(void)
Definition elog.c:1882
ErrorContextCallback * error_context_stack
Definition elog.c:99
int errcode(int sqlerrcode)
Definition elog.c:874
sigjmp_buf * PG_exception_stack
Definition elog.c:101
#define LOG
Definition elog.h:31
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
void err(int eval, const char *fmt,...)
Definition err.c:43
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsMinimalTuple
Definition execTuples.c:86
#define palloc0_object(type)
Definition fe_memutils.h:75
int MyProcPid
Definition globals.c:47
struct Latch * MyLatch
Definition globals.c:63
void ProcessConfigFile(GucContext context)
Definition guc-file.l:120
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4228
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_SUSET
Definition guc.h:78
@ PGC_SIGHUP
Definition guc.h:75
char * cluster_name
Definition guc_tables.c:564
volatile sig_atomic_t ConfigReloadPending
Definition interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
void proc_exit(int code)
Definition ipc.c:105
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
int i
Definition isn.c:77
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_free_deep(List *list)
Definition list.c:1560
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1088
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1148
#define AccessShareLock
Definition lockdefs.h:36
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition logical.c:2094
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:205
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
@ NormalProcessing
Definition miscadmin.h:472
@ InitProcessing
Definition miscadmin.h:471
#define GetProcessingMode()
Definition miscadmin.h:481
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define AmLogicalSlotSyncWorkerProcess()
Definition miscadmin.h:386
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
#define SetProcessingMode(mode)
Definition miscadmin.h:483
#define InvalidPid
Definition miscadmin.h:32
void namestrcpy(Name name, const char *str)
Definition name.c:233
static char * errmsg
#define NIL
Definition pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition pg_list.h:469
static XLogRecPtr DatumGetLSN(Datum X)
Definition pg_lsn.h:25
#define die(msg)
void pgstat_report_replslotsync(ReplicationSlot *slot)
#define pqsignal
Definition port.h:547
void FloatExceptionHandler(SIGNAL_ARGS)
Definition postgres.c:3059
void StatementCancelHandler(SIGNAL_ARGS)
Definition postgres.c:3042
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static TransactionId DatumGetTransactionId(Datum X)
Definition postgres.h:282
#define InvalidOid
unsigned int Oid
void BaseInit(void)
Definition postinit.c:616
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)
Definition postinit.c:719
static int fb(int x)
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition procarray.c:2906
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition procsignal.c:680
void init_ps_display(const char *fixed_part)
Definition ps_status.c:285
char * quote_literal_cstr(const char *rawstr)
Definition quote.c:101
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:381
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition slot.c:622
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition slot.c:380
void ReplicationSlotDropAcquired(void)
Definition slot.c:1035
void ReplicationSlotMarkDirty(void)
Definition slot.c:1177
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition slot.c:2916
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition slot.c:1219
void ReplicationSlotPersist(void)
Definition slot.c:1194
ReplicationSlot * MyReplicationSlot
Definition slot.c:149
void ReplicationSlotSave(void)
Definition slot.c:1159
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition slot.c:542
void ReplicationSlotRelease(void)
Definition slot.c:762
int max_replication_slots
Definition slot.c:152
ReplicationSlotCtlData * ReplicationSlotCtl
Definition slot.c:146
void ReplicationSlotsComputeRequiredLSN(void)
Definition slot.c:1301
void ReplicationSlotCleanup(bool synced_only)
Definition slot.c:861
@ RS_TEMPORARY
Definition slot.h:47
ReplicationSlotInvalidationCause
Definition slot.h:59
@ RS_INVAL_NONE
Definition slot.h:60
#define SlotIsLogical(slot)
Definition slot.h:288
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition slot.h:306
SlotSyncSkipReason
Definition slot.h:81
@ SS_SKIP_WAL_NOT_FLUSHED
Definition slot.h:83
@ SS_SKIP_NO_CONSISTENT_SNAPSHOT
Definition slot.h:87
@ SS_SKIP_NONE
Definition slot.h:82
@ SS_SKIP_INVALID
Definition slot.h:89
@ SS_SKIP_WAL_OR_ROWS_REMOVED
Definition slot.h:85
static List * get_local_synced_slots(void)
Definition slotsync.c:422
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:129
#define PRIMARY_INFO_OUTPUT_COL_COUNT
static void slotsync_worker_disconnect(int code, Datum arg)
Definition slotsync.c:1332
void SyncReplicationSlots(WalReceiverConn *wrconn)
Definition slotsync.c:1925
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition slotsync.c:453
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition slotsync.c:506
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition slotsync.c:563
static void update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
Definition slotsync.c:172
void ShutDownSlotSync(void)
Definition slotsync.c:1727
bool sync_replication_slots
Definition slotsync.c:122
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:699
static SlotSyncCtxStruct * SlotSyncCtx
Definition slotsync.c:119
static void slotsync_failure_callback(int code, Datum arg)
Definition slotsync.c:1863
#define SLOTSYNC_COLUMN_COUNT
static List * extract_slot_names(List *remote_slots)
Definition slotsync.c:1899
static long sleep_ms
Definition slotsync.c:132
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition slotsync.c:135
char * CheckAndGetDbnameFromConninfo(void)
Definition slotsync.c:1121
static bool syncing_slots
Definition slotsync.c:142
static void ProcessSlotSyncInterrupts(void)
Definition slotsync.c:1297
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:130
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:638
bool SlotSyncWorkerCanRestart(void)
Definition slotsync.c:1800
static void wait_for_slot_activity(bool some_slot_updated)
Definition slotsync.c:1391
static void slotsync_reread_config(void)
Definition slotsync.c:1217
static void reset_syncing_flag(void)
Definition slotsync.c:1458
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition slotsync.c:204
void SlotSyncShmemInit(void)
Definition slotsync.c:1843
static void slotsync_worker_onexit(int code, Datum arg)
Definition slotsync.c:1345
static void update_synced_slots_inactive_since(void)
Definition slotsync.c:1675
bool ValidateSlotSyncParams(int elevel)
Definition slotsync.c:1148
static void validate_remote_info(WalReceiverConn *wrconn)
Definition slotsync.c:1043
static void check_and_set_sync_info(pid_t sync_process_pid)
Definition slotsync.c:1426
bool IsSyncingReplicationSlots(void)
Definition slotsync.c:1825
void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Definition slotsync.c:1478
static List * fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
Definition slotsync.c:865
Size SlotSyncShmemSize(void)
Definition slotsync.c:1834
static bool synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list, bool *slot_persistence_pending)
Definition slotsync.c:1007
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition snapbuild.c:2061
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
void InitProcess(void)
Definition proc.c:380
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition pg_list.h:54
bool two_phase
Definition slotsync.c:153
char * plugin
Definition slotsync.c:151
char * name
Definition slotsync.c:150
char * database
Definition slotsync.c:152
bool failover
Definition slotsync.c:154
ReplicationSlotInvalidationCause invalidated
Definition slotsync.c:161
XLogRecPtr confirmed_lsn
Definition slotsync.c:156
XLogRecPtr restart_lsn
Definition slotsync.c:155
XLogRecPtr two_phase_at
Definition slotsync.c:157
TransactionId catalog_xmin
Definition slotsync.c:158
ReplicationSlot replication_slots[1]
Definition slot.h:299
TransactionId catalog_xmin
Definition slot.h:122
ReplicationSlotPersistency persistency
Definition slot.h:106
ReplicationSlotInvalidationCause invalidated
Definition slot.h:128
TransactionId effective_catalog_xmin
Definition slot.h:210
slock_t mutex
Definition slot.h:183
SlotSyncSkipReason slotsync_skip_reason
Definition slot.h:284
bool in_use
Definition slot.h:186
ProcNumber active_proc
Definition slot.h:192
ReplicationSlotPersistentData data
Definition slot.h:213
time_t last_start_time
Definition slotsync.c:115
Tuplestorestate * tuplestore
TupleDesc tupledesc
WalRcvExecStatus status
Definition c.h:832
void InitializeTimeouts(void)
Definition timeout.c:470
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:417
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static WalReceiverConn * wrconn
Definition walreceiver.c:95
bool hot_standby_feedback
Definition walreceiver.c:92
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
@ WALRCV_OK_TUPLES
static void walrcv_clear_result(WalRcvExecResult *walres)
#define walrcv_get_dbname_from_conninfo(conninfo)
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
#define walrcv_disconnect(conn)
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition walsender.c:3660
#define SIGCHLD
Definition win32_port.h:168
#define SIGHUP
Definition win32_port.h:158
#define SIGPIPE
Definition win32_port.h:163
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
#define SIGUSR2
Definition win32_port.h:171
bool IsTransactionState(void)
Definition xact.c:389
void StartTransactionCommand(void)
Definition xact.c:3081
void CommitTransactionCommand(void)
Definition xact.c:3179
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3779
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6547
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2666
int wal_segment_size
Definition xlog.c:147
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52
char * PrimarySlotName
bool StandbyMode
char * PrimaryConnInfo