PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slotsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * slotsync.c
3 * Functionality for synchronizing slots to a standby server from the
4 * primary server.
5 *
6 * Copyright (c) 2024-2026, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/slotsync.c
10 *
11 * This file contains the code for slot synchronization on a physical standby
12 * to fetch logical failover slots information from the primary server, create
13 * the slots on the standby and synchronize them periodically.
14 *
15 * Slot synchronization can be performed either automatically by enabling slot
16 * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 *
18 * If the WAL corresponding to the remote's restart_lsn is not available on the
19 * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 * which it is guaranteed that rows wouldn't have been removed then we cannot
21 * create the local standby slot because that would mean moving the local slot
22 * backward and decoding won't be possible via such a slot. In this case, the
23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 * which slot sync worker can perform the sync periodically or user can call
26 * pg_sync_replication_slots() periodically to perform the syncs.
27 *
28 * If synchronized slots fail to build a consistent snapshot from the
29 * restart_lsn before reaching confirmed_flush_lsn, they would become
30 * unreliable after promotion due to potential data loss from changes
31 * before reaching a consistent point. This can happen because the slots can
32 * be synced at some random time and we may not reach the consistent point
33 * at the same WAL location as the primary. So, we mark such slots as
34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 * consistent point, they will be marked as RS_PERSISTENT.
36 *
37 * If the WAL prior to the remote slot's confirmed_flush_lsn has not been
38 * flushed on the standby, the slot is marked as RS_TEMPORARY. Once the standby
39 * catches up and flushes that WAL, the slot will be marked as RS_PERSISTENT.
40 *
41 * The slot sync worker waits for some time before the next synchronization,
42 * with the duration varying based on whether any slots were updated during
43 * the last cycle. Refer to the comments above wait_for_slot_activity() for
44 * more details.
45 *
46 * If the SQL function pg_sync_replication_slots() is used to sync the slots,
47 * and if the slots are not ready to be synced and are marked as RS_TEMPORARY
48 * because of any of the reasons mentioned above, then the SQL function also
49 * waits and retries until the slots are marked as RS_PERSISTENT (which means
50 * sync-ready). Refer to the comments in SyncReplicationSlots() for more
51 * details.
52 *
53 * Any standby synchronized slots will be dropped if they no longer need
54 * to be synchronized. See comment atop drop_local_obsolete_slots() for more
55 * details.
56 *---------------------------------------------------------------------------
57 */
58
59#include "postgres.h"
60
61#include <time.h>
62
64#include "access/xlogrecovery.h"
65#include "catalog/pg_database.h"
66#include "libpq/pqsignal.h"
67#include "pgstat.h"
69#include "replication/logical.h"
72#include "storage/ipc.h"
73#include "storage/lmgr.h"
74#include "storage/proc.h"
75#include "storage/procarray.h"
76#include "storage/subsystems.h"
77#include "tcop/tcopprot.h"
78#include "utils/builtins.h"
79#include "utils/memutils.h"
80#include "utils/pg_lsn.h"
81#include "utils/ps_status.h"
82#include "utils/timeout.h"
83#include "utils/wait_event.h"
84
85/*
86 * Struct for sharing information to control slot synchronization.
87 *
88 * The 'pid' is either the slot sync worker's pid or the backend's pid running
89 * the SQL function pg_sync_replication_slots(). On promotion, the startup
90 * process sets 'stopSignaled' and uses this 'pid' to signal the synchronizing
91 * process with PROCSIG_SLOTSYNC_MESSAGE and also to wake it up so that the
92 * process can immediately stop its synchronizing work.
93 * Setting 'stopSignaled' on the other hand is used to handle the race
94 * condition when the postmaster has not noticed the promotion yet and thus may
95 * end up restarting the slot sync worker. If 'stopSignaled' is set, the worker
96 * will exit in such a case. The SQL function pg_sync_replication_slots() will
97 * also error out if this flag is set. Note that we don't need to reset this
98 * variable as after promotion the slot sync worker won't be restarted because
99 * the pmState changes to PM_RUN from PM_HOT_STANDBY and we don't support
100 * demoting primary without restarting the server.
101 * See LaunchMissingBackgroundProcesses.
102 *
103 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
104 * overwrites.
105 *
106 * The 'last_start_time' is needed by postmaster to start the slot sync worker
107 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
108 * is expected (e.g., slot sync GUCs change), slot sync worker will reset
109 * last_start_time before exiting, so that postmaster can start the worker
110 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
111 */
120
122
123static void SlotSyncShmemRequest(void *arg);
124static void SlotSyncShmemInit(void *arg);
125
130
131/* GUC variable */
133
134/*
135 * The sleep time (ms) between slot-sync cycles varies dynamically
136 * (within a MIN/MAX range) according to slot activity. See
137 * wait_for_slot_activity() for details.
138 */
139#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
140#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
141
143
144/* The restart interval for slot sync work used by postmaster */
145#define SLOTSYNC_RESTART_INTERVAL_SEC 10
146
147/*
148 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
149 * in SlotSyncCtxStruct, this flag is true only if the current process is
150 * performing slot synchronization.
151 */
152static bool syncing_slots = false;
153
154/*
155 * Interrupt flag set when PROCSIG_SLOTSYNC_MESSAGE is received, asking the
156 * slotsync worker or pg_sync_replication_slots() to stop because
157 * standby promotion has been triggered.
158 */
160
161/*
162 * Structure to hold information fetched from the primary server about a logical
163 * replication slot.
164 */
180
181static void slotsync_failure_callback(int code, Datum arg);
182static void update_synced_slots_inactive_since(void);
183
184/*
185 * Update slot sync skip stats. This function requires the caller to acquire
186 * the slot.
187 */
188static void
190{
191 ReplicationSlot *slot;
192
194
195 slot = MyReplicationSlot;
196
197 /*
198 * Update the slot sync related stats in pg_stat_replication_slots when a
199 * slot sync is skipped
200 */
203
204 /* Update the slot sync skip reason */
206 {
207 SpinLockAcquire(&slot->mutex);
209 SpinLockRelease(&slot->mutex);
210 }
211}
212
213/*
214 * If necessary, update the local synced slot's metadata based on the data
215 * from the remote slot.
216 *
217 * If no update was needed (the data of the remote slot is the same as the
218 * local slot) return false, otherwise true.
219 */
220static bool
222{
224 bool updated_xmin_or_lsn = false;
225 bool updated_config = false;
228
230
231 /*
232 * Make sure that concerned WAL is received and flushed before syncing
233 * slot to target lsn received from the primary server.
234 */
235 if (remote_slot->confirmed_lsn > latestFlushPtr)
236 {
238
239 /*
240 * Can get here only if GUC 'synchronized_standby_slots' on the
241 * primary server was not configured correctly.
242 */
243 ereport(LOG,
245 errmsg("skipping slot synchronization because the received slot sync"
246 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
247 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
248 remote_slot->name,
250
251 return false;
252 }
253
254 /*
255 * Don't overwrite if we already have a newer catalog_xmin and
256 * restart_lsn.
257 */
258 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
260 slot->data.catalog_xmin))
261 {
262 /* Update slot sync skip stats */
264
265 /*
266 * This can happen in following situations:
267 *
268 * If the slot is temporary, it means either the initial WAL location
269 * reserved for the local slot is ahead of the remote slot's
270 * restart_lsn or the initial xmin_horizon computed for the local slot
271 * is ahead of the remote slot.
272 *
273 * If the slot is persistent, both restart_lsn and catalog_xmin of the
274 * synced slot could still be ahead of the remote slot. Since we use
275 * slot advance functionality to keep snapbuild/slot updated, it is
276 * possible that the restart_lsn and catalog_xmin are advanced to a
277 * later position than it has on the primary. This can happen when
278 * slot advancing machinery finds running xacts record after reaching
279 * the consistent state at a later point than the primary where it
280 * serializes the snapshot and updates the restart_lsn.
281 *
282 * We LOG the message if the slot is temporary as it can help the user
283 * to understand why the slot is not sync-ready. In the case of a
284 * persistent slot, it would be a more common case and won't directly
285 * impact the users, so we used DEBUG1 level to log the message.
286 */
288 errmsg("could not synchronize replication slot \"%s\"",
289 remote_slot->name),
290 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
291 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
292 remote_slot->catalog_xmin,
294 slot->data.catalog_xmin));
295
296 /*
297 * Skip updating the configuration. This is required to avoid syncing
298 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
299 * transaction between old confirmed_lsn and two_phase_at will
300 * unexpectedly get decoded and sent to the downstream after
301 * promotion. See comments in ReorderBufferFinishPrepared.
302 */
303 return false;
304 }
305
306 /*
307 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
308 * slot.
309 */
310 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
311 remote_slot->restart_lsn > slot->data.restart_lsn ||
312 TransactionIdFollows(remote_slot->catalog_xmin,
313 slot->data.catalog_xmin))
314 {
315 /*
316 * We can't directly copy the remote slot's LSN or xmin unless there
317 * exists a consistent snapshot at that point. Otherwise, after
318 * promotion, the slots may not reach a consistent point before the
319 * confirmed_flush_lsn which can lead to a data loss. To avoid data
320 * loss, we let slot machinery advance the slot which ensures that
321 * snapbuilder/slot statuses are updated properly.
322 */
323 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
324 {
325 /*
326 * Update the slot info directly if there is a serialized snapshot
327 * at the restart_lsn, as the slot can quickly reach consistency
328 * at restart_lsn by restoring the snapshot.
329 */
330 SpinLockAcquire(&slot->mutex);
331 slot->data.restart_lsn = remote_slot->restart_lsn;
332 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
333 slot->data.catalog_xmin = remote_slot->catalog_xmin;
334 SpinLockRelease(&slot->mutex);
335 }
336 else
337 {
339
342
343 /* Sanity check */
344 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
346 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
347 remote_slot->name),
348 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
349 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
351
352 /*
353 * If we can't reach a consistent snapshot, the slot won't be
354 * persisted. See update_and_persist_local_synced_slot().
355 */
357 {
359
360 ereport(LOG,
361 errmsg("could not synchronize replication slot \"%s\"",
362 remote_slot->name),
363 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
365
367 }
368 }
369
370 updated_xmin_or_lsn = true;
371 }
372
373 /* Update slot sync skip stats */
375
376 if (remote_dbid != slot->data.database ||
377 remote_slot->two_phase != slot->data.two_phase ||
378 remote_slot->failover != slot->data.failover ||
379 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
380 remote_slot->two_phase_at != slot->data.two_phase_at)
381 {
383
384 /* Avoid expensive operations while holding a spinlock. */
386
387 SpinLockAcquire(&slot->mutex);
388 slot->data.plugin = plugin_name;
389 slot->data.database = remote_dbid;
390 slot->data.two_phase = remote_slot->two_phase;
391 slot->data.two_phase_at = remote_slot->two_phase_at;
392 slot->data.failover = remote_slot->failover;
393 SpinLockRelease(&slot->mutex);
394
395 updated_config = true;
396
397 /*
398 * Ensure that there is no risk of sending prepared transactions
399 * unexpectedly after the promotion.
400 */
402 }
403
404 /*
405 * We have to write the changed xmin to disk *before* we change the
406 * in-memory value, otherwise after a crash we wouldn't know that some
407 * catalog tuples might have been removed already.
408 */
410 {
413 }
414
415 /*
416 * Now the new xmin is safely on disk, we can let the global value
417 * advance. We do not take ProcArrayLock or similar since we only advance
418 * xmin here and there's not much harm done by a concurrent computation
419 * missing that.
420 */
422 {
423 SpinLockAcquire(&slot->mutex);
424 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
425 SpinLockRelease(&slot->mutex);
426
429 }
430
432}
433
434/*
435 * Get the list of local logical slots that are synchronized from the
436 * primary server.
437 */
438static List *
440{
442
444
446 {
448
449 /* Check if it is a synchronized slot */
450 if (s->in_use && s->data.synced)
451 {
454 }
455 }
456
458
459 return local_slots;
460}
461
462/*
463 * Helper function to check if local_slot is required to be retained.
464 *
465 * Return false either if local_slot does not exist in the remote_slots list
466 * or is invalidated while the corresponding remote slot is still valid,
467 * otherwise true.
468 */
469static bool
471{
472 bool remote_exists = false;
473 bool locally_invalidated = false;
474
476 {
477 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
478 {
479 remote_exists = true;
480
481 /*
482 * If remote slot is not invalidated but local slot is marked as
483 * invalidated, then set locally_invalidated flag.
484 */
487 (remote_slot->invalidated == RS_INVAL_NONE) &&
488 (local_slot->data.invalidated != RS_INVAL_NONE);
490
491 break;
492 }
493 }
494
496}
497
498/*
499 * Drop local obsolete slots.
500 *
501 * Drop the local slots that no longer need to be synced i.e. these either do
502 * not exist on the primary or are no longer enabled for failover.
503 *
504 * Additionally, drop any slots that are valid on the primary but got
505 * invalidated on the standby. This situation may occur due to the following
506 * reasons:
507 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
508 * records from the restart_lsn of the slot.
509 * - 'primary_slot_name' is temporarily reset to null and the physical slot is
510 * removed.
511 * These dropped slots will get recreated in next sync-cycle and it is okay to
512 * drop and recreate such slots as long as these are not consumable on the
513 * standby (which is the case currently).
514 *
515 * Note: Change of 'wal_level' on the primary server to a level lower than
516 * logical may also result in slot invalidation and removal on the standby.
517 * This is because such 'wal_level' change is only possible if the logical
518 * slots are removed on the primary server, so it's expected to see the
519 * slots being invalidated and removed on the standby too (and re-created
520 * if they are re-created on the primary server).
521 */
522static void
524{
526
528 {
529 /* Drop the local slot if it is not required to be retained. */
531 {
532 bool synced_slot;
533
534 /*
535 * Use shared lock to prevent a conflict with
536 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
537 * during a drop-database operation.
538 */
540 0, AccessShareLock);
541
542 /*
543 * In the small window between getting the slot to drop and
544 * locking the database, there is a possibility of a parallel
545 * database drop by the startup process and the creation of a new
546 * slot by the user. This new user-created slot may end up using
547 * the same shared memory as that of 'local_slot'. Thus check if
548 * local_slot is still the synced one before performing actual
549 * drop.
550 */
552 synced_slot = local_slot->in_use && local_slot->data.synced;
554
555 if (synced_slot)
556 {
557 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
559 }
560
562 0, AccessShareLock);
563
564 ereport(LOG,
565 errmsg("dropped replication slot \"%s\" of database with OID %u",
566 NameStr(local_slot->data.name),
567 local_slot->data.database));
568 }
569 }
570}
571
572/*
573 * Reserve WAL for the currently active local slot using the specified WAL
574 * location (restart_lsn).
575 *
576 * If the given WAL location has been removed or is at risk of removal,
577 * reserve WAL using the oldest segment that is non-removable.
578 */
579static void
581{
584 XLogSegNo segno;
586
587 Assert(slot != NULL);
589
590 /*
591 * Acquire an exclusive lock to prevent the checkpoint process from
592 * concurrently calculating the minimum slot LSN (see
593 * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
594 * first, the checkpoint must wait for the restart_lsn update before
595 * calculating the minimum LSN.
596 *
597 * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
598 * newly synced slot from being invalidated if a concurrent checkpoint has
599 * invoked CheckPointReplicationSlots() before the WAL reservation here.
600 * This can happen because the initial restart_lsn received from the
601 * remote server can precede the redo pointer. Therefore, when selecting
602 * the initial restart_lsn, we consider using the redo pointer or the
603 * minimum slot LSN (if those values are greater than the remote
604 * restart_lsn) instead of relying solely on the remote value.
605 */
607
608 /*
609 * Determine the minimum non-removable LSN by comparing the redo pointer
610 * with the minimum slot LSN.
611 *
612 * The minimum slot LSN is considered because the redo pointer advances at
613 * every checkpoint, even when replication slots are present on the
614 * standby. In such scenarios, the redo pointer can exceed the remote
615 * restart_lsn, while WALs preceding the remote restart_lsn remain
616 * protected by a local replication slot.
617 */
620
623
624 /*
625 * If the minimum safe LSN is greater than the given restart_lsn, use it
626 * as the initial restart_lsn for the newly synced slot. Otherwise, use
627 * the given remote restart_lsn.
628 */
629 SpinLockAcquire(&slot->mutex);
630 slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
631 SpinLockRelease(&slot->mutex);
632
634
636 if (XLogGetLastRemovedSegno() >= segno)
637 elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
638 NameStr(slot->data.name));
639
641}
642
643/*
644 * If the remote restart_lsn and catalog_xmin have caught up with the
645 * local ones, then update the LSNs and persist the local synced slot for
646 * future synchronization; otherwise, do nothing.
647 *
648 * *slot_persistence_pending is set to true if any of the slots fail to
649 * persist.
650 *
651 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
652 * false.
653 */
654static bool
657{
659
660 /* Slotsync skip stats are handled in function update_local_synced_slot() */
662
663 /*
664 * Check if the slot cannot be synchronized. Refer to the comment atop the
665 * file for details on this check.
666 */
668 {
669 /*
670 * We reach this point when the remote slot didn't catch up to locally
671 * reserved position, or it cannot reach the consistent point from the
672 * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
673 * not been received and flushed.
674 *
675 * We do not drop the slot because the restart_lsn and confirmed_lsn
676 * can be ahead of the current location when recreating the slot in
677 * the next cycle. It may take more time to create such a slot or
678 * reach the consistent point. Therefore, we keep this slot and
679 * attempt the synchronization in the next cycle.
680 *
681 * We also update the slot_persistence_pending parameter, so the SQL
682 * function can retry.
683 */
686
687 return false;
688 }
689
691
692 ereport(LOG,
693 errmsg("newly created replication slot \"%s\" is sync-ready now",
694 remote_slot->name));
695
696 return true;
697}
698
699/*
700 * Synchronize a single slot to the given position.
701 *
702 * This creates a new slot if there is no existing one and updates the
703 * metadata of the slot as per the data received from the primary server.
704 *
705 * The slot is created as a temporary slot and stays in the same state until the
706 * remote_slot catches up with locally reserved position and local slot is
707 * updated. The slot is then persisted and is considered as sync-ready for
708 * periodic syncs.
709 *
710 * *slot_persistence_pending is set to true if any of the slots fail to
711 * persist.
712 *
713 * Returns TRUE if the local slot is updated.
714 */
715static bool
718{
719 ReplicationSlot *slot;
720 bool slot_updated = false;
721
722 /* Search for the named slot */
723 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
724 {
725 bool synced;
726
727 SpinLockAcquire(&slot->mutex);
728 synced = slot->data.synced;
729 SpinLockRelease(&slot->mutex);
730
731 /* User-created slot with the same name exists, raise ERROR. */
732 if (!synced)
735 errmsg("exiting from slot synchronization because same"
736 " name slot \"%s\" already exists on the standby",
737 remote_slot->name));
738
739 /*
740 * The slot has been synchronized before.
741 *
742 * It is important to acquire the slot here before checking
743 * invalidation. If we don't acquire the slot first, there could be a
744 * race condition that the local slot could be invalidated just after
745 * checking the 'invalidated' flag here and we could end up
746 * overwriting 'invalidated' flag to remote_slot's value. See
747 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
748 * if the slot is not acquired by other processes.
749 *
750 * XXX: If it ever turns out that slot acquire/release is costly for
751 * cases when none of the slot properties is changed then we can do a
752 * pre-check to ensure that at least one of the slot properties is
753 * changed before acquiring the slot.
754 */
755 ReplicationSlotAcquire(remote_slot->name, true, false);
756
757 Assert(slot == MyReplicationSlot);
758
759 /*
760 * Copy the invalidation cause from remote only if local slot is not
761 * invalidated locally, we don't want to overwrite existing one.
762 */
763 if (slot->data.invalidated == RS_INVAL_NONE &&
764 remote_slot->invalidated != RS_INVAL_NONE)
765 {
766 SpinLockAcquire(&slot->mutex);
767 slot->data.invalidated = remote_slot->invalidated;
768 SpinLockRelease(&slot->mutex);
769
770 /* Make sure the invalidated state persists across server restart */
773
774 slot_updated = true;
775 }
776
777 /* Skip the sync of an invalidated slot */
778 if (slot->data.invalidated != RS_INVAL_NONE)
779 {
781
783 return slot_updated;
784 }
785
786 /* Slot not ready yet, let's attempt to make it sync-ready now. */
787 if (slot->data.persistency == RS_TEMPORARY)
788 {
792 }
793
794 /* Slot ready for sync, so sync it. */
795 else
796 {
797 /*
798 * Sanity check: As long as the invalidations are handled
799 * appropriately as above, this should never happen.
800 *
801 * We don't need to check restart_lsn here. See the comments in
802 * update_local_synced_slot() for details.
803 */
804 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
806 errmsg_internal("cannot synchronize local slot \"%s\"",
807 remote_slot->name),
808 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
810 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
811
813 }
814 }
815 /* Otherwise create the slot first. */
816 else
817 {
820
821 /* Skip creating the local slot if remote_slot is invalidated already */
822 if (remote_slot->invalidated != RS_INVAL_NONE)
823 return false;
824
825 /*
826 * We create temporary slots instead of ephemeral slots here because
827 * we want the slots to survive after releasing them. This is done to
828 * avoid dropping and re-creating the slots in each synchronization
829 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
830 * caught up.
831 */
833 remote_slot->two_phase,
834 false,
835 remote_slot->failover,
836 true);
837
838 /* For shorter lines. */
839 slot = MyReplicationSlot;
840
841 /* Avoid expensive operations while holding a spinlock. */
843
844 SpinLockAcquire(&slot->mutex);
845 slot->data.database = remote_dbid;
846 slot->data.plugin = plugin_name;
847 SpinLockRelease(&slot->mutex);
848
850
854 SpinLockAcquire(&slot->mutex);
857 SpinLockRelease(&slot->mutex);
861
864
865 slot_updated = true;
866 }
867
869
870 return slot_updated;
871}
872
873/*
874 * Fetch remote slots.
875 *
876 * If slot_names is NIL, fetches all failover logical slots from the
877 * primary server, otherwise fetches only the ones with names in slot_names.
878 *
879 * Returns a list of remote slot information structures, or NIL if none
880 * are found.
881 */
882static List *
884{
885#define SLOTSYNC_COLUMN_COUNT 10
888
889 WalRcvExecResult *res;
890 TupleTableSlot *tupslot;
892 StringInfoData query;
893
894 initStringInfo(&query);
896 "SELECT slot_name, plugin, confirmed_flush_lsn,"
897 " restart_lsn, catalog_xmin, two_phase,"
898 " two_phase_at, failover,"
899 " database, invalidation_reason"
900 " FROM pg_catalog.pg_replication_slots"
901 " WHERE failover and NOT temporary");
902
903 if (slot_names != NIL)
904 {
905 bool first_slot = true;
906
907 /*
908 * Construct the query to fetch only the specified slots
909 */
910 appendStringInfoString(&query, " AND slot_name IN (");
911
912 foreach_ptr(char, slot_name, slot_names)
913 {
914 if (!first_slot)
915 appendStringInfoString(&query, ", ");
916
917 appendStringInfo(&query, "%s", quote_literal_cstr(slot_name));
918 first_slot = false;
919 }
920 appendStringInfoChar(&query, ')');
921 }
922
923 /* Execute the query */
925 pfree(query.data);
926 if (res->status != WALRCV_OK_TUPLES)
928 errmsg("could not fetch failover logical slots info from the primary server: %s",
929 res->err));
930
932 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
933 {
934 bool isnull;
936 Datum d;
937 int col = 0;
938
940 &isnull));
941 Assert(!isnull);
942
943 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
944 &isnull));
945 Assert(!isnull);
946
947 /*
948 * It is possible to get null values for LSN and Xmin if slot is
949 * invalidated on the primary server, so handle accordingly.
950 */
951 d = slot_getattr(tupslot, ++col, &isnull);
952 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
953 DatumGetLSN(d);
954
955 d = slot_getattr(tupslot, ++col, &isnull);
956 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
957
958 d = slot_getattr(tupslot, ++col, &isnull);
959 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
961
962 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
963 &isnull));
964 Assert(!isnull);
965
966 d = slot_getattr(tupslot, ++col, &isnull);
967 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
968
969 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
970 &isnull));
971 Assert(!isnull);
972
973 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
974 ++col, &isnull));
975 Assert(!isnull);
976
977 d = slot_getattr(tupslot, ++col, &isnull);
978 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
980
981 /* Sanity check */
983
984 /*
985 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
986 * slot is valid, that means we have fetched the remote_slot in its
987 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
988 * sync it in the next sync cycle when the remote_slot is persisted
989 * and has valid lsn(s) and xmin values.
990 *
991 * XXX: In future, if we plan to expose 'slot->data.persistency' in
992 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
993 * slots in the first place.
994 */
995 if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
996 !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
997 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
998 remote_slot->invalidated == RS_INVAL_NONE)
1000 else
1001 /* Create list of remote slots */
1003
1004 ExecClearTuple(tupslot);
1005 }
1006
1008
1009 return remote_slot_list;
1010}
1011
1012/*
1013 * Synchronize slots.
1014 *
1015 * This function takes a list of remote slots and synchronizes them locally. It
1016 * creates the slots if not present on the standby and updates existing ones.
1017 *
1018 * If slot_persistence_pending is not NULL, it will be set to true if one or
1019 * more slots could not be persisted. This allows callers such as
1020 * SyncReplicationSlots() to retry those slots.
1021 *
1022 * Returns TRUE if any of the slots gets updated in this sync-cycle.
1023 */
1024static bool
1027{
1028 bool some_slot_updated = false;
1029
1030 /* Drop local slots that no longer need to be synced. */
1032
1033 /* Now sync the slots locally */
1035 {
1036 Oid remote_dbid = get_database_oid(remote_slot->database, false);
1037
1038 /*
1039 * Use shared lock to prevent a conflict with
1040 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1041 * a drop-database operation.
1042 */
1044
1047
1049 }
1050
1051 return some_slot_updated;
1052}
1053
1054/*
1055 * Checks the remote server info.
1056 *
1057 * We ensure that the 'primary_slot_name' exists on the remote server and the
1058 * remote server is not a standby node.
1059 */
1060static void
1062{
1063#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1064 WalRcvExecResult *res;
1066 StringInfoData cmd;
1067 bool isnull;
1068 TupleTableSlot *tupslot;
1069 bool remote_in_recovery;
1070 bool primary_slot_valid;
1071 bool started_tx = false;
1072
1073 initStringInfo(&cmd);
1074 appendStringInfo(&cmd,
1075 "SELECT pg_is_in_recovery(), count(*) = 1"
1076 " FROM pg_catalog.pg_replication_slots"
1077 " WHERE slot_type='physical' AND slot_name=%s",
1079
1080 /* The syscache access in walrcv_exec() needs a transaction env. */
1081 if (!IsTransactionState())
1082 {
1084 started_tx = true;
1085 }
1086
1088 pfree(cmd.data);
1089
1090 if (res->status != WALRCV_OK_TUPLES)
1091 ereport(ERROR,
1092 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1093 PrimarySlotName, res->err),
1094 errhint("Check if \"primary_slot_name\" is configured correctly."));
1095
1097 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1098 elog(ERROR,
1099 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1100
1101 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1102 Assert(!isnull);
1103
1104 /*
1105 * Slot sync is currently not supported on a cascading standby. This is
1106 * because if we allow it, the primary server needs to wait for all the
1107 * cascading standbys, otherwise, logical subscribers can still be ahead
1108 * of one of the cascading standbys which we plan to promote. Thus, to
1109 * avoid this additional complexity, we restrict it for the time being.
1110 */
1112 ereport(ERROR,
1114 errmsg("cannot synchronize replication slots from a standby server"));
1115
1116 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1117 Assert(!isnull);
1118
1119 if (!primary_slot_valid)
1120 ereport(ERROR,
1122 /* translator: second %s is a GUC variable name */
1123 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1124 PrimarySlotName, "primary_slot_name"));
1125
1126 ExecClearTuple(tupslot);
1128
1129 if (started_tx)
1131}
1132
1133/*
1134 * Checks if dbname is specified in 'primary_conninfo'.
1135 *
1136 * Error out if not specified otherwise return it.
1137 */
1138char *
1140{
1141 char *dbname;
1142
1143 /*
1144 * The slot synchronization needs a database connection for walrcv_exec to
1145 * work.
1146 */
1148 if (dbname == NULL)
1149 ereport(ERROR,
1151
1152 /*
1153 * translator: first %s is a connection option; second %s is a GUC
1154 * variable name
1155 */
1156 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1157 "dbname", "primary_conninfo"));
1158 return dbname;
1159}
1160
1161/*
1162 * Return true if all necessary GUCs for slot synchronization are set
1163 * appropriately, otherwise, return false.
1164 */
1165bool
1167{
1168 /*
1169 * Logical slot sync/creation requires logical decoding to be enabled.
1170 */
1172 {
1173 ereport(elevel,
1175 errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1176 errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1177
1178 return false;
1179 }
1180
1181 /*
1182 * A physical replication slot(primary_slot_name) is required on the
1183 * primary to ensure that the rows needed by the standby are not removed
1184 * after restarting, so that the synchronized slot on the standby will not
1185 * be invalidated.
1186 */
1187 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1188 {
1189 ereport(elevel,
1191 /* translator: %s is a GUC variable name */
1192 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1193 return false;
1194 }
1195
1196 /*
1197 * hot_standby_feedback must be enabled to cooperate with the physical
1198 * replication slot, which allows informing the primary about the xmin and
1199 * catalog_xmin values on the standby.
1200 */
1202 {
1203 ereport(elevel,
1205 /* translator: %s is a GUC variable name */
1206 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1207 "hot_standby_feedback"));
1208 return false;
1209 }
1210
1211 /*
1212 * The primary_conninfo is required to make connection to primary for
1213 * getting slots information.
1214 */
1215 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1216 {
1217 ereport(elevel,
1219 /* translator: %s is a GUC variable name */
1220 errmsg("replication slot synchronization requires \"%s\" to be set",
1221 "primary_conninfo"));
1222 return false;
1223 }
1224
1225 return true;
1226}
1227
1228/*
1229 * Re-read the config file for slot synchronization.
1230 *
1231 * Exit or throw error if relevant GUCs have changed depending on whether
1232 * called from slot sync worker or from the SQL function pg_sync_replication_slots()
1233 */
1234static void
1236{
1241 bool conninfo_changed;
1244 bool parameter_changed = false;
1245
1248
1249 ConfigReloadPending = false;
1251
1256
1258 {
1260 {
1261 ereport(LOG,
1262 /* translator: %s is a GUC variable name */
1263 errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1264 "sync_replication_slots"));
1265
1266 proc_exit(0);
1267 }
1268
1269 parameter_changed = true;
1270 }
1271 else
1272 {
1273 if (conninfo_changed ||
1276 {
1277
1279 {
1280 ereport(LOG,
1281 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1282
1283 /*
1284 * Reset the last-start time for this worker so that the
1285 * postmaster can restart it without waiting for
1286 * SLOTSYNC_RESTART_INTERVAL_SEC.
1287 */
1289
1290 proc_exit(0);
1291 }
1292
1293 parameter_changed = true;
1294 }
1295 }
1296
1297 /*
1298 * If we have reached here with a parameter change, we must be running in
1299 * SQL function, emit error in such a case.
1300 */
1302 {
1304 ereport(ERROR,
1306 errmsg("replication slot synchronization will stop because of a parameter change"));
1307 }
1308
1309}
1310
1311/*
1312 * Handle receipt of an interrupt indicating a slotsync shutdown message.
1313 *
1314 * This is called within the SIGUSR1 handler. All we do here is set a flag
1315 * that will cause the next CHECK_FOR_INTERRUPTS() to invoke
1316 * ProcessSlotSyncMessage().
1317 */
1318void
1320{
1321 InterruptPending = true;
1323 /* latch will be set by procsignal_sigusr1_handler */
1324}
1325
1326/*
1327 * Handle a PROCSIG_SLOTSYNC_MESSAGE signal, called from ProcessInterrupts().
1328 *
1329 * If the current process is the slotsync background worker, log a message
1330 * and exit cleanly. If it is a backend executing pg_sync_replication_slots(),
1331 * raise an error, unless the sync has already finished, in which case there
1332 * is no need to interrupt the caller.
1333 */
1334void
1336{
1338
1340 {
1341 ereport(LOG,
1342 errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1343 proc_exit(0);
1344 }
1345 else
1346 {
1347 /*
1348 * If sync has already completed, there is no need to interrupt the
1349 * caller with an error.
1350 */
1352 return;
1353
1354 ereport(ERROR,
1356 errmsg("replication slot synchronization will stop because promotion is triggered"));
1357 }
1358}
1359
1360/*
1361 * Connection cleanup function for slotsync worker.
1362 *
1363 * Called on slotsync worker exit.
1364 */
1365static void
1372
1373/*
1374 * Cleanup function for slotsync worker.
1375 *
1376 * Called on slotsync worker exit.
1377 */
1378static void
1380{
1381 /*
1382 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1383 *
1384 * The startup process during promotion invokes ShutDownSlotSync() which
1385 * waits for slot sync to finish and it does that by checking the
1386 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1387 * release and cleanup to avoid any dangling temporary slots or active
1388 * slots before it marks itself as finished syncing.
1389 */
1390
1391 /* Make sure active replication slots are released */
1392 if (MyReplicationSlot != NULL)
1394
1395 /* Also cleanup the temporary slots. */
1397
1399
1401
1402 /*
1403 * If syncing_slots is true, it indicates that the process errored out
1404 * without resetting the flag. So, we need to clean up shared memory and
1405 * reset the flag here.
1406 */
1407 if (syncing_slots)
1408 {
1409 SlotSyncCtx->syncing = false;
1410 syncing_slots = false;
1411 }
1412
1414}
1415
1416/*
1417 * Sleep for long enough that we believe it's likely that the slots on primary
1418 * get updated.
1419 *
1420 * If there is no slot activity the wait time between sync-cycles will double
1421 * (to a maximum of 30s). If there is some slot activity the wait time between
1422 * sync-cycles is reset to the minimum (200ms).
1423 */
1424static void
1426{
1427 int rc;
1428
1429 if (!some_slot_updated)
1430 {
1431 /*
1432 * No slots were updated, so double the sleep time, but not beyond the
1433 * maximum allowable value.
1434 */
1436 }
1437 else
1438 {
1439 /*
1440 * Some slots were updated since the last sleep, so reset the sleep
1441 * time.
1442 */
1444 }
1445
1446 rc = WaitLatch(MyLatch,
1448 sleep_ms,
1450
1451 if (rc & WL_LATCH_SET)
1453}
1454
1455/*
1456 * Emit an error if a concurrent sync call is in progress.
1457 * Otherwise, advertise that a sync is in progress.
1458 */
1459static void
1461{
1463
1464 /*
1465 * Exit immediately if promotion has been triggered. This guards against
1466 * a new worker (or a call to pg_sync_replication_slots()) that starts
1467 * after the old worker was stopped by ShutDownSlotSync().
1468 */
1470 {
1472
1474 {
1476 errmsg("replication slot synchronization worker will not start because promotion was triggered"));
1477
1478 proc_exit(0);
1479 }
1480 else
1481 {
1482 /*
1483 * For the backend executing SQL function
1484 * pg_sync_replication_slots().
1485 */
1486 ereport(ERROR,
1488 errmsg("replication slot synchronization will not start because promotion was triggered"));
1489 }
1490 }
1491
1492 if (SlotSyncCtx->syncing)
1493 {
1495 ereport(ERROR,
1497 errmsg("cannot synchronize replication slots concurrently"));
1498 }
1499
1500 /* The pid must not be already assigned in SlotSyncCtx */
1502
1503 SlotSyncCtx->syncing = true;
1504
1505 /*
1506 * Advertise the required PID so that the startup process can kill the
1507 * slot sync process on promotion.
1508 */
1510
1512
1513 syncing_slots = true;
1514}
1515
1516/*
1517 * Reset syncing flag.
1518 */
1519static void
1529
1530/*
1531 * The main loop of our worker process.
1532 *
1533 * It connects to the primary server, fetches logical failover slots
1534 * information periodically in order to create and sync the slots.
1535 *
1536 * Note: If any changes are made here, check if the corresponding SQL
1537 * function logic in SyncReplicationSlots() also needs to be changed.
1538 */
1539void
1541{
1543 char *dbname;
1544 char *err;
1547
1549
1550 /* Release postmaster's working memory context */
1552 {
1555 }
1556
1558
1560
1561 /*
1562 * Create a per-backend PGPROC struct in shared memory. We must do this
1563 * before we access any shared memory.
1564 */
1565 InitProcess();
1566
1567 /*
1568 * Early initialization.
1569 */
1570 BaseInit();
1571
1573
1574 /*
1575 * If an exception is encountered, processing resumes here.
1576 *
1577 * We just need to clean up, report the error, and go away.
1578 *
1579 * If we do not have this handling here, then since this worker process
1580 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1581 * Therefore, we create our own exception handler to catch ERRORs.
1582 */
1583 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1584 {
1585 /* since not using PG_TRY, must reset error stack by hand */
1587
1588 /* Prevents interrupts while cleaning up */
1590
1591 /* Report the error to the server log */
1593
1594 /*
1595 * We can now go away. Note that because we called InitProcess, a
1596 * callback was registered to do ProcKill, which will clean up
1597 * necessary state.
1598 */
1599 proc_exit(0);
1600 }
1601
1602 /* We can now handle ereport(ERROR) */
1604
1605 /* Setup signal handling */
1614
1616
1617 ereport(LOG, errmsg("slot sync worker started"));
1618
1619 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1621
1622 /*
1623 * Establishes SIGALRM handler and initialize timeout module. It is needed
1624 * by InitPostgres to register different timeouts.
1625 */
1627
1628 /* Load the libpq-specific functions */
1629 load_file("libpqwalreceiver", false);
1630
1631 /*
1632 * Unblock signals (they were blocked when the postmaster forked us)
1633 */
1635
1636 /*
1637 * Set always-secure search path, so malicious users can't redirect user
1638 * code (e.g. operators).
1639 *
1640 * It's not strictly necessary since we won't be scanning or writing to
1641 * any user table locally, but it's good to retain it here for added
1642 * precaution.
1643 */
1644 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1645
1647
1648 /*
1649 * Connect to the database specified by the user in primary_conninfo. We
1650 * need a database connection for walrcv_exec to work which we use to
1651 * fetch slot information from the remote node. See comments atop
1652 * libpqrcv_exec.
1653 *
1654 * We do not specify a specific user here since the slot sync worker will
1655 * operate as a superuser. This is safe because the slot sync worker does
1656 * not interact with user tables, eliminating the risk of executing
1657 * arbitrary code within triggers.
1658 */
1660
1662
1664 if (cluster_name[0])
1665 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1666 else
1667 appendStringInfoString(&app_name, "slotsync worker");
1668
1669 /*
1670 * Establish the connection to the primary server for slot
1671 * synchronization.
1672 */
1673 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1674 app_name.data, &err);
1675
1676 if (!wrconn)
1677 ereport(ERROR,
1679 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1680 app_name.data, err));
1681
1682 pfree(app_name.data);
1683
1684 /*
1685 * Register the disconnection callback.
1686 *
1687 * XXX: This can be combined with previous cleanup registration of
1688 * slotsync_worker_onexit() but that will need the connection to be made
1689 * global and we want to avoid introducing global for this purpose.
1690 */
1692
1693 /*
1694 * Using the specified primary server connection, check that we are not a
1695 * cascading standby and slot configured in 'primary_slot_name' exists on
1696 * the primary server.
1697 */
1699
1700 /* Main loop to synchronize slots */
1701 for (;;)
1702 {
1703 bool some_slot_updated = false;
1704 bool started_tx = false;
1706
1708
1711
1712 /*
1713 * The syscache access in fetch_remote_slots() needs a transaction
1714 * env.
1715 */
1716 if (!IsTransactionState())
1717 {
1719 started_tx = true;
1720 }
1721
1725
1726 if (started_tx)
1728
1730 }
1731
1732 /*
1733 * The slot sync worker can't get here because it will only stop when it
1734 * receives a stop request from the startup process, or when there is an
1735 * error.
1736 */
1737 Assert(false);
1738}
1739
1740/*
1741 * Update the inactive_since property for synced slots.
1742 *
1743 * Note that this function is currently called when we shutdown the slot
1744 * sync machinery.
1745 */
1746static void
1748{
1749 TimestampTz now = 0;
1750
1751 /*
1752 * We need to update inactive_since only when we are promoting standby to
1753 * correctly interpret the inactive_since if the standby gets promoted
1754 * without a restart. We don't want the slots to appear inactive for a
1755 * long time after promotion if they haven't been synchronized recently.
1756 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1757 */
1758 if (!StandbyMode)
1759 return;
1760
1761 /* The slot sync worker or the SQL function mustn't be running by now */
1763
1765
1767 {
1769
1770 /* Check if it is a synchronized slot */
1771 if (s->in_use && s->data.synced)
1772 {
1774
1775 /* The slot must not be acquired by any process */
1777
1778 /* Use the same inactive_since time for all the slots. */
1779 if (now == 0)
1781
1783 }
1784 }
1785
1787}
1788
1789/*
1790 * Shut down slot synchronization.
1791 *
1792 * This function sets stopSignaled=true and wakes up the slot sync process
1793 * (either worker or backend running the SQL function pg_sync_replication_slots())
1794 * so that worker can exit or the SQL function pg_sync_replication_slots() can
1795 * finish. It also waits till the slot sync worker has exited or
1796 * pg_sync_replication_slots() has finished.
1797 */
1798void
1800{
1802
1804
1805 SlotSyncCtx->stopSignaled = true;
1806
1807 /*
1808 * Return if neither the slot sync worker is running nor the function
1809 * pg_sync_replication_slots() is executing.
1810 */
1811 if (!SlotSyncCtx->syncing)
1812 {
1815 return;
1816 }
1817
1819
1821
1822 /*
1823 * Signal process doing slotsync, if any, asking it to stop.
1824 */
1828
1829 /* Wait for slot sync to end */
1830 for (;;)
1831 {
1832 int rc;
1833
1834 /* Wait a bit, we don't expect to have to wait long */
1835 rc = WaitLatch(MyLatch,
1838
1839 if (rc & WL_LATCH_SET)
1840 {
1843 }
1844
1846
1847 /* Ensure that no process is syncing the slots. */
1848 if (!SlotSyncCtx->syncing)
1849 break;
1850
1852 }
1853
1855
1857}
1858
1859/*
1860 * SlotSyncWorkerCanRestart
1861 *
1862 * Return true, indicating worker is allowed to restart, if enough time has
1863 * passed since it was last launched to reach SLOTSYNC_RESTART_INTERVAL_SEC.
1864 * Otherwise return false.
1865 *
1866 * This is a safety valve to protect against continuous respawn attempts if the
1867 * worker is dying immediately at launch. Note that since we will retry to
1868 * launch the worker from the postmaster main loop, we will get another
1869 * chance later.
1870 */
1871bool
1873{
1874 time_t curtime = time(NULL);
1875
1876 /*
1877 * If first time through, or time somehow went backwards, always update
1878 * last_start_time to match the current clock and allow worker start.
1879 * Otherwise allow it only once enough time has elapsed.
1880 */
1881 if (SlotSyncCtx->last_start_time == 0 ||
1882 curtime < SlotSyncCtx->last_start_time ||
1884 {
1886 return true;
1887 }
1888 return false;
1889}
1890
1891/*
1892 * Is current process syncing replication slots?
1893 *
1894 * Could be either backend executing SQL function or slot sync worker.
1895 */
1896bool
1898{
1899 return syncing_slots;
1900}
1901
1902/*
1903 * Register shared memory space needed for slot synchronization.
1904 */
1905static void
1907{
1908 ShmemRequestStruct(.name = "Slot Sync Data",
1909 .size = sizeof(SlotSyncCtxStruct),
1910 .ptr = (void **) &SlotSyncCtx,
1911 );
1912}
1913
1914/*
1915 * Initialize shared memory for slot synchronization.
1916 */
1917static void
1924
1925/*
1926 * Error cleanup callback for slot sync SQL function.
1927 */
1928static void
1930{
1932
1933 /*
1934 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1935 *
1936 * The startup process during promotion invokes ShutDownSlotSync() which
1937 * waits for slot sync to finish and it does that by checking the
1938 * 'syncing' flag. Thus the SQL function must be done with slots' release
1939 * and cleanup to avoid any dangling temporary slots or active slots
1940 * before it marks itself as finished syncing.
1941 */
1942
1943 /* Make sure active replication slots are released */
1944 if (MyReplicationSlot != NULL)
1946
1947 /* Also cleanup the synced temporary slots. */
1949
1950 /*
1951 * The set syncing_slots indicates that the process errored out without
1952 * resetting the flag. So, we need to clean up shared memory and reset the
1953 * flag here.
1954 */
1955 if (syncing_slots)
1957
1959}
1960
1961/*
1962 * Helper function to extract slot names from a list of remote slots
1963 */
1964static List *
1966{
1967 List *slot_names = NIL;
1968
1970 {
1971 char *slot_name;
1972
1973 slot_name = pstrdup(remote_slot->name);
1974 slot_names = lappend(slot_names, slot_name);
1975 }
1976
1977 return slot_names;
1978}
1979
1980/*
1981 * Synchronize the failover enabled replication slots using the specified
1982 * primary server connection.
1983 *
1984 * Repeatedly fetches and updates replication slot information from the
1985 * primary until all slots are at least "sync ready".
1986 *
1987 * Exits early if promotion is triggered or certain critical
1988 * configuration parameters have changed.
1989 */
1990void
1992{
1994 {
1996 List *slot_names = NIL; /* List of slot names to track */
1997
1999
2001
2002 /* Retry until all the slots are sync-ready */
2003 for (;;)
2004 {
2005 bool slot_persistence_pending = false;
2006 bool some_slot_updated = false;
2007
2008 /* Check for interrupts and config changes */
2010
2013
2014 /* We must be in a valid transaction state */
2016
2017 /*
2018 * Fetch remote slot info for the given slot_names. If slot_names
2019 * is NIL, fetch all failover-enabled slots. Note that we reuse
2020 * slot_names from the first iteration; re-fetching all failover
2021 * slots each time could cause an endless loop. Instead of
2022 * reprocessing only the pending slots in each iteration, it's
2023 * better to process all the slots received in the first
2024 * iteration. This ensures that by the time we're done, all slots
2025 * reflect the latest values.
2026 */
2027 remote_slots = fetch_remote_slots(wrconn, slot_names);
2028
2029 /* Attempt to synchronize slots */
2032
2033 /*
2034 * If slot_persistence_pending is true, extract slot names for
2035 * future iterations (only needed if we haven't done it yet)
2036 */
2037 if (slot_names == NIL && slot_persistence_pending)
2038 slot_names = extract_slot_names(remote_slots);
2039
2040 /* Free the current remote_slots list */
2042
2043 /* Done if all slots are persisted i.e are sync-ready */
2045 break;
2046
2047 /* wait before retrying again */
2049 }
2050
2051 if (slot_names)
2052 list_free_deep(slot_names);
2053
2054 /* Cleanup the synced temporary slots */
2056
2057 /* We are done with sync, so reset sync flag */
2059 }
2061}
sigset_t UnBlockSig
Definition pqsignal.c:22
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1603
#define TextDatumGetCString(d)
Definition builtins.h:99
#define NameStr(name)
Definition c.h:835
#define Min(x, y)
Definition c.h:1091
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
uint32 TransactionId
Definition c.h:736
int64 TimestampTz
Definition timestamp.h:39
Oid get_database_oid(const char *dbname, bool missing_ok)
void load_file(const char *filename, bool restricted)
Definition dfmgr.c:149
Datum arg
Definition elog.c:1322
void EmitErrorReport(void)
Definition elog.c:1882
ErrorContextCallback * error_context_stack
Definition elog.c:99
int errcode(int sqlerrcode)
Definition elog.c:874
sigjmp_buf * PG_exception_stack
Definition elog.c:101
#define LOG
Definition elog.h:32
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
void err(int eval, const char *fmt,...)
Definition err.c:43
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsMinimalTuple
Definition execTuples.c:86
#define palloc0_object(type)
Definition fe_memutils.h:75
volatile sig_atomic_t InterruptPending
Definition globals.c:32
int MyProcPid
Definition globals.c:49
struct Latch * MyLatch
Definition globals.c:65
void ProcessConfigFile(GucContext context)
Definition guc-file.l:120
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4234
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_SUSET
Definition guc.h:78
@ PGC_SIGHUP
Definition guc.h:75
char * cluster_name
Definition guc_tables.c:582
volatile sig_atomic_t ConfigReloadPending
Definition interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition interrupt.c:61
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
void proc_exit(int code)
Definition ipc.c:105
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
int i
Definition isn.c:77
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
List * lappend(List *list, void *datum)
Definition list.c:339
void list_free_deep(List *list)
Definition list.c:1560
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1088
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition lmgr.c:1148
#define AccessShareLock
Definition lockdefs.h:36
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition logical.c:2099
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:202
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext PostmasterContext
Definition mcxt.c:168
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
@ NormalProcessing
Definition miscadmin.h:490
@ InitProcessing
Definition miscadmin.h:489
#define GetProcessingMode()
Definition miscadmin.h:499
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define AmLogicalSlotSyncWorkerProcess()
Definition miscadmin.h:401
#define HOLD_INTERRUPTS()
Definition miscadmin.h:136
#define SetProcessingMode(mode)
Definition miscadmin.h:501
#define InvalidPid
Definition miscadmin.h:32
void namestrcpy(Name name, const char *str)
Definition name.c:233
static char * errmsg
#define NIL
Definition pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition pg_list.h:501
static XLogRecPtr DatumGetLSN(Datum X)
Definition pg_lsn.h:25
#define die(msg)
void pgstat_report_replslotsync(ReplicationSlot *slot)
#define pqsignal
Definition port.h:547
void FloatExceptionHandler(SIGNAL_ARGS)
Definition postgres.c:3070
void StatementCancelHandler(SIGNAL_ARGS)
Definition postgres.c:3053
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static TransactionId DatumGetTransactionId(Datum X)
Definition postgres.h:282
#define InvalidOid
unsigned int Oid
void BaseInit(void)
Definition postinit.c:616
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, uint32 flags, char *out_dbname)
Definition postinit.c:719
static int fb(int x)
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition procarray.c:2919
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition procsignal.c:288
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition procsignal.c:688
@ PROCSIG_SLOTSYNC_MESSAGE
Definition procsignal.h:39
void init_ps_display(const char *fixed_part)
Definition ps_status.c:286
char * quote_literal_cstr(const char *rawstr)
Definition quote.c:101
#define ShmemRequestStruct(...)
Definition shmem.h:176
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition slot.c:629
void ReplicationSlotDropAcquired(void)
Definition slot.c:1042
void ReplicationSlotMarkDirty(void)
Definition slot.c:1184
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool repack, bool failover, bool synced)
Definition slot.c:378
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition slot.c:2936
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition slot.c:1226
void ReplicationSlotPersist(void)
Definition slot.c:1201
ReplicationSlot * MyReplicationSlot
Definition slot.c:158
void ReplicationSlotSave(void)
Definition slot.c:1166
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition slot.c:548
void ReplicationSlotRelease(void)
Definition slot.c:769
int max_replication_slots
Definition slot.c:161
ReplicationSlotCtlData * ReplicationSlotCtl
Definition slot.c:147
void ReplicationSlotsComputeRequiredLSN(void)
Definition slot.c:1308
void ReplicationSlotCleanup(bool synced_only)
Definition slot.c:868
int max_repack_replication_slots
Definition slot.c:163
@ RS_TEMPORARY
Definition slot.h:47
ReplicationSlotInvalidationCause
Definition slot.h:59
@ RS_INVAL_NONE
Definition slot.h:60
#define SlotIsLogical(slot)
Definition slot.h:288
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition slot.h:306
SlotSyncSkipReason
Definition slot.h:81
@ SS_SKIP_WAL_NOT_FLUSHED
Definition slot.h:83
@ SS_SKIP_NO_CONSISTENT_SNAPSHOT
Definition slot.h:87
@ SS_SKIP_NONE
Definition slot.h:82
@ SS_SKIP_INVALID
Definition slot.h:89
@ SS_SKIP_WAL_OR_ROWS_REMOVED
Definition slot.h:85
static List * get_local_synced_slots(void)
Definition slotsync.c:439
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:139
#define PRIMARY_INFO_OUTPUT_COL_COUNT
static void slotsync_worker_disconnect(int code, Datum arg)
Definition slotsync.c:1366
void SyncReplicationSlots(WalReceiverConn *wrconn)
Definition slotsync.c:1991
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition slotsync.c:470
void ProcessSlotSyncMessage(void)
Definition slotsync.c:1335
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition slotsync.c:523
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition slotsync.c:580
const ShmemCallbacks SlotSyncShmemCallbacks
Definition slotsync.c:126
static void update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
Definition slotsync.c:189
void ShutDownSlotSync(void)
Definition slotsync.c:1799
bool sync_replication_slots
Definition slotsync.c:132
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:716
static SlotSyncCtxStruct * SlotSyncCtx
Definition slotsync.c:121
static void slotsync_failure_callback(int code, Datum arg)
Definition slotsync.c:1929
#define SLOTSYNC_COLUMN_COUNT
static List * extract_slot_names(List *remote_slots)
Definition slotsync.c:1965
static long sleep_ms
Definition slotsync.c:142
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition slotsync.c:145
char * CheckAndGetDbnameFromConninfo(void)
Definition slotsync.c:1139
static bool syncing_slots
Definition slotsync.c:152
void HandleSlotSyncMessageInterrupt(void)
Definition slotsync.c:1319
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition slotsync.c:140
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *slot_persistence_pending)
Definition slotsync.c:655
bool SlotSyncWorkerCanRestart(void)
Definition slotsync.c:1872
static void wait_for_slot_activity(bool some_slot_updated)
Definition slotsync.c:1425
static void slotsync_reread_config(void)
Definition slotsync.c:1235
static void reset_syncing_flag(void)
Definition slotsync.c:1520
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition slotsync.c:221
static void slotsync_worker_onexit(int code, Datum arg)
Definition slotsync.c:1379
static void update_synced_slots_inactive_since(void)
Definition slotsync.c:1747
bool ValidateSlotSyncParams(int elevel)
Definition slotsync.c:1166
static void SlotSyncShmemInit(void *arg)
Definition slotsync.c:1918
static void validate_remote_info(WalReceiverConn *wrconn)
Definition slotsync.c:1061
static void check_and_set_sync_info(pid_t sync_process_pid)
Definition slotsync.c:1460
bool IsSyncingReplicationSlots(void)
Definition slotsync.c:1897
volatile sig_atomic_t SlotSyncShutdownPending
Definition slotsync.c:159
void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Definition slotsync.c:1540
static void SlotSyncShmemRequest(void *arg)
Definition slotsync.c:1906
static List * fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
Definition slotsync.c:883
static bool synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list, bool *slot_persistence_pending)
Definition slotsync.c:1025
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition snapbuild.c:2118
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
void InitProcess(void)
Definition proc.c:392
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition pg_list.h:54
bool two_phase
Definition slotsync.c:170
char * plugin
Definition slotsync.c:168
char * name
Definition slotsync.c:167
char * database
Definition slotsync.c:169
bool failover
Definition slotsync.c:171
ReplicationSlotInvalidationCause invalidated
Definition slotsync.c:178
XLogRecPtr confirmed_lsn
Definition slotsync.c:173
XLogRecPtr restart_lsn
Definition slotsync.c:172
XLogRecPtr two_phase_at
Definition slotsync.c:174
TransactionId catalog_xmin
Definition slotsync.c:175
ReplicationSlot replication_slots[1]
Definition slot.h:299
TransactionId catalog_xmin
Definition slot.h:122
ReplicationSlotPersistency persistency
Definition slot.h:106
ReplicationSlotInvalidationCause invalidated
Definition slot.h:128
TransactionId effective_catalog_xmin
Definition slot.h:210
slock_t mutex
Definition slot.h:183
SlotSyncSkipReason slotsync_skip_reason
Definition slot.h:284
bool in_use
Definition slot.h:186
ProcNumber active_proc
Definition slot.h:192
ReplicationSlotPersistentData data
Definition slot.h:213
ShmemRequestCallback request_fn
Definition shmem.h:133
time_t last_start_time
Definition slotsync.c:117
Tuplestorestate * tuplestore
TupleDesc tupledesc
WalRcvExecStatus status
Definition c.h:830
void InitializeTimeouts(void)
Definition timeout.c:470
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:417
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
const char * name
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static WalReceiverConn * wrconn
Definition walreceiver.c:95
bool hot_standby_feedback
Definition walreceiver.c:92
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
@ WALRCV_OK_TUPLES
static void walrcv_clear_result(WalRcvExecResult *walres)
#define walrcv_get_dbname_from_conninfo(conninfo)
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
#define walrcv_disconnect(conn)
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition walsender.c:3804
#define SIGCHLD
Definition win32_port.h:168
#define SIGHUP
Definition win32_port.h:158
#define SIGPIPE
Definition win32_port.h:163
#define SIGUSR1
Definition win32_port.h:170
#define SIGUSR2
Definition win32_port.h:171
bool IsTransactionState(void)
Definition xact.c:389
void StartTransactionCommand(void)
Definition xact.c:3109
void CommitTransactionCommand(void)
Definition xact.c:3207
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3813
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6933
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2700
int wal_segment_size
Definition xlog.c:150
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52
char * PrimarySlotName
bool StandbyMode
char * PrimaryConnInfo