PostgreSQL Source Code  git master
slot.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slot.c
4  * Replication slot management.
5  *
6  *
7  * Copyright (c) 2012-2024, PostgreSQL Global Development Group
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/replication/slot.c
12  *
13  * NOTES
14  *
15  * Replication slots are used to keep state about replication streams
16  * originating from this cluster. Their primary purpose is to prevent the
17  * premature removal of WAL or of old tuple versions in a manner that would
18  * interfere with replication; they are also useful for monitoring purposes.
19  * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
20  * on standbys (to support cascading setups). The requirement that slots be
21  * usable on standbys precludes storing them in the system catalogs.
22  *
23  * Each replication slot gets its own directory inside the directory
24  * $PGDATA / PG_REPLSLOT_DIR. Inside that directory the state file will
25  * contain the slot's own data. Additional data can be stored alongside that
26  * file if required. While the server is running, the state data is also
27  * cached in memory for efficiency.
28  *
29  * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
30  * or free a slot. ReplicationSlotControlLock must be taken in shared mode
31  * to iterate over the slots, and in exclusive mode to change the in_use flag
32  * of a slot. The remaining data in each slot is protected by its mutex.
33  *
34  *-------------------------------------------------------------------------
35  */
36 
37 #include "postgres.h"
38 
39 #include <unistd.h>
40 #include <sys/stat.h>
41 
42 #include "access/transam.h"
43 #include "access/xlog_internal.h"
44 #include "access/xlogrecovery.h"
45 #include "common/file_utils.h"
46 #include "common/string.h"
47 #include "miscadmin.h"
48 #include "pgstat.h"
49 #include "postmaster/interrupt.h"
50 #include "replication/slotsync.h"
51 #include "replication/slot.h"
53 #include "storage/fd.h"
54 #include "storage/ipc.h"
55 #include "storage/proc.h"
56 #include "storage/procarray.h"
57 #include "utils/builtins.h"
58 #include "utils/guc_hooks.h"
59 #include "utils/varlena.h"
60 
61 /*
62  * Replication slot on-disk data structure.
63  */
64 typedef struct ReplicationSlotOnDisk
65 {
66  /* first part of this struct needs to be version independent */
67 
68  /* data not covered by checksum */
71 
72  /* data covered by checksum */
75 
76  /*
77  * The actual data in the slot that follows can differ based on the above
78  * 'version'.
79  */
80 
83 
84 /*
85  * Struct for the configuration of synchronized_standby_slots.
86  *
87  * Note: this must be a flat representation that can be held in a single chunk
88  * of guc_malloc'd memory, so that it can be stored as the "extra" data for the
89  * synchronized_standby_slots GUC.
90  */
91 typedef struct
92 {
93  /* Number of slot names in the slot_names[] */
95 
96  /*
97  * slot_names contains 'nslotnames' consecutive null-terminated C strings.
98  */
99  char slot_names[FLEXIBLE_ARRAY_MEMBER];
101 
102 /*
103  * Lookup table for slot invalidation causes.
104  */
105 const char *const SlotInvalidationCauses[] = {
106  [RS_INVAL_NONE] = "none",
107  [RS_INVAL_WAL_REMOVED] = "wal_removed",
108  [RS_INVAL_HORIZON] = "rows_removed",
109  [RS_INVAL_WAL_LEVEL] = "wal_level_insufficient",
110 };
111 
112 /* Maximum number of invalidation causes */
113 #define RS_INVAL_MAX_CAUSES RS_INVAL_WAL_LEVEL
114 
116  "array length mismatch");
117 
118 /* size of version independent data */
119 #define ReplicationSlotOnDiskConstantSize \
120  offsetof(ReplicationSlotOnDisk, slotdata)
121 /* size of the part of the slot not covered by the checksum */
122 #define ReplicationSlotOnDiskNotChecksummedSize \
123  offsetof(ReplicationSlotOnDisk, version)
124 /* size of the part covered by the checksum */
125 #define ReplicationSlotOnDiskChecksummedSize \
126  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize
127 /* size of the slot data that is version dependent */
128 #define ReplicationSlotOnDiskV2Size \
129  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
130 
131 #define SLOT_MAGIC 0x1051CA1 /* format identifier */
132 #define SLOT_VERSION 5 /* version for new files */
133 
134 /* Control array for replication slot management */
136 
137 /* My backend's replication slot in the shared memory array */
139 
140 /* GUC variables */
141 int max_replication_slots = 10; /* the maximum number of replication
142  * slots */
143 
144 /*
145  * This GUC lists streaming replication standby server slot names that
146  * logical WAL sender processes will wait for.
147  */
149 
150 /* This is the parsed and cached configuration for synchronized_standby_slots */
152 
153 /*
154  * Oldest LSN that has been confirmed to be flushed to the standbys
155  * corresponding to the physical slots specified in the synchronized_standby_slots GUC.
156  */
158 
159 static void ReplicationSlotShmemExit(int code, Datum arg);
160 static void ReplicationSlotDropPtr(ReplicationSlot *slot);
161 
162 /* internal persistency functions */
163 static void RestoreSlotFromDisk(const char *name);
164 static void CreateSlotOnDisk(ReplicationSlot *slot);
165 static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
166 
167 /*
168  * Report shared-memory space needed by ReplicationSlotsShmemInit.
169  */
170 Size
172 {
173  Size size = 0;
174 
175  if (max_replication_slots == 0)
176  return size;
177 
178  size = offsetof(ReplicationSlotCtlData, replication_slots);
179  size = add_size(size,
181 
182  return size;
183 }
184 
185 /*
186  * Allocate and initialize shared memory for replication slots.
187  */
188 void
190 {
191  bool found;
192 
193  if (max_replication_slots == 0)
194  return;
195 
197  ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
198  &found);
199 
200  if (!found)
201  {
202  int i;
203 
204  /* First time through, so initialize */
206 
207  for (i = 0; i < max_replication_slots; i++)
208  {
210 
211  /* everything else is zeroed by the memset above */
212  SpinLockInit(&slot->mutex);
216  }
217  }
218 }
219 
220 /*
221  * Register the callback for replication slot cleanup and releasing.
222  */
223 void
225 {
227 }
228 
229 /*
230  * Release and cleanup replication slots.
231  */
232 static void
234 {
235  /* Make sure active replication slots are released */
236  if (MyReplicationSlot != NULL)
238 
239  /* Also cleanup all the temporary slots. */
240  ReplicationSlotCleanup(false);
241 }
242 
243 /*
244  * Check whether the passed slot name is valid and report errors at elevel.
245  *
246  * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
247  * the name to be used as a directory name on every supported OS.
248  *
249  * Returns whether the directory name is valid or not if elevel < ERROR.
250  */
251 bool
252 ReplicationSlotValidateName(const char *name, int elevel)
253 {
254  const char *cp;
255 
256  if (strlen(name) == 0)
257  {
258  ereport(elevel,
259  (errcode(ERRCODE_INVALID_NAME),
260  errmsg("replication slot name \"%s\" is too short",
261  name)));
262  return false;
263  }
264 
265  if (strlen(name) >= NAMEDATALEN)
266  {
267  ereport(elevel,
268  (errcode(ERRCODE_NAME_TOO_LONG),
269  errmsg("replication slot name \"%s\" is too long",
270  name)));
271  return false;
272  }
273 
274  for (cp = name; *cp; cp++)
275  {
276  if (!((*cp >= 'a' && *cp <= 'z')
277  || (*cp >= '0' && *cp <= '9')
278  || (*cp == '_')))
279  {
280  ereport(elevel,
281  (errcode(ERRCODE_INVALID_NAME),
282  errmsg("replication slot name \"%s\" contains invalid character",
283  name),
284  errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
285  return false;
286  }
287  }
288  return true;
289 }
290 
291 /*
292  * Create a new replication slot and mark it as used by this backend.
293  *
294  * name: Name of the slot
295  * db_specific: logical decoding is db specific; if the slot is going to
296  * be used for that pass true, otherwise false.
297  * two_phase: Allows decoding of prepared transactions. We allow this option
298  * to be enabled only at the slot creation time. If we allow this option
299  * to be changed during decoding then it is quite possible that we skip
300  * prepare first time because this option was not enabled. Now next time
301  * during getting changes, if the two_phase option is enabled it can skip
302  * prepare because by that time start decoding point has been moved. So the
303  * user will only get commit prepared.
304  * failover: If enabled, allows the slot to be synced to standbys so
305  * that logical replication can be resumed after failover.
306  * synced: True if the slot is synchronized from the primary server.
307  */
308 void
309 ReplicationSlotCreate(const char *name, bool db_specific,
310  ReplicationSlotPersistency persistency,
311  bool two_phase, bool failover, bool synced)
312 {
313  ReplicationSlot *slot = NULL;
314  int i;
315 
316  Assert(MyReplicationSlot == NULL);
317 
319 
320  if (failover)
321  {
322  /*
323  * Do not allow users to create the failover enabled slots on the
324  * standby as we do not support sync to the cascading standby.
325  *
326  * However, failover enabled slots can be created during slot
327  * synchronization because we need to retain the same values as the
328  * remote slot.
329  */
331  ereport(ERROR,
332  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
333  errmsg("cannot enable failover for a replication slot created on the standby"));
334 
335  /*
336  * Do not allow users to create failover enabled temporary slots,
337  * because temporary slots will not be synced to the standby.
338  *
339  * However, failover enabled temporary slots can be created during
340  * slot synchronization. See the comments atop slotsync.c for details.
341  */
342  if (persistency == RS_TEMPORARY && !IsSyncingReplicationSlots())
343  ereport(ERROR,
344  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
345  errmsg("cannot enable failover for a temporary replication slot"));
346  }
347 
348  /*
349  * If some other backend ran this code concurrently with us, we'd likely
350  * both allocate the same slot, and that would be bad. We'd also be at
351  * risk of missing a name collision. Also, we don't want to try to create
352  * a new slot while somebody's busy cleaning up an old one, because we
353  * might both be monkeying with the same directory.
354  */
355  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
356 
357  /*
358  * Check for name collision, and identify an allocatable slot. We need to
359  * hold ReplicationSlotControlLock in shared mode for this, so that nobody
360  * else can change the in_use flags while we're looking at them.
361  */
362  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
363  for (i = 0; i < max_replication_slots; i++)
364  {
366 
367  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
368  ereport(ERROR,
370  errmsg("replication slot \"%s\" already exists", name)));
371  if (!s->in_use && slot == NULL)
372  slot = s;
373  }
374  LWLockRelease(ReplicationSlotControlLock);
375 
376  /* If all slots are in use, we're out of luck. */
377  if (slot == NULL)
378  ereport(ERROR,
379  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
380  errmsg("all replication slots are in use"),
381  errhint("Free one or increase \"max_replication_slots\".")));
382 
383  /*
384  * Since this slot is not in use, nobody should be looking at any part of
385  * it other than the in_use field unless they're trying to allocate it.
386  * And since we hold ReplicationSlotAllocationLock, nobody except us can
387  * be doing that. So it's safe to initialize the slot.
388  */
389  Assert(!slot->in_use);
390  Assert(slot->active_pid == 0);
391 
392  /* first initialize persistent data */
393  memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
394  namestrcpy(&slot->data.name, name);
395  slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
396  slot->data.persistency = persistency;
397  slot->data.two_phase = two_phase;
399  slot->data.failover = failover;
400  slot->data.synced = synced;
401 
402  /* and then data only present in shared memory */
403  slot->just_dirtied = false;
404  slot->dirty = false;
412  slot->inactive_since = 0;
413 
414  /*
415  * Create the slot on disk. We haven't actually marked the slot allocated
416  * yet, so no special cleanup is required if this errors out.
417  */
418  CreateSlotOnDisk(slot);
419 
420  /*
421  * We need to briefly prevent any other backend from iterating over the
422  * slots while we flip the in_use flag. We also need to set the active
423  * flag while holding the ControlLock as otherwise a concurrent
424  * ReplicationSlotAcquire() could acquire the slot as well.
425  */
426  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
427 
428  slot->in_use = true;
429 
430  /* We can now mark the slot active, and that makes it our slot. */
431  SpinLockAcquire(&slot->mutex);
432  Assert(slot->active_pid == 0);
433  slot->active_pid = MyProcPid;
434  SpinLockRelease(&slot->mutex);
435  MyReplicationSlot = slot;
436 
437  LWLockRelease(ReplicationSlotControlLock);
438 
439  /*
440  * Create statistics entry for the new logical slot. We don't collect any
441  * stats for physical slots, so no need to create an entry for the same.
442  * See ReplicationSlotDropPtr for why we need to do this before releasing
443  * ReplicationSlotAllocationLock.
444  */
445  if (SlotIsLogical(slot))
447 
448  /*
449  * Now that the slot has been marked as in_use and active, it's safe to
450  * let somebody else try to allocate a slot.
451  */
452  LWLockRelease(ReplicationSlotAllocationLock);
453 
454  /* Let everybody know we've modified this slot */
456 }
457 
458 /*
459  * Search for the named replication slot.
460  *
461  * Return the replication slot if found, otherwise NULL.
462  */
464 SearchNamedReplicationSlot(const char *name, bool need_lock)
465 {
466  int i;
467  ReplicationSlot *slot = NULL;
468 
469  if (need_lock)
470  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
471 
472  for (i = 0; i < max_replication_slots; i++)
473  {
475 
476  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
477  {
478  slot = s;
479  break;
480  }
481  }
482 
483  if (need_lock)
484  LWLockRelease(ReplicationSlotControlLock);
485 
486  return slot;
487 }
488 
489 /*
490  * Return the index of the replication slot in
491  * ReplicationSlotCtl->replication_slots.
492  *
493  * This is mainly useful to have an efficient key for storing replication slot
494  * stats.
495  */
496 int
498 {
500  slot < ReplicationSlotCtl->replication_slots + max_replication_slots);
501 
502  return slot - ReplicationSlotCtl->replication_slots;
503 }
504 
505 /*
506  * If the slot at 'index' is unused, return false. Otherwise 'name' is set to
507  * the slot's name and true is returned.
508  *
509  * This likely is only useful for pgstat_replslot.c during shutdown, in other
510  * cases there are obvious TOCTOU issues.
511  */
512 bool
514 {
515  ReplicationSlot *slot;
516  bool found;
517 
519 
520  /*
521  * Ensure that the slot cannot be dropped while we copy the name. Don't
522  * need the spinlock as the name of an existing slot cannot change.
523  */
524  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
525  found = slot->in_use;
526  if (slot->in_use)
527  namestrcpy(name, NameStr(slot->data.name));
528  LWLockRelease(ReplicationSlotControlLock);
529 
530  return found;
531 }
532 
533 /*
534  * Find a previously created slot and mark it as used by this process.
535  *
536  * An error is raised if nowait is true and the slot is currently in use. If
537  * nowait is false, we sleep until the slot is released by the owning process.
538  */
539 void
540 ReplicationSlotAcquire(const char *name, bool nowait)
541 {
542  ReplicationSlot *s;
543  int active_pid;
544 
545  Assert(name != NULL);
546 
547 retry:
548  Assert(MyReplicationSlot == NULL);
549 
550  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
551 
552  /* Check if the slot exits with the given name. */
553  s = SearchNamedReplicationSlot(name, false);
554  if (s == NULL || !s->in_use)
555  {
556  LWLockRelease(ReplicationSlotControlLock);
557 
558  ereport(ERROR,
559  (errcode(ERRCODE_UNDEFINED_OBJECT),
560  errmsg("replication slot \"%s\" does not exist",
561  name)));
562  }
563 
564  /*
565  * This is the slot we want; check if it's active under some other
566  * process. In single user mode, we don't need this check.
567  */
568  if (IsUnderPostmaster)
569  {
570  /*
571  * Get ready to sleep on the slot in case it is active. (We may end
572  * up not sleeping, but we don't want to do this while holding the
573  * spinlock.)
574  */
575  if (!nowait)
577 
578  SpinLockAcquire(&s->mutex);
579  if (s->active_pid == 0)
580  s->active_pid = MyProcPid;
581  active_pid = s->active_pid;
582  SpinLockRelease(&s->mutex);
583  }
584  else
585  active_pid = MyProcPid;
586  LWLockRelease(ReplicationSlotControlLock);
587 
588  /*
589  * If we found the slot but it's already active in another process, we
590  * wait until the owning process signals us that it's been released, or
591  * error out.
592  */
593  if (active_pid != MyProcPid)
594  {
595  if (!nowait)
596  {
597  /* Wait here until we get signaled, and then restart */
599  WAIT_EVENT_REPLICATION_SLOT_DROP);
601  goto retry;
602  }
603 
604  ereport(ERROR,
605  (errcode(ERRCODE_OBJECT_IN_USE),
606  errmsg("replication slot \"%s\" is active for PID %d",
607  NameStr(s->data.name), active_pid)));
608  }
609  else if (!nowait)
610  ConditionVariableCancelSleep(); /* no sleep needed after all */
611 
612  /* Let everybody know we've modified this slot */
614 
615  /* We made this slot active, so it's ours now. */
616  MyReplicationSlot = s;
617 
618  /*
619  * The call to pgstat_acquire_replslot() protects against stats for a
620  * different slot, from before a restart or such, being present during
621  * pgstat_report_replslot().
622  */
623  if (SlotIsLogical(s))
625 
626  /*
627  * Reset the time since the slot has become inactive as the slot is active
628  * now.
629  */
630  SpinLockAcquire(&s->mutex);
631  s->inactive_since = 0;
632  SpinLockRelease(&s->mutex);
633 
634  if (am_walsender)
635  {
637  SlotIsLogical(s)
638  ? errmsg("acquired logical replication slot \"%s\"",
639  NameStr(s->data.name))
640  : errmsg("acquired physical replication slot \"%s\"",
641  NameStr(s->data.name)));
642  }
643 }
644 
645 /*
646  * Release the replication slot that this backend considers to own.
647  *
648  * This or another backend can re-acquire the slot later.
649  * Resources this slot requires will be preserved.
650  */
651 void
653 {
655  char *slotname = NULL; /* keep compiler quiet */
656  bool is_logical = false; /* keep compiler quiet */
657  TimestampTz now = 0;
658 
659  Assert(slot != NULL && slot->active_pid != 0);
660 
661  if (am_walsender)
662  {
663  slotname = pstrdup(NameStr(slot->data.name));
664  is_logical = SlotIsLogical(slot);
665  }
666 
667  if (slot->data.persistency == RS_EPHEMERAL)
668  {
669  /*
670  * Delete the slot. There is no !PANIC case where this is allowed to
671  * fail, all that may happen is an incomplete cleanup of the on-disk
672  * data.
673  */
675  }
676 
677  /*
678  * If slot needed to temporarily restrain both data and catalog xmin to
679  * create the catalog snapshot, remove that temporary constraint.
680  * Snapshots can only be exported while the initial snapshot is still
681  * acquired.
682  */
683  if (!TransactionIdIsValid(slot->data.xmin) &&
685  {
686  SpinLockAcquire(&slot->mutex);
688  SpinLockRelease(&slot->mutex);
690  }
691 
692  /*
693  * Set the time since the slot has become inactive. We get the current
694  * time beforehand to avoid system call while holding the spinlock.
695  */
697 
698  if (slot->data.persistency == RS_PERSISTENT)
699  {
700  /*
701  * Mark persistent slot inactive. We're not freeing it, just
702  * disconnecting, but wake up others that may be waiting for it.
703  */
704  SpinLockAcquire(&slot->mutex);
705  slot->active_pid = 0;
706  slot->inactive_since = now;
707  SpinLockRelease(&slot->mutex);
709  }
710  else
711  {
712  SpinLockAcquire(&slot->mutex);
713  slot->inactive_since = now;
714  SpinLockRelease(&slot->mutex);
715  }
716 
717  MyReplicationSlot = NULL;
718 
719  /* might not have been set when we've been a plain slot */
720  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
723  LWLockRelease(ProcArrayLock);
724 
725  if (am_walsender)
726  {
728  is_logical
729  ? errmsg("released logical replication slot \"%s\"",
730  slotname)
731  : errmsg("released physical replication slot \"%s\"",
732  slotname));
733 
734  pfree(slotname);
735  }
736 }
737 
738 /*
739  * Cleanup temporary slots created in current session.
740  *
741  * Cleanup only synced temporary slots if 'synced_only' is true, else
742  * cleanup all temporary slots.
743  */
744 void
745 ReplicationSlotCleanup(bool synced_only)
746 {
747  int i;
748 
749  Assert(MyReplicationSlot == NULL);
750 
751 restart:
752  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
753  for (i = 0; i < max_replication_slots; i++)
754  {
756 
757  if (!s->in_use)
758  continue;
759 
760  SpinLockAcquire(&s->mutex);
761  if ((s->active_pid == MyProcPid &&
762  (!synced_only || s->data.synced)))
763  {
765  SpinLockRelease(&s->mutex);
766  LWLockRelease(ReplicationSlotControlLock); /* avoid deadlock */
767 
769 
771  goto restart;
772  }
773  else
774  SpinLockRelease(&s->mutex);
775  }
776 
777  LWLockRelease(ReplicationSlotControlLock);
778 }
779 
780 /*
781  * Permanently drop replication slot identified by the passed in name.
782  */
783 void
784 ReplicationSlotDrop(const char *name, bool nowait)
785 {
786  Assert(MyReplicationSlot == NULL);
787 
788  ReplicationSlotAcquire(name, nowait);
789 
790  /*
791  * Do not allow users to drop the slots which are currently being synced
792  * from the primary to the standby.
793  */
795  ereport(ERROR,
796  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
797  errmsg("cannot drop replication slot \"%s\"", name),
798  errdetail("This replication slot is being synchronized from the primary server."));
799 
801 }
802 
803 /*
804  * Change the definition of the slot identified by the specified name.
805  */
806 void
807 ReplicationSlotAlter(const char *name, const bool *failover,
808  const bool *two_phase)
809 {
810  bool update_slot = false;
811 
812  Assert(MyReplicationSlot == NULL);
813  Assert(failover || two_phase);
814 
816 
818  ereport(ERROR,
819  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
820  errmsg("cannot use %s with a physical replication slot",
821  "ALTER_REPLICATION_SLOT"));
822 
823  if (RecoveryInProgress())
824  {
825  /*
826  * Do not allow users to alter the slots which are currently being
827  * synced from the primary to the standby.
828  */
830  ereport(ERROR,
831  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
832  errmsg("cannot alter replication slot \"%s\"", name),
833  errdetail("This replication slot is being synchronized from the primary server."));
834 
835  /*
836  * Do not allow users to enable failover on the standby as we do not
837  * support sync to the cascading standby.
838  */
839  if (failover && *failover)
840  ereport(ERROR,
841  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
842  errmsg("cannot enable failover for a replication slot"
843  " on the standby"));
844  }
845 
846  if (failover)
847  {
848  /*
849  * Do not allow users to enable failover for temporary slots as we do
850  * not support syncing temporary slots to the standby.
851  */
852  if (*failover && MyReplicationSlot->data.persistency == RS_TEMPORARY)
853  ereport(ERROR,
854  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
855  errmsg("cannot enable failover for a temporary replication slot"));
856 
857  if (MyReplicationSlot->data.failover != *failover)
858  {
860  MyReplicationSlot->data.failover = *failover;
862 
863  update_slot = true;
864  }
865  }
866 
868  {
872 
873  update_slot = true;
874  }
875 
876  if (update_slot)
877  {
880  }
881 
883 }
884 
885 /*
886  * Permanently drop the currently acquired replication slot.
887  */
888 void
890 {
892 
893  Assert(MyReplicationSlot != NULL);
894 
895  /* slot isn't acquired anymore */
896  MyReplicationSlot = NULL;
897 
899 }
900 
901 /*
902  * Permanently drop the replication slot which will be released by the point
903  * this function returns.
904  */
905 static void
907 {
908  char path[MAXPGPATH];
909  char tmppath[MAXPGPATH];
910 
911  /*
912  * If some other backend ran this code concurrently with us, we might try
913  * to delete a slot with a certain name while someone else was trying to
914  * create a slot with the same name.
915  */
916  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
917 
918  /* Generate pathnames. */
919  sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(slot->data.name));
920  sprintf(tmppath, "%s/%s.tmp", PG_REPLSLOT_DIR, NameStr(slot->data.name));
921 
922  /*
923  * Rename the slot directory on disk, so that we'll no longer recognize
924  * this as a valid slot. Note that if this fails, we've got to mark the
925  * slot inactive before bailing out. If we're dropping an ephemeral or a
926  * temporary slot, we better never fail hard as the caller won't expect
927  * the slot to survive and this might get called during error handling.
928  */
929  if (rename(path, tmppath) == 0)
930  {
931  /*
932  * We need to fsync() the directory we just renamed and its parent to
933  * make sure that our changes are on disk in a crash-safe fashion. If
934  * fsync() fails, we can't be sure whether the changes are on disk or
935  * not. For now, we handle that by panicking;
936  * StartupReplicationSlots() will try to straighten it out after
937  * restart.
938  */
940  fsync_fname(tmppath, true);
943  }
944  else
945  {
946  bool fail_softly = slot->data.persistency != RS_PERSISTENT;
947 
948  SpinLockAcquire(&slot->mutex);
949  slot->active_pid = 0;
950  SpinLockRelease(&slot->mutex);
951 
952  /* wake up anyone waiting on this slot */
954 
955  ereport(fail_softly ? WARNING : ERROR,
957  errmsg("could not rename file \"%s\" to \"%s\": %m",
958  path, tmppath)));
959  }
960 
961  /*
962  * The slot is definitely gone. Lock out concurrent scans of the array
963  * long enough to kill it. It's OK to clear the active PID here without
964  * grabbing the mutex because nobody else can be scanning the array here,
965  * and nobody can be attached to this slot and thus access it without
966  * scanning the array.
967  *
968  * Also wake up processes waiting for it.
969  */
970  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
971  slot->active_pid = 0;
972  slot->in_use = false;
973  LWLockRelease(ReplicationSlotControlLock);
975 
976  /*
977  * Slot is dead and doesn't prevent resource removal anymore, recompute
978  * limits.
979  */
982 
983  /*
984  * If removing the directory fails, the worst thing that will happen is
985  * that the user won't be able to create a new slot with the same name
986  * until the next server restart. We warn about it, but that's all.
987  */
988  if (!rmtree(tmppath, true))
990  (errmsg("could not remove directory \"%s\"", tmppath)));
991 
992  /*
993  * Drop the statistics entry for the replication slot. Do this while
994  * holding ReplicationSlotAllocationLock so that we don't drop a
995  * statistics entry for another slot with the same name just created in
996  * another session.
997  */
998  if (SlotIsLogical(slot))
999  pgstat_drop_replslot(slot);
1000 
1001  /*
1002  * We release this at the very end, so that nobody starts trying to create
1003  * a slot while we're still cleaning up the detritus of the old one.
1004  */
1005  LWLockRelease(ReplicationSlotAllocationLock);
1006 }
1007 
1008 /*
1009  * Serialize the currently acquired slot's state from memory to disk, thereby
1010  * guaranteeing the current state will survive a crash.
1011  */
1012 void
1014 {
1015  char path[MAXPGPATH];
1016 
1017  Assert(MyReplicationSlot != NULL);
1018 
1021 }
1022 
1023 /*
1024  * Signal that it would be useful if the currently acquired slot would be
1025  * flushed out to disk.
1026  *
1027  * Note that the actual flush to disk can be delayed for a long time, if
1028  * required for correctness explicitly do a ReplicationSlotSave().
1029  */
1030 void
1032 {
1034 
1035  Assert(MyReplicationSlot != NULL);
1036 
1037  SpinLockAcquire(&slot->mutex);
1039  MyReplicationSlot->dirty = true;
1040  SpinLockRelease(&slot->mutex);
1041 }
1042 
1043 /*
1044  * Convert a slot that's marked as RS_EPHEMERAL or RS_TEMPORARY to a
1045  * RS_PERSISTENT slot, guaranteeing it will be there after an eventual crash.
1046  */
1047 void
1049 {
1051 
1052  Assert(slot != NULL);
1054 
1055  SpinLockAcquire(&slot->mutex);
1056  slot->data.persistency = RS_PERSISTENT;
1057  SpinLockRelease(&slot->mutex);
1058 
1061 }
1062 
1063 /*
1064  * Compute the oldest xmin across all slots and store it in the ProcArray.
1065  *
1066  * If already_locked is true, ProcArrayLock has already been acquired
1067  * exclusively.
1068  */
1069 void
1071 {
1072  int i;
1074  TransactionId agg_catalog_xmin = InvalidTransactionId;
1075 
1076  Assert(ReplicationSlotCtl != NULL);
1077 
1078  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1079 
1080  for (i = 0; i < max_replication_slots; i++)
1081  {
1083  TransactionId effective_xmin;
1084  TransactionId effective_catalog_xmin;
1085  bool invalidated;
1086 
1087  if (!s->in_use)
1088  continue;
1089 
1090  SpinLockAcquire(&s->mutex);
1091  effective_xmin = s->effective_xmin;
1092  effective_catalog_xmin = s->effective_catalog_xmin;
1093  invalidated = s->data.invalidated != RS_INVAL_NONE;
1094  SpinLockRelease(&s->mutex);
1095 
1096  /* invalidated slots need not apply */
1097  if (invalidated)
1098  continue;
1099 
1100  /* check the data xmin */
1101  if (TransactionIdIsValid(effective_xmin) &&
1102  (!TransactionIdIsValid(agg_xmin) ||
1103  TransactionIdPrecedes(effective_xmin, agg_xmin)))
1104  agg_xmin = effective_xmin;
1105 
1106  /* check the catalog xmin */
1107  if (TransactionIdIsValid(effective_catalog_xmin) &&
1108  (!TransactionIdIsValid(agg_catalog_xmin) ||
1109  TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
1110  agg_catalog_xmin = effective_catalog_xmin;
1111  }
1112 
1113  LWLockRelease(ReplicationSlotControlLock);
1114 
1115  ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
1116 }
1117 
1118 /*
1119  * Compute the oldest restart LSN across all slots and inform xlog module.
1120  *
1121  * Note: while max_slot_wal_keep_size is theoretically relevant for this
1122  * purpose, we don't try to account for that, because this module doesn't
1123  * know what to compare against.
1124  */
1125 void
1127 {
1128  int i;
1129  XLogRecPtr min_required = InvalidXLogRecPtr;
1130 
1131  Assert(ReplicationSlotCtl != NULL);
1132 
1133  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1134  for (i = 0; i < max_replication_slots; i++)
1135  {
1137  XLogRecPtr restart_lsn;
1138  bool invalidated;
1139 
1140  if (!s->in_use)
1141  continue;
1142 
1143  SpinLockAcquire(&s->mutex);
1144  restart_lsn = s->data.restart_lsn;
1145  invalidated = s->data.invalidated != RS_INVAL_NONE;
1146  SpinLockRelease(&s->mutex);
1147 
1148  /* invalidated slots need not apply */
1149  if (invalidated)
1150  continue;
1151 
1152  if (restart_lsn != InvalidXLogRecPtr &&
1153  (min_required == InvalidXLogRecPtr ||
1154  restart_lsn < min_required))
1155  min_required = restart_lsn;
1156  }
1157  LWLockRelease(ReplicationSlotControlLock);
1158 
1159  XLogSetReplicationSlotMinimumLSN(min_required);
1160 }
1161 
1162 /*
1163  * Compute the oldest WAL LSN required by *logical* decoding slots..
1164  *
1165  * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
1166  * slots exist.
1167  *
1168  * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
1169  * ignores physical replication slots.
1170  *
1171  * The results aren't required frequently, so we don't maintain a precomputed
1172  * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
1173  */
1174 XLogRecPtr
1176 {
1177  XLogRecPtr result = InvalidXLogRecPtr;
1178  int i;
1179 
1180  if (max_replication_slots <= 0)
1181  return InvalidXLogRecPtr;
1182 
1183  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1184 
1185  for (i = 0; i < max_replication_slots; i++)
1186  {
1187  ReplicationSlot *s;
1188  XLogRecPtr restart_lsn;
1189  bool invalidated;
1190 
1192 
1193  /* cannot change while ReplicationSlotCtlLock is held */
1194  if (!s->in_use)
1195  continue;
1196 
1197  /* we're only interested in logical slots */
1198  if (!SlotIsLogical(s))
1199  continue;
1200 
1201  /* read once, it's ok if it increases while we're checking */
1202  SpinLockAcquire(&s->mutex);
1203  restart_lsn = s->data.restart_lsn;
1204  invalidated = s->data.invalidated != RS_INVAL_NONE;
1205  SpinLockRelease(&s->mutex);
1206 
1207  /* invalidated slots need not apply */
1208  if (invalidated)
1209  continue;
1210 
1211  if (restart_lsn == InvalidXLogRecPtr)
1212  continue;
1213 
1214  if (result == InvalidXLogRecPtr ||
1215  restart_lsn < result)
1216  result = restart_lsn;
1217  }
1218 
1219  LWLockRelease(ReplicationSlotControlLock);
1220 
1221  return result;
1222 }
1223 
1224 /*
1225  * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
1226  * passed database oid.
1227  *
1228  * Returns true if there are any slots referencing the database. *nslots will
1229  * be set to the absolute number of slots in the database, *nactive to ones
1230  * currently active.
1231  */
1232 bool
1233 ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
1234 {
1235  int i;
1236 
1237  *nslots = *nactive = 0;
1238 
1239  if (max_replication_slots <= 0)
1240  return false;
1241 
1242  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1243  for (i = 0; i < max_replication_slots; i++)
1244  {
1245  ReplicationSlot *s;
1246 
1248 
1249  /* cannot change while ReplicationSlotCtlLock is held */
1250  if (!s->in_use)
1251  continue;
1252 
1253  /* only logical slots are database specific, skip */
1254  if (!SlotIsLogical(s))
1255  continue;
1256 
1257  /* not our database, skip */
1258  if (s->data.database != dboid)
1259  continue;
1260 
1261  /* NB: intentionally counting invalidated slots */
1262 
1263  /* count slots with spinlock held */
1264  SpinLockAcquire(&s->mutex);
1265  (*nslots)++;
1266  if (s->active_pid != 0)
1267  (*nactive)++;
1268  SpinLockRelease(&s->mutex);
1269  }
1270  LWLockRelease(ReplicationSlotControlLock);
1271 
1272  if (*nslots > 0)
1273  return true;
1274  return false;
1275 }
1276 
1277 /*
1278  * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
1279  * passed database oid. The caller should hold an exclusive lock on the
1280  * pg_database oid for the database to prevent creation of new slots on the db
1281  * or replay from existing slots.
1282  *
1283  * Another session that concurrently acquires an existing slot on the target DB
1284  * (most likely to drop it) may cause this function to ERROR. If that happens
1285  * it may have dropped some but not all slots.
1286  *
1287  * This routine isn't as efficient as it could be - but we don't drop
1288  * databases often, especially databases with lots of slots.
1289  */
1290 void
1292 {
1293  int i;
1294 
1295  if (max_replication_slots <= 0)
1296  return;
1297 
1298 restart:
1299  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1300  for (i = 0; i < max_replication_slots; i++)
1301  {
1302  ReplicationSlot *s;
1303  char *slotname;
1304  int active_pid;
1305 
1307 
1308  /* cannot change while ReplicationSlotCtlLock is held */
1309  if (!s->in_use)
1310  continue;
1311 
1312  /* only logical slots are database specific, skip */
1313  if (!SlotIsLogical(s))
1314  continue;
1315 
1316  /* not our database, skip */
1317  if (s->data.database != dboid)
1318  continue;
1319 
1320  /* NB: intentionally including invalidated slots */
1321 
1322  /* acquire slot, so ReplicationSlotDropAcquired can be reused */
1323  SpinLockAcquire(&s->mutex);
1324  /* can't change while ReplicationSlotControlLock is held */
1325  slotname = NameStr(s->data.name);
1326  active_pid = s->active_pid;
1327  if (active_pid == 0)
1328  {
1329  MyReplicationSlot = s;
1330  s->active_pid = MyProcPid;
1331  }
1332  SpinLockRelease(&s->mutex);
1333 
1334  /*
1335  * Even though we hold an exclusive lock on the database object a
1336  * logical slot for that DB can still be active, e.g. if it's
1337  * concurrently being dropped by a backend connected to another DB.
1338  *
1339  * That's fairly unlikely in practice, so we'll just bail out.
1340  *
1341  * The slot sync worker holds a shared lock on the database before
1342  * operating on synced logical slots to avoid conflict with the drop
1343  * happening here. The persistent synced slots are thus safe but there
1344  * is a possibility that the slot sync worker has created a temporary
1345  * slot (which stays active even on release) and we are trying to drop
1346  * that here. In practice, the chances of hitting this scenario are
1347  * less as during slot synchronization, the temporary slot is
1348  * immediately converted to persistent and thus is safe due to the
1349  * shared lock taken on the database. So, we'll just bail out in such
1350  * a case.
1351  *
1352  * XXX: We can consider shutting down the slot sync worker before
1353  * trying to drop synced temporary slots here.
1354  */
1355  if (active_pid)
1356  ereport(ERROR,
1357  (errcode(ERRCODE_OBJECT_IN_USE),
1358  errmsg("replication slot \"%s\" is active for PID %d",
1359  slotname, active_pid)));
1360 
1361  /*
1362  * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
1363  * holding ReplicationSlotControlLock over filesystem operations,
1364  * release ReplicationSlotControlLock and use
1365  * ReplicationSlotDropAcquired.
1366  *
1367  * As that means the set of slots could change, restart scan from the
1368  * beginning each time we release the lock.
1369  */
1370  LWLockRelease(ReplicationSlotControlLock);
1372  goto restart;
1373  }
1374  LWLockRelease(ReplicationSlotControlLock);
1375 }
1376 
1377 
1378 /*
1379  * Check whether the server's configuration supports using replication
1380  * slots.
1381  */
1382 void
1384 {
1385  /*
1386  * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
1387  * needs the same check.
1388  */
1389 
1390  if (max_replication_slots == 0)
1391  ereport(ERROR,
1392  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1393  errmsg("replication slots can only be used if \"max_replication_slots\" > 0")));
1394 
1396  ereport(ERROR,
1397  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1398  errmsg("replication slots can only be used if \"wal_level\" >= \"replica\"")));
1399 }
1400 
1401 /*
1402  * Check whether the user has privilege to use replication slots.
1403  */
1404 void
1406 {
1407  if (!has_rolreplication(GetUserId()))
1408  ereport(ERROR,
1409  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1410  errmsg("permission denied to use replication slots"),
1411  errdetail("Only roles with the %s attribute may use replication slots.",
1412  "REPLICATION")));
1413 }
1414 
1415 /*
1416  * Reserve WAL for the currently active slot.
1417  *
1418  * Compute and set restart_lsn in a manner that's appropriate for the type of
1419  * the slot and concurrency safe.
1420  */
1421 void
1423 {
1425 
1426  Assert(slot != NULL);
1428 
1429  /*
1430  * The replication slot mechanism is used to prevent removal of required
1431  * WAL. As there is no interlock between this routine and checkpoints, WAL
1432  * segments could concurrently be removed when a now stale return value of
1433  * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
1434  * this happens we'll just retry.
1435  */
1436  while (true)
1437  {
1438  XLogSegNo segno;
1439  XLogRecPtr restart_lsn;
1440 
1441  /*
1442  * For logical slots log a standby snapshot and start logical decoding
1443  * at exactly that position. That allows the slot to start up more
1444  * quickly. But on a standby we cannot do WAL writes, so just use the
1445  * replay pointer; effectively, an attempt to create a logical slot on
1446  * standby will cause it to wait for an xl_running_xact record to be
1447  * logged independently on the primary, so that a snapshot can be
1448  * built using the record.
1449  *
1450  * None of this is needed (or indeed helpful) for physical slots as
1451  * they'll start replay at the last logged checkpoint anyway. Instead
1452  * return the location of the last redo LSN. While that slightly
1453  * increases the chance that we have to retry, it's where a base
1454  * backup has to start replay at.
1455  */
1456  if (SlotIsPhysical(slot))
1457  restart_lsn = GetRedoRecPtr();
1458  else if (RecoveryInProgress())
1459  restart_lsn = GetXLogReplayRecPtr(NULL);
1460  else
1461  restart_lsn = GetXLogInsertRecPtr();
1462 
1463  SpinLockAcquire(&slot->mutex);
1464  slot->data.restart_lsn = restart_lsn;
1465  SpinLockRelease(&slot->mutex);
1466 
1467  /* prevent WAL removal as fast as possible */
1469 
1470  /*
1471  * If all required WAL is still there, great, otherwise retry. The
1472  * slot should prevent further removal of WAL, unless there's a
1473  * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
1474  * the new restart_lsn above, so normally we should never need to loop
1475  * more than twice.
1476  */
1478  if (XLogGetLastRemovedSegno() < segno)
1479  break;
1480  }
1481 
1482  if (!RecoveryInProgress() && SlotIsLogical(slot))
1483  {
1484  XLogRecPtr flushptr;
1485 
1486  /* make sure we have enough information to start */
1487  flushptr = LogStandbySnapshot();
1488 
1489  /* and make sure it's fsynced to disk */
1490  XLogFlush(flushptr);
1491  }
1492 }
1493 
1494 /*
1495  * Report that replication slot needs to be invalidated
1496  */
1497 static void
1499  bool terminating,
1500  int pid,
1501  NameData slotname,
1502  XLogRecPtr restart_lsn,
1503  XLogRecPtr oldestLSN,
1504  TransactionId snapshotConflictHorizon)
1505 {
1506  StringInfoData err_detail;
1507  bool hint = false;
1508 
1509  initStringInfo(&err_detail);
1510 
1511  switch (cause)
1512  {
1513  case RS_INVAL_WAL_REMOVED:
1514  {
1515  unsigned long long ex = oldestLSN - restart_lsn;
1516 
1517  hint = true;
1518  appendStringInfo(&err_detail,
1519  ngettext("The slot's restart_lsn %X/%X exceeds the limit by %llu byte.",
1520  "The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1521  ex),
1522  LSN_FORMAT_ARGS(restart_lsn),
1523  ex);
1524  break;
1525  }
1526  case RS_INVAL_HORIZON:
1527  appendStringInfo(&err_detail, _("The slot conflicted with xid horizon %u."),
1528  snapshotConflictHorizon);
1529  break;
1530 
1531  case RS_INVAL_WAL_LEVEL:
1532  appendStringInfoString(&err_detail, _("Logical decoding on standby requires \"wal_level\" >= \"logical\" on the primary server."));
1533  break;
1534  case RS_INVAL_NONE:
1535  pg_unreachable();
1536  }
1537 
1538  ereport(LOG,
1539  terminating ?
1540  errmsg("terminating process %d to release replication slot \"%s\"",
1541  pid, NameStr(slotname)) :
1542  errmsg("invalidating obsolete replication slot \"%s\"",
1543  NameStr(slotname)),
1544  errdetail_internal("%s", err_detail.data),
1545  hint ? errhint("You might need to increase \"%s\".", "max_slot_wal_keep_size") : 0);
1546 
1547  pfree(err_detail.data);
1548 }
1549 
1550 /*
1551  * Helper for InvalidateObsoleteReplicationSlots
1552  *
1553  * Acquires the given slot and mark it invalid, if necessary and possible.
1554  *
1555  * Returns whether ReplicationSlotControlLock was released in the interim (and
1556  * in that case we're not holding the lock at return, otherwise we are).
1557  *
1558  * Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1559  *
1560  * This is inherently racy, because we release the LWLock
1561  * for syscalls, so caller must restart if we return true.
1562  */
1563 static bool
1565  ReplicationSlot *s,
1566  XLogRecPtr oldestLSN,
1567  Oid dboid, TransactionId snapshotConflictHorizon,
1568  bool *invalidated)
1569 {
1570  int last_signaled_pid = 0;
1571  bool released_lock = false;
1572  bool terminated = false;
1573  TransactionId initial_effective_xmin = InvalidTransactionId;
1574  TransactionId initial_catalog_effective_xmin = InvalidTransactionId;
1575  XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1577 
1578  for (;;)
1579  {
1580  XLogRecPtr restart_lsn;
1581  NameData slotname;
1582  int active_pid = 0;
1583  ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
1584 
1585  Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
1586 
1587  if (!s->in_use)
1588  {
1589  if (released_lock)
1590  LWLockRelease(ReplicationSlotControlLock);
1591  break;
1592  }
1593 
1594  /*
1595  * Check if the slot needs to be invalidated. If it needs to be
1596  * invalidated, and is not currently acquired, acquire it and mark it
1597  * as having been invalidated. We do this with the spinlock held to
1598  * avoid race conditions -- for example the restart_lsn could move
1599  * forward, or the slot could be dropped.
1600  */
1601  SpinLockAcquire(&s->mutex);
1602 
1603  restart_lsn = s->data.restart_lsn;
1604 
1605  /* we do nothing if the slot is already invalid */
1606  if (s->data.invalidated == RS_INVAL_NONE)
1607  {
1608  /*
1609  * The slot's mutex will be released soon, and it is possible that
1610  * those values change since the process holding the slot has been
1611  * terminated (if any), so record them here to ensure that we
1612  * would report the correct invalidation cause.
1613  */
1614  if (!terminated)
1615  {
1616  initial_restart_lsn = s->data.restart_lsn;
1617  initial_effective_xmin = s->effective_xmin;
1618  initial_catalog_effective_xmin = s->effective_catalog_xmin;
1619  }
1620 
1621  switch (cause)
1622  {
1623  case RS_INVAL_WAL_REMOVED:
1624  if (initial_restart_lsn != InvalidXLogRecPtr &&
1625  initial_restart_lsn < oldestLSN)
1626  invalidation_cause = cause;
1627  break;
1628  case RS_INVAL_HORIZON:
1629  if (!SlotIsLogical(s))
1630  break;
1631  /* invalid DB oid signals a shared relation */
1632  if (dboid != InvalidOid && dboid != s->data.database)
1633  break;
1634  if (TransactionIdIsValid(initial_effective_xmin) &&
1635  TransactionIdPrecedesOrEquals(initial_effective_xmin,
1636  snapshotConflictHorizon))
1637  invalidation_cause = cause;
1638  else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
1639  TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
1640  snapshotConflictHorizon))
1641  invalidation_cause = cause;
1642  break;
1643  case RS_INVAL_WAL_LEVEL:
1644  if (SlotIsLogical(s))
1645  invalidation_cause = cause;
1646  break;
1647  case RS_INVAL_NONE:
1648  pg_unreachable();
1649  }
1650  }
1651 
1652  /*
1653  * The invalidation cause recorded previously should not change while
1654  * the process owning the slot (if any) has been terminated.
1655  */
1656  Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1657  invalidation_cause_prev != invalidation_cause));
1658 
1659  /* if there's no invalidation, we're done */
1660  if (invalidation_cause == RS_INVAL_NONE)
1661  {
1662  SpinLockRelease(&s->mutex);
1663  if (released_lock)
1664  LWLockRelease(ReplicationSlotControlLock);
1665  break;
1666  }
1667 
1668  slotname = s->data.name;
1669  active_pid = s->active_pid;
1670 
1671  /*
1672  * If the slot can be acquired, do so and mark it invalidated
1673  * immediately. Otherwise we'll signal the owning process, below, and
1674  * retry.
1675  */
1676  if (active_pid == 0)
1677  {
1678  MyReplicationSlot = s;
1679  s->active_pid = MyProcPid;
1680  s->data.invalidated = invalidation_cause;
1681 
1682  /*
1683  * XXX: We should consider not overwriting restart_lsn and instead
1684  * just rely on .invalidated.
1685  */
1686  if (invalidation_cause == RS_INVAL_WAL_REMOVED)
1688 
1689  /* Let caller know */
1690  *invalidated = true;
1691  }
1692 
1693  SpinLockRelease(&s->mutex);
1694 
1695  /*
1696  * The logical replication slots shouldn't be invalidated as GUC
1697  * max_slot_wal_keep_size is set to -1 during the binary upgrade. See
1698  * check_old_cluster_for_valid_slots() where we ensure that no
1699  * invalidated before the upgrade.
1700  */
1701  Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
1702 
1703  if (active_pid != 0)
1704  {
1705  /*
1706  * Prepare the sleep on the slot's condition variable before
1707  * releasing the lock, to close a possible race condition if the
1708  * slot is released before the sleep below.
1709  */
1711 
1712  LWLockRelease(ReplicationSlotControlLock);
1713  released_lock = true;
1714 
1715  /*
1716  * Signal to terminate the process that owns the slot, if we
1717  * haven't already signalled it. (Avoidance of repeated
1718  * signalling is the only reason for there to be a loop in this
1719  * routine; otherwise we could rely on caller's restart loop.)
1720  *
1721  * There is the race condition that other process may own the slot
1722  * after its current owner process is terminated and before this
1723  * process owns it. To handle that, we signal only if the PID of
1724  * the owning process has changed from the previous time. (This
1725  * logic assumes that the same PID is not reused very quickly.)
1726  */
1727  if (last_signaled_pid != active_pid)
1728  {
1729  ReportSlotInvalidation(invalidation_cause, true, active_pid,
1730  slotname, restart_lsn,
1731  oldestLSN, snapshotConflictHorizon);
1732 
1733  if (MyBackendType == B_STARTUP)
1734  (void) SendProcSignal(active_pid,
1737  else
1738  (void) kill(active_pid, SIGTERM);
1739 
1740  last_signaled_pid = active_pid;
1741  terminated = true;
1742  invalidation_cause_prev = invalidation_cause;
1743  }
1744 
1745  /* Wait until the slot is released. */
1747  WAIT_EVENT_REPLICATION_SLOT_DROP);
1748 
1749  /*
1750  * Re-acquire lock and start over; we expect to invalidate the
1751  * slot next time (unless another process acquires the slot in the
1752  * meantime).
1753  */
1754  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1755  continue;
1756  }
1757  else
1758  {
1759  /*
1760  * We hold the slot now and have already invalidated it; flush it
1761  * to ensure that state persists.
1762  *
1763  * Don't want to hold ReplicationSlotControlLock across file
1764  * system operations, so release it now but be sure to tell caller
1765  * to restart from scratch.
1766  */
1767  LWLockRelease(ReplicationSlotControlLock);
1768  released_lock = true;
1769 
1770  /* Make sure the invalidated state persists across server restart */
1774 
1775  ReportSlotInvalidation(invalidation_cause, false, active_pid,
1776  slotname, restart_lsn,
1777  oldestLSN, snapshotConflictHorizon);
1778 
1779  /* done with this slot for now */
1780  break;
1781  }
1782  }
1783 
1784  Assert(released_lock == !LWLockHeldByMe(ReplicationSlotControlLock));
1785 
1786  return released_lock;
1787 }
1788 
1789 /*
1790  * Invalidate slots that require resources about to be removed.
1791  *
1792  * Returns true when any slot have got invalidated.
1793  *
1794  * Whether a slot needs to be invalidated depends on the cause. A slot is
1795  * removed if it:
1796  * - RS_INVAL_WAL_REMOVED: requires a LSN older than the given segment
1797  * - RS_INVAL_HORIZON: requires a snapshot <= the given horizon in the given
1798  * db; dboid may be InvalidOid for shared relations
1799  * - RS_INVAL_WAL_LEVEL: is logical
1800  *
1801  * NB - this runs as part of checkpoint, so avoid raising errors if possible.
1802  */
1803 bool
1805  XLogSegNo oldestSegno, Oid dboid,
1806  TransactionId snapshotConflictHorizon)
1807 {
1808  XLogRecPtr oldestLSN;
1809  bool invalidated = false;
1810 
1811  Assert(cause != RS_INVAL_HORIZON || TransactionIdIsValid(snapshotConflictHorizon));
1812  Assert(cause != RS_INVAL_WAL_REMOVED || oldestSegno > 0);
1813  Assert(cause != RS_INVAL_NONE);
1814 
1815  if (max_replication_slots == 0)
1816  return invalidated;
1817 
1818  XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
1819 
1820 restart:
1821  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1822  for (int i = 0; i < max_replication_slots; i++)
1823  {
1825 
1826  if (!s->in_use)
1827  continue;
1828 
1829  if (InvalidatePossiblyObsoleteSlot(cause, s, oldestLSN, dboid,
1830  snapshotConflictHorizon,
1831  &invalidated))
1832  {
1833  /* if the lock was released, start from scratch */
1834  goto restart;
1835  }
1836  }
1837  LWLockRelease(ReplicationSlotControlLock);
1838 
1839  /*
1840  * If any slots have been invalidated, recalculate the resource limits.
1841  */
1842  if (invalidated)
1843  {
1846  }
1847 
1848  return invalidated;
1849 }
1850 
1851 /*
1852  * Flush all replication slots to disk.
1853  *
1854  * It is convenient to flush dirty replication slots at the time of checkpoint.
1855  * Additionally, in case of a shutdown checkpoint, we also identify the slots
1856  * for which the confirmed_flush LSN has been updated since the last time it
1857  * was saved and flush them.
1858  */
1859 void
1861 {
1862  int i;
1863 
1864  elog(DEBUG1, "performing replication slot checkpoint");
1865 
1866  /*
1867  * Prevent any slot from being created/dropped while we're active. As we
1868  * explicitly do *not* want to block iterating over replication_slots or
1869  * acquiring a slot we cannot take the control lock - but that's OK,
1870  * because holding ReplicationSlotAllocationLock is strictly stronger, and
1871  * enough to guarantee that nobody can change the in_use bits on us.
1872  */
1873  LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
1874 
1875  for (i = 0; i < max_replication_slots; i++)
1876  {
1878  char path[MAXPGPATH];
1879 
1880  if (!s->in_use)
1881  continue;
1882 
1883  /* save the slot to disk, locking is handled in SaveSlotToPath() */
1884  sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(s->data.name));
1885 
1886  /*
1887  * Slot's data is not flushed each time the confirmed_flush LSN is
1888  * updated as that could lead to frequent writes. However, we decide
1889  * to force a flush of all logical slot's data at the time of shutdown
1890  * if the confirmed_flush LSN is changed since we last flushed it to
1891  * disk. This helps in avoiding an unnecessary retreat of the
1892  * confirmed_flush LSN after restart.
1893  */
1894  if (is_shutdown && SlotIsLogical(s))
1895  {
1896  SpinLockAcquire(&s->mutex);
1897 
1898  if (s->data.invalidated == RS_INVAL_NONE &&
1900  {
1901  s->just_dirtied = true;
1902  s->dirty = true;
1903  }
1904  SpinLockRelease(&s->mutex);
1905  }
1906 
1907  SaveSlotToPath(s, path, LOG);
1908  }
1909  LWLockRelease(ReplicationSlotAllocationLock);
1910 }
1911 
1912 /*
1913  * Load all replication slots from disk into memory at server startup. This
1914  * needs to be run before we start crash recovery.
1915  */
1916 void
1918 {
1919  DIR *replication_dir;
1920  struct dirent *replication_de;
1921 
1922  elog(DEBUG1, "starting up replication slots");
1923 
1924  /* restore all slots by iterating over all on-disk entries */
1925  replication_dir = AllocateDir(PG_REPLSLOT_DIR);
1926  while ((replication_de = ReadDir(replication_dir, PG_REPLSLOT_DIR)) != NULL)
1927  {
1928  char path[MAXPGPATH + sizeof(PG_REPLSLOT_DIR)];
1929  PGFileType de_type;
1930 
1931  if (strcmp(replication_de->d_name, ".") == 0 ||
1932  strcmp(replication_de->d_name, "..") == 0)
1933  continue;
1934 
1935  snprintf(path, sizeof(path), "%s/%s", PG_REPLSLOT_DIR, replication_de->d_name);
1936  de_type = get_dirent_type(path, replication_de, false, DEBUG1);
1937 
1938  /* we're only creating directories here, skip if it's not our's */
1939  if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_DIR)
1940  continue;
1941 
1942  /* we crashed while a slot was being setup or deleted, clean up */
1943  if (pg_str_endswith(replication_de->d_name, ".tmp"))
1944  {
1945  if (!rmtree(path, true))
1946  {
1947  ereport(WARNING,
1948  (errmsg("could not remove directory \"%s\"",
1949  path)));
1950  continue;
1951  }
1953  continue;
1954  }
1955 
1956  /* looks like a slot in a normal state, restore */
1957  RestoreSlotFromDisk(replication_de->d_name);
1958  }
1959  FreeDir(replication_dir);
1960 
1961  /* currently no slots exist, we're done. */
1962  if (max_replication_slots <= 0)
1963  return;
1964 
1965  /* Now that we have recovered all the data, compute replication xmin */
1968 }
1969 
1970 /* ----
1971  * Manipulation of on-disk state of replication slots
1972  *
1973  * NB: none of the routines below should take any notice whether a slot is the
1974  * current one or not, that's all handled a layer above.
1975  * ----
1976  */
1977 static void
1979 {
1980  char tmppath[MAXPGPATH];
1981  char path[MAXPGPATH];
1982  struct stat st;
1983 
1984  /*
1985  * No need to take out the io_in_progress_lock, nobody else can see this
1986  * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
1987  * takes out the lock, if we'd take the lock here, we'd deadlock.
1988  */
1989 
1990  sprintf(path, "%s/%s", PG_REPLSLOT_DIR, NameStr(slot->data.name));
1991  sprintf(tmppath, "%s/%s.tmp", PG_REPLSLOT_DIR, NameStr(slot->data.name));
1992 
1993  /*
1994  * It's just barely possible that some previous effort to create or drop a
1995  * slot with this name left a temp directory lying around. If that seems
1996  * to be the case, try to remove it. If the rmtree() fails, we'll error
1997  * out at the MakePGDirectory() below, so we don't bother checking
1998  * success.
1999  */
2000  if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
2001  rmtree(tmppath, true);
2002 
2003  /* Create and fsync the temporary slot directory. */
2004  if (MakePGDirectory(tmppath) < 0)
2005  ereport(ERROR,
2007  errmsg("could not create directory \"%s\": %m",
2008  tmppath)));
2009  fsync_fname(tmppath, true);
2010 
2011  /* Write the actual state file. */
2012  slot->dirty = true; /* signal that we really need to write */
2013  SaveSlotToPath(slot, tmppath, ERROR);
2014 
2015  /* Rename the directory into place. */
2016  if (rename(tmppath, path) != 0)
2017  ereport(ERROR,
2019  errmsg("could not rename file \"%s\" to \"%s\": %m",
2020  tmppath, path)));
2021 
2022  /*
2023  * If we'd now fail - really unlikely - we wouldn't know whether this slot
2024  * would persist after an OS crash or not - so, force a restart. The
2025  * restart would try to fsync this again till it works.
2026  */
2028 
2029  fsync_fname(path, true);
2031 
2032  END_CRIT_SECTION();
2033 }
2034 
2035 /*
2036  * Shared functionality between saving and creating a replication slot.
2037  */
2038 static void
2039 SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
2040 {
2041  char tmppath[MAXPGPATH];
2042  char path[MAXPGPATH];
2043  int fd;
2045  bool was_dirty;
2046 
2047  /* first check whether there's something to write out */
2048  SpinLockAcquire(&slot->mutex);
2049  was_dirty = slot->dirty;
2050  slot->just_dirtied = false;
2051  SpinLockRelease(&slot->mutex);
2052 
2053  /* and don't do anything if there's nothing to write */
2054  if (!was_dirty)
2055  return;
2056 
2058 
2059  /* silence valgrind :( */
2060  memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
2061 
2062  sprintf(tmppath, "%s/state.tmp", dir);
2063  sprintf(path, "%s/state", dir);
2064 
2065  fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
2066  if (fd < 0)
2067  {
2068  /*
2069  * If not an ERROR, then release the lock before returning. In case
2070  * of an ERROR, the error recovery path automatically releases the
2071  * lock, but no harm in explicitly releasing even in that case. Note
2072  * that LWLockRelease() could affect errno.
2073  */
2074  int save_errno = errno;
2075 
2077  errno = save_errno;
2078  ereport(elevel,
2080  errmsg("could not create file \"%s\": %m",
2081  tmppath)));
2082  return;
2083  }
2084 
2085  cp.magic = SLOT_MAGIC;
2086  INIT_CRC32C(cp.checksum);
2087  cp.version = SLOT_VERSION;
2089 
2090  SpinLockAcquire(&slot->mutex);
2091 
2092  memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
2093 
2094  SpinLockRelease(&slot->mutex);
2095 
2096  COMP_CRC32C(cp.checksum,
2097  (char *) (&cp) + ReplicationSlotOnDiskNotChecksummedSize,
2099  FIN_CRC32C(cp.checksum);
2100 
2101  errno = 0;
2102  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
2103  if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
2104  {
2105  int save_errno = errno;
2106 
2110 
2111  /* if write didn't set errno, assume problem is no disk space */
2112  errno = save_errno ? save_errno : ENOSPC;
2113  ereport(elevel,
2115  errmsg("could not write to file \"%s\": %m",
2116  tmppath)));
2117  return;
2118  }
2120 
2121  /* fsync the temporary file */
2122  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
2123  if (pg_fsync(fd) != 0)
2124  {
2125  int save_errno = errno;
2126 
2130  errno = save_errno;
2131  ereport(elevel,
2133  errmsg("could not fsync file \"%s\": %m",
2134  tmppath)));
2135  return;
2136  }
2138 
2139  if (CloseTransientFile(fd) != 0)
2140  {
2141  int save_errno = errno;
2142 
2144  errno = save_errno;
2145  ereport(elevel,
2147  errmsg("could not close file \"%s\": %m",
2148  tmppath)));
2149  return;
2150  }
2151 
2152  /* rename to permanent file, fsync file and directory */
2153  if (rename(tmppath, path) != 0)
2154  {
2155  int save_errno = errno;
2156 
2158  errno = save_errno;
2159  ereport(elevel,
2161  errmsg("could not rename file \"%s\" to \"%s\": %m",
2162  tmppath, path)));
2163  return;
2164  }
2165 
2166  /*
2167  * Check CreateSlotOnDisk() for the reasoning of using a critical section.
2168  */
2170 
2171  fsync_fname(path, false);
2172  fsync_fname(dir, true);
2174 
2175  END_CRIT_SECTION();
2176 
2177  /*
2178  * Successfully wrote, unset dirty bit, unless somebody dirtied again
2179  * already and remember the confirmed_flush LSN value.
2180  */
2181  SpinLockAcquire(&slot->mutex);
2182  if (!slot->just_dirtied)
2183  slot->dirty = false;
2185  SpinLockRelease(&slot->mutex);
2186 
2188 }
2189 
2190 /*
2191  * Load a single slot from disk into memory.
2192  */
2193 static void
2195 {
2197  int i;
2198  char slotdir[MAXPGPATH + sizeof(PG_REPLSLOT_DIR)];
2199  char path[MAXPGPATH + sizeof(PG_REPLSLOT_DIR) + 10];
2200  int fd;
2201  bool restored = false;
2202  int readBytes;
2203  pg_crc32c checksum;
2204 
2205  /* no need to lock here, no concurrent access allowed yet */
2206 
2207  /* delete temp file if it exists */
2208  sprintf(slotdir, "%s/%s", PG_REPLSLOT_DIR, name);
2209  sprintf(path, "%s/state.tmp", slotdir);
2210  if (unlink(path) < 0 && errno != ENOENT)
2211  ereport(PANIC,
2213  errmsg("could not remove file \"%s\": %m", path)));
2214 
2215  sprintf(path, "%s/state", slotdir);
2216 
2217  elog(DEBUG1, "restoring replication slot from \"%s\"", path);
2218 
2219  /* on some operating systems fsyncing a file requires O_RDWR */
2220  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
2221 
2222  /*
2223  * We do not need to handle this as we are rename()ing the directory into
2224  * place only after we fsync()ed the state file.
2225  */
2226  if (fd < 0)
2227  ereport(PANIC,
2229  errmsg("could not open file \"%s\": %m", path)));
2230 
2231  /*
2232  * Sync state file before we're reading from it. We might have crashed
2233  * while it wasn't synced yet and we shouldn't continue on that basis.
2234  */
2235  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
2236  if (pg_fsync(fd) != 0)
2237  ereport(PANIC,
2239  errmsg("could not fsync file \"%s\": %m",
2240  path)));
2242 
2243  /* Also sync the parent directory */
2245  fsync_fname(slotdir, true);
2246  END_CRIT_SECTION();
2247 
2248  /* read part of statefile that's guaranteed to be version independent */
2249  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2250  readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
2252  if (readBytes != ReplicationSlotOnDiskConstantSize)
2253  {
2254  if (readBytes < 0)
2255  ereport(PANIC,
2257  errmsg("could not read file \"%s\": %m", path)));
2258  else
2259  ereport(PANIC,
2261  errmsg("could not read file \"%s\": read %d of %zu",
2262  path, readBytes,
2264  }
2265 
2266  /* verify magic */
2267  if (cp.magic != SLOT_MAGIC)
2268  ereport(PANIC,
2270  errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
2271  path, cp.magic, SLOT_MAGIC)));
2272 
2273  /* verify version */
2274  if (cp.version != SLOT_VERSION)
2275  ereport(PANIC,
2277  errmsg("replication slot file \"%s\" has unsupported version %u",
2278  path, cp.version)));
2279 
2280  /* boundary check on length */
2282  ereport(PANIC,
2284  errmsg("replication slot file \"%s\" has corrupted length %u",
2285  path, cp.length)));
2286 
2287  /* Now that we know the size, read the entire file */
2288  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2289  readBytes = read(fd,
2290  (char *) &cp + ReplicationSlotOnDiskConstantSize,
2291  cp.length);
2293  if (readBytes != cp.length)
2294  {
2295  if (readBytes < 0)
2296  ereport(PANIC,
2298  errmsg("could not read file \"%s\": %m", path)));
2299  else
2300  ereport(PANIC,
2302  errmsg("could not read file \"%s\": read %d of %zu",
2303  path, readBytes, (Size) cp.length)));
2304  }
2305 
2306  if (CloseTransientFile(fd) != 0)
2307  ereport(PANIC,
2309  errmsg("could not close file \"%s\": %m", path)));
2310 
2311  /* now verify the CRC */
2312  INIT_CRC32C(checksum);
2313  COMP_CRC32C(checksum,
2316  FIN_CRC32C(checksum);
2317 
2318  if (!EQ_CRC32C(checksum, cp.checksum))
2319  ereport(PANIC,
2320  (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
2321  path, checksum, cp.checksum)));
2322 
2323  /*
2324  * If we crashed with an ephemeral slot active, don't restore but delete
2325  * it.
2326  */
2328  {
2329  if (!rmtree(slotdir, true))
2330  {
2331  ereport(WARNING,
2332  (errmsg("could not remove directory \"%s\"",
2333  slotdir)));
2334  }
2336  return;
2337  }
2338 
2339  /*
2340  * Verify that requirements for the specific slot type are met. That's
2341  * important because if these aren't met we're not guaranteed to retain
2342  * all the necessary resources for the slot.
2343  *
2344  * NB: We have to do so *after* the above checks for ephemeral slots,
2345  * because otherwise a slot that shouldn't exist anymore could prevent
2346  * restarts.
2347  *
2348  * NB: Changing the requirements here also requires adapting
2349  * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
2350  */
2352  ereport(FATAL,
2353  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2354  errmsg("logical replication slot \"%s\" exists, but \"wal_level\" < \"logical\"",
2355  NameStr(cp.slotdata.name)),
2356  errhint("Change \"wal_level\" to be \"logical\" or higher.")));
2357  else if (wal_level < WAL_LEVEL_REPLICA)
2358  ereport(FATAL,
2359  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2360  errmsg("physical replication slot \"%s\" exists, but \"wal_level\" < \"replica\"",
2361  NameStr(cp.slotdata.name)),
2362  errhint("Change \"wal_level\" to be \"replica\" or higher.")));
2363 
2364  /* nothing can be active yet, don't lock anything */
2365  for (i = 0; i < max_replication_slots; i++)
2366  {
2367  ReplicationSlot *slot;
2368 
2370 
2371  if (slot->in_use)
2372  continue;
2373 
2374  /* restore the entire set of persistent data */
2375  memcpy(&slot->data, &cp.slotdata,
2377 
2378  /* initialize in memory state */
2379  slot->effective_xmin = cp.slotdata.xmin;
2382 
2387 
2388  slot->in_use = true;
2389  slot->active_pid = 0;
2390 
2391  /*
2392  * Set the time since the slot has become inactive after loading the
2393  * slot from the disk into memory. Whoever acquires the slot i.e.
2394  * makes the slot active will reset it.
2395  */
2397 
2398  restored = true;
2399  break;
2400  }
2401 
2402  if (!restored)
2403  ereport(FATAL,
2404  (errmsg("too many replication slots active before shutdown"),
2405  errhint("Increase \"max_replication_slots\" and try again.")));
2406 }
2407 
2408 /*
2409  * Maps an invalidation reason for a replication slot to
2410  * ReplicationSlotInvalidationCause.
2411  */
2413 GetSlotInvalidationCause(const char *invalidation_reason)
2414 {
2417  bool found PG_USED_FOR_ASSERTS_ONLY = false;
2418 
2419  Assert(invalidation_reason);
2420 
2421  for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++)
2422  {
2423  if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0)
2424  {
2425  found = true;
2426  result = cause;
2427  break;
2428  }
2429  }
2430 
2431  Assert(found);
2432  return result;
2433 }
2434 
2435 /*
2436  * A helper function to validate slots specified in GUC synchronized_standby_slots.
2437  *
2438  * The rawname will be parsed, and the result will be saved into *elemlist.
2439  */
2440 static bool
2441 validate_sync_standby_slots(char *rawname, List **elemlist)
2442 {
2443  bool ok;
2444 
2445  /* Verify syntax and parse string into a list of identifiers */
2446  ok = SplitIdentifierString(rawname, ',', elemlist);
2447 
2448  if (!ok)
2449  {
2450  GUC_check_errdetail("List syntax is invalid.");
2451  }
2452  else if (!ReplicationSlotCtl)
2453  {
2454  /*
2455  * We cannot validate the replication slot if the replication slots'
2456  * data has not been initialized. This is ok as we will anyway
2457  * validate the specified slot when waiting for them to catch up. See
2458  * StandbySlotsHaveCaughtup() for details.
2459  */
2460  }
2461  else
2462  {
2463  /* Check that the specified slots exist and are logical slots */
2464  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2465 
2466  foreach_ptr(char, name, *elemlist)
2467  {
2468  ReplicationSlot *slot;
2469 
2470  slot = SearchNamedReplicationSlot(name, false);
2471 
2472  if (!slot)
2473  {
2474  GUC_check_errdetail("replication slot \"%s\" does not exist",
2475  name);
2476  ok = false;
2477  break;
2478  }
2479 
2480  if (!SlotIsPhysical(slot))
2481  {
2482  GUC_check_errdetail("\"%s\" is not a physical replication slot",
2483  name);
2484  ok = false;
2485  break;
2486  }
2487  }
2488 
2489  LWLockRelease(ReplicationSlotControlLock);
2490  }
2491 
2492  return ok;
2493 }
2494 
2495 /*
2496  * GUC check_hook for synchronized_standby_slots
2497  */
2498 bool
2500 {
2501  char *rawname;
2502  char *ptr;
2503  List *elemlist;
2504  int size;
2505  bool ok;
2507 
2508  if ((*newval)[0] == '\0')
2509  return true;
2510 
2511  /* Need a modifiable copy of the GUC string */
2512  rawname = pstrdup(*newval);
2513 
2514  /* Now verify if the specified slots exist and have correct type */
2515  ok = validate_sync_standby_slots(rawname, &elemlist);
2516 
2517  if (!ok || elemlist == NIL)
2518  {
2519  pfree(rawname);
2520  list_free(elemlist);
2521  return ok;
2522  }
2523 
2524  /* Compute the size required for the SyncStandbySlotsConfigData struct */
2525  size = offsetof(SyncStandbySlotsConfigData, slot_names);
2526  foreach_ptr(char, slot_name, elemlist)
2527  size += strlen(slot_name) + 1;
2528 
2529  /* GUC extra value must be guc_malloc'd, not palloc'd */
2531 
2532  /* Transform the data into SyncStandbySlotsConfigData */
2533  config->nslotnames = list_length(elemlist);
2534 
2535  ptr = config->slot_names;
2536  foreach_ptr(char, slot_name, elemlist)
2537  {
2538  strcpy(ptr, slot_name);
2539  ptr += strlen(slot_name) + 1;
2540  }
2541 
2542  *extra = (void *) config;
2543 
2544  pfree(rawname);
2545  list_free(elemlist);
2546  return true;
2547 }
2548 
2549 /*
2550  * GUC assign_hook for synchronized_standby_slots
2551  */
2552 void
2554 {
2555  /*
2556  * The standby slots may have changed, so we must recompute the oldest
2557  * LSN.
2558  */
2560 
2562 }
2563 
2564 /*
2565  * Check if the passed slot_name is specified in the synchronized_standby_slots GUC.
2566  */
2567 bool
2568 SlotExistsInSyncStandbySlots(const char *slot_name)
2569 {
2570  const char *standby_slot_name;
2571 
2572  /* Return false if there is no value in synchronized_standby_slots */
2574  return false;
2575 
2576  /*
2577  * XXX: We are not expecting this list to be long so a linear search
2578  * shouldn't hurt but if that turns out not to be true then we can cache
2579  * this information for each WalSender as well.
2580  */
2581  standby_slot_name = synchronized_standby_slots_config->slot_names;
2582  for (int i = 0; i < synchronized_standby_slots_config->nslotnames; i++)
2583  {
2584  if (strcmp(standby_slot_name, slot_name) == 0)
2585  return true;
2586 
2587  standby_slot_name += strlen(standby_slot_name) + 1;
2588  }
2589 
2590  return false;
2591 }
2592 
2593 /*
2594  * Return true if the slots specified in synchronized_standby_slots have caught up to
2595  * the given WAL location, false otherwise.
2596  *
2597  * The elevel parameter specifies the error level used for logging messages
2598  * related to slots that do not exist, are invalidated, or are inactive.
2599  */
2600 bool
2601 StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
2602 {
2603  const char *name;
2604  int caught_up_slot_num = 0;
2605  XLogRecPtr min_restart_lsn = InvalidXLogRecPtr;
2606 
2607  /*
2608  * Don't need to wait for the standbys to catch up if there is no value in
2609  * synchronized_standby_slots.
2610  */
2612  return true;
2613 
2614  /*
2615  * Don't need to wait for the standbys to catch up if we are on a standby
2616  * server, since we do not support syncing slots to cascading standbys.
2617  */
2618  if (RecoveryInProgress())
2619  return true;
2620 
2621  /*
2622  * Don't need to wait for the standbys to catch up if they are already
2623  * beyond the specified WAL location.
2624  */
2626  ss_oldest_flush_lsn >= wait_for_lsn)
2627  return true;
2628 
2629  /*
2630  * To prevent concurrent slot dropping and creation while filtering the
2631  * slots, take the ReplicationSlotControlLock outside of the loop.
2632  */
2633  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2634 
2636  for (int i = 0; i < synchronized_standby_slots_config->nslotnames; i++)
2637  {
2638  XLogRecPtr restart_lsn;
2639  bool invalidated;
2640  bool inactive;
2641  ReplicationSlot *slot;
2642 
2643  slot = SearchNamedReplicationSlot(name, false);
2644 
2645  if (!slot)
2646  {
2647  /*
2648  * If a slot name provided in synchronized_standby_slots does not
2649  * exist, report a message and exit the loop. A user can specify a
2650  * slot name that does not exist just before the server startup.
2651  * The GUC check_hook(validate_sync_standby_slots) cannot validate
2652  * such a slot during startup as the ReplicationSlotCtl shared
2653  * memory is not initialized at that time. It is also possible for
2654  * a user to drop the slot in synchronized_standby_slots
2655  * afterwards.
2656  */
2657  ereport(elevel,
2658  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2659  errmsg("replication slot \"%s\" specified in parameter \"%s\" does not exist",
2660  name, "synchronized_standby_slots"),
2661  errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
2662  name),
2663  errhint("Create the replication slot \"%s\" or amend parameter \"%s\".",
2664  name, "synchronized_standby_slots"));
2665  break;
2666  }
2667 
2668  if (SlotIsLogical(slot))
2669  {
2670  /*
2671  * If a logical slot name is provided in
2672  * synchronized_standby_slots, report a message and exit the loop.
2673  * Similar to the non-existent case, a user can specify a logical
2674  * slot name in synchronized_standby_slots before the server
2675  * startup, or drop an existing physical slot and recreate a
2676  * logical slot with the same name.
2677  */
2678  ereport(elevel,
2679  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2680  errmsg("cannot specify logical replication slot \"%s\" in parameter \"%s\"",
2681  name, "synchronized_standby_slots"),
2682  errdetail("Logical replication is waiting for correction on replication slot \"%s\".",
2683  name),
2684  errhint("Remove the logical replication slot \"%s\" from parameter \"%s\".",
2685  name, "synchronized_standby_slots"));
2686  break;
2687  }
2688 
2689  SpinLockAcquire(&slot->mutex);
2690  restart_lsn = slot->data.restart_lsn;
2691  invalidated = slot->data.invalidated != RS_INVAL_NONE;
2692  inactive = slot->active_pid == 0;
2693  SpinLockRelease(&slot->mutex);
2694 
2695  if (invalidated)
2696  {
2697  /* Specified physical slot has been invalidated */
2698  ereport(elevel,
2699  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2700  errmsg("physical replication slot \"%s\" specified in parameter \"%s\" has been invalidated",
2701  name, "synchronized_standby_slots"),
2702  errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
2703  name),
2704  errhint("Drop and recreate the replication slot \"%s\", or amend parameter \"%s\".",
2705  name, "synchronized_standby_slots"));
2706  break;
2707  }
2708 
2709  if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn < wait_for_lsn)
2710  {
2711  /* Log a message if no active_pid for this physical slot */
2712  if (inactive)
2713  ereport(elevel,
2714  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2715  errmsg("replication slot \"%s\" specified in parameter \"%s\" does not have active_pid",
2716  name, "synchronized_standby_slots"),
2717  errdetail("Logical replication is waiting on the standby associated with replication slot \"%s\".",
2718  name),
2719  errhint("Start the standby associated with the replication slot \"%s\", or amend parameter \"%s\".",
2720  name, "synchronized_standby_slots"));
2721 
2722  /* Continue if the current slot hasn't caught up. */
2723  break;
2724  }
2725 
2726  Assert(restart_lsn >= wait_for_lsn);
2727 
2728  if (XLogRecPtrIsInvalid(min_restart_lsn) ||
2729  min_restart_lsn > restart_lsn)
2730  min_restart_lsn = restart_lsn;
2731 
2732  caught_up_slot_num++;
2733 
2734  name += strlen(name) + 1;
2735  }
2736 
2737  LWLockRelease(ReplicationSlotControlLock);
2738 
2739  /*
2740  * Return false if not all the standbys have caught up to the specified
2741  * WAL location.
2742  */
2743  if (caught_up_slot_num != synchronized_standby_slots_config->nslotnames)
2744  return false;
2745 
2746  /* The ss_oldest_flush_lsn must not retreat. */
2748  min_restart_lsn >= ss_oldest_flush_lsn);
2749 
2750  ss_oldest_flush_lsn = min_restart_lsn;
2751 
2752  return true;
2753 }
2754 
2755 /*
2756  * Wait for physical standbys to confirm receiving the given lsn.
2757  *
2758  * Used by logical decoding SQL functions. It waits for physical standbys
2759  * corresponding to the physical slots specified in the synchronized_standby_slots GUC.
2760  */
2761 void
2763 {
2764  /*
2765  * Don't need to wait for the standby to catch up if the current acquired
2766  * slot is not a logical failover slot, or there is no value in
2767  * synchronized_standby_slots.
2768  */
2770  return;
2771 
2773 
2774  for (;;)
2775  {
2777 
2778  if (ConfigReloadPending)
2779  {
2780  ConfigReloadPending = false;
2782  }
2783 
2784  /* Exit if done waiting for every slot. */
2785  if (StandbySlotsHaveCaughtup(wait_for_lsn, WARNING))
2786  break;
2787 
2788  /*
2789  * Wait for the slots in the synchronized_standby_slots to catch up,
2790  * but use a timeout (1s) so we can also check if the
2791  * synchronized_standby_slots has been changed.
2792  */
2794  WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION);
2795  }
2796 
2798 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
#define NameStr(name)
Definition: c.h:746
unsigned int uint32
Definition: c.h:506
#define ngettext(s, p, n)
Definition: c.h:1181
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Assert(condition)
Definition: c.h:858
#define PG_BINARY
Definition: c.h:1273
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
#define pg_unreachable()
Definition: c.h:296
#define lengthof(array)
Definition: c.h:788
#define MemSet(start, val, len)
Definition: c.h:1020
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
bool ConditionVariableCancelSleep(void)
bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout, uint32 wait_event_info)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1230
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2932
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3937
int FreeDir(DIR *dir)
Definition: fd.c:2984
int CloseTransientFile(int fd)
Definition: fd.c:2832
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2656
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2866
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:526
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_DIR
Definition: file_utils.h:23
@ PGFILETYPE_ERROR
Definition: file_utils.h:20
bool IsBinaryUpgrade
Definition: globals.c:120
int MyProcPid
Definition: globals.c:46
bool IsUnderPostmaster
Definition: globals.c:119
Oid MyDatabaseId
Definition: globals.c:93
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:637
#define newval
#define GUC_check_errdetail
Definition: guc.h:476
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
void ProcessConfigFile(GucContext context)
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
int i
Definition: isn.c:73
void list_free(List *list)
Definition: list.c:1546
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1893
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1937
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_REPLICATION_SLOT_IO
Definition: lwlock.h:189
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
@ B_STARTUP
Definition: miscadmin.h:355
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:514
BackendType MyBackendType
Definition: miscinit.c:63
bool has_rolreplication(Oid roleid)
Definition: miscinit.c:711
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define NAMEDATALEN
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static bool two_phase
static rewind_source * source
Definition: pg_rewind.c:89
void pgstat_create_replslot(ReplicationSlot *slot)
void pgstat_acquire_replslot(ReplicationSlot *slot)
void pgstat_drop_replslot(ReplicationSlot *slot)
#define sprintf
Definition: port.h:240
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3947
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:281
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
bool rmtree(const char *path, bool rmtopdir)
Definition: rmtree.c:50
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:464
int ReplicationSlotIndex(ReplicationSlot *slot)
Definition: slot.c:497
char * synchronized_standby_slots
Definition: slot.c:148
void assign_synchronized_standby_slots(const char *newval, void *extra)
Definition: slot.c:2553
#define ReplicationSlotOnDiskChecksummedSize
Definition: slot.c:125
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1860
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:309
void ReplicationSlotDropAcquired(void)
Definition: slot.c:889
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1031
void ReplicationSlotReserveWal(void)
Definition: slot.c:1422
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
Definition: slot.c:1233
void ReplicationSlotAcquire(const char *name, bool nowait)
Definition: slot.c:540
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1804
static XLogRecPtr ss_oldest_flush_lsn
Definition: slot.c:157
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason)
Definition: slot.c:2413
void ReplicationSlotsDropDBSlots(Oid dboid)
Definition: slot.c:1291
#define ReplicationSlotOnDiskNotChecksummedSize
Definition: slot.c:122
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1175
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1070
static void RestoreSlotFromDisk(const char *name)
Definition: slot.c:2194
#define RS_INVAL_MAX_CAUSES
Definition: slot.c:113
void ReplicationSlotPersist(void)
Definition: slot.c:1048
ReplicationSlot * MyReplicationSlot
Definition: slot.c:138
static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
Definition: slot.c:2039
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:784
bool SlotExistsInSyncStandbySlots(const char *slot_name)
Definition: slot.c:2568
static bool validate_sync_standby_slots(char *rawname, List **elemlist)
Definition: slot.c:2441
void ReplicationSlotSave(void)
Definition: slot.c:1013
static void CreateSlotOnDisk(ReplicationSlot *slot)
Definition: slot.c:1978
#define ReplicationSlotOnDiskV2Size
Definition: slot.c:128
void CheckSlotPermissions(void)
Definition: slot.c:1405
bool ReplicationSlotName(int index, Name name)
Definition: slot.c:513
bool check_synchronized_standby_slots(char **newval, void **extra, GucSource source)
Definition: slot.c:2499
void ReplicationSlotsShmemInit(void)
Definition: slot.c:189
const char *const SlotInvalidationCauses[]
Definition: slot.c:105
void ReplicationSlotAlter(const char *name, const bool *failover, const bool *two_phase)
Definition: slot.c:807
void ReplicationSlotRelease(void)
Definition: slot.c:652
int max_replication_slots
Definition: slot.c:141
StaticAssertDecl(lengthof(SlotInvalidationCauses)==(RS_INVAL_MAX_CAUSES+1), "array length mismatch")
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:135
#define SLOT_VERSION
Definition: slot.c:132
struct ReplicationSlotOnDisk ReplicationSlotOnDisk
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
Definition: slot.c:2762
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2601
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1126
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:745
void ReplicationSlotInitialize(void)
Definition: slot.c:224
static void ReplicationSlotDropPtr(ReplicationSlot *slot)
Definition: slot.c:906
static bool InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, bool *invalidated)
Definition: slot.c:1564
void StartupReplicationSlots(void)
Definition: slot.c:1917
void CheckSlotRequirements(void)
Definition: slot.c:1383
#define SLOT_MAGIC
Definition: slot.c:131
static void ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, bool terminating, int pid, NameData slotname, XLogRecPtr restart_lsn, XLogRecPtr oldestLSN, TransactionId snapshotConflictHorizon)
Definition: slot.c:1498
static SyncStandbySlotsConfigData * synchronized_standby_slots_config
Definition: slot.c:151
#define ReplicationSlotOnDiskConstantSize
Definition: slot.c:119
Size ReplicationSlotsShmemSize(void)
Definition: slot.c:171
bool ReplicationSlotValidateName(const char *name, int elevel)
Definition: slot.c:252
static void ReplicationSlotShmemExit(int code, Datum arg)
Definition: slot.c:233
ReplicationSlotPersistency
Definition: slot.h:37
@ RS_PERSISTENT
Definition: slot.h:38
@ RS_EPHEMERAL
Definition: slot.h:39
@ RS_TEMPORARY
Definition: slot.h:40
#define SlotIsPhysical(slot)
Definition: slot.h:212
#define PG_REPLSLOT_DIR
Definition: slot.h:21
ReplicationSlotInvalidationCause
Definition: slot.h:51
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:54
@ RS_INVAL_HORIZON
Definition: slot.h:56
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:58
@ RS_INVAL_NONE
Definition: slot.h:52
#define SlotIsLogical(slot)
Definition: slot.h:213
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1651
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
PGPROC * MyProc
Definition: proc.c:67
PROC_HDR * ProcGlobal
Definition: proc.c:79
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:32
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:32
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
Definition: pg_list.h:54
uint8 statusFlags
Definition: proc.h:237
int pgxactoff
Definition: proc.h:179
uint8 * statusFlags
Definition: proc.h:394
ReplicationSlot replication_slots[1]
Definition: slot.h:224
uint32 version
Definition: slot.c:73
ReplicationSlotPersistentData slotdata
Definition: slot.c:81
pg_crc32c checksum
Definition: slot.c:70
TransactionId xmin
Definition: slot.h:85
TransactionId catalog_xmin
Definition: slot.h:93
XLogRecPtr restart_lsn
Definition: slot.h:96
XLogRecPtr confirmed_flush
Definition: slot.h:107
ReplicationSlotPersistency persistency
Definition: slot.h:77
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:99
XLogRecPtr candidate_xmin_lsn
Definition: slot.h:197
TransactionId effective_catalog_xmin
Definition: slot.h:178
slock_t mutex
Definition: slot.h:154
XLogRecPtr candidate_restart_valid
Definition: slot.h:198
XLogRecPtr last_saved_confirmed_flush
Definition: slot.h:206
pid_t active_pid
Definition: slot.h:160
bool in_use
Definition: slot.h:157
TransactionId effective_xmin
Definition: slot.h:177
bool just_dirtied
Definition: slot.h:163
XLogRecPtr candidate_restart_lsn
Definition: slot.h:199
LWLock io_in_progress_lock
Definition: slot.h:184
ConditionVariable active_cv
Definition: slot.h:187
TransactionId candidate_catalog_xmin
Definition: slot.h:196
bool dirty
Definition: slot.h:164
ReplicationSlotPersistentData data
Definition: slot.h:181
TimestampTz inactive_since
Definition: slot.h:209
char slot_names[FLEXIBLE_ARRAY_MEMBER]
Definition: slot.c:99
ConditionVariable wal_confirm_rcv_cv
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: type.h:95
Definition: c.h:741
unsigned short st_mode
Definition: win32_port.h:268
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3437
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
const char * name
bool am_walsender
Definition: walsender.c:115
bool log_replication_commands
Definition: walsender.c:125
WalSndCtlData * WalSndCtl
Definition: walsender.c:109
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
#define kill(pid, sig)
Definition: win32_port.h:503
bool RecoveryInProgress(void)
Definition: xlog.c:6333
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3751
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6436
int wal_level
Definition: xlog.c:130
int wal_segment_size
Definition: xlog.c:142
void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
Definition: xlog.c:2681
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9427
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2795
@ WAL_LEVEL_REPLICA
Definition: xlog.h:75
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint64 XLogSegNo
Definition: xlogdefs.h:48
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)