PostgreSQL Source Code  git master
slot.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slot.c
4  * Replication slot management.
5  *
6  *
7  * Copyright (c) 2012-2024, PostgreSQL Global Development Group
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/replication/slot.c
12  *
13  * NOTES
14  *
15  * Replication slots are used to keep state about replication streams
16  * originating from this cluster. Their primary purpose is to prevent the
17  * premature removal of WAL or of old tuple versions in a manner that would
18  * interfere with replication; they are also useful for monitoring purposes.
19  * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
20  * on standbys (to support cascading setups). The requirement that slots be
21  * usable on standbys precludes storing them in the system catalogs.
22  *
23  * Each replication slot gets its own directory inside the $PGDATA/pg_replslot
24  * directory. Inside that directory the state file will contain the slot's
25  * own data. Additional data can be stored alongside that file if required.
26  * While the server is running, the state data is also cached in memory for
27  * efficiency.
28  *
29  * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
30  * or free a slot. ReplicationSlotControlLock must be taken in shared mode
31  * to iterate over the slots, and in exclusive mode to change the in_use flag
32  * of a slot. The remaining data in each slot is protected by its mutex.
33  *
34  *-------------------------------------------------------------------------
35  */
36 
37 #include "postgres.h"
38 
39 #include <unistd.h>
40 #include <sys/stat.h>
41 
42 #include "access/transam.h"
43 #include "access/xlog_internal.h"
44 #include "access/xlogrecovery.h"
45 #include "common/file_utils.h"
46 #include "common/string.h"
47 #include "miscadmin.h"
48 #include "pgstat.h"
49 #include "postmaster/interrupt.h"
50 #include "replication/slotsync.h"
51 #include "replication/slot.h"
53 #include "storage/fd.h"
54 #include "storage/ipc.h"
55 #include "storage/proc.h"
56 #include "storage/procarray.h"
57 #include "utils/builtins.h"
58 #include "utils/guc_hooks.h"
59 #include "utils/varlena.h"
60 
61 /*
62  * Replication slot on-disk data structure.
63  */
64 typedef struct ReplicationSlotOnDisk
65 {
66  /* first part of this struct needs to be version independent */
67 
68  /* data not covered by checksum */
71 
72  /* data covered by checksum */
75 
76  /*
77  * The actual data in the slot that follows can differ based on the above
78  * 'version'.
79  */
80 
83 
84 /*
85  * Struct for the configuration of standby_slot_names.
86  *
87  * Note: this must be a flat representation that can be held in a single chunk
88  * of guc_malloc'd memory, so that it can be stored as the "extra" data for the
89  * standby_slot_names GUC.
90  */
91 typedef struct
92 {
93  /* Number of slot names in the slot_names[] */
95 
96  /*
97  * slot_names contains 'nslotnames' consecutive null-terminated C strings.
98  */
99  char slot_names[FLEXIBLE_ARRAY_MEMBER];
101 
102 /*
103  * Lookup table for slot invalidation causes.
104  */
105 const char *const SlotInvalidationCauses[] = {
106  [RS_INVAL_NONE] = "none",
107  [RS_INVAL_WAL_REMOVED] = "wal_removed",
108  [RS_INVAL_HORIZON] = "rows_removed",
109  [RS_INVAL_WAL_LEVEL] = "wal_level_insufficient",
110 };
111 
112 /* Maximum number of invalidation causes */
113 #define RS_INVAL_MAX_CAUSES RS_INVAL_WAL_LEVEL
114 
116  "array length mismatch");
117 
118 /* size of version independent data */
119 #define ReplicationSlotOnDiskConstantSize \
120  offsetof(ReplicationSlotOnDisk, slotdata)
121 /* size of the part of the slot not covered by the checksum */
122 #define ReplicationSlotOnDiskNotChecksummedSize \
123  offsetof(ReplicationSlotOnDisk, version)
124 /* size of the part covered by the checksum */
125 #define ReplicationSlotOnDiskChecksummedSize \
126  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize
127 /* size of the slot data that is version dependent */
128 #define ReplicationSlotOnDiskV2Size \
129  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
130 
131 #define SLOT_MAGIC 0x1051CA1 /* format identifier */
132 #define SLOT_VERSION 5 /* version for new files */
133 
134 /* Control array for replication slot management */
136 
137 /* My backend's replication slot in the shared memory array */
139 
140 /* GUC variables */
141 int max_replication_slots = 10; /* the maximum number of replication
142  * slots */
143 
144 /*
145  * This GUC lists streaming replication standby server slot names that
146  * logical WAL sender processes will wait for.
147  */
149 
150 /* This is the parsed and cached configuration for standby_slot_names */
152 
153 /*
154  * Oldest LSN that has been confirmed to be flushed to the standbys
155  * corresponding to the physical slots specified in the standby_slot_names GUC.
156  */
158 
159 static void ReplicationSlotShmemExit(int code, Datum arg);
160 static void ReplicationSlotDropPtr(ReplicationSlot *slot);
161 
162 /* internal persistency functions */
163 static void RestoreSlotFromDisk(const char *name);
164 static void CreateSlotOnDisk(ReplicationSlot *slot);
165 static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
166 
167 /*
168  * Report shared-memory space needed by ReplicationSlotsShmemInit.
169  */
170 Size
172 {
173  Size size = 0;
174 
175  if (max_replication_slots == 0)
176  return size;
177 
178  size = offsetof(ReplicationSlotCtlData, replication_slots);
179  size = add_size(size,
181 
182  return size;
183 }
184 
185 /*
186  * Allocate and initialize shared memory for replication slots.
187  */
188 void
190 {
191  bool found;
192 
193  if (max_replication_slots == 0)
194  return;
195 
197  ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
198  &found);
199 
200  if (!found)
201  {
202  int i;
203 
204  /* First time through, so initialize */
206 
207  for (i = 0; i < max_replication_slots; i++)
208  {
210 
211  /* everything else is zeroed by the memset above */
212  SpinLockInit(&slot->mutex);
216  }
217  }
218 }
219 
220 /*
221  * Register the callback for replication slot cleanup and releasing.
222  */
223 void
225 {
227 }
228 
229 /*
230  * Release and cleanup replication slots.
231  */
232 static void
234 {
235  /* Make sure active replication slots are released */
236  if (MyReplicationSlot != NULL)
238 
239  /* Also cleanup all the temporary slots. */
241 }
242 
243 /*
244  * Check whether the passed slot name is valid and report errors at elevel.
245  *
246  * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
247  * the name to be used as a directory name on every supported OS.
248  *
249  * Returns whether the directory name is valid or not if elevel < ERROR.
250  */
251 bool
252 ReplicationSlotValidateName(const char *name, int elevel)
253 {
254  const char *cp;
255 
256  if (strlen(name) == 0)
257  {
258  ereport(elevel,
259  (errcode(ERRCODE_INVALID_NAME),
260  errmsg("replication slot name \"%s\" is too short",
261  name)));
262  return false;
263  }
264 
265  if (strlen(name) >= NAMEDATALEN)
266  {
267  ereport(elevel,
268  (errcode(ERRCODE_NAME_TOO_LONG),
269  errmsg("replication slot name \"%s\" is too long",
270  name)));
271  return false;
272  }
273 
274  for (cp = name; *cp; cp++)
275  {
276  if (!((*cp >= 'a' && *cp <= 'z')
277  || (*cp >= '0' && *cp <= '9')
278  || (*cp == '_')))
279  {
280  ereport(elevel,
281  (errcode(ERRCODE_INVALID_NAME),
282  errmsg("replication slot name \"%s\" contains invalid character",
283  name),
284  errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
285  return false;
286  }
287  }
288  return true;
289 }
290 
291 /*
292  * Create a new replication slot and mark it as used by this backend.
293  *
294  * name: Name of the slot
295  * db_specific: logical decoding is db specific; if the slot is going to
296  * be used for that pass true, otherwise false.
297  * two_phase: Allows decoding of prepared transactions. We allow this option
298  * to be enabled only at the slot creation time. If we allow this option
299  * to be changed during decoding then it is quite possible that we skip
300  * prepare first time because this option was not enabled. Now next time
301  * during getting changes, if the two_phase option is enabled it can skip
302  * prepare because by that time start decoding point has been moved. So the
303  * user will only get commit prepared.
304  * failover: If enabled, allows the slot to be synced to standbys so
305  * that logical replication can be resumed after failover.
306  * synced: True if the slot is synchronized from the primary server.
307  */
308 void
309 ReplicationSlotCreate(const char *name, bool db_specific,
310  ReplicationSlotPersistency persistency,
311  bool two_phase, bool failover, bool synced)
312 {
313  ReplicationSlot *slot = NULL;
314  int i;
315 
316  Assert(MyReplicationSlot == NULL);
317 
319 
320  if (failover)
321  {
322  /*
323  * Do not allow users to create the failover enabled slots on the
324  * standby as we do not support sync to the cascading standby.
325  *
326  * However, failover enabled slots can be created during slot
327  * synchronization because we need to retain the same values as the
328  * remote slot.
329  */
331  ereport(ERROR,
332  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
333  errmsg("cannot enable failover for a replication slot created on the standby"));
334 
335  /*
336  * Do not allow users to create failover enabled temporary slots,
337  * because temporary slots will not be synced to the standby.
338  *
339  * However, failover enabled temporary slots can be created during
340  * slot synchronization. See the comments atop slotsync.c for details.
341  */
342  if (persistency == RS_TEMPORARY && !IsSyncingReplicationSlots())
343  ereport(ERROR,
344  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
345  errmsg("cannot enable failover for a temporary replication slot"));
346  }
347 
348  /*
349  * If some other backend ran this code concurrently with us, we'd likely
350  * both allocate the same slot, and that would be bad. We'd also be at
351  * risk of missing a name collision. Also, we don't want to try to create
352  * a new slot while somebody's busy cleaning up an old one, because we
353  * might both be monkeying with the same directory.
354  */
355  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
356 
357  /*
358  * Check for name collision, and identify an allocatable slot. We need to
359  * hold ReplicationSlotControlLock in shared mode for this, so that nobody
360  * else can change the in_use flags while we're looking at them.
361  */
362  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
363  for (i = 0; i < max_replication_slots; i++)
364  {
366 
367  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
368  ereport(ERROR,
370  errmsg("replication slot \"%s\" already exists", name)));
371  if (!s->in_use && slot == NULL)
372  slot = s;
373  }
374  LWLockRelease(ReplicationSlotControlLock);
375 
376  /* If all slots are in use, we're out of luck. */
377  if (slot == NULL)
378  ereport(ERROR,
379  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
380  errmsg("all replication slots are in use"),
381  errhint("Free one or increase max_replication_slots.")));
382 
383  /*
384  * Since this slot is not in use, nobody should be looking at any part of
385  * it other than the in_use field unless they're trying to allocate it.
386  * And since we hold ReplicationSlotAllocationLock, nobody except us can
387  * be doing that. So it's safe to initialize the slot.
388  */
389  Assert(!slot->in_use);
390  Assert(slot->active_pid == 0);
391 
392  /* first initialize persistent data */
393  memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
394  namestrcpy(&slot->data.name, name);
395  slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
396  slot->data.persistency = persistency;
397  slot->data.two_phase = two_phase;
399  slot->data.failover = failover;
400  slot->data.synced = synced;
401 
402  /* and then data only present in shared memory */
403  slot->just_dirtied = false;
404  slot->dirty = false;
412  slot->inactive_since = 0;
413 
414  /*
415  * Create the slot on disk. We haven't actually marked the slot allocated
416  * yet, so no special cleanup is required if this errors out.
417  */
418  CreateSlotOnDisk(slot);
419 
420  /*
421  * We need to briefly prevent any other backend from iterating over the
422  * slots while we flip the in_use flag. We also need to set the active
423  * flag while holding the ControlLock as otherwise a concurrent
424  * ReplicationSlotAcquire() could acquire the slot as well.
425  */
426  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
427 
428  slot->in_use = true;
429 
430  /* We can now mark the slot active, and that makes it our slot. */
431  SpinLockAcquire(&slot->mutex);
432  Assert(slot->active_pid == 0);
433  slot->active_pid = MyProcPid;
434  SpinLockRelease(&slot->mutex);
435  MyReplicationSlot = slot;
436 
437  LWLockRelease(ReplicationSlotControlLock);
438 
439  /*
440  * Create statistics entry for the new logical slot. We don't collect any
441  * stats for physical slots, so no need to create an entry for the same.
442  * See ReplicationSlotDropPtr for why we need to do this before releasing
443  * ReplicationSlotAllocationLock.
444  */
445  if (SlotIsLogical(slot))
447 
448  /*
449  * Now that the slot has been marked as in_use and active, it's safe to
450  * let somebody else try to allocate a slot.
451  */
452  LWLockRelease(ReplicationSlotAllocationLock);
453 
454  /* Let everybody know we've modified this slot */
456 }
457 
458 /*
459  * Search for the named replication slot.
460  *
461  * Return the replication slot if found, otherwise NULL.
462  */
464 SearchNamedReplicationSlot(const char *name, bool need_lock)
465 {
466  int i;
467  ReplicationSlot *slot = NULL;
468 
469  if (need_lock)
470  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
471 
472  for (i = 0; i < max_replication_slots; i++)
473  {
475 
476  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
477  {
478  slot = s;
479  break;
480  }
481  }
482 
483  if (need_lock)
484  LWLockRelease(ReplicationSlotControlLock);
485 
486  return slot;
487 }
488 
489 /*
490  * Return the index of the replication slot in
491  * ReplicationSlotCtl->replication_slots.
492  *
493  * This is mainly useful to have an efficient key for storing replication slot
494  * stats.
495  */
496 int
498 {
500  slot < ReplicationSlotCtl->replication_slots + max_replication_slots);
501 
502  return slot - ReplicationSlotCtl->replication_slots;
503 }
504 
505 /*
506  * If the slot at 'index' is unused, return false. Otherwise 'name' is set to
507  * the slot's name and true is returned.
508  *
509  * This likely is only useful for pgstat_replslot.c during shutdown, in other
510  * cases there are obvious TOCTOU issues.
511  */
512 bool
514 {
515  ReplicationSlot *slot;
516  bool found;
517 
519 
520  /*
521  * Ensure that the slot cannot be dropped while we copy the name. Don't
522  * need the spinlock as the name of an existing slot cannot change.
523  */
524  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
525  found = slot->in_use;
526  if (slot->in_use)
527  namestrcpy(name, NameStr(slot->data.name));
528  LWLockRelease(ReplicationSlotControlLock);
529 
530  return found;
531 }
532 
533 /*
534  * Find a previously created slot and mark it as used by this process.
535  *
536  * An error is raised if nowait is true and the slot is currently in use. If
537  * nowait is false, we sleep until the slot is released by the owning process.
538  */
539 void
540 ReplicationSlotAcquire(const char *name, bool nowait)
541 {
542  ReplicationSlot *s;
543  int active_pid;
544 
545  Assert(name != NULL);
546 
547 retry:
548  Assert(MyReplicationSlot == NULL);
549 
550  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
551 
552  /* Check if the slot exits with the given name. */
553  s = SearchNamedReplicationSlot(name, false);
554  if (s == NULL || !s->in_use)
555  {
556  LWLockRelease(ReplicationSlotControlLock);
557 
558  ereport(ERROR,
559  (errcode(ERRCODE_UNDEFINED_OBJECT),
560  errmsg("replication slot \"%s\" does not exist",
561  name)));
562  }
563 
564  /*
565  * This is the slot we want; check if it's active under some other
566  * process. In single user mode, we don't need this check.
567  */
568  if (IsUnderPostmaster)
569  {
570  /*
571  * Get ready to sleep on the slot in case it is active. (We may end
572  * up not sleeping, but we don't want to do this while holding the
573  * spinlock.)
574  */
575  if (!nowait)
577 
578  SpinLockAcquire(&s->mutex);
579  if (s->active_pid == 0)
580  s->active_pid = MyProcPid;
581  active_pid = s->active_pid;
582  SpinLockRelease(&s->mutex);
583  }
584  else
585  active_pid = MyProcPid;
586  LWLockRelease(ReplicationSlotControlLock);
587 
588  /*
589  * If we found the slot but it's already active in another process, we
590  * wait until the owning process signals us that it's been released, or
591  * error out.
592  */
593  if (active_pid != MyProcPid)
594  {
595  if (!nowait)
596  {
597  /* Wait here until we get signaled, and then restart */
599  WAIT_EVENT_REPLICATION_SLOT_DROP);
601  goto retry;
602  }
603 
604  ereport(ERROR,
605  (errcode(ERRCODE_OBJECT_IN_USE),
606  errmsg("replication slot \"%s\" is active for PID %d",
607  NameStr(s->data.name), active_pid)));
608  }
609  else if (!nowait)
610  ConditionVariableCancelSleep(); /* no sleep needed after all */
611 
612  /* Let everybody know we've modified this slot */
614 
615  /* We made this slot active, so it's ours now. */
616  MyReplicationSlot = s;
617 
618  /*
619  * The call to pgstat_acquire_replslot() protects against stats for a
620  * different slot, from before a restart or such, being present during
621  * pgstat_report_replslot().
622  */
623  if (SlotIsLogical(s))
625 
626  /*
627  * Reset the time since the slot has become inactive as the slot is active
628  * now.
629  */
630  SpinLockAcquire(&s->mutex);
631  s->inactive_since = 0;
632  SpinLockRelease(&s->mutex);
633 
634  if (am_walsender)
635  {
637  SlotIsLogical(s)
638  ? errmsg("acquired logical replication slot \"%s\"",
639  NameStr(s->data.name))
640  : errmsg("acquired physical replication slot \"%s\"",
641  NameStr(s->data.name)));
642  }
643 }
644 
645 /*
646  * Release the replication slot that this backend considers to own.
647  *
648  * This or another backend can re-acquire the slot later.
649  * Resources this slot requires will be preserved.
650  */
651 void
653 {
655  char *slotname = NULL; /* keep compiler quiet */
656  bool is_logical = false; /* keep compiler quiet */
657  TimestampTz now = 0;
658 
659  Assert(slot != NULL && slot->active_pid != 0);
660 
661  if (am_walsender)
662  {
663  slotname = pstrdup(NameStr(slot->data.name));
664  is_logical = SlotIsLogical(slot);
665  }
666 
667  if (slot->data.persistency == RS_EPHEMERAL)
668  {
669  /*
670  * Delete the slot. There is no !PANIC case where this is allowed to
671  * fail, all that may happen is an incomplete cleanup of the on-disk
672  * data.
673  */
675  }
676 
677  /*
678  * If slot needed to temporarily restrain both data and catalog xmin to
679  * create the catalog snapshot, remove that temporary constraint.
680  * Snapshots can only be exported while the initial snapshot is still
681  * acquired.
682  */
683  if (!TransactionIdIsValid(slot->data.xmin) &&
685  {
686  SpinLockAcquire(&slot->mutex);
688  SpinLockRelease(&slot->mutex);
690  }
691 
692  /*
693  * Set the time since the slot has become inactive. We get the current
694  * time beforehand to avoid system call while holding the spinlock.
695  */
697 
698  if (slot->data.persistency == RS_PERSISTENT)
699  {
700  /*
701  * Mark persistent slot inactive. We're not freeing it, just
702  * disconnecting, but wake up others that may be waiting for it.
703  */
704  SpinLockAcquire(&slot->mutex);
705  slot->active_pid = 0;
706  slot->inactive_since = now;
707  SpinLockRelease(&slot->mutex);
709  }
710  else
711  {
712  SpinLockAcquire(&slot->mutex);
713  slot->inactive_since = now;
714  SpinLockRelease(&slot->mutex);
715  }
716 
717  MyReplicationSlot = NULL;
718 
719  /* might not have been set when we've been a plain slot */
720  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
723  LWLockRelease(ProcArrayLock);
724 
725  if (am_walsender)
726  {
728  is_logical
729  ? errmsg("released logical replication slot \"%s\"",
730  slotname)
731  : errmsg("released physical replication slot \"%s\"",
732  slotname));
733 
734  pfree(slotname);
735  }
736 }
737 
738 /*
739  * Cleanup all temporary slots created in current session.
740  */
741 void
743 {
744  int i;
745 
746  Assert(MyReplicationSlot == NULL);
747 
748 restart:
749  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
750  for (i = 0; i < max_replication_slots; i++)
751  {
753 
754  if (!s->in_use)
755  continue;
756 
757  SpinLockAcquire(&s->mutex);
758  if (s->active_pid == MyProcPid)
759  {
761  SpinLockRelease(&s->mutex);
762  LWLockRelease(ReplicationSlotControlLock); /* avoid deadlock */
763 
765 
767  goto restart;
768  }
769  else
770  SpinLockRelease(&s->mutex);
771  }
772 
773  LWLockRelease(ReplicationSlotControlLock);
774 }
775 
776 /*
777  * Permanently drop replication slot identified by the passed in name.
778  */
779 void
780 ReplicationSlotDrop(const char *name, bool nowait)
781 {
782  Assert(MyReplicationSlot == NULL);
783 
784  ReplicationSlotAcquire(name, nowait);
785 
786  /*
787  * Do not allow users to drop the slots which are currently being synced
788  * from the primary to the standby.
789  */
791  ereport(ERROR,
792  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
793  errmsg("cannot drop replication slot \"%s\"", name),
794  errdetail("This slot is being synced from the primary server."));
795 
797 }
798 
799 /*
800  * Change the definition of the slot identified by the specified name.
801  */
802 void
803 ReplicationSlotAlter(const char *name, bool failover)
804 {
805  Assert(MyReplicationSlot == NULL);
806 
808 
810  ereport(ERROR,
811  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
812  errmsg("cannot use %s with a physical replication slot",
813  "ALTER_REPLICATION_SLOT"));
814 
815  if (RecoveryInProgress())
816  {
817  /*
818  * Do not allow users to alter the slots which are currently being
819  * synced from the primary to the standby.
820  */
822  ereport(ERROR,
823  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
824  errmsg("cannot alter replication slot \"%s\"", name),
825  errdetail("This slot is being synced from the primary server."));
826 
827  /*
828  * Do not allow users to enable failover on the standby as we do not
829  * support sync to the cascading standby.
830  */
831  if (failover)
832  ereport(ERROR,
833  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
834  errmsg("cannot enable failover for a replication slot"
835  " on the standby"));
836  }
837 
838  /*
839  * Do not allow users to enable failover for temporary slots as we do not
840  * support syncing temporary slots to the standby.
841  */
842  if (failover && MyReplicationSlot->data.persistency == RS_TEMPORARY)
843  ereport(ERROR,
844  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
845  errmsg("cannot enable failover for a temporary replication slot"));
846 
847  if (MyReplicationSlot->data.failover != failover)
848  {
850  MyReplicationSlot->data.failover = failover;
852 
855  }
856 
858 }
859 
860 /*
861  * Permanently drop the currently acquired replication slot.
862  */
863 void
865 {
867 
868  Assert(MyReplicationSlot != NULL);
869 
870  /* slot isn't acquired anymore */
871  MyReplicationSlot = NULL;
872 
874 }
875 
876 /*
877  * Permanently drop the replication slot which will be released by the point
878  * this function returns.
879  */
880 static void
882 {
883  char path[MAXPGPATH];
884  char tmppath[MAXPGPATH];
885 
886  /*
887  * If some other backend ran this code concurrently with us, we might try
888  * to delete a slot with a certain name while someone else was trying to
889  * create a slot with the same name.
890  */
891  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
892 
893  /* Generate pathnames. */
894  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
895  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
896 
897  /*
898  * Rename the slot directory on disk, so that we'll no longer recognize
899  * this as a valid slot. Note that if this fails, we've got to mark the
900  * slot inactive before bailing out. If we're dropping an ephemeral or a
901  * temporary slot, we better never fail hard as the caller won't expect
902  * the slot to survive and this might get called during error handling.
903  */
904  if (rename(path, tmppath) == 0)
905  {
906  /*
907  * We need to fsync() the directory we just renamed and its parent to
908  * make sure that our changes are on disk in a crash-safe fashion. If
909  * fsync() fails, we can't be sure whether the changes are on disk or
910  * not. For now, we handle that by panicking;
911  * StartupReplicationSlots() will try to straighten it out after
912  * restart.
913  */
915  fsync_fname(tmppath, true);
916  fsync_fname("pg_replslot", true);
918  }
919  else
920  {
921  bool fail_softly = slot->data.persistency != RS_PERSISTENT;
922 
923  SpinLockAcquire(&slot->mutex);
924  slot->active_pid = 0;
925  SpinLockRelease(&slot->mutex);
926 
927  /* wake up anyone waiting on this slot */
929 
930  ereport(fail_softly ? WARNING : ERROR,
932  errmsg("could not rename file \"%s\" to \"%s\": %m",
933  path, tmppath)));
934  }
935 
936  /*
937  * The slot is definitely gone. Lock out concurrent scans of the array
938  * long enough to kill it. It's OK to clear the active PID here without
939  * grabbing the mutex because nobody else can be scanning the array here,
940  * and nobody can be attached to this slot and thus access it without
941  * scanning the array.
942  *
943  * Also wake up processes waiting for it.
944  */
945  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
946  slot->active_pid = 0;
947  slot->in_use = false;
948  LWLockRelease(ReplicationSlotControlLock);
950 
951  /*
952  * Slot is dead and doesn't prevent resource removal anymore, recompute
953  * limits.
954  */
957 
958  /*
959  * If removing the directory fails, the worst thing that will happen is
960  * that the user won't be able to create a new slot with the same name
961  * until the next server restart. We warn about it, but that's all.
962  */
963  if (!rmtree(tmppath, true))
965  (errmsg("could not remove directory \"%s\"", tmppath)));
966 
967  /*
968  * Drop the statistics entry for the replication slot. Do this while
969  * holding ReplicationSlotAllocationLock so that we don't drop a
970  * statistics entry for another slot with the same name just created in
971  * another session.
972  */
973  if (SlotIsLogical(slot))
974  pgstat_drop_replslot(slot);
975 
976  /*
977  * We release this at the very end, so that nobody starts trying to create
978  * a slot while we're still cleaning up the detritus of the old one.
979  */
980  LWLockRelease(ReplicationSlotAllocationLock);
981 }
982 
983 /*
984  * Serialize the currently acquired slot's state from memory to disk, thereby
985  * guaranteeing the current state will survive a crash.
986  */
987 void
989 {
990  char path[MAXPGPATH];
991 
992  Assert(MyReplicationSlot != NULL);
993 
994  sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
996 }
997 
998 /*
999  * Signal that it would be useful if the currently acquired slot would be
1000  * flushed out to disk.
1001  *
1002  * Note that the actual flush to disk can be delayed for a long time, if
1003  * required for correctness explicitly do a ReplicationSlotSave().
1004  */
1005 void
1007 {
1009 
1010  Assert(MyReplicationSlot != NULL);
1011 
1012  SpinLockAcquire(&slot->mutex);
1014  MyReplicationSlot->dirty = true;
1015  SpinLockRelease(&slot->mutex);
1016 }
1017 
1018 /*
1019  * Convert a slot that's marked as RS_EPHEMERAL or RS_TEMPORARY to a
1020  * RS_PERSISTENT slot, guaranteeing it will be there after an eventual crash.
1021  */
1022 void
1024 {
1026 
1027  Assert(slot != NULL);
1029 
1030  SpinLockAcquire(&slot->mutex);
1031  slot->data.persistency = RS_PERSISTENT;
1032  SpinLockRelease(&slot->mutex);
1033 
1036 }
1037 
1038 /*
1039  * Compute the oldest xmin across all slots and store it in the ProcArray.
1040  *
1041  * If already_locked is true, ProcArrayLock has already been acquired
1042  * exclusively.
1043  */
1044 void
1046 {
1047  int i;
1049  TransactionId agg_catalog_xmin = InvalidTransactionId;
1050 
1051  Assert(ReplicationSlotCtl != NULL);
1052 
1053  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1054 
1055  for (i = 0; i < max_replication_slots; i++)
1056  {
1058  TransactionId effective_xmin;
1059  TransactionId effective_catalog_xmin;
1060  bool invalidated;
1061 
1062  if (!s->in_use)
1063  continue;
1064 
1065  SpinLockAcquire(&s->mutex);
1066  effective_xmin = s->effective_xmin;
1067  effective_catalog_xmin = s->effective_catalog_xmin;
1068  invalidated = s->data.invalidated != RS_INVAL_NONE;
1069  SpinLockRelease(&s->mutex);
1070 
1071  /* invalidated slots need not apply */
1072  if (invalidated)
1073  continue;
1074 
1075  /* check the data xmin */
1076  if (TransactionIdIsValid(effective_xmin) &&
1077  (!TransactionIdIsValid(agg_xmin) ||
1078  TransactionIdPrecedes(effective_xmin, agg_xmin)))
1079  agg_xmin = effective_xmin;
1080 
1081  /* check the catalog xmin */
1082  if (TransactionIdIsValid(effective_catalog_xmin) &&
1083  (!TransactionIdIsValid(agg_catalog_xmin) ||
1084  TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
1085  agg_catalog_xmin = effective_catalog_xmin;
1086  }
1087 
1088  LWLockRelease(ReplicationSlotControlLock);
1089 
1090  ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
1091 }
1092 
1093 /*
1094  * Compute the oldest restart LSN across all slots and inform xlog module.
1095  *
1096  * Note: while max_slot_wal_keep_size is theoretically relevant for this
1097  * purpose, we don't try to account for that, because this module doesn't
1098  * know what to compare against.
1099  */
1100 void
1102 {
1103  int i;
1104  XLogRecPtr min_required = InvalidXLogRecPtr;
1105 
1106  Assert(ReplicationSlotCtl != NULL);
1107 
1108  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1109  for (i = 0; i < max_replication_slots; i++)
1110  {
1112  XLogRecPtr restart_lsn;
1113  bool invalidated;
1114 
1115  if (!s->in_use)
1116  continue;
1117 
1118  SpinLockAcquire(&s->mutex);
1119  restart_lsn = s->data.restart_lsn;
1120  invalidated = s->data.invalidated != RS_INVAL_NONE;
1121  SpinLockRelease(&s->mutex);
1122 
1123  /* invalidated slots need not apply */
1124  if (invalidated)
1125  continue;
1126 
1127  if (restart_lsn != InvalidXLogRecPtr &&
1128  (min_required == InvalidXLogRecPtr ||
1129  restart_lsn < min_required))
1130  min_required = restart_lsn;
1131  }
1132  LWLockRelease(ReplicationSlotControlLock);
1133 
1134  XLogSetReplicationSlotMinimumLSN(min_required);
1135 }
1136 
1137 /*
1138  * Compute the oldest WAL LSN required by *logical* decoding slots..
1139  *
1140  * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
1141  * slots exist.
1142  *
1143  * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
1144  * ignores physical replication slots.
1145  *
1146  * The results aren't required frequently, so we don't maintain a precomputed
1147  * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
1148  */
1149 XLogRecPtr
1151 {
1152  XLogRecPtr result = InvalidXLogRecPtr;
1153  int i;
1154 
1155  if (max_replication_slots <= 0)
1156  return InvalidXLogRecPtr;
1157 
1158  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1159 
1160  for (i = 0; i < max_replication_slots; i++)
1161  {
1162  ReplicationSlot *s;
1163  XLogRecPtr restart_lsn;
1164  bool invalidated;
1165 
1167 
1168  /* cannot change while ReplicationSlotCtlLock is held */
1169  if (!s->in_use)
1170  continue;
1171 
1172  /* we're only interested in logical slots */
1173  if (!SlotIsLogical(s))
1174  continue;
1175 
1176  /* read once, it's ok if it increases while we're checking */
1177  SpinLockAcquire(&s->mutex);
1178  restart_lsn = s->data.restart_lsn;
1179  invalidated = s->data.invalidated != RS_INVAL_NONE;
1180  SpinLockRelease(&s->mutex);
1181 
1182  /* invalidated slots need not apply */
1183  if (invalidated)
1184  continue;
1185 
1186  if (restart_lsn == InvalidXLogRecPtr)
1187  continue;
1188 
1189  if (result == InvalidXLogRecPtr ||
1190  restart_lsn < result)
1191  result = restart_lsn;
1192  }
1193 
1194  LWLockRelease(ReplicationSlotControlLock);
1195 
1196  return result;
1197 }
1198 
1199 /*
1200  * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
1201  * passed database oid.
1202  *
1203  * Returns true if there are any slots referencing the database. *nslots will
1204  * be set to the absolute number of slots in the database, *nactive to ones
1205  * currently active.
1206  */
1207 bool
1208 ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
1209 {
1210  int i;
1211 
1212  *nslots = *nactive = 0;
1213 
1214  if (max_replication_slots <= 0)
1215  return false;
1216 
1217  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1218  for (i = 0; i < max_replication_slots; i++)
1219  {
1220  ReplicationSlot *s;
1221 
1223 
1224  /* cannot change while ReplicationSlotCtlLock is held */
1225  if (!s->in_use)
1226  continue;
1227 
1228  /* only logical slots are database specific, skip */
1229  if (!SlotIsLogical(s))
1230  continue;
1231 
1232  /* not our database, skip */
1233  if (s->data.database != dboid)
1234  continue;
1235 
1236  /* NB: intentionally counting invalidated slots */
1237 
1238  /* count slots with spinlock held */
1239  SpinLockAcquire(&s->mutex);
1240  (*nslots)++;
1241  if (s->active_pid != 0)
1242  (*nactive)++;
1243  SpinLockRelease(&s->mutex);
1244  }
1245  LWLockRelease(ReplicationSlotControlLock);
1246 
1247  if (*nslots > 0)
1248  return true;
1249  return false;
1250 }
1251 
1252 /*
1253  * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
1254  * passed database oid. The caller should hold an exclusive lock on the
1255  * pg_database oid for the database to prevent creation of new slots on the db
1256  * or replay from existing slots.
1257  *
1258  * Another session that concurrently acquires an existing slot on the target DB
1259  * (most likely to drop it) may cause this function to ERROR. If that happens
1260  * it may have dropped some but not all slots.
1261  *
1262  * This routine isn't as efficient as it could be - but we don't drop
1263  * databases often, especially databases with lots of slots.
1264  */
1265 void
1267 {
1268  int i;
1269 
1270  if (max_replication_slots <= 0)
1271  return;
1272 
1273 restart:
1274  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1275  for (i = 0; i < max_replication_slots; i++)
1276  {
1277  ReplicationSlot *s;
1278  char *slotname;
1279  int active_pid;
1280 
1282 
1283  /* cannot change while ReplicationSlotCtlLock is held */
1284  if (!s->in_use)
1285  continue;
1286 
1287  /* only logical slots are database specific, skip */
1288  if (!SlotIsLogical(s))
1289  continue;
1290 
1291  /* not our database, skip */
1292  if (s->data.database != dboid)
1293  continue;
1294 
1295  /* NB: intentionally including invalidated slots */
1296 
1297  /* acquire slot, so ReplicationSlotDropAcquired can be reused */
1298  SpinLockAcquire(&s->mutex);
1299  /* can't change while ReplicationSlotControlLock is held */
1300  slotname = NameStr(s->data.name);
1301  active_pid = s->active_pid;
1302  if (active_pid == 0)
1303  {
1304  MyReplicationSlot = s;
1305  s->active_pid = MyProcPid;
1306  }
1307  SpinLockRelease(&s->mutex);
1308 
1309  /*
1310  * Even though we hold an exclusive lock on the database object a
1311  * logical slot for that DB can still be active, e.g. if it's
1312  * concurrently being dropped by a backend connected to another DB.
1313  *
1314  * That's fairly unlikely in practice, so we'll just bail out.
1315  *
1316  * The slot sync worker holds a shared lock on the database before
1317  * operating on synced logical slots to avoid conflict with the drop
1318  * happening here. The persistent synced slots are thus safe but there
1319  * is a possibility that the slot sync worker has created a temporary
1320  * slot (which stays active even on release) and we are trying to drop
1321  * that here. In practice, the chances of hitting this scenario are
1322  * less as during slot synchronization, the temporary slot is
1323  * immediately converted to persistent and thus is safe due to the
1324  * shared lock taken on the database. So, we'll just bail out in such
1325  * a case.
1326  *
1327  * XXX: We can consider shutting down the slot sync worker before
1328  * trying to drop synced temporary slots here.
1329  */
1330  if (active_pid)
1331  ereport(ERROR,
1332  (errcode(ERRCODE_OBJECT_IN_USE),
1333  errmsg("replication slot \"%s\" is active for PID %d",
1334  slotname, active_pid)));
1335 
1336  /*
1337  * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
1338  * holding ReplicationSlotControlLock over filesystem operations,
1339  * release ReplicationSlotControlLock and use
1340  * ReplicationSlotDropAcquired.
1341  *
1342  * As that means the set of slots could change, restart scan from the
1343  * beginning each time we release the lock.
1344  */
1345  LWLockRelease(ReplicationSlotControlLock);
1347  goto restart;
1348  }
1349  LWLockRelease(ReplicationSlotControlLock);
1350 }
1351 
1352 
1353 /*
1354  * Check whether the server's configuration supports using replication
1355  * slots.
1356  */
1357 void
1359 {
1360  /*
1361  * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
1362  * needs the same check.
1363  */
1364 
1365  if (max_replication_slots == 0)
1366  ereport(ERROR,
1367  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1368  errmsg("replication slots can only be used if max_replication_slots > 0")));
1369 
1371  ereport(ERROR,
1372  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1373  errmsg("replication slots can only be used if wal_level >= replica")));
1374 }
1375 
1376 /*
1377  * Check whether the user has privilege to use replication slots.
1378  */
1379 void
1381 {
1382  if (!has_rolreplication(GetUserId()))
1383  ereport(ERROR,
1384  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1385  errmsg("permission denied to use replication slots"),
1386  errdetail("Only roles with the %s attribute may use replication slots.",
1387  "REPLICATION")));
1388 }
1389 
1390 /*
1391  * Reserve WAL for the currently active slot.
1392  *
1393  * Compute and set restart_lsn in a manner that's appropriate for the type of
1394  * the slot and concurrency safe.
1395  */
1396 void
1398 {
1400 
1401  Assert(slot != NULL);
1403 
1404  /*
1405  * The replication slot mechanism is used to prevent removal of required
1406  * WAL. As there is no interlock between this routine and checkpoints, WAL
1407  * segments could concurrently be removed when a now stale return value of
1408  * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
1409  * this happens we'll just retry.
1410  */
1411  while (true)
1412  {
1413  XLogSegNo segno;
1414  XLogRecPtr restart_lsn;
1415 
1416  /*
1417  * For logical slots log a standby snapshot and start logical decoding
1418  * at exactly that position. That allows the slot to start up more
1419  * quickly. But on a standby we cannot do WAL writes, so just use the
1420  * replay pointer; effectively, an attempt to create a logical slot on
1421  * standby will cause it to wait for an xl_running_xact record to be
1422  * logged independently on the primary, so that a snapshot can be
1423  * built using the record.
1424  *
1425  * None of this is needed (or indeed helpful) for physical slots as
1426  * they'll start replay at the last logged checkpoint anyway. Instead
1427  * return the location of the last redo LSN. While that slightly
1428  * increases the chance that we have to retry, it's where a base
1429  * backup has to start replay at.
1430  */
1431  if (SlotIsPhysical(slot))
1432  restart_lsn = GetRedoRecPtr();
1433  else if (RecoveryInProgress())
1434  restart_lsn = GetXLogReplayRecPtr(NULL);
1435  else
1436  restart_lsn = GetXLogInsertRecPtr();
1437 
1438  SpinLockAcquire(&slot->mutex);
1439  slot->data.restart_lsn = restart_lsn;
1440  SpinLockRelease(&slot->mutex);
1441 
1442  /* prevent WAL removal as fast as possible */
1444 
1445  /*
1446  * If all required WAL is still there, great, otherwise retry. The
1447  * slot should prevent further removal of WAL, unless there's a
1448  * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
1449  * the new restart_lsn above, so normally we should never need to loop
1450  * more than twice.
1451  */
1453  if (XLogGetLastRemovedSegno() < segno)
1454  break;
1455  }
1456 
1457  if (!RecoveryInProgress() && SlotIsLogical(slot))
1458  {
1459  XLogRecPtr flushptr;
1460 
1461  /* make sure we have enough information to start */
1462  flushptr = LogStandbySnapshot();
1463 
1464  /* and make sure it's fsynced to disk */
1465  XLogFlush(flushptr);
1466  }
1467 }
1468 
1469 /*
1470  * Report that replication slot needs to be invalidated
1471  */
1472 static void
1474  bool terminating,
1475  int pid,
1476  NameData slotname,
1477  XLogRecPtr restart_lsn,
1478  XLogRecPtr oldestLSN,
1479  TransactionId snapshotConflictHorizon)
1480 {
1481  StringInfoData err_detail;
1482  bool hint = false;
1483 
1484  initStringInfo(&err_detail);
1485 
1486  switch (cause)
1487  {
1488  case RS_INVAL_WAL_REMOVED:
1489  {
1490  unsigned long long ex = oldestLSN - restart_lsn;
1491 
1492  hint = true;
1493  appendStringInfo(&err_detail,
1494  ngettext("The slot's restart_lsn %X/%X exceeds the limit by %llu byte.",
1495  "The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1496  ex),
1497  LSN_FORMAT_ARGS(restart_lsn),
1498  ex);
1499  break;
1500  }
1501  case RS_INVAL_HORIZON:
1502  appendStringInfo(&err_detail, _("The slot conflicted with xid horizon %u."),
1503  snapshotConflictHorizon);
1504  break;
1505 
1506  case RS_INVAL_WAL_LEVEL:
1507  appendStringInfoString(&err_detail, _("Logical decoding on standby requires wal_level >= logical on the primary server."));
1508  break;
1509  case RS_INVAL_NONE:
1510  pg_unreachable();
1511  }
1512 
1513  ereport(LOG,
1514  terminating ?
1515  errmsg("terminating process %d to release replication slot \"%s\"",
1516  pid, NameStr(slotname)) :
1517  errmsg("invalidating obsolete replication slot \"%s\"",
1518  NameStr(slotname)),
1519  errdetail_internal("%s", err_detail.data),
1520  hint ? errhint("You might need to increase %s.", "max_slot_wal_keep_size") : 0);
1521 
1522  pfree(err_detail.data);
1523 }
1524 
1525 /*
1526  * Helper for InvalidateObsoleteReplicationSlots
1527  *
1528  * Acquires the given slot and mark it invalid, if necessary and possible.
1529  *
1530  * Returns whether ReplicationSlotControlLock was released in the interim (and
1531  * in that case we're not holding the lock at return, otherwise we are).
1532  *
1533  * Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1534  *
1535  * This is inherently racy, because we release the LWLock
1536  * for syscalls, so caller must restart if we return true.
1537  */
1538 static bool
1540  ReplicationSlot *s,
1541  XLogRecPtr oldestLSN,
1542  Oid dboid, TransactionId snapshotConflictHorizon,
1543  bool *invalidated)
1544 {
1545  int last_signaled_pid = 0;
1546  bool released_lock = false;
1547  bool terminated = false;
1548  TransactionId initial_effective_xmin = InvalidTransactionId;
1549  TransactionId initial_catalog_effective_xmin = InvalidTransactionId;
1550  XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1552 
1553  for (;;)
1554  {
1555  XLogRecPtr restart_lsn;
1556  NameData slotname;
1557  int active_pid = 0;
1558  ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
1559 
1560  Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
1561 
1562  if (!s->in_use)
1563  {
1564  if (released_lock)
1565  LWLockRelease(ReplicationSlotControlLock);
1566  break;
1567  }
1568 
1569  /*
1570  * Check if the slot needs to be invalidated. If it needs to be
1571  * invalidated, and is not currently acquired, acquire it and mark it
1572  * as having been invalidated. We do this with the spinlock held to
1573  * avoid race conditions -- for example the restart_lsn could move
1574  * forward, or the slot could be dropped.
1575  */
1576  SpinLockAcquire(&s->mutex);
1577 
1578  restart_lsn = s->data.restart_lsn;
1579 
1580  /* we do nothing if the slot is already invalid */
1581  if (s->data.invalidated == RS_INVAL_NONE)
1582  {
1583  /*
1584  * The slot's mutex will be released soon, and it is possible that
1585  * those values change since the process holding the slot has been
1586  * terminated (if any), so record them here to ensure that we
1587  * would report the correct invalidation cause.
1588  */
1589  if (!terminated)
1590  {
1591  initial_restart_lsn = s->data.restart_lsn;
1592  initial_effective_xmin = s->effective_xmin;
1593  initial_catalog_effective_xmin = s->effective_catalog_xmin;
1594  }
1595 
1596  switch (cause)
1597  {
1598  case RS_INVAL_WAL_REMOVED:
1599  if (initial_restart_lsn != InvalidXLogRecPtr &&
1600  initial_restart_lsn < oldestLSN)
1601  invalidation_cause = cause;
1602  break;
1603  case RS_INVAL_HORIZON:
1604  if (!SlotIsLogical(s))
1605  break;
1606  /* invalid DB oid signals a shared relation */
1607  if (dboid != InvalidOid && dboid != s->data.database)
1608  break;
1609  if (TransactionIdIsValid(initial_effective_xmin) &&
1610  TransactionIdPrecedesOrEquals(initial_effective_xmin,
1611  snapshotConflictHorizon))
1612  invalidation_cause = cause;
1613  else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
1614  TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
1615  snapshotConflictHorizon))
1616  invalidation_cause = cause;
1617  break;
1618  case RS_INVAL_WAL_LEVEL:
1619  if (SlotIsLogical(s))
1620  invalidation_cause = cause;
1621  break;
1622  case RS_INVAL_NONE:
1623  pg_unreachable();
1624  }
1625  }
1626 
1627  /*
1628  * The invalidation cause recorded previously should not change while
1629  * the process owning the slot (if any) has been terminated.
1630  */
1631  Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1632  invalidation_cause_prev != invalidation_cause));
1633 
1634  /* if there's no invalidation, we're done */
1635  if (invalidation_cause == RS_INVAL_NONE)
1636  {
1637  SpinLockRelease(&s->mutex);
1638  if (released_lock)
1639  LWLockRelease(ReplicationSlotControlLock);
1640  break;
1641  }
1642 
1643  slotname = s->data.name;
1644  active_pid = s->active_pid;
1645 
1646  /*
1647  * If the slot can be acquired, do so and mark it invalidated
1648  * immediately. Otherwise we'll signal the owning process, below, and
1649  * retry.
1650  */
1651  if (active_pid == 0)
1652  {
1653  MyReplicationSlot = s;
1654  s->active_pid = MyProcPid;
1655  s->data.invalidated = invalidation_cause;
1656 
1657  /*
1658  * XXX: We should consider not overwriting restart_lsn and instead
1659  * just rely on .invalidated.
1660  */
1661  if (invalidation_cause == RS_INVAL_WAL_REMOVED)
1663 
1664  /* Let caller know */
1665  *invalidated = true;
1666  }
1667 
1668  SpinLockRelease(&s->mutex);
1669 
1670  /*
1671  * The logical replication slots shouldn't be invalidated as GUC
1672  * max_slot_wal_keep_size is set to -1 during the binary upgrade. See
1673  * check_old_cluster_for_valid_slots() where we ensure that no
1674  * invalidated before the upgrade.
1675  */
1676  Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
1677 
1678  if (active_pid != 0)
1679  {
1680  /*
1681  * Prepare the sleep on the slot's condition variable before
1682  * releasing the lock, to close a possible race condition if the
1683  * slot is released before the sleep below.
1684  */
1686 
1687  LWLockRelease(ReplicationSlotControlLock);
1688  released_lock = true;
1689 
1690  /*
1691  * Signal to terminate the process that owns the slot, if we
1692  * haven't already signalled it. (Avoidance of repeated
1693  * signalling is the only reason for there to be a loop in this
1694  * routine; otherwise we could rely on caller's restart loop.)
1695  *
1696  * There is the race condition that other process may own the slot
1697  * after its current owner process is terminated and before this
1698  * process owns it. To handle that, we signal only if the PID of
1699  * the owning process has changed from the previous time. (This
1700  * logic assumes that the same PID is not reused very quickly.)
1701  */
1702  if (last_signaled_pid != active_pid)
1703  {
1704  ReportSlotInvalidation(invalidation_cause, true, active_pid,
1705  slotname, restart_lsn,
1706  oldestLSN, snapshotConflictHorizon);
1707 
1708  if (MyBackendType == B_STARTUP)
1709  (void) SendProcSignal(active_pid,
1712  else
1713  (void) kill(active_pid, SIGTERM);
1714 
1715  last_signaled_pid = active_pid;
1716  terminated = true;
1717  invalidation_cause_prev = invalidation_cause;
1718  }
1719 
1720  /* Wait until the slot is released. */
1722  WAIT_EVENT_REPLICATION_SLOT_DROP);
1723 
1724  /*
1725  * Re-acquire lock and start over; we expect to invalidate the
1726  * slot next time (unless another process acquires the slot in the
1727  * meantime).
1728  */
1729  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1730  continue;
1731  }
1732  else
1733  {
1734  /*
1735  * We hold the slot now and have already invalidated it; flush it
1736  * to ensure that state persists.
1737  *
1738  * Don't want to hold ReplicationSlotControlLock across file
1739  * system operations, so release it now but be sure to tell caller
1740  * to restart from scratch.
1741  */
1742  LWLockRelease(ReplicationSlotControlLock);
1743  released_lock = true;
1744 
1745  /* Make sure the invalidated state persists across server restart */
1749 
1750  ReportSlotInvalidation(invalidation_cause, false, active_pid,
1751  slotname, restart_lsn,
1752  oldestLSN, snapshotConflictHorizon);
1753 
1754  /* done with this slot for now */
1755  break;
1756  }
1757  }
1758 
1759  Assert(released_lock == !LWLockHeldByMe(ReplicationSlotControlLock));
1760 
1761  return released_lock;
1762 }
1763 
1764 /*
1765  * Invalidate slots that require resources about to be removed.
1766  *
1767  * Returns true when any slot have got invalidated.
1768  *
1769  * Whether a slot needs to be invalidated depends on the cause. A slot is
1770  * removed if it:
1771  * - RS_INVAL_WAL_REMOVED: requires a LSN older than the given segment
1772  * - RS_INVAL_HORIZON: requires a snapshot <= the given horizon in the given
1773  * db; dboid may be InvalidOid for shared relations
1774  * - RS_INVAL_WAL_LEVEL: is logical
1775  *
1776  * NB - this runs as part of checkpoint, so avoid raising errors if possible.
1777  */
1778 bool
1780  XLogSegNo oldestSegno, Oid dboid,
1781  TransactionId snapshotConflictHorizon)
1782 {
1783  XLogRecPtr oldestLSN;
1784  bool invalidated = false;
1785 
1786  Assert(cause != RS_INVAL_HORIZON || TransactionIdIsValid(snapshotConflictHorizon));
1787  Assert(cause != RS_INVAL_WAL_REMOVED || oldestSegno > 0);
1788  Assert(cause != RS_INVAL_NONE);
1789 
1790  if (max_replication_slots == 0)
1791  return invalidated;
1792 
1793  XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
1794 
1795 restart:
1796  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1797  for (int i = 0; i < max_replication_slots; i++)
1798  {
1800 
1801  if (!s->in_use)
1802  continue;
1803 
1804  if (InvalidatePossiblyObsoleteSlot(cause, s, oldestLSN, dboid,
1805  snapshotConflictHorizon,
1806  &invalidated))
1807  {
1808  /* if the lock was released, start from scratch */
1809  goto restart;
1810  }
1811  }
1812  LWLockRelease(ReplicationSlotControlLock);
1813 
1814  /*
1815  * If any slots have been invalidated, recalculate the resource limits.
1816  */
1817  if (invalidated)
1818  {
1821  }
1822 
1823  return invalidated;
1824 }
1825 
1826 /*
1827  * Flush all replication slots to disk.
1828  *
1829  * It is convenient to flush dirty replication slots at the time of checkpoint.
1830  * Additionally, in case of a shutdown checkpoint, we also identify the slots
1831  * for which the confirmed_flush LSN has been updated since the last time it
1832  * was saved and flush them.
1833  */
1834 void
1836 {
1837  int i;
1838 
1839  elog(DEBUG1, "performing replication slot checkpoint");
1840 
1841  /*
1842  * Prevent any slot from being created/dropped while we're active. As we
1843  * explicitly do *not* want to block iterating over replication_slots or
1844  * acquiring a slot we cannot take the control lock - but that's OK,
1845  * because holding ReplicationSlotAllocationLock is strictly stronger, and
1846  * enough to guarantee that nobody can change the in_use bits on us.
1847  */
1848  LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
1849 
1850  for (i = 0; i < max_replication_slots; i++)
1851  {
1853  char path[MAXPGPATH];
1854 
1855  if (!s->in_use)
1856  continue;
1857 
1858  /* save the slot to disk, locking is handled in SaveSlotToPath() */
1859  sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
1860 
1861  /*
1862  * Slot's data is not flushed each time the confirmed_flush LSN is
1863  * updated as that could lead to frequent writes. However, we decide
1864  * to force a flush of all logical slot's data at the time of shutdown
1865  * if the confirmed_flush LSN is changed since we last flushed it to
1866  * disk. This helps in avoiding an unnecessary retreat of the
1867  * confirmed_flush LSN after restart.
1868  */
1869  if (is_shutdown && SlotIsLogical(s))
1870  {
1871  SpinLockAcquire(&s->mutex);
1872 
1874 
1875  if (s->data.invalidated == RS_INVAL_NONE &&
1877  {
1878  s->just_dirtied = true;
1879  s->dirty = true;
1880  }
1881  SpinLockRelease(&s->mutex);
1882  }
1883 
1884  SaveSlotToPath(s, path, LOG);
1885  }
1886  LWLockRelease(ReplicationSlotAllocationLock);
1887 }
1888 
1889 /*
1890  * Load all replication slots from disk into memory at server startup. This
1891  * needs to be run before we start crash recovery.
1892  */
1893 void
1895 {
1896  DIR *replication_dir;
1897  struct dirent *replication_de;
1898 
1899  elog(DEBUG1, "starting up replication slots");
1900 
1901  /* restore all slots by iterating over all on-disk entries */
1902  replication_dir = AllocateDir("pg_replslot");
1903  while ((replication_de = ReadDir(replication_dir, "pg_replslot")) != NULL)
1904  {
1905  char path[MAXPGPATH + 12];
1906  PGFileType de_type;
1907 
1908  if (strcmp(replication_de->d_name, ".") == 0 ||
1909  strcmp(replication_de->d_name, "..") == 0)
1910  continue;
1911 
1912  snprintf(path, sizeof(path), "pg_replslot/%s", replication_de->d_name);
1913  de_type = get_dirent_type(path, replication_de, false, DEBUG1);
1914 
1915  /* we're only creating directories here, skip if it's not our's */
1916  if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_DIR)
1917  continue;
1918 
1919  /* we crashed while a slot was being setup or deleted, clean up */
1920  if (pg_str_endswith(replication_de->d_name, ".tmp"))
1921  {
1922  if (!rmtree(path, true))
1923  {
1924  ereport(WARNING,
1925  (errmsg("could not remove directory \"%s\"",
1926  path)));
1927  continue;
1928  }
1929  fsync_fname("pg_replslot", true);
1930  continue;
1931  }
1932 
1933  /* looks like a slot in a normal state, restore */
1934  RestoreSlotFromDisk(replication_de->d_name);
1935  }
1936  FreeDir(replication_dir);
1937 
1938  /* currently no slots exist, we're done. */
1939  if (max_replication_slots <= 0)
1940  return;
1941 
1942  /* Now that we have recovered all the data, compute replication xmin */
1945 }
1946 
1947 /* ----
1948  * Manipulation of on-disk state of replication slots
1949  *
1950  * NB: none of the routines below should take any notice whether a slot is the
1951  * current one or not, that's all handled a layer above.
1952  * ----
1953  */
1954 static void
1956 {
1957  char tmppath[MAXPGPATH];
1958  char path[MAXPGPATH];
1959  struct stat st;
1960 
1961  /*
1962  * No need to take out the io_in_progress_lock, nobody else can see this
1963  * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
1964  * takes out the lock, if we'd take the lock here, we'd deadlock.
1965  */
1966 
1967  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
1968  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
1969 
1970  /*
1971  * It's just barely possible that some previous effort to create or drop a
1972  * slot with this name left a temp directory lying around. If that seems
1973  * to be the case, try to remove it. If the rmtree() fails, we'll error
1974  * out at the MakePGDirectory() below, so we don't bother checking
1975  * success.
1976  */
1977  if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
1978  rmtree(tmppath, true);
1979 
1980  /* Create and fsync the temporary slot directory. */
1981  if (MakePGDirectory(tmppath) < 0)
1982  ereport(ERROR,
1984  errmsg("could not create directory \"%s\": %m",
1985  tmppath)));
1986  fsync_fname(tmppath, true);
1987 
1988  /* Write the actual state file. */
1989  slot->dirty = true; /* signal that we really need to write */
1990  SaveSlotToPath(slot, tmppath, ERROR);
1991 
1992  /* Rename the directory into place. */
1993  if (rename(tmppath, path) != 0)
1994  ereport(ERROR,
1996  errmsg("could not rename file \"%s\" to \"%s\": %m",
1997  tmppath, path)));
1998 
1999  /*
2000  * If we'd now fail - really unlikely - we wouldn't know whether this slot
2001  * would persist after an OS crash or not - so, force a restart. The
2002  * restart would try to fsync this again till it works.
2003  */
2005 
2006  fsync_fname(path, true);
2007  fsync_fname("pg_replslot", true);
2008 
2009  END_CRIT_SECTION();
2010 }
2011 
2012 /*
2013  * Shared functionality between saving and creating a replication slot.
2014  */
2015 static void
2016 SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
2017 {
2018  char tmppath[MAXPGPATH];
2019  char path[MAXPGPATH];
2020  int fd;
2022  bool was_dirty;
2023 
2024  /* first check whether there's something to write out */
2025  SpinLockAcquire(&slot->mutex);
2026  was_dirty = slot->dirty;
2027  slot->just_dirtied = false;
2028  SpinLockRelease(&slot->mutex);
2029 
2030  /* and don't do anything if there's nothing to write */
2031  if (!was_dirty)
2032  return;
2033 
2035 
2036  /* silence valgrind :( */
2037  memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
2038 
2039  sprintf(tmppath, "%s/state.tmp", dir);
2040  sprintf(path, "%s/state", dir);
2041 
2042  fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
2043  if (fd < 0)
2044  {
2045  /*
2046  * If not an ERROR, then release the lock before returning. In case
2047  * of an ERROR, the error recovery path automatically releases the
2048  * lock, but no harm in explicitly releasing even in that case. Note
2049  * that LWLockRelease() could affect errno.
2050  */
2051  int save_errno = errno;
2052 
2054  errno = save_errno;
2055  ereport(elevel,
2057  errmsg("could not create file \"%s\": %m",
2058  tmppath)));
2059  return;
2060  }
2061 
2062  cp.magic = SLOT_MAGIC;
2063  INIT_CRC32C(cp.checksum);
2064  cp.version = SLOT_VERSION;
2066 
2067  SpinLockAcquire(&slot->mutex);
2068 
2069  memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
2070 
2071  SpinLockRelease(&slot->mutex);
2072 
2073  COMP_CRC32C(cp.checksum,
2074  (char *) (&cp) + ReplicationSlotOnDiskNotChecksummedSize,
2076  FIN_CRC32C(cp.checksum);
2077 
2078  errno = 0;
2079  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
2080  if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
2081  {
2082  int save_errno = errno;
2083 
2087 
2088  /* if write didn't set errno, assume problem is no disk space */
2089  errno = save_errno ? save_errno : ENOSPC;
2090  ereport(elevel,
2092  errmsg("could not write to file \"%s\": %m",
2093  tmppath)));
2094  return;
2095  }
2097 
2098  /* fsync the temporary file */
2099  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
2100  if (pg_fsync(fd) != 0)
2101  {
2102  int save_errno = errno;
2103 
2107  errno = save_errno;
2108  ereport(elevel,
2110  errmsg("could not fsync file \"%s\": %m",
2111  tmppath)));
2112  return;
2113  }
2115 
2116  if (CloseTransientFile(fd) != 0)
2117  {
2118  int save_errno = errno;
2119 
2121  errno = save_errno;
2122  ereport(elevel,
2124  errmsg("could not close file \"%s\": %m",
2125  tmppath)));
2126  return;
2127  }
2128 
2129  /* rename to permanent file, fsync file and directory */
2130  if (rename(tmppath, path) != 0)
2131  {
2132  int save_errno = errno;
2133 
2135  errno = save_errno;
2136  ereport(elevel,
2138  errmsg("could not rename file \"%s\" to \"%s\": %m",
2139  tmppath, path)));
2140  return;
2141  }
2142 
2143  /*
2144  * Check CreateSlotOnDisk() for the reasoning of using a critical section.
2145  */
2147 
2148  fsync_fname(path, false);
2149  fsync_fname(dir, true);
2150  fsync_fname("pg_replslot", true);
2151 
2152  END_CRIT_SECTION();
2153 
2154  /*
2155  * Successfully wrote, unset dirty bit, unless somebody dirtied again
2156  * already and remember the confirmed_flush LSN value.
2157  */
2158  SpinLockAcquire(&slot->mutex);
2159  if (!slot->just_dirtied)
2160  slot->dirty = false;
2162  SpinLockRelease(&slot->mutex);
2163 
2165 }
2166 
2167 /*
2168  * Load a single slot from disk into memory.
2169  */
2170 static void
2172 {
2174  int i;
2175  char slotdir[MAXPGPATH + 12];
2176  char path[MAXPGPATH + 22];
2177  int fd;
2178  bool restored = false;
2179  int readBytes;
2180  pg_crc32c checksum;
2181 
2182  /* no need to lock here, no concurrent access allowed yet */
2183 
2184  /* delete temp file if it exists */
2185  sprintf(slotdir, "pg_replslot/%s", name);
2186  sprintf(path, "%s/state.tmp", slotdir);
2187  if (unlink(path) < 0 && errno != ENOENT)
2188  ereport(PANIC,
2190  errmsg("could not remove file \"%s\": %m", path)));
2191 
2192  sprintf(path, "%s/state", slotdir);
2193 
2194  elog(DEBUG1, "restoring replication slot from \"%s\"", path);
2195 
2196  /* on some operating systems fsyncing a file requires O_RDWR */
2197  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
2198 
2199  /*
2200  * We do not need to handle this as we are rename()ing the directory into
2201  * place only after we fsync()ed the state file.
2202  */
2203  if (fd < 0)
2204  ereport(PANIC,
2206  errmsg("could not open file \"%s\": %m", path)));
2207 
2208  /*
2209  * Sync state file before we're reading from it. We might have crashed
2210  * while it wasn't synced yet and we shouldn't continue on that basis.
2211  */
2212  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
2213  if (pg_fsync(fd) != 0)
2214  ereport(PANIC,
2216  errmsg("could not fsync file \"%s\": %m",
2217  path)));
2219 
2220  /* Also sync the parent directory */
2222  fsync_fname(slotdir, true);
2223  END_CRIT_SECTION();
2224 
2225  /* read part of statefile that's guaranteed to be version independent */
2226  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2227  readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
2229  if (readBytes != ReplicationSlotOnDiskConstantSize)
2230  {
2231  if (readBytes < 0)
2232  ereport(PANIC,
2234  errmsg("could not read file \"%s\": %m", path)));
2235  else
2236  ereport(PANIC,
2238  errmsg("could not read file \"%s\": read %d of %zu",
2239  path, readBytes,
2241  }
2242 
2243  /* verify magic */
2244  if (cp.magic != SLOT_MAGIC)
2245  ereport(PANIC,
2247  errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
2248  path, cp.magic, SLOT_MAGIC)));
2249 
2250  /* verify version */
2251  if (cp.version != SLOT_VERSION)
2252  ereport(PANIC,
2254  errmsg("replication slot file \"%s\" has unsupported version %u",
2255  path, cp.version)));
2256 
2257  /* boundary check on length */
2259  ereport(PANIC,
2261  errmsg("replication slot file \"%s\" has corrupted length %u",
2262  path, cp.length)));
2263 
2264  /* Now that we know the size, read the entire file */
2265  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2266  readBytes = read(fd,
2267  (char *) &cp + ReplicationSlotOnDiskConstantSize,
2268  cp.length);
2270  if (readBytes != cp.length)
2271  {
2272  if (readBytes < 0)
2273  ereport(PANIC,
2275  errmsg("could not read file \"%s\": %m", path)));
2276  else
2277  ereport(PANIC,
2279  errmsg("could not read file \"%s\": read %d of %zu",
2280  path, readBytes, (Size) cp.length)));
2281  }
2282 
2283  if (CloseTransientFile(fd) != 0)
2284  ereport(PANIC,
2286  errmsg("could not close file \"%s\": %m", path)));
2287 
2288  /* now verify the CRC */
2289  INIT_CRC32C(checksum);
2290  COMP_CRC32C(checksum,
2293  FIN_CRC32C(checksum);
2294 
2295  if (!EQ_CRC32C(checksum, cp.checksum))
2296  ereport(PANIC,
2297  (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
2298  path, checksum, cp.checksum)));
2299 
2300  /*
2301  * If we crashed with an ephemeral slot active, don't restore but delete
2302  * it.
2303  */
2305  {
2306  if (!rmtree(slotdir, true))
2307  {
2308  ereport(WARNING,
2309  (errmsg("could not remove directory \"%s\"",
2310  slotdir)));
2311  }
2312  fsync_fname("pg_replslot", true);
2313  return;
2314  }
2315 
2316  /*
2317  * Verify that requirements for the specific slot type are met. That's
2318  * important because if these aren't met we're not guaranteed to retain
2319  * all the necessary resources for the slot.
2320  *
2321  * NB: We have to do so *after* the above checks for ephemeral slots,
2322  * because otherwise a slot that shouldn't exist anymore could prevent
2323  * restarts.
2324  *
2325  * NB: Changing the requirements here also requires adapting
2326  * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
2327  */
2329  ereport(FATAL,
2330  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2331  errmsg("logical replication slot \"%s\" exists, but wal_level < logical",
2332  NameStr(cp.slotdata.name)),
2333  errhint("Change wal_level to be logical or higher.")));
2334  else if (wal_level < WAL_LEVEL_REPLICA)
2335  ereport(FATAL,
2336  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2337  errmsg("physical replication slot \"%s\" exists, but wal_level < replica",
2338  NameStr(cp.slotdata.name)),
2339  errhint("Change wal_level to be replica or higher.")));
2340 
2341  /* nothing can be active yet, don't lock anything */
2342  for (i = 0; i < max_replication_slots; i++)
2343  {
2344  ReplicationSlot *slot;
2345 
2347 
2348  if (slot->in_use)
2349  continue;
2350 
2351  /* restore the entire set of persistent data */
2352  memcpy(&slot->data, &cp.slotdata,
2354 
2355  /* initialize in memory state */
2356  slot->effective_xmin = cp.slotdata.xmin;
2359 
2364 
2365  slot->in_use = true;
2366  slot->active_pid = 0;
2367 
2368  /*
2369  * Set the time since the slot has become inactive after loading the
2370  * slot from the disk into memory. Whoever acquires the slot i.e.
2371  * makes the slot active will reset it.
2372  */
2374 
2375  restored = true;
2376  break;
2377  }
2378 
2379  if (!restored)
2380  ereport(FATAL,
2381  (errmsg("too many replication slots active before shutdown"),
2382  errhint("Increase max_replication_slots and try again.")));
2383 }
2384 
2385 /*
2386  * Maps an invalidation reason for a replication slot to
2387  * ReplicationSlotInvalidationCause.
2388  */
2390 GetSlotInvalidationCause(const char *invalidation_reason)
2391 {
2394  bool found PG_USED_FOR_ASSERTS_ONLY = false;
2395 
2396  Assert(invalidation_reason);
2397 
2398  for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++)
2399  {
2400  if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0)
2401  {
2402  found = true;
2403  result = cause;
2404  break;
2405  }
2406  }
2407 
2408  Assert(found);
2409  return result;
2410 }
2411 
2412 /*
2413  * A helper function to validate slots specified in GUC standby_slot_names.
2414  *
2415  * The rawname will be parsed, and the result will be saved into *elemlist.
2416  */
2417 static bool
2418 validate_standby_slots(char *rawname, List **elemlist)
2419 {
2420  bool ok;
2421 
2422  /* Verify syntax and parse string into a list of identifiers */
2423  ok = SplitIdentifierString(rawname, ',', elemlist);
2424 
2425  if (!ok)
2426  {
2427  GUC_check_errdetail("List syntax is invalid.");
2428  }
2429  else if (!ReplicationSlotCtl)
2430  {
2431  /*
2432  * We cannot validate the replication slot if the replication slots'
2433  * data has not been initialized. This is ok as we will anyway
2434  * validate the specified slot when waiting for them to catch up. See
2435  * StandbySlotsHaveCaughtup() for details.
2436  */
2437  }
2438  else
2439  {
2440  /* Check that the specified slots exist and are logical slots */
2441  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2442 
2443  foreach_ptr(char, name, *elemlist)
2444  {
2445  ReplicationSlot *slot;
2446 
2447  slot = SearchNamedReplicationSlot(name, false);
2448 
2449  if (!slot)
2450  {
2451  GUC_check_errdetail("replication slot \"%s\" does not exist",
2452  name);
2453  ok = false;
2454  break;
2455  }
2456 
2457  if (!SlotIsPhysical(slot))
2458  {
2459  GUC_check_errdetail("\"%s\" is not a physical replication slot",
2460  name);
2461  ok = false;
2462  break;
2463  }
2464  }
2465 
2466  LWLockRelease(ReplicationSlotControlLock);
2467  }
2468 
2469  return ok;
2470 }
2471 
2472 /*
2473  * GUC check_hook for standby_slot_names
2474  */
2475 bool
2477 {
2478  char *rawname;
2479  char *ptr;
2480  List *elemlist;
2481  int size;
2482  bool ok;
2484 
2485  if ((*newval)[0] == '\0')
2486  return true;
2487 
2488  /* Need a modifiable copy of the GUC string */
2489  rawname = pstrdup(*newval);
2490 
2491  /* Now verify if the specified slots exist and have correct type */
2492  ok = validate_standby_slots(rawname, &elemlist);
2493 
2494  if (!ok || elemlist == NIL)
2495  {
2496  pfree(rawname);
2497  list_free(elemlist);
2498  return ok;
2499  }
2500 
2501  /* Compute the size required for the StandbySlotNamesConfigData struct */
2502  size = offsetof(StandbySlotNamesConfigData, slot_names);
2503  foreach_ptr(char, slot_name, elemlist)
2504  size += strlen(slot_name) + 1;
2505 
2506  /* GUC extra value must be guc_malloc'd, not palloc'd */
2508 
2509  /* Transform the data into StandbySlotNamesConfigData */
2510  config->nslotnames = list_length(elemlist);
2511 
2512  ptr = config->slot_names;
2513  foreach_ptr(char, slot_name, elemlist)
2514  {
2515  strcpy(ptr, slot_name);
2516  ptr += strlen(slot_name) + 1;
2517  }
2518 
2519  *extra = (void *) config;
2520 
2521  pfree(rawname);
2522  list_free(elemlist);
2523  return true;
2524 }
2525 
2526 /*
2527  * GUC assign_hook for standby_slot_names
2528  */
2529 void
2530 assign_standby_slot_names(const char *newval, void *extra)
2531 {
2532  /*
2533  * The standby slots may have changed, so we must recompute the oldest
2534  * LSN.
2535  */
2537 
2539 }
2540 
2541 /*
2542  * Check if the passed slot_name is specified in the standby_slot_names GUC.
2543  */
2544 bool
2545 SlotExistsInStandbySlotNames(const char *slot_name)
2546 {
2547  const char *standby_slot_name;
2548 
2549  /* Return false if there is no value in standby_slot_names */
2550  if (standby_slot_names_config == NULL)
2551  return false;
2552 
2553  /*
2554  * XXX: We are not expecting this list to be long so a linear search
2555  * shouldn't hurt but if that turns out not to be true then we can cache
2556  * this information for each WalSender as well.
2557  */
2558  standby_slot_name = standby_slot_names_config->slot_names;
2559  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2560  {
2561  if (strcmp(standby_slot_name, slot_name) == 0)
2562  return true;
2563 
2564  standby_slot_name += strlen(standby_slot_name) + 1;
2565  }
2566 
2567  return false;
2568 }
2569 
2570 /*
2571  * Return true if the slots specified in standby_slot_names have caught up to
2572  * the given WAL location, false otherwise.
2573  *
2574  * The elevel parameter specifies the error level used for logging messages
2575  * related to slots that do not exist, are invalidated, or are inactive.
2576  */
2577 bool
2578 StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
2579 {
2580  const char *name;
2581  int caught_up_slot_num = 0;
2582  XLogRecPtr min_restart_lsn = InvalidXLogRecPtr;
2583 
2584  /*
2585  * Don't need to wait for the standbys to catch up if there is no value in
2586  * standby_slot_names.
2587  */
2588  if (standby_slot_names_config == NULL)
2589  return true;
2590 
2591  /*
2592  * Don't need to wait for the standbys to catch up if we are on a standby
2593  * server, since we do not support syncing slots to cascading standbys.
2594  */
2595  if (RecoveryInProgress())
2596  return true;
2597 
2598  /*
2599  * Don't need to wait for the standbys to catch up if they are already
2600  * beyond the specified WAL location.
2601  */
2603  ss_oldest_flush_lsn >= wait_for_lsn)
2604  return true;
2605 
2606  /*
2607  * To prevent concurrent slot dropping and creation while filtering the
2608  * slots, take the ReplicationSlotControlLock outside of the loop.
2609  */
2610  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2611 
2613  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2614  {
2615  XLogRecPtr restart_lsn;
2616  bool invalidated;
2617  bool inactive;
2618  ReplicationSlot *slot;
2619 
2620  slot = SearchNamedReplicationSlot(name, false);
2621 
2622  if (!slot)
2623  {
2624  /*
2625  * If a slot name provided in standby_slot_names does not exist,
2626  * report a message and exit the loop. A user can specify a slot
2627  * name that does not exist just before the server startup. The
2628  * GUC check_hook(validate_standby_slots) cannot validate such a
2629  * slot during startup as the ReplicationSlotCtl shared memory is
2630  * not initialized at that time. It is also possible for a user to
2631  * drop the slot in standby_slot_names afterwards.
2632  */
2633  ereport(elevel,
2634  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2635  errmsg("replication slot \"%s\" specified in parameter %s does not exist",
2636  name, "standby_slot_names"),
2637  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2638  name),
2639  errhint("Consider creating the slot \"%s\" or amend parameter %s.",
2640  name, "standby_slot_names"));
2641  break;
2642  }
2643 
2644  if (SlotIsLogical(slot))
2645  {
2646  /*
2647  * If a logical slot name is provided in standby_slot_names,
2648  * report a message and exit the loop. Similar to the non-existent
2649  * case, a user can specify a logical slot name in
2650  * standby_slot_names before the server startup, or drop an
2651  * existing physical slot and recreate a logical slot with the
2652  * same name.
2653  */
2654  ereport(elevel,
2655  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2656  errmsg("cannot have logical replication slot \"%s\" in parameter %s",
2657  name, "standby_slot_names"),
2658  errdetail("Logical replication is waiting for correction on \"%s\".",
2659  name),
2660  errhint("Consider removing logical slot \"%s\" from parameter %s.",
2661  name, "standby_slot_names"));
2662  break;
2663  }
2664 
2665  SpinLockAcquire(&slot->mutex);
2666  restart_lsn = slot->data.restart_lsn;
2667  invalidated = slot->data.invalidated != RS_INVAL_NONE;
2668  inactive = slot->active_pid == 0;
2669  SpinLockRelease(&slot->mutex);
2670 
2671  if (invalidated)
2672  {
2673  /* Specified physical slot has been invalidated */
2674  ereport(elevel,
2675  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2676  errmsg("physical slot \"%s\" specified in parameter %s has been invalidated",
2677  name, "standby_slot_names"),
2678  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2679  name),
2680  errhint("Consider dropping and recreating the slot \"%s\" or amend parameter %s.",
2681  name, "standby_slot_names"));
2682  break;
2683  }
2684 
2685  if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn < wait_for_lsn)
2686  {
2687  /* Log a message if no active_pid for this physical slot */
2688  if (inactive)
2689  ereport(elevel,
2690  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2691  errmsg("replication slot \"%s\" specified in parameter %s does not have active_pid",
2692  name, "standby_slot_names"),
2693  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2694  name),
2695  errhint("Consider starting standby associated with \"%s\" or amend parameter %s.",
2696  name, "standby_slot_names"));
2697 
2698  /* Continue if the current slot hasn't caught up. */
2699  break;
2700  }
2701 
2702  Assert(restart_lsn >= wait_for_lsn);
2703 
2704  if (XLogRecPtrIsInvalid(min_restart_lsn) ||
2705  min_restart_lsn > restart_lsn)
2706  min_restart_lsn = restart_lsn;
2707 
2708  caught_up_slot_num++;
2709 
2710  name += strlen(name) + 1;
2711  }
2712 
2713  LWLockRelease(ReplicationSlotControlLock);
2714 
2715  /*
2716  * Return false if not all the standbys have caught up to the specified
2717  * WAL location.
2718  */
2719  if (caught_up_slot_num != standby_slot_names_config->nslotnames)
2720  return false;
2721 
2722  /* The ss_oldest_flush_lsn must not retreat. */
2724  min_restart_lsn >= ss_oldest_flush_lsn);
2725 
2726  ss_oldest_flush_lsn = min_restart_lsn;
2727 
2728  return true;
2729 }
2730 
2731 /*
2732  * Wait for physical standbys to confirm receiving the given lsn.
2733  *
2734  * Used by logical decoding SQL functions. It waits for physical standbys
2735  * corresponding to the physical slots specified in the standby_slot_names GUC.
2736  */
2737 void
2739 {
2740  /*
2741  * Don't need to wait for the standby to catch up if the current acquired
2742  * slot is not a logical failover slot, or there is no value in
2743  * standby_slot_names.
2744  */
2746  return;
2747 
2749 
2750  for (;;)
2751  {
2753 
2754  if (ConfigReloadPending)
2755  {
2756  ConfigReloadPending = false;
2758  }
2759 
2760  /* Exit if done waiting for every slot. */
2761  if (StandbySlotsHaveCaughtup(wait_for_lsn, WARNING))
2762  break;
2763 
2764  /*
2765  * Wait for the slots in the standby_slot_names to catch up, but use a
2766  * timeout (1s) so we can also check if the standby_slot_names has
2767  * been changed.
2768  */
2770  WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION);
2771  }
2772 
2774 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
#define NameStr(name)
Definition: c.h:746
unsigned int uint32
Definition: c.h:506
#define ngettext(s, p, n)
Definition: c.h:1181
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Assert(condition)
Definition: c.h:858
#define PG_BINARY
Definition: c.h:1273
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
#define pg_unreachable()
Definition: c.h:296
#define lengthof(array)
Definition: c.h:788
#define MemSet(start, val, len)
Definition: c.h:1020
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
bool ConditionVariableCancelSleep(void)
bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout, uint32 wait_event_info)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1232
int errcode_for_file_access(void)
Definition: elog.c:882
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
int FreeDir(DIR *dir)
Definition: fd.c:2961
int CloseTransientFile(int fd)
Definition: fd.c:2809
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_DIR
Definition: file_utils.h:23
@ PGFILETYPE_ERROR
Definition: file_utils.h:20
bool IsBinaryUpgrade
Definition: globals.c:118
int MyProcPid
Definition: globals.c:45
bool IsUnderPostmaster
Definition: globals.c:117
Oid MyDatabaseId
Definition: globals.c:91
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:640
#define newval
#define GUC_check_errdetail
Definition: guc.h:448
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
void ProcessConfigFile(GucContext context)
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
int i
Definition: isn.c:73
void list_free(List *list)
Definition: list.c:1546
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1895
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1939
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:709
@ LWTRANCHE_REPLICATION_SLOT_IO
Definition: lwlock.h:189
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
@ B_STARTUP
Definition: miscadmin.h:358
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:514
BackendType MyBackendType
Definition: miscinit.c:63
bool has_rolreplication(Oid roleid)
Definition: miscinit.c:711
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define NAMEDATALEN
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static bool two_phase
static rewind_source * source
Definition: pg_rewind.c:89
void pgstat_create_replslot(ReplicationSlot *slot)
void pgstat_acquire_replslot(ReplicationSlot *slot)
void pgstat_drop_replslot(ReplicationSlot *slot)
#define sprintf
Definition: port.h:240
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3927
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:257
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
bool rmtree(const char *path, bool rmtopdir)
Definition: rmtree.c:50
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:464
void ReplicationSlotAlter(const char *name, bool failover)
Definition: slot.c:803
int ReplicationSlotIndex(ReplicationSlot *slot)
Definition: slot.c:497
#define ReplicationSlotOnDiskChecksummedSize
Definition: slot.c:125
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1835
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:309
void ReplicationSlotCleanup(void)
Definition: slot.c:742
void ReplicationSlotDropAcquired(void)
Definition: slot.c:864
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1006
void ReplicationSlotReserveWal(void)
Definition: slot.c:1397
char * standby_slot_names
Definition: slot.c:148
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
Definition: slot.c:1208
void ReplicationSlotAcquire(const char *name, bool nowait)
Definition: slot.c:540
bool SlotExistsInStandbySlotNames(const char *slot_name)
Definition: slot.c:2545
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1779
static bool validate_standby_slots(char *rawname, List **elemlist)
Definition: slot.c:2418
static XLogRecPtr ss_oldest_flush_lsn
Definition: slot.c:157
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason)
Definition: slot.c:2390
void ReplicationSlotsDropDBSlots(Oid dboid)
Definition: slot.c:1266
#define ReplicationSlotOnDiskNotChecksummedSize
Definition: slot.c:122
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1150
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1045
static void RestoreSlotFromDisk(const char *name)
Definition: slot.c:2171
#define RS_INVAL_MAX_CAUSES
Definition: slot.c:113
void ReplicationSlotPersist(void)
Definition: slot.c:1023
ReplicationSlot * MyReplicationSlot
Definition: slot.c:138
static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
Definition: slot.c:2016
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:780
void ReplicationSlotSave(void)
Definition: slot.c:988
static void CreateSlotOnDisk(ReplicationSlot *slot)
Definition: slot.c:1955
#define ReplicationSlotOnDiskV2Size
Definition: slot.c:128
void CheckSlotPermissions(void)
Definition: slot.c:1380
bool ReplicationSlotName(int index, Name name)
Definition: slot.c:513
void ReplicationSlotsShmemInit(void)
Definition: slot.c:189
const char *const SlotInvalidationCauses[]
Definition: slot.c:105
static StandbySlotNamesConfigData * standby_slot_names_config
Definition: slot.c:151
void ReplicationSlotRelease(void)
Definition: slot.c:652
int max_replication_slots
Definition: slot.c:141
StaticAssertDecl(lengthof(SlotInvalidationCauses)==(RS_INVAL_MAX_CAUSES+1), "array length mismatch")
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:135
#define SLOT_VERSION
Definition: slot.c:132
struct ReplicationSlotOnDisk ReplicationSlotOnDisk
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
Definition: slot.c:2738
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2578
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1101
void ReplicationSlotInitialize(void)
Definition: slot.c:224
static void ReplicationSlotDropPtr(ReplicationSlot *slot)
Definition: slot.c:881
static bool InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, bool *invalidated)
Definition: slot.c:1539
void StartupReplicationSlots(void)
Definition: slot.c:1894
void CheckSlotRequirements(void)
Definition: slot.c:1358
#define SLOT_MAGIC
Definition: slot.c:131
void assign_standby_slot_names(const char *newval, void *extra)
Definition: slot.c:2530
bool check_standby_slot_names(char **newval, void **extra, GucSource source)
Definition: slot.c:2476
static void ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, bool terminating, int pid, NameData slotname, XLogRecPtr restart_lsn, XLogRecPtr oldestLSN, TransactionId snapshotConflictHorizon)
Definition: slot.c:1473
#define ReplicationSlotOnDiskConstantSize
Definition: slot.c:119
Size ReplicationSlotsShmemSize(void)
Definition: slot.c:171
bool ReplicationSlotValidateName(const char *name, int elevel)
Definition: slot.c:252
static void ReplicationSlotShmemExit(int code, Datum arg)
Definition: slot.c:233
ReplicationSlotPersistency
Definition: slot.h:34
@ RS_PERSISTENT
Definition: slot.h:35
@ RS_EPHEMERAL
Definition: slot.h:36
@ RS_TEMPORARY
Definition: slot.h:37
#define SlotIsPhysical(slot)
Definition: slot.h:209
ReplicationSlotInvalidationCause
Definition: slot.h:48
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
@ RS_INVAL_HORIZON
Definition: slot.h:53
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
@ RS_INVAL_NONE
Definition: slot.h:49
#define SlotIsLogical(slot)
Definition: slot.h:210
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1569
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:66
PROC_HDR * ProcGlobal
Definition: proc.c:78
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:32
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:32
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
Definition: pg_list.h:54
uint8 statusFlags
Definition: proc.h:238
int pgxactoff
Definition: proc.h:180
uint8 * statusFlags
Definition: proc.h:395
ReplicationSlot replication_slots[1]
Definition: slot.h:221
uint32 version
Definition: slot.c:73
ReplicationSlotPersistentData slotdata
Definition: slot.c:81
pg_crc32c checksum
Definition: slot.c:70
TransactionId xmin
Definition: slot.h:82
TransactionId catalog_xmin
Definition: slot.h:90
XLogRecPtr restart_lsn
Definition: slot.h:93
XLogRecPtr confirmed_flush
Definition: slot.h:104
ReplicationSlotPersistency persistency
Definition: slot.h:74
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:96
XLogRecPtr candidate_xmin_lsn
Definition: slot.h:194
TransactionId effective_catalog_xmin
Definition: slot.h:175
slock_t mutex
Definition: slot.h:151
XLogRecPtr candidate_restart_valid
Definition: slot.h:195
XLogRecPtr last_saved_confirmed_flush
Definition: slot.h:203
pid_t active_pid
Definition: slot.h:157
bool in_use
Definition: slot.h:154
TransactionId effective_xmin
Definition: slot.h:174
bool just_dirtied
Definition: slot.h:160
XLogRecPtr candidate_restart_lsn
Definition: slot.h:196
LWLock io_in_progress_lock
Definition: slot.h:181
ConditionVariable active_cv
Definition: slot.h:184
TransactionId candidate_catalog_xmin
Definition: slot.h:193
bool dirty
Definition: slot.h:161
ReplicationSlotPersistentData data
Definition: slot.h:178
TimestampTz inactive_since
Definition: slot.h:206
char slot_names[FLEXIBLE_ARRAY_MEMBER]
Definition: slot.c:99
ConditionVariable wal_confirm_rcv_cv
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: type.h:95
Definition: c.h:741
unsigned short st_mode
Definition: win32_port.h:268
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3457
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
const char * name
bool am_walsender
Definition: walsender.c:115
bool log_replication_commands
Definition: walsender.c:125
WalSndCtlData * WalSndCtl
Definition: walsender.c:109
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
#define kill(pid, sig)
Definition: win32_port.h:485
bool RecoveryInProgress(void)
Definition: xlog.c:6290
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3747
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6393
int wal_level
Definition: xlog.c:131
int wal_segment_size
Definition: xlog.c:143
void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
Definition: xlog.c:2677
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9355
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2791
@ WAL_LEVEL_REPLICA
Definition: xlog.h:73
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:74
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint64 XLogSegNo
Definition: xlogdefs.h:48
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)