PostgreSQL Source Code  git master
slot.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slot.c
4  * Replication slot management.
5  *
6  *
7  * Copyright (c) 2012-2024, PostgreSQL Global Development Group
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/replication/slot.c
12  *
13  * NOTES
14  *
15  * Replication slots are used to keep state about replication streams
16  * originating from this cluster. Their primary purpose is to prevent the
17  * premature removal of WAL or of old tuple versions in a manner that would
18  * interfere with replication; they are also useful for monitoring purposes.
19  * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
20  * on standbys (to support cascading setups). The requirement that slots be
21  * usable on standbys precludes storing them in the system catalogs.
22  *
23  * Each replication slot gets its own directory inside the $PGDATA/pg_replslot
24  * directory. Inside that directory the state file will contain the slot's
25  * own data. Additional data can be stored alongside that file if required.
26  * While the server is running, the state data is also cached in memory for
27  * efficiency.
28  *
29  * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
30  * or free a slot. ReplicationSlotControlLock must be taken in shared mode
31  * to iterate over the slots, and in exclusive mode to change the in_use flag
32  * of a slot. The remaining data in each slot is protected by its mutex.
33  *
34  *-------------------------------------------------------------------------
35  */
36 
37 #include "postgres.h"
38 
39 #include <unistd.h>
40 #include <sys/stat.h>
41 
42 #include "access/transam.h"
43 #include "access/xlog_internal.h"
44 #include "access/xlogrecovery.h"
45 #include "common/file_utils.h"
46 #include "common/string.h"
47 #include "miscadmin.h"
48 #include "pgstat.h"
49 #include "postmaster/interrupt.h"
50 #include "replication/slotsync.h"
51 #include "replication/slot.h"
53 #include "storage/fd.h"
54 #include "storage/ipc.h"
55 #include "storage/proc.h"
56 #include "storage/procarray.h"
57 #include "utils/builtins.h"
58 #include "utils/guc_hooks.h"
59 #include "utils/varlena.h"
60 
61 /*
62  * Replication slot on-disk data structure.
63  */
64 typedef struct ReplicationSlotOnDisk
65 {
66  /* first part of this struct needs to be version independent */
67 
68  /* data not covered by checksum */
71 
72  /* data covered by checksum */
75 
76  /*
77  * The actual data in the slot that follows can differ based on the above
78  * 'version'.
79  */
80 
83 
84 /*
85  * Struct for the configuration of standby_slot_names.
86  *
87  * Note: this must be a flat representation that can be held in a single chunk
88  * of guc_malloc'd memory, so that it can be stored as the "extra" data for the
89  * standby_slot_names GUC.
90  */
91 typedef struct
92 {
93  /* Number of slot names in the slot_names[] */
95 
96  /*
97  * slot_names contains 'nslotnames' consecutive null-terminated C strings.
98  */
99  char slot_names[FLEXIBLE_ARRAY_MEMBER];
101 
102 /*
103  * Lookup table for slot invalidation causes.
104  */
105 const char *const SlotInvalidationCauses[] = {
106  [RS_INVAL_NONE] = "none",
107  [RS_INVAL_WAL_REMOVED] = "wal_removed",
108  [RS_INVAL_HORIZON] = "rows_removed",
109  [RS_INVAL_WAL_LEVEL] = "wal_level_insufficient",
110 };
111 
112 /* Maximum number of invalidation causes */
113 #define RS_INVAL_MAX_CAUSES RS_INVAL_WAL_LEVEL
114 
116  "array length mismatch");
117 
118 /* size of version independent data */
119 #define ReplicationSlotOnDiskConstantSize \
120  offsetof(ReplicationSlotOnDisk, slotdata)
121 /* size of the part of the slot not covered by the checksum */
122 #define ReplicationSlotOnDiskNotChecksummedSize \
123  offsetof(ReplicationSlotOnDisk, version)
124 /* size of the part covered by the checksum */
125 #define ReplicationSlotOnDiskChecksummedSize \
126  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize
127 /* size of the slot data that is version dependent */
128 #define ReplicationSlotOnDiskV2Size \
129  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
130 
131 #define SLOT_MAGIC 0x1051CA1 /* format identifier */
132 #define SLOT_VERSION 5 /* version for new files */
133 
134 /* Control array for replication slot management */
136 
137 /* My backend's replication slot in the shared memory array */
139 
140 /* GUC variables */
141 int max_replication_slots = 10; /* the maximum number of replication
142  * slots */
143 
144 /*
145  * This GUC lists streaming replication standby server slot names that
146  * logical WAL sender processes will wait for.
147  */
149 
150 /* This is the parsed and cached configuration for standby_slot_names */
152 
153 /*
154  * Oldest LSN that has been confirmed to be flushed to the standbys
155  * corresponding to the physical slots specified in the standby_slot_names GUC.
156  */
158 
159 static void ReplicationSlotShmemExit(int code, Datum arg);
160 static void ReplicationSlotDropPtr(ReplicationSlot *slot);
161 
162 /* internal persistency functions */
163 static void RestoreSlotFromDisk(const char *name);
164 static void CreateSlotOnDisk(ReplicationSlot *slot);
165 static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
166 
167 /*
168  * Report shared-memory space needed by ReplicationSlotsShmemInit.
169  */
170 Size
172 {
173  Size size = 0;
174 
175  if (max_replication_slots == 0)
176  return size;
177 
178  size = offsetof(ReplicationSlotCtlData, replication_slots);
179  size = add_size(size,
181 
182  return size;
183 }
184 
185 /*
186  * Allocate and initialize shared memory for replication slots.
187  */
188 void
190 {
191  bool found;
192 
193  if (max_replication_slots == 0)
194  return;
195 
197  ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
198  &found);
199 
200  if (!found)
201  {
202  int i;
203 
204  /* First time through, so initialize */
206 
207  for (i = 0; i < max_replication_slots; i++)
208  {
210 
211  /* everything else is zeroed by the memset above */
212  SpinLockInit(&slot->mutex);
216  }
217  }
218 }
219 
220 /*
221  * Register the callback for replication slot cleanup and releasing.
222  */
223 void
225 {
227 }
228 
229 /*
230  * Release and cleanup replication slots.
231  */
232 static void
234 {
235  /* Make sure active replication slots are released */
236  if (MyReplicationSlot != NULL)
238 
239  /* Also cleanup all the temporary slots. */
240  ReplicationSlotCleanup(false);
241 }
242 
243 /*
244  * Check whether the passed slot name is valid and report errors at elevel.
245  *
246  * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
247  * the name to be used as a directory name on every supported OS.
248  *
249  * Returns whether the directory name is valid or not if elevel < ERROR.
250  */
251 bool
252 ReplicationSlotValidateName(const char *name, int elevel)
253 {
254  const char *cp;
255 
256  if (strlen(name) == 0)
257  {
258  ereport(elevel,
259  (errcode(ERRCODE_INVALID_NAME),
260  errmsg("replication slot name \"%s\" is too short",
261  name)));
262  return false;
263  }
264 
265  if (strlen(name) >= NAMEDATALEN)
266  {
267  ereport(elevel,
268  (errcode(ERRCODE_NAME_TOO_LONG),
269  errmsg("replication slot name \"%s\" is too long",
270  name)));
271  return false;
272  }
273 
274  for (cp = name; *cp; cp++)
275  {
276  if (!((*cp >= 'a' && *cp <= 'z')
277  || (*cp >= '0' && *cp <= '9')
278  || (*cp == '_')))
279  {
280  ereport(elevel,
281  (errcode(ERRCODE_INVALID_NAME),
282  errmsg("replication slot name \"%s\" contains invalid character",
283  name),
284  errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
285  return false;
286  }
287  }
288  return true;
289 }
290 
291 /*
292  * Create a new replication slot and mark it as used by this backend.
293  *
294  * name: Name of the slot
295  * db_specific: logical decoding is db specific; if the slot is going to
296  * be used for that pass true, otherwise false.
297  * two_phase: Allows decoding of prepared transactions. We allow this option
298  * to be enabled only at the slot creation time. If we allow this option
299  * to be changed during decoding then it is quite possible that we skip
300  * prepare first time because this option was not enabled. Now next time
301  * during getting changes, if the two_phase option is enabled it can skip
302  * prepare because by that time start decoding point has been moved. So the
303  * user will only get commit prepared.
304  * failover: If enabled, allows the slot to be synced to standbys so
305  * that logical replication can be resumed after failover.
306  * synced: True if the slot is synchronized from the primary server.
307  */
308 void
309 ReplicationSlotCreate(const char *name, bool db_specific,
310  ReplicationSlotPersistency persistency,
311  bool two_phase, bool failover, bool synced)
312 {
313  ReplicationSlot *slot = NULL;
314  int i;
315 
316  Assert(MyReplicationSlot == NULL);
317 
319 
320  if (failover)
321  {
322  /*
323  * Do not allow users to create the failover enabled slots on the
324  * standby as we do not support sync to the cascading standby.
325  *
326  * However, failover enabled slots can be created during slot
327  * synchronization because we need to retain the same values as the
328  * remote slot.
329  */
331  ereport(ERROR,
332  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
333  errmsg("cannot enable failover for a replication slot created on the standby"));
334 
335  /*
336  * Do not allow users to create failover enabled temporary slots,
337  * because temporary slots will not be synced to the standby.
338  *
339  * However, failover enabled temporary slots can be created during
340  * slot synchronization. See the comments atop slotsync.c for details.
341  */
342  if (persistency == RS_TEMPORARY && !IsSyncingReplicationSlots())
343  ereport(ERROR,
344  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
345  errmsg("cannot enable failover for a temporary replication slot"));
346  }
347 
348  /*
349  * If some other backend ran this code concurrently with us, we'd likely
350  * both allocate the same slot, and that would be bad. We'd also be at
351  * risk of missing a name collision. Also, we don't want to try to create
352  * a new slot while somebody's busy cleaning up an old one, because we
353  * might both be monkeying with the same directory.
354  */
355  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
356 
357  /*
358  * Check for name collision, and identify an allocatable slot. We need to
359  * hold ReplicationSlotControlLock in shared mode for this, so that nobody
360  * else can change the in_use flags while we're looking at them.
361  */
362  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
363  for (i = 0; i < max_replication_slots; i++)
364  {
366 
367  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
368  ereport(ERROR,
370  errmsg("replication slot \"%s\" already exists", name)));
371  if (!s->in_use && slot == NULL)
372  slot = s;
373  }
374  LWLockRelease(ReplicationSlotControlLock);
375 
376  /* If all slots are in use, we're out of luck. */
377  if (slot == NULL)
378  ereport(ERROR,
379  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
380  errmsg("all replication slots are in use"),
381  errhint("Free one or increase \"max_replication_slots\".")));
382 
383  /*
384  * Since this slot is not in use, nobody should be looking at any part of
385  * it other than the in_use field unless they're trying to allocate it.
386  * And since we hold ReplicationSlotAllocationLock, nobody except us can
387  * be doing that. So it's safe to initialize the slot.
388  */
389  Assert(!slot->in_use);
390  Assert(slot->active_pid == 0);
391 
392  /* first initialize persistent data */
393  memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
394  namestrcpy(&slot->data.name, name);
395  slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
396  slot->data.persistency = persistency;
397  slot->data.two_phase = two_phase;
399  slot->data.failover = failover;
400  slot->data.synced = synced;
401 
402  /* and then data only present in shared memory */
403  slot->just_dirtied = false;
404  slot->dirty = false;
412  slot->inactive_since = 0;
413 
414  /*
415  * Create the slot on disk. We haven't actually marked the slot allocated
416  * yet, so no special cleanup is required if this errors out.
417  */
418  CreateSlotOnDisk(slot);
419 
420  /*
421  * We need to briefly prevent any other backend from iterating over the
422  * slots while we flip the in_use flag. We also need to set the active
423  * flag while holding the ControlLock as otherwise a concurrent
424  * ReplicationSlotAcquire() could acquire the slot as well.
425  */
426  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
427 
428  slot->in_use = true;
429 
430  /* We can now mark the slot active, and that makes it our slot. */
431  SpinLockAcquire(&slot->mutex);
432  Assert(slot->active_pid == 0);
433  slot->active_pid = MyProcPid;
434  SpinLockRelease(&slot->mutex);
435  MyReplicationSlot = slot;
436 
437  LWLockRelease(ReplicationSlotControlLock);
438 
439  /*
440  * Create statistics entry for the new logical slot. We don't collect any
441  * stats for physical slots, so no need to create an entry for the same.
442  * See ReplicationSlotDropPtr for why we need to do this before releasing
443  * ReplicationSlotAllocationLock.
444  */
445  if (SlotIsLogical(slot))
447 
448  /*
449  * Now that the slot has been marked as in_use and active, it's safe to
450  * let somebody else try to allocate a slot.
451  */
452  LWLockRelease(ReplicationSlotAllocationLock);
453 
454  /* Let everybody know we've modified this slot */
456 }
457 
458 /*
459  * Search for the named replication slot.
460  *
461  * Return the replication slot if found, otherwise NULL.
462  */
464 SearchNamedReplicationSlot(const char *name, bool need_lock)
465 {
466  int i;
467  ReplicationSlot *slot = NULL;
468 
469  if (need_lock)
470  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
471 
472  for (i = 0; i < max_replication_slots; i++)
473  {
475 
476  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
477  {
478  slot = s;
479  break;
480  }
481  }
482 
483  if (need_lock)
484  LWLockRelease(ReplicationSlotControlLock);
485 
486  return slot;
487 }
488 
489 /*
490  * Return the index of the replication slot in
491  * ReplicationSlotCtl->replication_slots.
492  *
493  * This is mainly useful to have an efficient key for storing replication slot
494  * stats.
495  */
496 int
498 {
500  slot < ReplicationSlotCtl->replication_slots + max_replication_slots);
501 
502  return slot - ReplicationSlotCtl->replication_slots;
503 }
504 
505 /*
506  * If the slot at 'index' is unused, return false. Otherwise 'name' is set to
507  * the slot's name and true is returned.
508  *
509  * This likely is only useful for pgstat_replslot.c during shutdown, in other
510  * cases there are obvious TOCTOU issues.
511  */
512 bool
514 {
515  ReplicationSlot *slot;
516  bool found;
517 
519 
520  /*
521  * Ensure that the slot cannot be dropped while we copy the name. Don't
522  * need the spinlock as the name of an existing slot cannot change.
523  */
524  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
525  found = slot->in_use;
526  if (slot->in_use)
527  namestrcpy(name, NameStr(slot->data.name));
528  LWLockRelease(ReplicationSlotControlLock);
529 
530  return found;
531 }
532 
533 /*
534  * Find a previously created slot and mark it as used by this process.
535  *
536  * An error is raised if nowait is true and the slot is currently in use. If
537  * nowait is false, we sleep until the slot is released by the owning process.
538  */
539 void
540 ReplicationSlotAcquire(const char *name, bool nowait)
541 {
542  ReplicationSlot *s;
543  int active_pid;
544 
545  Assert(name != NULL);
546 
547 retry:
548  Assert(MyReplicationSlot == NULL);
549 
550  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
551 
552  /* Check if the slot exits with the given name. */
553  s = SearchNamedReplicationSlot(name, false);
554  if (s == NULL || !s->in_use)
555  {
556  LWLockRelease(ReplicationSlotControlLock);
557 
558  ereport(ERROR,
559  (errcode(ERRCODE_UNDEFINED_OBJECT),
560  errmsg("replication slot \"%s\" does not exist",
561  name)));
562  }
563 
564  /*
565  * This is the slot we want; check if it's active under some other
566  * process. In single user mode, we don't need this check.
567  */
568  if (IsUnderPostmaster)
569  {
570  /*
571  * Get ready to sleep on the slot in case it is active. (We may end
572  * up not sleeping, but we don't want to do this while holding the
573  * spinlock.)
574  */
575  if (!nowait)
577 
578  SpinLockAcquire(&s->mutex);
579  if (s->active_pid == 0)
580  s->active_pid = MyProcPid;
581  active_pid = s->active_pid;
582  SpinLockRelease(&s->mutex);
583  }
584  else
585  active_pid = MyProcPid;
586  LWLockRelease(ReplicationSlotControlLock);
587 
588  /*
589  * If we found the slot but it's already active in another process, we
590  * wait until the owning process signals us that it's been released, or
591  * error out.
592  */
593  if (active_pid != MyProcPid)
594  {
595  if (!nowait)
596  {
597  /* Wait here until we get signaled, and then restart */
599  WAIT_EVENT_REPLICATION_SLOT_DROP);
601  goto retry;
602  }
603 
604  ereport(ERROR,
605  (errcode(ERRCODE_OBJECT_IN_USE),
606  errmsg("replication slot \"%s\" is active for PID %d",
607  NameStr(s->data.name), active_pid)));
608  }
609  else if (!nowait)
610  ConditionVariableCancelSleep(); /* no sleep needed after all */
611 
612  /* Let everybody know we've modified this slot */
614 
615  /* We made this slot active, so it's ours now. */
616  MyReplicationSlot = s;
617 
618  /*
619  * The call to pgstat_acquire_replslot() protects against stats for a
620  * different slot, from before a restart or such, being present during
621  * pgstat_report_replslot().
622  */
623  if (SlotIsLogical(s))
625 
626  /*
627  * Reset the time since the slot has become inactive as the slot is active
628  * now.
629  */
630  SpinLockAcquire(&s->mutex);
631  s->inactive_since = 0;
632  SpinLockRelease(&s->mutex);
633 
634  if (am_walsender)
635  {
637  SlotIsLogical(s)
638  ? errmsg("acquired logical replication slot \"%s\"",
639  NameStr(s->data.name))
640  : errmsg("acquired physical replication slot \"%s\"",
641  NameStr(s->data.name)));
642  }
643 }
644 
645 /*
646  * Release the replication slot that this backend considers to own.
647  *
648  * This or another backend can re-acquire the slot later.
649  * Resources this slot requires will be preserved.
650  */
651 void
653 {
655  char *slotname = NULL; /* keep compiler quiet */
656  bool is_logical = false; /* keep compiler quiet */
657  TimestampTz now = 0;
658 
659  Assert(slot != NULL && slot->active_pid != 0);
660 
661  if (am_walsender)
662  {
663  slotname = pstrdup(NameStr(slot->data.name));
664  is_logical = SlotIsLogical(slot);
665  }
666 
667  if (slot->data.persistency == RS_EPHEMERAL)
668  {
669  /*
670  * Delete the slot. There is no !PANIC case where this is allowed to
671  * fail, all that may happen is an incomplete cleanup of the on-disk
672  * data.
673  */
675  }
676 
677  /*
678  * If slot needed to temporarily restrain both data and catalog xmin to
679  * create the catalog snapshot, remove that temporary constraint.
680  * Snapshots can only be exported while the initial snapshot is still
681  * acquired.
682  */
683  if (!TransactionIdIsValid(slot->data.xmin) &&
685  {
686  SpinLockAcquire(&slot->mutex);
688  SpinLockRelease(&slot->mutex);
690  }
691 
692  /*
693  * Set the time since the slot has become inactive. We get the current
694  * time beforehand to avoid system call while holding the spinlock.
695  */
697 
698  if (slot->data.persistency == RS_PERSISTENT)
699  {
700  /*
701  * Mark persistent slot inactive. We're not freeing it, just
702  * disconnecting, but wake up others that may be waiting for it.
703  */
704  SpinLockAcquire(&slot->mutex);
705  slot->active_pid = 0;
706  slot->inactive_since = now;
707  SpinLockRelease(&slot->mutex);
709  }
710  else
711  {
712  SpinLockAcquire(&slot->mutex);
713  slot->inactive_since = now;
714  SpinLockRelease(&slot->mutex);
715  }
716 
717  MyReplicationSlot = NULL;
718 
719  /* might not have been set when we've been a plain slot */
720  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
723  LWLockRelease(ProcArrayLock);
724 
725  if (am_walsender)
726  {
728  is_logical
729  ? errmsg("released logical replication slot \"%s\"",
730  slotname)
731  : errmsg("released physical replication slot \"%s\"",
732  slotname));
733 
734  pfree(slotname);
735  }
736 }
737 
738 /*
739  * Cleanup temporary slots created in current session.
740  *
741  * Cleanup only synced temporary slots if 'synced_only' is true, else
742  * cleanup all temporary slots.
743  */
744 void
745 ReplicationSlotCleanup(bool synced_only)
746 {
747  int i;
748 
749  Assert(MyReplicationSlot == NULL);
750 
751 restart:
752  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
753  for (i = 0; i < max_replication_slots; i++)
754  {
756 
757  if (!s->in_use)
758  continue;
759 
760  SpinLockAcquire(&s->mutex);
761  if ((s->active_pid == MyProcPid &&
762  (!synced_only || s->data.synced)))
763  {
765  SpinLockRelease(&s->mutex);
766  LWLockRelease(ReplicationSlotControlLock); /* avoid deadlock */
767 
769 
771  goto restart;
772  }
773  else
774  SpinLockRelease(&s->mutex);
775  }
776 
777  LWLockRelease(ReplicationSlotControlLock);
778 }
779 
780 /*
781  * Permanently drop replication slot identified by the passed in name.
782  */
783 void
784 ReplicationSlotDrop(const char *name, bool nowait)
785 {
786  Assert(MyReplicationSlot == NULL);
787 
788  ReplicationSlotAcquire(name, nowait);
789 
790  /*
791  * Do not allow users to drop the slots which are currently being synced
792  * from the primary to the standby.
793  */
795  ereport(ERROR,
796  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
797  errmsg("cannot drop replication slot \"%s\"", name),
798  errdetail("This slot is being synced from the primary server."));
799 
801 }
802 
803 /*
804  * Change the definition of the slot identified by the specified name.
805  */
806 void
807 ReplicationSlotAlter(const char *name, bool failover)
808 {
809  Assert(MyReplicationSlot == NULL);
810 
812 
814  ereport(ERROR,
815  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
816  errmsg("cannot use %s with a physical replication slot",
817  "ALTER_REPLICATION_SLOT"));
818 
819  if (RecoveryInProgress())
820  {
821  /*
822  * Do not allow users to alter the slots which are currently being
823  * synced from the primary to the standby.
824  */
826  ereport(ERROR,
827  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
828  errmsg("cannot alter replication slot \"%s\"", name),
829  errdetail("This slot is being synced from the primary server."));
830 
831  /*
832  * Do not allow users to enable failover on the standby as we do not
833  * support sync to the cascading standby.
834  */
835  if (failover)
836  ereport(ERROR,
837  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
838  errmsg("cannot enable failover for a replication slot"
839  " on the standby"));
840  }
841 
842  /*
843  * Do not allow users to enable failover for temporary slots as we do not
844  * support syncing temporary slots to the standby.
845  */
846  if (failover && MyReplicationSlot->data.persistency == RS_TEMPORARY)
847  ereport(ERROR,
848  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
849  errmsg("cannot enable failover for a temporary replication slot"));
850 
851  if (MyReplicationSlot->data.failover != failover)
852  {
854  MyReplicationSlot->data.failover = failover;
856 
859  }
860 
862 }
863 
864 /*
865  * Permanently drop the currently acquired replication slot.
866  */
867 void
869 {
871 
872  Assert(MyReplicationSlot != NULL);
873 
874  /* slot isn't acquired anymore */
875  MyReplicationSlot = NULL;
876 
878 }
879 
880 /*
881  * Permanently drop the replication slot which will be released by the point
882  * this function returns.
883  */
884 static void
886 {
887  char path[MAXPGPATH];
888  char tmppath[MAXPGPATH];
889 
890  /*
891  * If some other backend ran this code concurrently with us, we might try
892  * to delete a slot with a certain name while someone else was trying to
893  * create a slot with the same name.
894  */
895  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
896 
897  /* Generate pathnames. */
898  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
899  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
900 
901  /*
902  * Rename the slot directory on disk, so that we'll no longer recognize
903  * this as a valid slot. Note that if this fails, we've got to mark the
904  * slot inactive before bailing out. If we're dropping an ephemeral or a
905  * temporary slot, we better never fail hard as the caller won't expect
906  * the slot to survive and this might get called during error handling.
907  */
908  if (rename(path, tmppath) == 0)
909  {
910  /*
911  * We need to fsync() the directory we just renamed and its parent to
912  * make sure that our changes are on disk in a crash-safe fashion. If
913  * fsync() fails, we can't be sure whether the changes are on disk or
914  * not. For now, we handle that by panicking;
915  * StartupReplicationSlots() will try to straighten it out after
916  * restart.
917  */
919  fsync_fname(tmppath, true);
920  fsync_fname("pg_replslot", true);
922  }
923  else
924  {
925  bool fail_softly = slot->data.persistency != RS_PERSISTENT;
926 
927  SpinLockAcquire(&slot->mutex);
928  slot->active_pid = 0;
929  SpinLockRelease(&slot->mutex);
930 
931  /* wake up anyone waiting on this slot */
933 
934  ereport(fail_softly ? WARNING : ERROR,
936  errmsg("could not rename file \"%s\" to \"%s\": %m",
937  path, tmppath)));
938  }
939 
940  /*
941  * The slot is definitely gone. Lock out concurrent scans of the array
942  * long enough to kill it. It's OK to clear the active PID here without
943  * grabbing the mutex because nobody else can be scanning the array here,
944  * and nobody can be attached to this slot and thus access it without
945  * scanning the array.
946  *
947  * Also wake up processes waiting for it.
948  */
949  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
950  slot->active_pid = 0;
951  slot->in_use = false;
952  LWLockRelease(ReplicationSlotControlLock);
954 
955  /*
956  * Slot is dead and doesn't prevent resource removal anymore, recompute
957  * limits.
958  */
961 
962  /*
963  * If removing the directory fails, the worst thing that will happen is
964  * that the user won't be able to create a new slot with the same name
965  * until the next server restart. We warn about it, but that's all.
966  */
967  if (!rmtree(tmppath, true))
969  (errmsg("could not remove directory \"%s\"", tmppath)));
970 
971  /*
972  * Drop the statistics entry for the replication slot. Do this while
973  * holding ReplicationSlotAllocationLock so that we don't drop a
974  * statistics entry for another slot with the same name just created in
975  * another session.
976  */
977  if (SlotIsLogical(slot))
978  pgstat_drop_replslot(slot);
979 
980  /*
981  * We release this at the very end, so that nobody starts trying to create
982  * a slot while we're still cleaning up the detritus of the old one.
983  */
984  LWLockRelease(ReplicationSlotAllocationLock);
985 }
986 
987 /*
988  * Serialize the currently acquired slot's state from memory to disk, thereby
989  * guaranteeing the current state will survive a crash.
990  */
991 void
993 {
994  char path[MAXPGPATH];
995 
996  Assert(MyReplicationSlot != NULL);
997 
998  sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
1000 }
1001 
1002 /*
1003  * Signal that it would be useful if the currently acquired slot would be
1004  * flushed out to disk.
1005  *
1006  * Note that the actual flush to disk can be delayed for a long time, if
1007  * required for correctness explicitly do a ReplicationSlotSave().
1008  */
1009 void
1011 {
1013 
1014  Assert(MyReplicationSlot != NULL);
1015 
1016  SpinLockAcquire(&slot->mutex);
1018  MyReplicationSlot->dirty = true;
1019  SpinLockRelease(&slot->mutex);
1020 }
1021 
1022 /*
1023  * Convert a slot that's marked as RS_EPHEMERAL or RS_TEMPORARY to a
1024  * RS_PERSISTENT slot, guaranteeing it will be there after an eventual crash.
1025  */
1026 void
1028 {
1030 
1031  Assert(slot != NULL);
1033 
1034  SpinLockAcquire(&slot->mutex);
1035  slot->data.persistency = RS_PERSISTENT;
1036  SpinLockRelease(&slot->mutex);
1037 
1040 }
1041 
1042 /*
1043  * Compute the oldest xmin across all slots and store it in the ProcArray.
1044  *
1045  * If already_locked is true, ProcArrayLock has already been acquired
1046  * exclusively.
1047  */
1048 void
1050 {
1051  int i;
1053  TransactionId agg_catalog_xmin = InvalidTransactionId;
1054 
1055  Assert(ReplicationSlotCtl != NULL);
1056 
1057  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1058 
1059  for (i = 0; i < max_replication_slots; i++)
1060  {
1062  TransactionId effective_xmin;
1063  TransactionId effective_catalog_xmin;
1064  bool invalidated;
1065 
1066  if (!s->in_use)
1067  continue;
1068 
1069  SpinLockAcquire(&s->mutex);
1070  effective_xmin = s->effective_xmin;
1071  effective_catalog_xmin = s->effective_catalog_xmin;
1072  invalidated = s->data.invalidated != RS_INVAL_NONE;
1073  SpinLockRelease(&s->mutex);
1074 
1075  /* invalidated slots need not apply */
1076  if (invalidated)
1077  continue;
1078 
1079  /* check the data xmin */
1080  if (TransactionIdIsValid(effective_xmin) &&
1081  (!TransactionIdIsValid(agg_xmin) ||
1082  TransactionIdPrecedes(effective_xmin, agg_xmin)))
1083  agg_xmin = effective_xmin;
1084 
1085  /* check the catalog xmin */
1086  if (TransactionIdIsValid(effective_catalog_xmin) &&
1087  (!TransactionIdIsValid(agg_catalog_xmin) ||
1088  TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
1089  agg_catalog_xmin = effective_catalog_xmin;
1090  }
1091 
1092  LWLockRelease(ReplicationSlotControlLock);
1093 
1094  ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
1095 }
1096 
1097 /*
1098  * Compute the oldest restart LSN across all slots and inform xlog module.
1099  *
1100  * Note: while max_slot_wal_keep_size is theoretically relevant for this
1101  * purpose, we don't try to account for that, because this module doesn't
1102  * know what to compare against.
1103  */
1104 void
1106 {
1107  int i;
1108  XLogRecPtr min_required = InvalidXLogRecPtr;
1109 
1110  Assert(ReplicationSlotCtl != NULL);
1111 
1112  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1113  for (i = 0; i < max_replication_slots; i++)
1114  {
1116  XLogRecPtr restart_lsn;
1117  bool invalidated;
1118 
1119  if (!s->in_use)
1120  continue;
1121 
1122  SpinLockAcquire(&s->mutex);
1123  restart_lsn = s->data.restart_lsn;
1124  invalidated = s->data.invalidated != RS_INVAL_NONE;
1125  SpinLockRelease(&s->mutex);
1126 
1127  /* invalidated slots need not apply */
1128  if (invalidated)
1129  continue;
1130 
1131  if (restart_lsn != InvalidXLogRecPtr &&
1132  (min_required == InvalidXLogRecPtr ||
1133  restart_lsn < min_required))
1134  min_required = restart_lsn;
1135  }
1136  LWLockRelease(ReplicationSlotControlLock);
1137 
1138  XLogSetReplicationSlotMinimumLSN(min_required);
1139 }
1140 
1141 /*
1142  * Compute the oldest WAL LSN required by *logical* decoding slots..
1143  *
1144  * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
1145  * slots exist.
1146  *
1147  * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
1148  * ignores physical replication slots.
1149  *
1150  * The results aren't required frequently, so we don't maintain a precomputed
1151  * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
1152  */
1153 XLogRecPtr
1155 {
1156  XLogRecPtr result = InvalidXLogRecPtr;
1157  int i;
1158 
1159  if (max_replication_slots <= 0)
1160  return InvalidXLogRecPtr;
1161 
1162  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1163 
1164  for (i = 0; i < max_replication_slots; i++)
1165  {
1166  ReplicationSlot *s;
1167  XLogRecPtr restart_lsn;
1168  bool invalidated;
1169 
1171 
1172  /* cannot change while ReplicationSlotCtlLock is held */
1173  if (!s->in_use)
1174  continue;
1175 
1176  /* we're only interested in logical slots */
1177  if (!SlotIsLogical(s))
1178  continue;
1179 
1180  /* read once, it's ok if it increases while we're checking */
1181  SpinLockAcquire(&s->mutex);
1182  restart_lsn = s->data.restart_lsn;
1183  invalidated = s->data.invalidated != RS_INVAL_NONE;
1184  SpinLockRelease(&s->mutex);
1185 
1186  /* invalidated slots need not apply */
1187  if (invalidated)
1188  continue;
1189 
1190  if (restart_lsn == InvalidXLogRecPtr)
1191  continue;
1192 
1193  if (result == InvalidXLogRecPtr ||
1194  restart_lsn < result)
1195  result = restart_lsn;
1196  }
1197 
1198  LWLockRelease(ReplicationSlotControlLock);
1199 
1200  return result;
1201 }
1202 
1203 /*
1204  * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
1205  * passed database oid.
1206  *
1207  * Returns true if there are any slots referencing the database. *nslots will
1208  * be set to the absolute number of slots in the database, *nactive to ones
1209  * currently active.
1210  */
1211 bool
1212 ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
1213 {
1214  int i;
1215 
1216  *nslots = *nactive = 0;
1217 
1218  if (max_replication_slots <= 0)
1219  return false;
1220 
1221  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1222  for (i = 0; i < max_replication_slots; i++)
1223  {
1224  ReplicationSlot *s;
1225 
1227 
1228  /* cannot change while ReplicationSlotCtlLock is held */
1229  if (!s->in_use)
1230  continue;
1231 
1232  /* only logical slots are database specific, skip */
1233  if (!SlotIsLogical(s))
1234  continue;
1235 
1236  /* not our database, skip */
1237  if (s->data.database != dboid)
1238  continue;
1239 
1240  /* NB: intentionally counting invalidated slots */
1241 
1242  /* count slots with spinlock held */
1243  SpinLockAcquire(&s->mutex);
1244  (*nslots)++;
1245  if (s->active_pid != 0)
1246  (*nactive)++;
1247  SpinLockRelease(&s->mutex);
1248  }
1249  LWLockRelease(ReplicationSlotControlLock);
1250 
1251  if (*nslots > 0)
1252  return true;
1253  return false;
1254 }
1255 
1256 /*
1257  * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
1258  * passed database oid. The caller should hold an exclusive lock on the
1259  * pg_database oid for the database to prevent creation of new slots on the db
1260  * or replay from existing slots.
1261  *
1262  * Another session that concurrently acquires an existing slot on the target DB
1263  * (most likely to drop it) may cause this function to ERROR. If that happens
1264  * it may have dropped some but not all slots.
1265  *
1266  * This routine isn't as efficient as it could be - but we don't drop
1267  * databases often, especially databases with lots of slots.
1268  */
1269 void
1271 {
1272  int i;
1273 
1274  if (max_replication_slots <= 0)
1275  return;
1276 
1277 restart:
1278  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1279  for (i = 0; i < max_replication_slots; i++)
1280  {
1281  ReplicationSlot *s;
1282  char *slotname;
1283  int active_pid;
1284 
1286 
1287  /* cannot change while ReplicationSlotCtlLock is held */
1288  if (!s->in_use)
1289  continue;
1290 
1291  /* only logical slots are database specific, skip */
1292  if (!SlotIsLogical(s))
1293  continue;
1294 
1295  /* not our database, skip */
1296  if (s->data.database != dboid)
1297  continue;
1298 
1299  /* NB: intentionally including invalidated slots */
1300 
1301  /* acquire slot, so ReplicationSlotDropAcquired can be reused */
1302  SpinLockAcquire(&s->mutex);
1303  /* can't change while ReplicationSlotControlLock is held */
1304  slotname = NameStr(s->data.name);
1305  active_pid = s->active_pid;
1306  if (active_pid == 0)
1307  {
1308  MyReplicationSlot = s;
1309  s->active_pid = MyProcPid;
1310  }
1311  SpinLockRelease(&s->mutex);
1312 
1313  /*
1314  * Even though we hold an exclusive lock on the database object a
1315  * logical slot for that DB can still be active, e.g. if it's
1316  * concurrently being dropped by a backend connected to another DB.
1317  *
1318  * That's fairly unlikely in practice, so we'll just bail out.
1319  *
1320  * The slot sync worker holds a shared lock on the database before
1321  * operating on synced logical slots to avoid conflict with the drop
1322  * happening here. The persistent synced slots are thus safe but there
1323  * is a possibility that the slot sync worker has created a temporary
1324  * slot (which stays active even on release) and we are trying to drop
1325  * that here. In practice, the chances of hitting this scenario are
1326  * less as during slot synchronization, the temporary slot is
1327  * immediately converted to persistent and thus is safe due to the
1328  * shared lock taken on the database. So, we'll just bail out in such
1329  * a case.
1330  *
1331  * XXX: We can consider shutting down the slot sync worker before
1332  * trying to drop synced temporary slots here.
1333  */
1334  if (active_pid)
1335  ereport(ERROR,
1336  (errcode(ERRCODE_OBJECT_IN_USE),
1337  errmsg("replication slot \"%s\" is active for PID %d",
1338  slotname, active_pid)));
1339 
1340  /*
1341  * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
1342  * holding ReplicationSlotControlLock over filesystem operations,
1343  * release ReplicationSlotControlLock and use
1344  * ReplicationSlotDropAcquired.
1345  *
1346  * As that means the set of slots could change, restart scan from the
1347  * beginning each time we release the lock.
1348  */
1349  LWLockRelease(ReplicationSlotControlLock);
1351  goto restart;
1352  }
1353  LWLockRelease(ReplicationSlotControlLock);
1354 }
1355 
1356 
1357 /*
1358  * Check whether the server's configuration supports using replication
1359  * slots.
1360  */
1361 void
1363 {
1364  /*
1365  * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
1366  * needs the same check.
1367  */
1368 
1369  if (max_replication_slots == 0)
1370  ereport(ERROR,
1371  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1372  errmsg("replication slots can only be used if \"max_replication_slots\" > 0")));
1373 
1375  ereport(ERROR,
1376  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1377  errmsg("replication slots can only be used if \"wal_level\" >= \"replica\"")));
1378 }
1379 
1380 /*
1381  * Check whether the user has privilege to use replication slots.
1382  */
1383 void
1385 {
1386  if (!has_rolreplication(GetUserId()))
1387  ereport(ERROR,
1388  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1389  errmsg("permission denied to use replication slots"),
1390  errdetail("Only roles with the %s attribute may use replication slots.",
1391  "REPLICATION")));
1392 }
1393 
1394 /*
1395  * Reserve WAL for the currently active slot.
1396  *
1397  * Compute and set restart_lsn in a manner that's appropriate for the type of
1398  * the slot and concurrency safe.
1399  */
1400 void
1402 {
1404 
1405  Assert(slot != NULL);
1407 
1408  /*
1409  * The replication slot mechanism is used to prevent removal of required
1410  * WAL. As there is no interlock between this routine and checkpoints, WAL
1411  * segments could concurrently be removed when a now stale return value of
1412  * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
1413  * this happens we'll just retry.
1414  */
1415  while (true)
1416  {
1417  XLogSegNo segno;
1418  XLogRecPtr restart_lsn;
1419 
1420  /*
1421  * For logical slots log a standby snapshot and start logical decoding
1422  * at exactly that position. That allows the slot to start up more
1423  * quickly. But on a standby we cannot do WAL writes, so just use the
1424  * replay pointer; effectively, an attempt to create a logical slot on
1425  * standby will cause it to wait for an xl_running_xact record to be
1426  * logged independently on the primary, so that a snapshot can be
1427  * built using the record.
1428  *
1429  * None of this is needed (or indeed helpful) for physical slots as
1430  * they'll start replay at the last logged checkpoint anyway. Instead
1431  * return the location of the last redo LSN. While that slightly
1432  * increases the chance that we have to retry, it's where a base
1433  * backup has to start replay at.
1434  */
1435  if (SlotIsPhysical(slot))
1436  restart_lsn = GetRedoRecPtr();
1437  else if (RecoveryInProgress())
1438  restart_lsn = GetXLogReplayRecPtr(NULL);
1439  else
1440  restart_lsn = GetXLogInsertRecPtr();
1441 
1442  SpinLockAcquire(&slot->mutex);
1443  slot->data.restart_lsn = restart_lsn;
1444  SpinLockRelease(&slot->mutex);
1445 
1446  /* prevent WAL removal as fast as possible */
1448 
1449  /*
1450  * If all required WAL is still there, great, otherwise retry. The
1451  * slot should prevent further removal of WAL, unless there's a
1452  * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
1453  * the new restart_lsn above, so normally we should never need to loop
1454  * more than twice.
1455  */
1457  if (XLogGetLastRemovedSegno() < segno)
1458  break;
1459  }
1460 
1461  if (!RecoveryInProgress() && SlotIsLogical(slot))
1462  {
1463  XLogRecPtr flushptr;
1464 
1465  /* make sure we have enough information to start */
1466  flushptr = LogStandbySnapshot();
1467 
1468  /* and make sure it's fsynced to disk */
1469  XLogFlush(flushptr);
1470  }
1471 }
1472 
1473 /*
1474  * Report that replication slot needs to be invalidated
1475  */
1476 static void
1478  bool terminating,
1479  int pid,
1480  NameData slotname,
1481  XLogRecPtr restart_lsn,
1482  XLogRecPtr oldestLSN,
1483  TransactionId snapshotConflictHorizon)
1484 {
1485  StringInfoData err_detail;
1486  bool hint = false;
1487 
1488  initStringInfo(&err_detail);
1489 
1490  switch (cause)
1491  {
1492  case RS_INVAL_WAL_REMOVED:
1493  {
1494  unsigned long long ex = oldestLSN - restart_lsn;
1495 
1496  hint = true;
1497  appendStringInfo(&err_detail,
1498  ngettext("The slot's restart_lsn %X/%X exceeds the limit by %llu byte.",
1499  "The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1500  ex),
1501  LSN_FORMAT_ARGS(restart_lsn),
1502  ex);
1503  break;
1504  }
1505  case RS_INVAL_HORIZON:
1506  appendStringInfo(&err_detail, _("The slot conflicted with xid horizon %u."),
1507  snapshotConflictHorizon);
1508  break;
1509 
1510  case RS_INVAL_WAL_LEVEL:
1511  appendStringInfoString(&err_detail, _("Logical decoding on standby requires \"wal_level\" >= \"logical\" on the primary server."));
1512  break;
1513  case RS_INVAL_NONE:
1514  pg_unreachable();
1515  }
1516 
1517  ereport(LOG,
1518  terminating ?
1519  errmsg("terminating process %d to release replication slot \"%s\"",
1520  pid, NameStr(slotname)) :
1521  errmsg("invalidating obsolete replication slot \"%s\"",
1522  NameStr(slotname)),
1523  errdetail_internal("%s", err_detail.data),
1524  hint ? errhint("You might need to increase \"%s\".", "max_slot_wal_keep_size") : 0);
1525 
1526  pfree(err_detail.data);
1527 }
1528 
1529 /*
1530  * Helper for InvalidateObsoleteReplicationSlots
1531  *
1532  * Acquires the given slot and mark it invalid, if necessary and possible.
1533  *
1534  * Returns whether ReplicationSlotControlLock was released in the interim (and
1535  * in that case we're not holding the lock at return, otherwise we are).
1536  *
1537  * Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1538  *
1539  * This is inherently racy, because we release the LWLock
1540  * for syscalls, so caller must restart if we return true.
1541  */
1542 static bool
1544  ReplicationSlot *s,
1545  XLogRecPtr oldestLSN,
1546  Oid dboid, TransactionId snapshotConflictHorizon,
1547  bool *invalidated)
1548 {
1549  int last_signaled_pid = 0;
1550  bool released_lock = false;
1551  bool terminated = false;
1552  TransactionId initial_effective_xmin = InvalidTransactionId;
1553  TransactionId initial_catalog_effective_xmin = InvalidTransactionId;
1554  XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1556 
1557  for (;;)
1558  {
1559  XLogRecPtr restart_lsn;
1560  NameData slotname;
1561  int active_pid = 0;
1562  ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
1563 
1564  Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
1565 
1566  if (!s->in_use)
1567  {
1568  if (released_lock)
1569  LWLockRelease(ReplicationSlotControlLock);
1570  break;
1571  }
1572 
1573  /*
1574  * Check if the slot needs to be invalidated. If it needs to be
1575  * invalidated, and is not currently acquired, acquire it and mark it
1576  * as having been invalidated. We do this with the spinlock held to
1577  * avoid race conditions -- for example the restart_lsn could move
1578  * forward, or the slot could be dropped.
1579  */
1580  SpinLockAcquire(&s->mutex);
1581 
1582  restart_lsn = s->data.restart_lsn;
1583 
1584  /* we do nothing if the slot is already invalid */
1585  if (s->data.invalidated == RS_INVAL_NONE)
1586  {
1587  /*
1588  * The slot's mutex will be released soon, and it is possible that
1589  * those values change since the process holding the slot has been
1590  * terminated (if any), so record them here to ensure that we
1591  * would report the correct invalidation cause.
1592  */
1593  if (!terminated)
1594  {
1595  initial_restart_lsn = s->data.restart_lsn;
1596  initial_effective_xmin = s->effective_xmin;
1597  initial_catalog_effective_xmin = s->effective_catalog_xmin;
1598  }
1599 
1600  switch (cause)
1601  {
1602  case RS_INVAL_WAL_REMOVED:
1603  if (initial_restart_lsn != InvalidXLogRecPtr &&
1604  initial_restart_lsn < oldestLSN)
1605  invalidation_cause = cause;
1606  break;
1607  case RS_INVAL_HORIZON:
1608  if (!SlotIsLogical(s))
1609  break;
1610  /* invalid DB oid signals a shared relation */
1611  if (dboid != InvalidOid && dboid != s->data.database)
1612  break;
1613  if (TransactionIdIsValid(initial_effective_xmin) &&
1614  TransactionIdPrecedesOrEquals(initial_effective_xmin,
1615  snapshotConflictHorizon))
1616  invalidation_cause = cause;
1617  else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
1618  TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
1619  snapshotConflictHorizon))
1620  invalidation_cause = cause;
1621  break;
1622  case RS_INVAL_WAL_LEVEL:
1623  if (SlotIsLogical(s))
1624  invalidation_cause = cause;
1625  break;
1626  case RS_INVAL_NONE:
1627  pg_unreachable();
1628  }
1629  }
1630 
1631  /*
1632  * The invalidation cause recorded previously should not change while
1633  * the process owning the slot (if any) has been terminated.
1634  */
1635  Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1636  invalidation_cause_prev != invalidation_cause));
1637 
1638  /* if there's no invalidation, we're done */
1639  if (invalidation_cause == RS_INVAL_NONE)
1640  {
1641  SpinLockRelease(&s->mutex);
1642  if (released_lock)
1643  LWLockRelease(ReplicationSlotControlLock);
1644  break;
1645  }
1646 
1647  slotname = s->data.name;
1648  active_pid = s->active_pid;
1649 
1650  /*
1651  * If the slot can be acquired, do so and mark it invalidated
1652  * immediately. Otherwise we'll signal the owning process, below, and
1653  * retry.
1654  */
1655  if (active_pid == 0)
1656  {
1657  MyReplicationSlot = s;
1658  s->active_pid = MyProcPid;
1659  s->data.invalidated = invalidation_cause;
1660 
1661  /*
1662  * XXX: We should consider not overwriting restart_lsn and instead
1663  * just rely on .invalidated.
1664  */
1665  if (invalidation_cause == RS_INVAL_WAL_REMOVED)
1667 
1668  /* Let caller know */
1669  *invalidated = true;
1670  }
1671 
1672  SpinLockRelease(&s->mutex);
1673 
1674  /*
1675  * The logical replication slots shouldn't be invalidated as GUC
1676  * max_slot_wal_keep_size is set to -1 during the binary upgrade. See
1677  * check_old_cluster_for_valid_slots() where we ensure that no
1678  * invalidated before the upgrade.
1679  */
1680  Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
1681 
1682  if (active_pid != 0)
1683  {
1684  /*
1685  * Prepare the sleep on the slot's condition variable before
1686  * releasing the lock, to close a possible race condition if the
1687  * slot is released before the sleep below.
1688  */
1690 
1691  LWLockRelease(ReplicationSlotControlLock);
1692  released_lock = true;
1693 
1694  /*
1695  * Signal to terminate the process that owns the slot, if we
1696  * haven't already signalled it. (Avoidance of repeated
1697  * signalling is the only reason for there to be a loop in this
1698  * routine; otherwise we could rely on caller's restart loop.)
1699  *
1700  * There is the race condition that other process may own the slot
1701  * after its current owner process is terminated and before this
1702  * process owns it. To handle that, we signal only if the PID of
1703  * the owning process has changed from the previous time. (This
1704  * logic assumes that the same PID is not reused very quickly.)
1705  */
1706  if (last_signaled_pid != active_pid)
1707  {
1708  ReportSlotInvalidation(invalidation_cause, true, active_pid,
1709  slotname, restart_lsn,
1710  oldestLSN, snapshotConflictHorizon);
1711 
1712  if (MyBackendType == B_STARTUP)
1713  (void) SendProcSignal(active_pid,
1716  else
1717  (void) kill(active_pid, SIGTERM);
1718 
1719  last_signaled_pid = active_pid;
1720  terminated = true;
1721  invalidation_cause_prev = invalidation_cause;
1722  }
1723 
1724  /* Wait until the slot is released. */
1726  WAIT_EVENT_REPLICATION_SLOT_DROP);
1727 
1728  /*
1729  * Re-acquire lock and start over; we expect to invalidate the
1730  * slot next time (unless another process acquires the slot in the
1731  * meantime).
1732  */
1733  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1734  continue;
1735  }
1736  else
1737  {
1738  /*
1739  * We hold the slot now and have already invalidated it; flush it
1740  * to ensure that state persists.
1741  *
1742  * Don't want to hold ReplicationSlotControlLock across file
1743  * system operations, so release it now but be sure to tell caller
1744  * to restart from scratch.
1745  */
1746  LWLockRelease(ReplicationSlotControlLock);
1747  released_lock = true;
1748 
1749  /* Make sure the invalidated state persists across server restart */
1753 
1754  ReportSlotInvalidation(invalidation_cause, false, active_pid,
1755  slotname, restart_lsn,
1756  oldestLSN, snapshotConflictHorizon);
1757 
1758  /* done with this slot for now */
1759  break;
1760  }
1761  }
1762 
1763  Assert(released_lock == !LWLockHeldByMe(ReplicationSlotControlLock));
1764 
1765  return released_lock;
1766 }
1767 
1768 /*
1769  * Invalidate slots that require resources about to be removed.
1770  *
1771  * Returns true when any slot have got invalidated.
1772  *
1773  * Whether a slot needs to be invalidated depends on the cause. A slot is
1774  * removed if it:
1775  * - RS_INVAL_WAL_REMOVED: requires a LSN older than the given segment
1776  * - RS_INVAL_HORIZON: requires a snapshot <= the given horizon in the given
1777  * db; dboid may be InvalidOid for shared relations
1778  * - RS_INVAL_WAL_LEVEL: is logical
1779  *
1780  * NB - this runs as part of checkpoint, so avoid raising errors if possible.
1781  */
1782 bool
1784  XLogSegNo oldestSegno, Oid dboid,
1785  TransactionId snapshotConflictHorizon)
1786 {
1787  XLogRecPtr oldestLSN;
1788  bool invalidated = false;
1789 
1790  Assert(cause != RS_INVAL_HORIZON || TransactionIdIsValid(snapshotConflictHorizon));
1791  Assert(cause != RS_INVAL_WAL_REMOVED || oldestSegno > 0);
1792  Assert(cause != RS_INVAL_NONE);
1793 
1794  if (max_replication_slots == 0)
1795  return invalidated;
1796 
1797  XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
1798 
1799 restart:
1800  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1801  for (int i = 0; i < max_replication_slots; i++)
1802  {
1804 
1805  if (!s->in_use)
1806  continue;
1807 
1808  if (InvalidatePossiblyObsoleteSlot(cause, s, oldestLSN, dboid,
1809  snapshotConflictHorizon,
1810  &invalidated))
1811  {
1812  /* if the lock was released, start from scratch */
1813  goto restart;
1814  }
1815  }
1816  LWLockRelease(ReplicationSlotControlLock);
1817 
1818  /*
1819  * If any slots have been invalidated, recalculate the resource limits.
1820  */
1821  if (invalidated)
1822  {
1825  }
1826 
1827  return invalidated;
1828 }
1829 
1830 /*
1831  * Flush all replication slots to disk.
1832  *
1833  * It is convenient to flush dirty replication slots at the time of checkpoint.
1834  * Additionally, in case of a shutdown checkpoint, we also identify the slots
1835  * for which the confirmed_flush LSN has been updated since the last time it
1836  * was saved and flush them.
1837  */
1838 void
1840 {
1841  int i;
1842 
1843  elog(DEBUG1, "performing replication slot checkpoint");
1844 
1845  /*
1846  * Prevent any slot from being created/dropped while we're active. As we
1847  * explicitly do *not* want to block iterating over replication_slots or
1848  * acquiring a slot we cannot take the control lock - but that's OK,
1849  * because holding ReplicationSlotAllocationLock is strictly stronger, and
1850  * enough to guarantee that nobody can change the in_use bits on us.
1851  */
1852  LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
1853 
1854  for (i = 0; i < max_replication_slots; i++)
1855  {
1857  char path[MAXPGPATH];
1858 
1859  if (!s->in_use)
1860  continue;
1861 
1862  /* save the slot to disk, locking is handled in SaveSlotToPath() */
1863  sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
1864 
1865  /*
1866  * Slot's data is not flushed each time the confirmed_flush LSN is
1867  * updated as that could lead to frequent writes. However, we decide
1868  * to force a flush of all logical slot's data at the time of shutdown
1869  * if the confirmed_flush LSN is changed since we last flushed it to
1870  * disk. This helps in avoiding an unnecessary retreat of the
1871  * confirmed_flush LSN after restart.
1872  */
1873  if (is_shutdown && SlotIsLogical(s))
1874  {
1875  SpinLockAcquire(&s->mutex);
1876 
1877  if (s->data.invalidated == RS_INVAL_NONE &&
1879  {
1880  s->just_dirtied = true;
1881  s->dirty = true;
1882  }
1883  SpinLockRelease(&s->mutex);
1884  }
1885 
1886  SaveSlotToPath(s, path, LOG);
1887  }
1888  LWLockRelease(ReplicationSlotAllocationLock);
1889 }
1890 
1891 /*
1892  * Load all replication slots from disk into memory at server startup. This
1893  * needs to be run before we start crash recovery.
1894  */
1895 void
1897 {
1898  DIR *replication_dir;
1899  struct dirent *replication_de;
1900 
1901  elog(DEBUG1, "starting up replication slots");
1902 
1903  /* restore all slots by iterating over all on-disk entries */
1904  replication_dir = AllocateDir("pg_replslot");
1905  while ((replication_de = ReadDir(replication_dir, "pg_replslot")) != NULL)
1906  {
1907  char path[MAXPGPATH + 12];
1908  PGFileType de_type;
1909 
1910  if (strcmp(replication_de->d_name, ".") == 0 ||
1911  strcmp(replication_de->d_name, "..") == 0)
1912  continue;
1913 
1914  snprintf(path, sizeof(path), "pg_replslot/%s", replication_de->d_name);
1915  de_type = get_dirent_type(path, replication_de, false, DEBUG1);
1916 
1917  /* we're only creating directories here, skip if it's not our's */
1918  if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_DIR)
1919  continue;
1920 
1921  /* we crashed while a slot was being setup or deleted, clean up */
1922  if (pg_str_endswith(replication_de->d_name, ".tmp"))
1923  {
1924  if (!rmtree(path, true))
1925  {
1926  ereport(WARNING,
1927  (errmsg("could not remove directory \"%s\"",
1928  path)));
1929  continue;
1930  }
1931  fsync_fname("pg_replslot", true);
1932  continue;
1933  }
1934 
1935  /* looks like a slot in a normal state, restore */
1936  RestoreSlotFromDisk(replication_de->d_name);
1937  }
1938  FreeDir(replication_dir);
1939 
1940  /* currently no slots exist, we're done. */
1941  if (max_replication_slots <= 0)
1942  return;
1943 
1944  /* Now that we have recovered all the data, compute replication xmin */
1947 }
1948 
1949 /* ----
1950  * Manipulation of on-disk state of replication slots
1951  *
1952  * NB: none of the routines below should take any notice whether a slot is the
1953  * current one or not, that's all handled a layer above.
1954  * ----
1955  */
1956 static void
1958 {
1959  char tmppath[MAXPGPATH];
1960  char path[MAXPGPATH];
1961  struct stat st;
1962 
1963  /*
1964  * No need to take out the io_in_progress_lock, nobody else can see this
1965  * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
1966  * takes out the lock, if we'd take the lock here, we'd deadlock.
1967  */
1968 
1969  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
1970  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
1971 
1972  /*
1973  * It's just barely possible that some previous effort to create or drop a
1974  * slot with this name left a temp directory lying around. If that seems
1975  * to be the case, try to remove it. If the rmtree() fails, we'll error
1976  * out at the MakePGDirectory() below, so we don't bother checking
1977  * success.
1978  */
1979  if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
1980  rmtree(tmppath, true);
1981 
1982  /* Create and fsync the temporary slot directory. */
1983  if (MakePGDirectory(tmppath) < 0)
1984  ereport(ERROR,
1986  errmsg("could not create directory \"%s\": %m",
1987  tmppath)));
1988  fsync_fname(tmppath, true);
1989 
1990  /* Write the actual state file. */
1991  slot->dirty = true; /* signal that we really need to write */
1992  SaveSlotToPath(slot, tmppath, ERROR);
1993 
1994  /* Rename the directory into place. */
1995  if (rename(tmppath, path) != 0)
1996  ereport(ERROR,
1998  errmsg("could not rename file \"%s\" to \"%s\": %m",
1999  tmppath, path)));
2000 
2001  /*
2002  * If we'd now fail - really unlikely - we wouldn't know whether this slot
2003  * would persist after an OS crash or not - so, force a restart. The
2004  * restart would try to fsync this again till it works.
2005  */
2007 
2008  fsync_fname(path, true);
2009  fsync_fname("pg_replslot", true);
2010 
2011  END_CRIT_SECTION();
2012 }
2013 
2014 /*
2015  * Shared functionality between saving and creating a replication slot.
2016  */
2017 static void
2018 SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
2019 {
2020  char tmppath[MAXPGPATH];
2021  char path[MAXPGPATH];
2022  int fd;
2024  bool was_dirty;
2025 
2026  /* first check whether there's something to write out */
2027  SpinLockAcquire(&slot->mutex);
2028  was_dirty = slot->dirty;
2029  slot->just_dirtied = false;
2030  SpinLockRelease(&slot->mutex);
2031 
2032  /* and don't do anything if there's nothing to write */
2033  if (!was_dirty)
2034  return;
2035 
2037 
2038  /* silence valgrind :( */
2039  memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
2040 
2041  sprintf(tmppath, "%s/state.tmp", dir);
2042  sprintf(path, "%s/state", dir);
2043 
2044  fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
2045  if (fd < 0)
2046  {
2047  /*
2048  * If not an ERROR, then release the lock before returning. In case
2049  * of an ERROR, the error recovery path automatically releases the
2050  * lock, but no harm in explicitly releasing even in that case. Note
2051  * that LWLockRelease() could affect errno.
2052  */
2053  int save_errno = errno;
2054 
2056  errno = save_errno;
2057  ereport(elevel,
2059  errmsg("could not create file \"%s\": %m",
2060  tmppath)));
2061  return;
2062  }
2063 
2064  cp.magic = SLOT_MAGIC;
2065  INIT_CRC32C(cp.checksum);
2066  cp.version = SLOT_VERSION;
2068 
2069  SpinLockAcquire(&slot->mutex);
2070 
2071  memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
2072 
2073  SpinLockRelease(&slot->mutex);
2074 
2075  COMP_CRC32C(cp.checksum,
2076  (char *) (&cp) + ReplicationSlotOnDiskNotChecksummedSize,
2078  FIN_CRC32C(cp.checksum);
2079 
2080  errno = 0;
2081  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
2082  if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
2083  {
2084  int save_errno = errno;
2085 
2089 
2090  /* if write didn't set errno, assume problem is no disk space */
2091  errno = save_errno ? save_errno : ENOSPC;
2092  ereport(elevel,
2094  errmsg("could not write to file \"%s\": %m",
2095  tmppath)));
2096  return;
2097  }
2099 
2100  /* fsync the temporary file */
2101  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
2102  if (pg_fsync(fd) != 0)
2103  {
2104  int save_errno = errno;
2105 
2109  errno = save_errno;
2110  ereport(elevel,
2112  errmsg("could not fsync file \"%s\": %m",
2113  tmppath)));
2114  return;
2115  }
2117 
2118  if (CloseTransientFile(fd) != 0)
2119  {
2120  int save_errno = errno;
2121 
2123  errno = save_errno;
2124  ereport(elevel,
2126  errmsg("could not close file \"%s\": %m",
2127  tmppath)));
2128  return;
2129  }
2130 
2131  /* rename to permanent file, fsync file and directory */
2132  if (rename(tmppath, path) != 0)
2133  {
2134  int save_errno = errno;
2135 
2137  errno = save_errno;
2138  ereport(elevel,
2140  errmsg("could not rename file \"%s\" to \"%s\": %m",
2141  tmppath, path)));
2142  return;
2143  }
2144 
2145  /*
2146  * Check CreateSlotOnDisk() for the reasoning of using a critical section.
2147  */
2149 
2150  fsync_fname(path, false);
2151  fsync_fname(dir, true);
2152  fsync_fname("pg_replslot", true);
2153 
2154  END_CRIT_SECTION();
2155 
2156  /*
2157  * Successfully wrote, unset dirty bit, unless somebody dirtied again
2158  * already and remember the confirmed_flush LSN value.
2159  */
2160  SpinLockAcquire(&slot->mutex);
2161  if (!slot->just_dirtied)
2162  slot->dirty = false;
2164  SpinLockRelease(&slot->mutex);
2165 
2167 }
2168 
2169 /*
2170  * Load a single slot from disk into memory.
2171  */
2172 static void
2174 {
2176  int i;
2177  char slotdir[MAXPGPATH + 12];
2178  char path[MAXPGPATH + 22];
2179  int fd;
2180  bool restored = false;
2181  int readBytes;
2182  pg_crc32c checksum;
2183 
2184  /* no need to lock here, no concurrent access allowed yet */
2185 
2186  /* delete temp file if it exists */
2187  sprintf(slotdir, "pg_replslot/%s", name);
2188  sprintf(path, "%s/state.tmp", slotdir);
2189  if (unlink(path) < 0 && errno != ENOENT)
2190  ereport(PANIC,
2192  errmsg("could not remove file \"%s\": %m", path)));
2193 
2194  sprintf(path, "%s/state", slotdir);
2195 
2196  elog(DEBUG1, "restoring replication slot from \"%s\"", path);
2197 
2198  /* on some operating systems fsyncing a file requires O_RDWR */
2199  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
2200 
2201  /*
2202  * We do not need to handle this as we are rename()ing the directory into
2203  * place only after we fsync()ed the state file.
2204  */
2205  if (fd < 0)
2206  ereport(PANIC,
2208  errmsg("could not open file \"%s\": %m", path)));
2209 
2210  /*
2211  * Sync state file before we're reading from it. We might have crashed
2212  * while it wasn't synced yet and we shouldn't continue on that basis.
2213  */
2214  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
2215  if (pg_fsync(fd) != 0)
2216  ereport(PANIC,
2218  errmsg("could not fsync file \"%s\": %m",
2219  path)));
2221 
2222  /* Also sync the parent directory */
2224  fsync_fname(slotdir, true);
2225  END_CRIT_SECTION();
2226 
2227  /* read part of statefile that's guaranteed to be version independent */
2228  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2229  readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
2231  if (readBytes != ReplicationSlotOnDiskConstantSize)
2232  {
2233  if (readBytes < 0)
2234  ereport(PANIC,
2236  errmsg("could not read file \"%s\": %m", path)));
2237  else
2238  ereport(PANIC,
2240  errmsg("could not read file \"%s\": read %d of %zu",
2241  path, readBytes,
2243  }
2244 
2245  /* verify magic */
2246  if (cp.magic != SLOT_MAGIC)
2247  ereport(PANIC,
2249  errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
2250  path, cp.magic, SLOT_MAGIC)));
2251 
2252  /* verify version */
2253  if (cp.version != SLOT_VERSION)
2254  ereport(PANIC,
2256  errmsg("replication slot file \"%s\" has unsupported version %u",
2257  path, cp.version)));
2258 
2259  /* boundary check on length */
2261  ereport(PANIC,
2263  errmsg("replication slot file \"%s\" has corrupted length %u",
2264  path, cp.length)));
2265 
2266  /* Now that we know the size, read the entire file */
2267  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2268  readBytes = read(fd,
2269  (char *) &cp + ReplicationSlotOnDiskConstantSize,
2270  cp.length);
2272  if (readBytes != cp.length)
2273  {
2274  if (readBytes < 0)
2275  ereport(PANIC,
2277  errmsg("could not read file \"%s\": %m", path)));
2278  else
2279  ereport(PANIC,
2281  errmsg("could not read file \"%s\": read %d of %zu",
2282  path, readBytes, (Size) cp.length)));
2283  }
2284 
2285  if (CloseTransientFile(fd) != 0)
2286  ereport(PANIC,
2288  errmsg("could not close file \"%s\": %m", path)));
2289 
2290  /* now verify the CRC */
2291  INIT_CRC32C(checksum);
2292  COMP_CRC32C(checksum,
2295  FIN_CRC32C(checksum);
2296 
2297  if (!EQ_CRC32C(checksum, cp.checksum))
2298  ereport(PANIC,
2299  (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
2300  path, checksum, cp.checksum)));
2301 
2302  /*
2303  * If we crashed with an ephemeral slot active, don't restore but delete
2304  * it.
2305  */
2307  {
2308  if (!rmtree(slotdir, true))
2309  {
2310  ereport(WARNING,
2311  (errmsg("could not remove directory \"%s\"",
2312  slotdir)));
2313  }
2314  fsync_fname("pg_replslot", true);
2315  return;
2316  }
2317 
2318  /*
2319  * Verify that requirements for the specific slot type are met. That's
2320  * important because if these aren't met we're not guaranteed to retain
2321  * all the necessary resources for the slot.
2322  *
2323  * NB: We have to do so *after* the above checks for ephemeral slots,
2324  * because otherwise a slot that shouldn't exist anymore could prevent
2325  * restarts.
2326  *
2327  * NB: Changing the requirements here also requires adapting
2328  * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
2329  */
2331  ereport(FATAL,
2332  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2333  errmsg("logical replication slot \"%s\" exists, but \"wal_level\" < \"logical\"",
2334  NameStr(cp.slotdata.name)),
2335  errhint("Change \"wal_level\" to be \"logical\" or higher.")));
2336  else if (wal_level < WAL_LEVEL_REPLICA)
2337  ereport(FATAL,
2338  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2339  errmsg("physical replication slot \"%s\" exists, but \"wal_level\" < \"replica\"",
2340  NameStr(cp.slotdata.name)),
2341  errhint("Change \"wal_level\" to be \"replica\" or higher.")));
2342 
2343  /* nothing can be active yet, don't lock anything */
2344  for (i = 0; i < max_replication_slots; i++)
2345  {
2346  ReplicationSlot *slot;
2347 
2349 
2350  if (slot->in_use)
2351  continue;
2352 
2353  /* restore the entire set of persistent data */
2354  memcpy(&slot->data, &cp.slotdata,
2356 
2357  /* initialize in memory state */
2358  slot->effective_xmin = cp.slotdata.xmin;
2361 
2366 
2367  slot->in_use = true;
2368  slot->active_pid = 0;
2369 
2370  /*
2371  * Set the time since the slot has become inactive after loading the
2372  * slot from the disk into memory. Whoever acquires the slot i.e.
2373  * makes the slot active will reset it.
2374  */
2376 
2377  restored = true;
2378  break;
2379  }
2380 
2381  if (!restored)
2382  ereport(FATAL,
2383  (errmsg("too many replication slots active before shutdown"),
2384  errhint("Increase \"max_replication_slots\" and try again.")));
2385 }
2386 
2387 /*
2388  * Maps an invalidation reason for a replication slot to
2389  * ReplicationSlotInvalidationCause.
2390  */
2392 GetSlotInvalidationCause(const char *invalidation_reason)
2393 {
2396  bool found PG_USED_FOR_ASSERTS_ONLY = false;
2397 
2398  Assert(invalidation_reason);
2399 
2400  for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++)
2401  {
2402  if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0)
2403  {
2404  found = true;
2405  result = cause;
2406  break;
2407  }
2408  }
2409 
2410  Assert(found);
2411  return result;
2412 }
2413 
2414 /*
2415  * A helper function to validate slots specified in GUC standby_slot_names.
2416  *
2417  * The rawname will be parsed, and the result will be saved into *elemlist.
2418  */
2419 static bool
2420 validate_standby_slots(char *rawname, List **elemlist)
2421 {
2422  bool ok;
2423 
2424  /* Verify syntax and parse string into a list of identifiers */
2425  ok = SplitIdentifierString(rawname, ',', elemlist);
2426 
2427  if (!ok)
2428  {
2429  GUC_check_errdetail("List syntax is invalid.");
2430  }
2431  else if (!ReplicationSlotCtl)
2432  {
2433  /*
2434  * We cannot validate the replication slot if the replication slots'
2435  * data has not been initialized. This is ok as we will anyway
2436  * validate the specified slot when waiting for them to catch up. See
2437  * StandbySlotsHaveCaughtup() for details.
2438  */
2439  }
2440  else
2441  {
2442  /* Check that the specified slots exist and are logical slots */
2443  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2444 
2445  foreach_ptr(char, name, *elemlist)
2446  {
2447  ReplicationSlot *slot;
2448 
2449  slot = SearchNamedReplicationSlot(name, false);
2450 
2451  if (!slot)
2452  {
2453  GUC_check_errdetail("replication slot \"%s\" does not exist",
2454  name);
2455  ok = false;
2456  break;
2457  }
2458 
2459  if (!SlotIsPhysical(slot))
2460  {
2461  GUC_check_errdetail("\"%s\" is not a physical replication slot",
2462  name);
2463  ok = false;
2464  break;
2465  }
2466  }
2467 
2468  LWLockRelease(ReplicationSlotControlLock);
2469  }
2470 
2471  return ok;
2472 }
2473 
2474 /*
2475  * GUC check_hook for standby_slot_names
2476  */
2477 bool
2479 {
2480  char *rawname;
2481  char *ptr;
2482  List *elemlist;
2483  int size;
2484  bool ok;
2486 
2487  if ((*newval)[0] == '\0')
2488  return true;
2489 
2490  /* Need a modifiable copy of the GUC string */
2491  rawname = pstrdup(*newval);
2492 
2493  /* Now verify if the specified slots exist and have correct type */
2494  ok = validate_standby_slots(rawname, &elemlist);
2495 
2496  if (!ok || elemlist == NIL)
2497  {
2498  pfree(rawname);
2499  list_free(elemlist);
2500  return ok;
2501  }
2502 
2503  /* Compute the size required for the StandbySlotNamesConfigData struct */
2504  size = offsetof(StandbySlotNamesConfigData, slot_names);
2505  foreach_ptr(char, slot_name, elemlist)
2506  size += strlen(slot_name) + 1;
2507 
2508  /* GUC extra value must be guc_malloc'd, not palloc'd */
2510 
2511  /* Transform the data into StandbySlotNamesConfigData */
2512  config->nslotnames = list_length(elemlist);
2513 
2514  ptr = config->slot_names;
2515  foreach_ptr(char, slot_name, elemlist)
2516  {
2517  strcpy(ptr, slot_name);
2518  ptr += strlen(slot_name) + 1;
2519  }
2520 
2521  *extra = (void *) config;
2522 
2523  pfree(rawname);
2524  list_free(elemlist);
2525  return true;
2526 }
2527 
2528 /*
2529  * GUC assign_hook for standby_slot_names
2530  */
2531 void
2532 assign_standby_slot_names(const char *newval, void *extra)
2533 {
2534  /*
2535  * The standby slots may have changed, so we must recompute the oldest
2536  * LSN.
2537  */
2539 
2541 }
2542 
2543 /*
2544  * Check if the passed slot_name is specified in the standby_slot_names GUC.
2545  */
2546 bool
2547 SlotExistsInStandbySlotNames(const char *slot_name)
2548 {
2549  const char *standby_slot_name;
2550 
2551  /* Return false if there is no value in standby_slot_names */
2552  if (standby_slot_names_config == NULL)
2553  return false;
2554 
2555  /*
2556  * XXX: We are not expecting this list to be long so a linear search
2557  * shouldn't hurt but if that turns out not to be true then we can cache
2558  * this information for each WalSender as well.
2559  */
2560  standby_slot_name = standby_slot_names_config->slot_names;
2561  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2562  {
2563  if (strcmp(standby_slot_name, slot_name) == 0)
2564  return true;
2565 
2566  standby_slot_name += strlen(standby_slot_name) + 1;
2567  }
2568 
2569  return false;
2570 }
2571 
2572 /*
2573  * Return true if the slots specified in standby_slot_names have caught up to
2574  * the given WAL location, false otherwise.
2575  *
2576  * The elevel parameter specifies the error level used for logging messages
2577  * related to slots that do not exist, are invalidated, or are inactive.
2578  */
2579 bool
2580 StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
2581 {
2582  const char *name;
2583  int caught_up_slot_num = 0;
2584  XLogRecPtr min_restart_lsn = InvalidXLogRecPtr;
2585 
2586  /*
2587  * Don't need to wait for the standbys to catch up if there is no value in
2588  * standby_slot_names.
2589  */
2590  if (standby_slot_names_config == NULL)
2591  return true;
2592 
2593  /*
2594  * Don't need to wait for the standbys to catch up if we are on a standby
2595  * server, since we do not support syncing slots to cascading standbys.
2596  */
2597  if (RecoveryInProgress())
2598  return true;
2599 
2600  /*
2601  * Don't need to wait for the standbys to catch up if they are already
2602  * beyond the specified WAL location.
2603  */
2605  ss_oldest_flush_lsn >= wait_for_lsn)
2606  return true;
2607 
2608  /*
2609  * To prevent concurrent slot dropping and creation while filtering the
2610  * slots, take the ReplicationSlotControlLock outside of the loop.
2611  */
2612  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2613 
2615  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2616  {
2617  XLogRecPtr restart_lsn;
2618  bool invalidated;
2619  bool inactive;
2620  ReplicationSlot *slot;
2621 
2622  slot = SearchNamedReplicationSlot(name, false);
2623 
2624  if (!slot)
2625  {
2626  /*
2627  * If a slot name provided in standby_slot_names does not exist,
2628  * report a message and exit the loop. A user can specify a slot
2629  * name that does not exist just before the server startup. The
2630  * GUC check_hook(validate_standby_slots) cannot validate such a
2631  * slot during startup as the ReplicationSlotCtl shared memory is
2632  * not initialized at that time. It is also possible for a user to
2633  * drop the slot in standby_slot_names afterwards.
2634  */
2635  ereport(elevel,
2636  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2637  errmsg("replication slot \"%s\" specified in parameter %s does not exist",
2638  name, "standby_slot_names"),
2639  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2640  name),
2641  errhint("Consider creating the slot \"%s\" or amend parameter %s.",
2642  name, "standby_slot_names"));
2643  break;
2644  }
2645 
2646  if (SlotIsLogical(slot))
2647  {
2648  /*
2649  * If a logical slot name is provided in standby_slot_names,
2650  * report a message and exit the loop. Similar to the non-existent
2651  * case, a user can specify a logical slot name in
2652  * standby_slot_names before the server startup, or drop an
2653  * existing physical slot and recreate a logical slot with the
2654  * same name.
2655  */
2656  ereport(elevel,
2657  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2658  errmsg("cannot have logical replication slot \"%s\" in parameter %s",
2659  name, "standby_slot_names"),
2660  errdetail("Logical replication is waiting for correction on \"%s\".",
2661  name),
2662  errhint("Consider removing logical slot \"%s\" from parameter %s.",
2663  name, "standby_slot_names"));
2664  break;
2665  }
2666 
2667  SpinLockAcquire(&slot->mutex);
2668  restart_lsn = slot->data.restart_lsn;
2669  invalidated = slot->data.invalidated != RS_INVAL_NONE;
2670  inactive = slot->active_pid == 0;
2671  SpinLockRelease(&slot->mutex);
2672 
2673  if (invalidated)
2674  {
2675  /* Specified physical slot has been invalidated */
2676  ereport(elevel,
2677  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2678  errmsg("physical slot \"%s\" specified in parameter %s has been invalidated",
2679  name, "standby_slot_names"),
2680  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2681  name),
2682  errhint("Consider dropping and recreating the slot \"%s\" or amend parameter %s.",
2683  name, "standby_slot_names"));
2684  break;
2685  }
2686 
2687  if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn < wait_for_lsn)
2688  {
2689  /* Log a message if no active_pid for this physical slot */
2690  if (inactive)
2691  ereport(elevel,
2692  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2693  errmsg("replication slot \"%s\" specified in parameter %s does not have active_pid",
2694  name, "standby_slot_names"),
2695  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2696  name),
2697  errhint("Consider starting standby associated with \"%s\" or amend parameter %s.",
2698  name, "standby_slot_names"));
2699 
2700  /* Continue if the current slot hasn't caught up. */
2701  break;
2702  }
2703 
2704  Assert(restart_lsn >= wait_for_lsn);
2705 
2706  if (XLogRecPtrIsInvalid(min_restart_lsn) ||
2707  min_restart_lsn > restart_lsn)
2708  min_restart_lsn = restart_lsn;
2709 
2710  caught_up_slot_num++;
2711 
2712  name += strlen(name) + 1;
2713  }
2714 
2715  LWLockRelease(ReplicationSlotControlLock);
2716 
2717  /*
2718  * Return false if not all the standbys have caught up to the specified
2719  * WAL location.
2720  */
2721  if (caught_up_slot_num != standby_slot_names_config->nslotnames)
2722  return false;
2723 
2724  /* The ss_oldest_flush_lsn must not retreat. */
2726  min_restart_lsn >= ss_oldest_flush_lsn);
2727 
2728  ss_oldest_flush_lsn = min_restart_lsn;
2729 
2730  return true;
2731 }
2732 
2733 /*
2734  * Wait for physical standbys to confirm receiving the given lsn.
2735  *
2736  * Used by logical decoding SQL functions. It waits for physical standbys
2737  * corresponding to the physical slots specified in the standby_slot_names GUC.
2738  */
2739 void
2741 {
2742  /*
2743  * Don't need to wait for the standby to catch up if the current acquired
2744  * slot is not a logical failover slot, or there is no value in
2745  * standby_slot_names.
2746  */
2748  return;
2749 
2751 
2752  for (;;)
2753  {
2755 
2756  if (ConfigReloadPending)
2757  {
2758  ConfigReloadPending = false;
2760  }
2761 
2762  /* Exit if done waiting for every slot. */
2763  if (StandbySlotsHaveCaughtup(wait_for_lsn, WARNING))
2764  break;
2765 
2766  /*
2767  * Wait for the slots in the standby_slot_names to catch up, but use a
2768  * timeout (1s) so we can also check if the standby_slot_names has
2769  * been changed.
2770  */
2772  WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION);
2773  }
2774 
2776 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
#define NameStr(name)
Definition: c.h:746
unsigned int uint32
Definition: c.h:506
#define ngettext(s, p, n)
Definition: c.h:1181
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Assert(condition)
Definition: c.h:858
#define PG_BINARY
Definition: c.h:1273
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
#define pg_unreachable()
Definition: c.h:296
#define lengthof(array)
Definition: c.h:788
#define MemSet(start, val, len)
Definition: c.h:1020
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
bool ConditionVariableCancelSleep(void)
bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout, uint32 wait_event_info)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1230
int errcode_for_file_access(void)
Definition: elog.c:880
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:857
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
int FreeDir(DIR *dir)
Definition: fd.c:2961
int CloseTransientFile(int fd)
Definition: fd.c:2809
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_DIR
Definition: file_utils.h:23
@ PGFILETYPE_ERROR
Definition: file_utils.h:20
bool IsBinaryUpgrade
Definition: globals.c:118
int MyProcPid
Definition: globals.c:45
bool IsUnderPostmaster
Definition: globals.c:117
Oid MyDatabaseId
Definition: globals.c:91
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:640
#define newval
#define GUC_check_errdetail
Definition: guc.h:447
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
void ProcessConfigFile(GucContext context)
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
int i
Definition: isn.c:73
void list_free(List *list)
Definition: list.c:1546
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1895
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1939
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:709
@ LWTRANCHE_REPLICATION_SLOT_IO
Definition: lwlock.h:189
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
@ B_STARTUP
Definition: miscadmin.h:358
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:514
BackendType MyBackendType
Definition: miscinit.c:63
bool has_rolreplication(Oid roleid)
Definition: miscinit.c:711
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define NAMEDATALEN
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static bool two_phase
static rewind_source * source
Definition: pg_rewind.c:89
void pgstat_create_replslot(ReplicationSlot *slot)
void pgstat_acquire_replslot(ReplicationSlot *slot)
void pgstat_drop_replslot(ReplicationSlot *slot)
#define sprintf
Definition: port.h:240
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3930
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:257
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
bool rmtree(const char *path, bool rmtopdir)
Definition: rmtree.c:50
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:464
void ReplicationSlotAlter(const char *name, bool failover)
Definition: slot.c:807
int ReplicationSlotIndex(ReplicationSlot *slot)
Definition: slot.c:497
#define ReplicationSlotOnDiskChecksummedSize
Definition: slot.c:125
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1839
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:309
void ReplicationSlotDropAcquired(void)
Definition: slot.c:868
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1010
void ReplicationSlotReserveWal(void)
Definition: slot.c:1401
char * standby_slot_names
Definition: slot.c:148
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
Definition: slot.c:1212
void ReplicationSlotAcquire(const char *name, bool nowait)
Definition: slot.c:540
bool SlotExistsInStandbySlotNames(const char *slot_name)
Definition: slot.c:2547
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1783
static bool validate_standby_slots(char *rawname, List **elemlist)
Definition: slot.c:2420
static XLogRecPtr ss_oldest_flush_lsn
Definition: slot.c:157
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason)
Definition: slot.c:2392
void ReplicationSlotsDropDBSlots(Oid dboid)
Definition: slot.c:1270
#define ReplicationSlotOnDiskNotChecksummedSize
Definition: slot.c:122
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1154
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1049
static void RestoreSlotFromDisk(const char *name)
Definition: slot.c:2173
#define RS_INVAL_MAX_CAUSES
Definition: slot.c:113
void ReplicationSlotPersist(void)
Definition: slot.c:1027
ReplicationSlot * MyReplicationSlot
Definition: slot.c:138
static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
Definition: slot.c:2018
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:784
void ReplicationSlotSave(void)
Definition: slot.c:992
static void CreateSlotOnDisk(ReplicationSlot *slot)
Definition: slot.c:1957
#define ReplicationSlotOnDiskV2Size
Definition: slot.c:128
void CheckSlotPermissions(void)
Definition: slot.c:1384
bool ReplicationSlotName(int index, Name name)
Definition: slot.c:513
void ReplicationSlotsShmemInit(void)
Definition: slot.c:189
const char *const SlotInvalidationCauses[]
Definition: slot.c:105
static StandbySlotNamesConfigData * standby_slot_names_config
Definition: slot.c:151
void ReplicationSlotRelease(void)
Definition: slot.c:652
int max_replication_slots
Definition: slot.c:141
StaticAssertDecl(lengthof(SlotInvalidationCauses)==(RS_INVAL_MAX_CAUSES+1), "array length mismatch")
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:135
#define SLOT_VERSION
Definition: slot.c:132
struct ReplicationSlotOnDisk ReplicationSlotOnDisk
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
Definition: slot.c:2740
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2580
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1105
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:745
void ReplicationSlotInitialize(void)
Definition: slot.c:224
static void ReplicationSlotDropPtr(ReplicationSlot *slot)
Definition: slot.c:885
static bool InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, bool *invalidated)
Definition: slot.c:1543
void StartupReplicationSlots(void)
Definition: slot.c:1896
void CheckSlotRequirements(void)
Definition: slot.c:1362
#define SLOT_MAGIC
Definition: slot.c:131
void assign_standby_slot_names(const char *newval, void *extra)
Definition: slot.c:2532
bool check_standby_slot_names(char **newval, void **extra, GucSource source)
Definition: slot.c:2478
static void ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, bool terminating, int pid, NameData slotname, XLogRecPtr restart_lsn, XLogRecPtr oldestLSN, TransactionId snapshotConflictHorizon)
Definition: slot.c:1477
#define ReplicationSlotOnDiskConstantSize
Definition: slot.c:119
Size ReplicationSlotsShmemSize(void)
Definition: slot.c:171
bool ReplicationSlotValidateName(const char *name, int elevel)
Definition: slot.c:252
static void ReplicationSlotShmemExit(int code, Datum arg)
Definition: slot.c:233
ReplicationSlotPersistency
Definition: slot.h:34
@ RS_PERSISTENT
Definition: slot.h:35
@ RS_EPHEMERAL
Definition: slot.h:36
@ RS_TEMPORARY
Definition: slot.h:37
#define SlotIsPhysical(slot)
Definition: slot.h:209
ReplicationSlotInvalidationCause
Definition: slot.h:48
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
@ RS_INVAL_HORIZON
Definition: slot.h:53
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
@ RS_INVAL_NONE
Definition: slot.h:49
#define SlotIsLogical(slot)
Definition: slot.h:210
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1650
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:66
PROC_HDR * ProcGlobal
Definition: proc.c:78
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:32
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:32
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
Definition: pg_list.h:54
uint8 statusFlags
Definition: proc.h:238
int pgxactoff
Definition: proc.h:180
uint8 * statusFlags
Definition: proc.h:395
ReplicationSlot replication_slots[1]
Definition: slot.h:221
uint32 version
Definition: slot.c:73
ReplicationSlotPersistentData slotdata
Definition: slot.c:81
pg_crc32c checksum
Definition: slot.c:70
TransactionId xmin
Definition: slot.h:82
TransactionId catalog_xmin
Definition: slot.h:90
XLogRecPtr restart_lsn
Definition: slot.h:93
XLogRecPtr confirmed_flush
Definition: slot.h:104
ReplicationSlotPersistency persistency
Definition: slot.h:74
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:96
XLogRecPtr candidate_xmin_lsn
Definition: slot.h:194
TransactionId effective_catalog_xmin
Definition: slot.h:175
slock_t mutex
Definition: slot.h:151
XLogRecPtr candidate_restart_valid
Definition: slot.h:195
XLogRecPtr last_saved_confirmed_flush
Definition: slot.h:203
pid_t active_pid
Definition: slot.h:157
bool in_use
Definition: slot.h:154
TransactionId effective_xmin
Definition: slot.h:174
bool just_dirtied
Definition: slot.h:160
XLogRecPtr candidate_restart_lsn
Definition: slot.h:196
LWLock io_in_progress_lock
Definition: slot.h:181
ConditionVariable active_cv
Definition: slot.h:184
TransactionId candidate_catalog_xmin
Definition: slot.h:193
bool dirty
Definition: slot.h:161
ReplicationSlotPersistentData data
Definition: slot.h:178
TimestampTz inactive_since
Definition: slot.h:206
char slot_names[FLEXIBLE_ARRAY_MEMBER]
Definition: slot.c:99
ConditionVariable wal_confirm_rcv_cv
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: type.h:95
Definition: c.h:741
unsigned short st_mode
Definition: win32_port.h:268
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3457
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:82
static void pgstat_report_wait_end(void)
Definition: wait_event.h:98
const char * name
bool am_walsender
Definition: walsender.c:115
bool log_replication_commands
Definition: walsender.c:125
WalSndCtlData * WalSndCtl
Definition: walsender.c:109
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
#define kill(pid, sig)
Definition: win32_port.h:485
bool RecoveryInProgress(void)
Definition: xlog.c:6290
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3747
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6393
int wal_level
Definition: xlog.c:131
int wal_segment_size
Definition: xlog.c:143
void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
Definition: xlog.c:2677
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9355
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2791
@ WAL_LEVEL_REPLICA
Definition: xlog.h:73
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:74
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint64 XLogSegNo
Definition: xlogdefs.h:48
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)