PostgreSQL Source Code  git master
slot.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slot.c
4  * Replication slot management.
5  *
6  *
7  * Copyright (c) 2012-2024, PostgreSQL Global Development Group
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/replication/slot.c
12  *
13  * NOTES
14  *
15  * Replication slots are used to keep state about replication streams
16  * originating from this cluster. Their primary purpose is to prevent the
17  * premature removal of WAL or of old tuple versions in a manner that would
18  * interfere with replication; they are also useful for monitoring purposes.
19  * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
20  * on standbys (to support cascading setups). The requirement that slots be
21  * usable on standbys precludes storing them in the system catalogs.
22  *
23  * Each replication slot gets its own directory inside the $PGDATA/pg_replslot
24  * directory. Inside that directory the state file will contain the slot's
25  * own data. Additional data can be stored alongside that file if required.
26  * While the server is running, the state data is also cached in memory for
27  * efficiency.
28  *
29  * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
30  * or free a slot. ReplicationSlotControlLock must be taken in shared mode
31  * to iterate over the slots, and in exclusive mode to change the in_use flag
32  * of a slot. The remaining data in each slot is protected by its mutex.
33  *
34  *-------------------------------------------------------------------------
35  */
36 
37 #include "postgres.h"
38 
39 #include <unistd.h>
40 #include <sys/stat.h>
41 
42 #include "access/transam.h"
43 #include "access/xlog_internal.h"
44 #include "access/xlogrecovery.h"
45 #include "common/file_utils.h"
46 #include "common/string.h"
47 #include "miscadmin.h"
48 #include "pgstat.h"
49 #include "postmaster/interrupt.h"
50 #include "replication/slotsync.h"
51 #include "replication/slot.h"
53 #include "storage/fd.h"
54 #include "storage/ipc.h"
55 #include "storage/proc.h"
56 #include "storage/procarray.h"
57 #include "utils/builtins.h"
58 #include "utils/guc_hooks.h"
59 #include "utils/varlena.h"
60 
61 /*
62  * Replication slot on-disk data structure.
63  */
64 typedef struct ReplicationSlotOnDisk
65 {
66  /* first part of this struct needs to be version independent */
67 
68  /* data not covered by checksum */
71 
72  /* data covered by checksum */
75 
76  /*
77  * The actual data in the slot that follows can differ based on the above
78  * 'version'.
79  */
80 
83 
84 /*
85  * Struct for the configuration of standby_slot_names.
86  *
87  * Note: this must be a flat representation that can be held in a single chunk
88  * of guc_malloc'd memory, so that it can be stored as the "extra" data for the
89  * standby_slot_names GUC.
90  */
91 typedef struct
92 {
93  /* Number of slot names in the slot_names[] */
95 
96  /*
97  * slot_names contains 'nslotnames' consecutive null-terminated C strings.
98  */
99  char slot_names[FLEXIBLE_ARRAY_MEMBER];
101 
102 /*
103  * Lookup table for slot invalidation causes.
104  */
105 const char *const SlotInvalidationCauses[] = {
106  [RS_INVAL_NONE] = "none",
107  [RS_INVAL_WAL_REMOVED] = "wal_removed",
108  [RS_INVAL_HORIZON] = "rows_removed",
109  [RS_INVAL_WAL_LEVEL] = "wal_level_insufficient",
110 };
111 
112 /* Maximum number of invalidation causes */
113 #define RS_INVAL_MAX_CAUSES RS_INVAL_WAL_LEVEL
114 
116  "array length mismatch");
117 
118 /* size of version independent data */
119 #define ReplicationSlotOnDiskConstantSize \
120  offsetof(ReplicationSlotOnDisk, slotdata)
121 /* size of the part of the slot not covered by the checksum */
122 #define ReplicationSlotOnDiskNotChecksummedSize \
123  offsetof(ReplicationSlotOnDisk, version)
124 /* size of the part covered by the checksum */
125 #define ReplicationSlotOnDiskChecksummedSize \
126  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskNotChecksummedSize
127 /* size of the slot data that is version dependent */
128 #define ReplicationSlotOnDiskV2Size \
129  sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
130 
131 #define SLOT_MAGIC 0x1051CA1 /* format identifier */
132 #define SLOT_VERSION 5 /* version for new files */
133 
134 /* Control array for replication slot management */
136 
137 /* My backend's replication slot in the shared memory array */
139 
140 /* GUC variables */
141 int max_replication_slots = 10; /* the maximum number of replication
142  * slots */
143 
144 /*
145  * This GUC lists streaming replication standby server slot names that
146  * logical WAL sender processes will wait for.
147  */
149 
150 /* This is the parsed and cached configuration for standby_slot_names */
152 
153 /*
154  * Oldest LSN that has been confirmed to be flushed to the standbys
155  * corresponding to the physical slots specified in the standby_slot_names GUC.
156  */
158 
159 static void ReplicationSlotShmemExit(int code, Datum arg);
160 static void ReplicationSlotDropPtr(ReplicationSlot *slot);
161 
162 /* internal persistency functions */
163 static void RestoreSlotFromDisk(const char *name);
164 static void CreateSlotOnDisk(ReplicationSlot *slot);
165 static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
166 
167 /*
168  * Report shared-memory space needed by ReplicationSlotsShmemInit.
169  */
170 Size
172 {
173  Size size = 0;
174 
175  if (max_replication_slots == 0)
176  return size;
177 
178  size = offsetof(ReplicationSlotCtlData, replication_slots);
179  size = add_size(size,
181 
182  return size;
183 }
184 
185 /*
186  * Allocate and initialize shared memory for replication slots.
187  */
188 void
190 {
191  bool found;
192 
193  if (max_replication_slots == 0)
194  return;
195 
197  ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
198  &found);
199 
200  if (!found)
201  {
202  int i;
203 
204  /* First time through, so initialize */
206 
207  for (i = 0; i < max_replication_slots; i++)
208  {
210 
211  /* everything else is zeroed by the memset above */
212  SpinLockInit(&slot->mutex);
216  }
217  }
218 }
219 
220 /*
221  * Register the callback for replication slot cleanup and releasing.
222  */
223 void
225 {
227 }
228 
229 /*
230  * Release and cleanup replication slots.
231  */
232 static void
234 {
235  /* Make sure active replication slots are released */
236  if (MyReplicationSlot != NULL)
238 
239  /* Also cleanup all the temporary slots. */
241 }
242 
243 /*
244  * Check whether the passed slot name is valid and report errors at elevel.
245  *
246  * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
247  * the name to be used as a directory name on every supported OS.
248  *
249  * Returns whether the directory name is valid or not if elevel < ERROR.
250  */
251 bool
252 ReplicationSlotValidateName(const char *name, int elevel)
253 {
254  const char *cp;
255 
256  if (strlen(name) == 0)
257  {
258  ereport(elevel,
259  (errcode(ERRCODE_INVALID_NAME),
260  errmsg("replication slot name \"%s\" is too short",
261  name)));
262  return false;
263  }
264 
265  if (strlen(name) >= NAMEDATALEN)
266  {
267  ereport(elevel,
268  (errcode(ERRCODE_NAME_TOO_LONG),
269  errmsg("replication slot name \"%s\" is too long",
270  name)));
271  return false;
272  }
273 
274  for (cp = name; *cp; cp++)
275  {
276  if (!((*cp >= 'a' && *cp <= 'z')
277  || (*cp >= '0' && *cp <= '9')
278  || (*cp == '_')))
279  {
280  ereport(elevel,
281  (errcode(ERRCODE_INVALID_NAME),
282  errmsg("replication slot name \"%s\" contains invalid character",
283  name),
284  errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
285  return false;
286  }
287  }
288  return true;
289 }
290 
291 /*
292  * Create a new replication slot and mark it as used by this backend.
293  *
294  * name: Name of the slot
295  * db_specific: logical decoding is db specific; if the slot is going to
296  * be used for that pass true, otherwise false.
297  * two_phase: Allows decoding of prepared transactions. We allow this option
298  * to be enabled only at the slot creation time. If we allow this option
299  * to be changed during decoding then it is quite possible that we skip
300  * prepare first time because this option was not enabled. Now next time
301  * during getting changes, if the two_phase option is enabled it can skip
302  * prepare because by that time start decoding point has been moved. So the
303  * user will only get commit prepared.
304  * failover: If enabled, allows the slot to be synced to standbys so
305  * that logical replication can be resumed after failover.
306  * synced: True if the slot is synchronized from the primary server.
307  */
308 void
309 ReplicationSlotCreate(const char *name, bool db_specific,
310  ReplicationSlotPersistency persistency,
311  bool two_phase, bool failover, bool synced)
312 {
313  ReplicationSlot *slot = NULL;
314  int i;
315 
316  Assert(MyReplicationSlot == NULL);
317 
319 
320  if (failover)
321  {
322  /*
323  * Do not allow users to create the failover enabled slots on the
324  * standby as we do not support sync to the cascading standby.
325  *
326  * However, failover enabled slots can be created during slot
327  * synchronization because we need to retain the same values as the
328  * remote slot.
329  */
331  ereport(ERROR,
332  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
333  errmsg("cannot enable failover for a replication slot created on the standby"));
334 
335  /*
336  * Do not allow users to create failover enabled temporary slots,
337  * because temporary slots will not be synced to the standby.
338  *
339  * However, failover enabled temporary slots can be created during
340  * slot synchronization. See the comments atop slotsync.c for details.
341  */
342  if (persistency == RS_TEMPORARY && !IsSyncingReplicationSlots())
343  ereport(ERROR,
344  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
345  errmsg("cannot enable failover for a temporary replication slot"));
346  }
347 
348  /*
349  * If some other backend ran this code concurrently with us, we'd likely
350  * both allocate the same slot, and that would be bad. We'd also be at
351  * risk of missing a name collision. Also, we don't want to try to create
352  * a new slot while somebody's busy cleaning up an old one, because we
353  * might both be monkeying with the same directory.
354  */
355  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
356 
357  /*
358  * Check for name collision, and identify an allocatable slot. We need to
359  * hold ReplicationSlotControlLock in shared mode for this, so that nobody
360  * else can change the in_use flags while we're looking at them.
361  */
362  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
363  for (i = 0; i < max_replication_slots; i++)
364  {
366 
367  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
368  ereport(ERROR,
370  errmsg("replication slot \"%s\" already exists", name)));
371  if (!s->in_use && slot == NULL)
372  slot = s;
373  }
374  LWLockRelease(ReplicationSlotControlLock);
375 
376  /* If all slots are in use, we're out of luck. */
377  if (slot == NULL)
378  ereport(ERROR,
379  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
380  errmsg("all replication slots are in use"),
381  errhint("Free one or increase max_replication_slots.")));
382 
383  /*
384  * Since this slot is not in use, nobody should be looking at any part of
385  * it other than the in_use field unless they're trying to allocate it.
386  * And since we hold ReplicationSlotAllocationLock, nobody except us can
387  * be doing that. So it's safe to initialize the slot.
388  */
389  Assert(!slot->in_use);
390  Assert(slot->active_pid == 0);
391 
392  /* first initialize persistent data */
393  memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
394  namestrcpy(&slot->data.name, name);
395  slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
396  slot->data.persistency = persistency;
397  slot->data.two_phase = two_phase;
399  slot->data.failover = failover;
400  slot->data.synced = synced;
401 
402  /* and then data only present in shared memory */
403  slot->just_dirtied = false;
404  slot->dirty = false;
412  slot->inactive_since = 0;
413 
414  /*
415  * Create the slot on disk. We haven't actually marked the slot allocated
416  * yet, so no special cleanup is required if this errors out.
417  */
418  CreateSlotOnDisk(slot);
419 
420  /*
421  * We need to briefly prevent any other backend from iterating over the
422  * slots while we flip the in_use flag. We also need to set the active
423  * flag while holding the ControlLock as otherwise a concurrent
424  * ReplicationSlotAcquire() could acquire the slot as well.
425  */
426  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
427 
428  slot->in_use = true;
429 
430  /* We can now mark the slot active, and that makes it our slot. */
431  SpinLockAcquire(&slot->mutex);
432  Assert(slot->active_pid == 0);
433  slot->active_pid = MyProcPid;
434  SpinLockRelease(&slot->mutex);
435  MyReplicationSlot = slot;
436 
437  LWLockRelease(ReplicationSlotControlLock);
438 
439  /*
440  * Create statistics entry for the new logical slot. We don't collect any
441  * stats for physical slots, so no need to create an entry for the same.
442  * See ReplicationSlotDropPtr for why we need to do this before releasing
443  * ReplicationSlotAllocationLock.
444  */
445  if (SlotIsLogical(slot))
447 
448  /*
449  * Now that the slot has been marked as in_use and active, it's safe to
450  * let somebody else try to allocate a slot.
451  */
452  LWLockRelease(ReplicationSlotAllocationLock);
453 
454  /* Let everybody know we've modified this slot */
456 }
457 
458 /*
459  * Search for the named replication slot.
460  *
461  * Return the replication slot if found, otherwise NULL.
462  */
464 SearchNamedReplicationSlot(const char *name, bool need_lock)
465 {
466  int i;
467  ReplicationSlot *slot = NULL;
468 
469  if (need_lock)
470  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
471 
472  for (i = 0; i < max_replication_slots; i++)
473  {
475 
476  if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
477  {
478  slot = s;
479  break;
480  }
481  }
482 
483  if (need_lock)
484  LWLockRelease(ReplicationSlotControlLock);
485 
486  return slot;
487 }
488 
489 /*
490  * Return the index of the replication slot in
491  * ReplicationSlotCtl->replication_slots.
492  *
493  * This is mainly useful to have an efficient key for storing replication slot
494  * stats.
495  */
496 int
498 {
500  slot < ReplicationSlotCtl->replication_slots + max_replication_slots);
501 
502  return slot - ReplicationSlotCtl->replication_slots;
503 }
504 
505 /*
506  * If the slot at 'index' is unused, return false. Otherwise 'name' is set to
507  * the slot's name and true is returned.
508  *
509  * This likely is only useful for pgstat_replslot.c during shutdown, in other
510  * cases there are obvious TOCTOU issues.
511  */
512 bool
514 {
515  ReplicationSlot *slot;
516  bool found;
517 
519 
520  /*
521  * Ensure that the slot cannot be dropped while we copy the name. Don't
522  * need the spinlock as the name of an existing slot cannot change.
523  */
524  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
525  found = slot->in_use;
526  if (slot->in_use)
527  namestrcpy(name, NameStr(slot->data.name));
528  LWLockRelease(ReplicationSlotControlLock);
529 
530  return found;
531 }
532 
533 /*
534  * Find a previously created slot and mark it as used by this process.
535  *
536  * An error is raised if nowait is true and the slot is currently in use. If
537  * nowait is false, we sleep until the slot is released by the owning process.
538  */
539 void
540 ReplicationSlotAcquire(const char *name, bool nowait)
541 {
542  ReplicationSlot *s;
543  int active_pid;
544 
545  Assert(name != NULL);
546 
547 retry:
548  Assert(MyReplicationSlot == NULL);
549 
550  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
551 
552  /* Check if the slot exits with the given name. */
553  s = SearchNamedReplicationSlot(name, false);
554  if (s == NULL || !s->in_use)
555  {
556  LWLockRelease(ReplicationSlotControlLock);
557 
558  ereport(ERROR,
559  (errcode(ERRCODE_UNDEFINED_OBJECT),
560  errmsg("replication slot \"%s\" does not exist",
561  name)));
562  }
563 
564  /*
565  * This is the slot we want; check if it's active under some other
566  * process. In single user mode, we don't need this check.
567  */
568  if (IsUnderPostmaster)
569  {
570  /*
571  * Get ready to sleep on the slot in case it is active. (We may end
572  * up not sleeping, but we don't want to do this while holding the
573  * spinlock.)
574  */
575  if (!nowait)
577 
578  SpinLockAcquire(&s->mutex);
579  if (s->active_pid == 0)
580  s->active_pid = MyProcPid;
581  active_pid = s->active_pid;
582  SpinLockRelease(&s->mutex);
583  }
584  else
585  active_pid = MyProcPid;
586  LWLockRelease(ReplicationSlotControlLock);
587 
588  /*
589  * If we found the slot but it's already active in another process, we
590  * wait until the owning process signals us that it's been released, or
591  * error out.
592  */
593  if (active_pid != MyProcPid)
594  {
595  if (!nowait)
596  {
597  /* Wait here until we get signaled, and then restart */
599  WAIT_EVENT_REPLICATION_SLOT_DROP);
601  goto retry;
602  }
603 
604  ereport(ERROR,
605  (errcode(ERRCODE_OBJECT_IN_USE),
606  errmsg("replication slot \"%s\" is active for PID %d",
607  NameStr(s->data.name), active_pid)));
608  }
609  else if (!nowait)
610  ConditionVariableCancelSleep(); /* no sleep needed after all */
611 
612  /* Let everybody know we've modified this slot */
614 
615  /* We made this slot active, so it's ours now. */
616  MyReplicationSlot = s;
617 
618  /*
619  * The call to pgstat_acquire_replslot() protects against stats for a
620  * different slot, from before a restart or such, being present during
621  * pgstat_report_replslot().
622  */
623  if (SlotIsLogical(s))
625 
626  /*
627  * Reset the time since the slot has become inactive as the slot is active
628  * now.
629  */
630  SpinLockAcquire(&s->mutex);
631  s->inactive_since = 0;
632  SpinLockRelease(&s->mutex);
633 
634  if (am_walsender)
635  {
637  SlotIsLogical(s)
638  ? errmsg("acquired logical replication slot \"%s\"",
639  NameStr(s->data.name))
640  : errmsg("acquired physical replication slot \"%s\"",
641  NameStr(s->data.name)));
642  }
643 }
644 
645 /*
646  * Release the replication slot that this backend considers to own.
647  *
648  * This or another backend can re-acquire the slot later.
649  * Resources this slot requires will be preserved.
650  */
651 void
653 {
655  char *slotname = NULL; /* keep compiler quiet */
656  bool is_logical = false; /* keep compiler quiet */
657  TimestampTz now = 0;
658 
659  Assert(slot != NULL && slot->active_pid != 0);
660 
661  if (am_walsender)
662  {
663  slotname = pstrdup(NameStr(slot->data.name));
664  is_logical = SlotIsLogical(slot);
665  }
666 
667  if (slot->data.persistency == RS_EPHEMERAL)
668  {
669  /*
670  * Delete the slot. There is no !PANIC case where this is allowed to
671  * fail, all that may happen is an incomplete cleanup of the on-disk
672  * data.
673  */
675  }
676 
677  /*
678  * If slot needed to temporarily restrain both data and catalog xmin to
679  * create the catalog snapshot, remove that temporary constraint.
680  * Snapshots can only be exported while the initial snapshot is still
681  * acquired.
682  */
683  if (!TransactionIdIsValid(slot->data.xmin) &&
685  {
686  SpinLockAcquire(&slot->mutex);
688  SpinLockRelease(&slot->mutex);
690  }
691 
692  /*
693  * Set the last inactive time after marking the slot inactive. We don't
694  * set it for the slots currently being synced from the primary to the
695  * standby because such slots are typically inactive as decoding is not
696  * allowed on those.
697  */
698  if (!(RecoveryInProgress() && slot->data.synced))
700 
701  if (slot->data.persistency == RS_PERSISTENT)
702  {
703  /*
704  * Mark persistent slot inactive. We're not freeing it, just
705  * disconnecting, but wake up others that may be waiting for it.
706  */
707  SpinLockAcquire(&slot->mutex);
708  slot->active_pid = 0;
709  slot->inactive_since = now;
710  SpinLockRelease(&slot->mutex);
712  }
713  else
714  {
715  SpinLockAcquire(&slot->mutex);
716  slot->inactive_since = now;
717  SpinLockRelease(&slot->mutex);
718  }
719 
720  MyReplicationSlot = NULL;
721 
722  /* might not have been set when we've been a plain slot */
723  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
726  LWLockRelease(ProcArrayLock);
727 
728  if (am_walsender)
729  {
731  is_logical
732  ? errmsg("released logical replication slot \"%s\"",
733  slotname)
734  : errmsg("released physical replication slot \"%s\"",
735  slotname));
736 
737  pfree(slotname);
738  }
739 }
740 
741 /*
742  * Cleanup all temporary slots created in current session.
743  */
744 void
746 {
747  int i;
748 
749  Assert(MyReplicationSlot == NULL);
750 
751 restart:
752  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
753  for (i = 0; i < max_replication_slots; i++)
754  {
756 
757  if (!s->in_use)
758  continue;
759 
760  SpinLockAcquire(&s->mutex);
761  if (s->active_pid == MyProcPid)
762  {
764  SpinLockRelease(&s->mutex);
765  LWLockRelease(ReplicationSlotControlLock); /* avoid deadlock */
766 
768 
770  goto restart;
771  }
772  else
773  SpinLockRelease(&s->mutex);
774  }
775 
776  LWLockRelease(ReplicationSlotControlLock);
777 }
778 
779 /*
780  * Permanently drop replication slot identified by the passed in name.
781  */
782 void
783 ReplicationSlotDrop(const char *name, bool nowait)
784 {
785  Assert(MyReplicationSlot == NULL);
786 
787  ReplicationSlotAcquire(name, nowait);
788 
789  /*
790  * Do not allow users to drop the slots which are currently being synced
791  * from the primary to the standby.
792  */
794  ereport(ERROR,
795  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
796  errmsg("cannot drop replication slot \"%s\"", name),
797  errdetail("This slot is being synced from the primary server."));
798 
800 }
801 
802 /*
803  * Change the definition of the slot identified by the specified name.
804  */
805 void
806 ReplicationSlotAlter(const char *name, bool failover)
807 {
808  Assert(MyReplicationSlot == NULL);
809 
811 
813  ereport(ERROR,
814  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
815  errmsg("cannot use %s with a physical replication slot",
816  "ALTER_REPLICATION_SLOT"));
817 
818  if (RecoveryInProgress())
819  {
820  /*
821  * Do not allow users to alter the slots which are currently being
822  * synced from the primary to the standby.
823  */
825  ereport(ERROR,
826  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
827  errmsg("cannot alter replication slot \"%s\"", name),
828  errdetail("This slot is being synced from the primary server."));
829 
830  /*
831  * Do not allow users to enable failover on the standby as we do not
832  * support sync to the cascading standby.
833  */
834  if (failover)
835  ereport(ERROR,
836  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
837  errmsg("cannot enable failover for a replication slot"
838  " on the standby"));
839  }
840 
841  /*
842  * Do not allow users to enable failover for temporary slots as we do not
843  * support syncing temporary slots to the standby.
844  */
845  if (failover && MyReplicationSlot->data.persistency == RS_TEMPORARY)
846  ereport(ERROR,
847  errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
848  errmsg("cannot enable failover for a temporary replication slot"));
849 
850  if (MyReplicationSlot->data.failover != failover)
851  {
853  MyReplicationSlot->data.failover = failover;
855 
858  }
859 
861 }
862 
863 /*
864  * Permanently drop the currently acquired replication slot.
865  */
866 void
868 {
870 
871  Assert(MyReplicationSlot != NULL);
872 
873  /* slot isn't acquired anymore */
874  MyReplicationSlot = NULL;
875 
877 }
878 
879 /*
880  * Permanently drop the replication slot which will be released by the point
881  * this function returns.
882  */
883 static void
885 {
886  char path[MAXPGPATH];
887  char tmppath[MAXPGPATH];
888 
889  /*
890  * If some other backend ran this code concurrently with us, we might try
891  * to delete a slot with a certain name while someone else was trying to
892  * create a slot with the same name.
893  */
894  LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
895 
896  /* Generate pathnames. */
897  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
898  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
899 
900  /*
901  * Rename the slot directory on disk, so that we'll no longer recognize
902  * this as a valid slot. Note that if this fails, we've got to mark the
903  * slot inactive before bailing out. If we're dropping an ephemeral or a
904  * temporary slot, we better never fail hard as the caller won't expect
905  * the slot to survive and this might get called during error handling.
906  */
907  if (rename(path, tmppath) == 0)
908  {
909  /*
910  * We need to fsync() the directory we just renamed and its parent to
911  * make sure that our changes are on disk in a crash-safe fashion. If
912  * fsync() fails, we can't be sure whether the changes are on disk or
913  * not. For now, we handle that by panicking;
914  * StartupReplicationSlots() will try to straighten it out after
915  * restart.
916  */
918  fsync_fname(tmppath, true);
919  fsync_fname("pg_replslot", true);
921  }
922  else
923  {
924  bool fail_softly = slot->data.persistency != RS_PERSISTENT;
925 
926  SpinLockAcquire(&slot->mutex);
927  slot->active_pid = 0;
928  SpinLockRelease(&slot->mutex);
929 
930  /* wake up anyone waiting on this slot */
932 
933  ereport(fail_softly ? WARNING : ERROR,
935  errmsg("could not rename file \"%s\" to \"%s\": %m",
936  path, tmppath)));
937  }
938 
939  /*
940  * The slot is definitely gone. Lock out concurrent scans of the array
941  * long enough to kill it. It's OK to clear the active PID here without
942  * grabbing the mutex because nobody else can be scanning the array here,
943  * and nobody can be attached to this slot and thus access it without
944  * scanning the array.
945  *
946  * Also wake up processes waiting for it.
947  */
948  LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
949  slot->active_pid = 0;
950  slot->in_use = false;
951  LWLockRelease(ReplicationSlotControlLock);
953 
954  /*
955  * Slot is dead and doesn't prevent resource removal anymore, recompute
956  * limits.
957  */
960 
961  /*
962  * If removing the directory fails, the worst thing that will happen is
963  * that the user won't be able to create a new slot with the same name
964  * until the next server restart. We warn about it, but that's all.
965  */
966  if (!rmtree(tmppath, true))
968  (errmsg("could not remove directory \"%s\"", tmppath)));
969 
970  /*
971  * Drop the statistics entry for the replication slot. Do this while
972  * holding ReplicationSlotAllocationLock so that we don't drop a
973  * statistics entry for another slot with the same name just created in
974  * another session.
975  */
976  if (SlotIsLogical(slot))
977  pgstat_drop_replslot(slot);
978 
979  /*
980  * We release this at the very end, so that nobody starts trying to create
981  * a slot while we're still cleaning up the detritus of the old one.
982  */
983  LWLockRelease(ReplicationSlotAllocationLock);
984 }
985 
986 /*
987  * Serialize the currently acquired slot's state from memory to disk, thereby
988  * guaranteeing the current state will survive a crash.
989  */
990 void
992 {
993  char path[MAXPGPATH];
994 
995  Assert(MyReplicationSlot != NULL);
996 
997  sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
999 }
1000 
1001 /*
1002  * Signal that it would be useful if the currently acquired slot would be
1003  * flushed out to disk.
1004  *
1005  * Note that the actual flush to disk can be delayed for a long time, if
1006  * required for correctness explicitly do a ReplicationSlotSave().
1007  */
1008 void
1010 {
1012 
1013  Assert(MyReplicationSlot != NULL);
1014 
1015  SpinLockAcquire(&slot->mutex);
1017  MyReplicationSlot->dirty = true;
1018  SpinLockRelease(&slot->mutex);
1019 }
1020 
1021 /*
1022  * Convert a slot that's marked as RS_EPHEMERAL or RS_TEMPORARY to a
1023  * RS_PERSISTENT slot, guaranteeing it will be there after an eventual crash.
1024  */
1025 void
1027 {
1029 
1030  Assert(slot != NULL);
1032 
1033  SpinLockAcquire(&slot->mutex);
1034  slot->data.persistency = RS_PERSISTENT;
1035  SpinLockRelease(&slot->mutex);
1036 
1039 }
1040 
1041 /*
1042  * Compute the oldest xmin across all slots and store it in the ProcArray.
1043  *
1044  * If already_locked is true, ProcArrayLock has already been acquired
1045  * exclusively.
1046  */
1047 void
1049 {
1050  int i;
1052  TransactionId agg_catalog_xmin = InvalidTransactionId;
1053 
1054  Assert(ReplicationSlotCtl != NULL);
1055 
1056  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1057 
1058  for (i = 0; i < max_replication_slots; i++)
1059  {
1061  TransactionId effective_xmin;
1062  TransactionId effective_catalog_xmin;
1063  bool invalidated;
1064 
1065  if (!s->in_use)
1066  continue;
1067 
1068  SpinLockAcquire(&s->mutex);
1069  effective_xmin = s->effective_xmin;
1070  effective_catalog_xmin = s->effective_catalog_xmin;
1071  invalidated = s->data.invalidated != RS_INVAL_NONE;
1072  SpinLockRelease(&s->mutex);
1073 
1074  /* invalidated slots need not apply */
1075  if (invalidated)
1076  continue;
1077 
1078  /* check the data xmin */
1079  if (TransactionIdIsValid(effective_xmin) &&
1080  (!TransactionIdIsValid(agg_xmin) ||
1081  TransactionIdPrecedes(effective_xmin, agg_xmin)))
1082  agg_xmin = effective_xmin;
1083 
1084  /* check the catalog xmin */
1085  if (TransactionIdIsValid(effective_catalog_xmin) &&
1086  (!TransactionIdIsValid(agg_catalog_xmin) ||
1087  TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
1088  agg_catalog_xmin = effective_catalog_xmin;
1089  }
1090 
1091  LWLockRelease(ReplicationSlotControlLock);
1092 
1093  ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
1094 }
1095 
1096 /*
1097  * Compute the oldest restart LSN across all slots and inform xlog module.
1098  *
1099  * Note: while max_slot_wal_keep_size is theoretically relevant for this
1100  * purpose, we don't try to account for that, because this module doesn't
1101  * know what to compare against.
1102  */
1103 void
1105 {
1106  int i;
1107  XLogRecPtr min_required = InvalidXLogRecPtr;
1108 
1109  Assert(ReplicationSlotCtl != NULL);
1110 
1111  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1112  for (i = 0; i < max_replication_slots; i++)
1113  {
1115  XLogRecPtr restart_lsn;
1116  bool invalidated;
1117 
1118  if (!s->in_use)
1119  continue;
1120 
1121  SpinLockAcquire(&s->mutex);
1122  restart_lsn = s->data.restart_lsn;
1123  invalidated = s->data.invalidated != RS_INVAL_NONE;
1124  SpinLockRelease(&s->mutex);
1125 
1126  /* invalidated slots need not apply */
1127  if (invalidated)
1128  continue;
1129 
1130  if (restart_lsn != InvalidXLogRecPtr &&
1131  (min_required == InvalidXLogRecPtr ||
1132  restart_lsn < min_required))
1133  min_required = restart_lsn;
1134  }
1135  LWLockRelease(ReplicationSlotControlLock);
1136 
1137  XLogSetReplicationSlotMinimumLSN(min_required);
1138 }
1139 
1140 /*
1141  * Compute the oldest WAL LSN required by *logical* decoding slots..
1142  *
1143  * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
1144  * slots exist.
1145  *
1146  * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
1147  * ignores physical replication slots.
1148  *
1149  * The results aren't required frequently, so we don't maintain a precomputed
1150  * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
1151  */
1152 XLogRecPtr
1154 {
1155  XLogRecPtr result = InvalidXLogRecPtr;
1156  int i;
1157 
1158  if (max_replication_slots <= 0)
1159  return InvalidXLogRecPtr;
1160 
1161  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1162 
1163  for (i = 0; i < max_replication_slots; i++)
1164  {
1165  ReplicationSlot *s;
1166  XLogRecPtr restart_lsn;
1167  bool invalidated;
1168 
1170 
1171  /* cannot change while ReplicationSlotCtlLock is held */
1172  if (!s->in_use)
1173  continue;
1174 
1175  /* we're only interested in logical slots */
1176  if (!SlotIsLogical(s))
1177  continue;
1178 
1179  /* read once, it's ok if it increases while we're checking */
1180  SpinLockAcquire(&s->mutex);
1181  restart_lsn = s->data.restart_lsn;
1182  invalidated = s->data.invalidated != RS_INVAL_NONE;
1183  SpinLockRelease(&s->mutex);
1184 
1185  /* invalidated slots need not apply */
1186  if (invalidated)
1187  continue;
1188 
1189  if (restart_lsn == InvalidXLogRecPtr)
1190  continue;
1191 
1192  if (result == InvalidXLogRecPtr ||
1193  restart_lsn < result)
1194  result = restart_lsn;
1195  }
1196 
1197  LWLockRelease(ReplicationSlotControlLock);
1198 
1199  return result;
1200 }
1201 
1202 /*
1203  * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
1204  * passed database oid.
1205  *
1206  * Returns true if there are any slots referencing the database. *nslots will
1207  * be set to the absolute number of slots in the database, *nactive to ones
1208  * currently active.
1209  */
1210 bool
1211 ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
1212 {
1213  int i;
1214 
1215  *nslots = *nactive = 0;
1216 
1217  if (max_replication_slots <= 0)
1218  return false;
1219 
1220  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1221  for (i = 0; i < max_replication_slots; i++)
1222  {
1223  ReplicationSlot *s;
1224 
1226 
1227  /* cannot change while ReplicationSlotCtlLock is held */
1228  if (!s->in_use)
1229  continue;
1230 
1231  /* only logical slots are database specific, skip */
1232  if (!SlotIsLogical(s))
1233  continue;
1234 
1235  /* not our database, skip */
1236  if (s->data.database != dboid)
1237  continue;
1238 
1239  /* NB: intentionally counting invalidated slots */
1240 
1241  /* count slots with spinlock held */
1242  SpinLockAcquire(&s->mutex);
1243  (*nslots)++;
1244  if (s->active_pid != 0)
1245  (*nactive)++;
1246  SpinLockRelease(&s->mutex);
1247  }
1248  LWLockRelease(ReplicationSlotControlLock);
1249 
1250  if (*nslots > 0)
1251  return true;
1252  return false;
1253 }
1254 
1255 /*
1256  * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
1257  * passed database oid. The caller should hold an exclusive lock on the
1258  * pg_database oid for the database to prevent creation of new slots on the db
1259  * or replay from existing slots.
1260  *
1261  * Another session that concurrently acquires an existing slot on the target DB
1262  * (most likely to drop it) may cause this function to ERROR. If that happens
1263  * it may have dropped some but not all slots.
1264  *
1265  * This routine isn't as efficient as it could be - but we don't drop
1266  * databases often, especially databases with lots of slots.
1267  */
1268 void
1270 {
1271  int i;
1272 
1273  if (max_replication_slots <= 0)
1274  return;
1275 
1276 restart:
1277  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1278  for (i = 0; i < max_replication_slots; i++)
1279  {
1280  ReplicationSlot *s;
1281  char *slotname;
1282  int active_pid;
1283 
1285 
1286  /* cannot change while ReplicationSlotCtlLock is held */
1287  if (!s->in_use)
1288  continue;
1289 
1290  /* only logical slots are database specific, skip */
1291  if (!SlotIsLogical(s))
1292  continue;
1293 
1294  /* not our database, skip */
1295  if (s->data.database != dboid)
1296  continue;
1297 
1298  /* NB: intentionally including invalidated slots */
1299 
1300  /* acquire slot, so ReplicationSlotDropAcquired can be reused */
1301  SpinLockAcquire(&s->mutex);
1302  /* can't change while ReplicationSlotControlLock is held */
1303  slotname = NameStr(s->data.name);
1304  active_pid = s->active_pid;
1305  if (active_pid == 0)
1306  {
1307  MyReplicationSlot = s;
1308  s->active_pid = MyProcPid;
1309  }
1310  SpinLockRelease(&s->mutex);
1311 
1312  /*
1313  * Even though we hold an exclusive lock on the database object a
1314  * logical slot for that DB can still be active, e.g. if it's
1315  * concurrently being dropped by a backend connected to another DB.
1316  *
1317  * That's fairly unlikely in practice, so we'll just bail out.
1318  *
1319  * The slot sync worker holds a shared lock on the database before
1320  * operating on synced logical slots to avoid conflict with the drop
1321  * happening here. The persistent synced slots are thus safe but there
1322  * is a possibility that the slot sync worker has created a temporary
1323  * slot (which stays active even on release) and we are trying to drop
1324  * that here. In practice, the chances of hitting this scenario are
1325  * less as during slot synchronization, the temporary slot is
1326  * immediately converted to persistent and thus is safe due to the
1327  * shared lock taken on the database. So, we'll just bail out in such
1328  * a case.
1329  *
1330  * XXX: We can consider shutting down the slot sync worker before
1331  * trying to drop synced temporary slots here.
1332  */
1333  if (active_pid)
1334  ereport(ERROR,
1335  (errcode(ERRCODE_OBJECT_IN_USE),
1336  errmsg("replication slot \"%s\" is active for PID %d",
1337  slotname, active_pid)));
1338 
1339  /*
1340  * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
1341  * holding ReplicationSlotControlLock over filesystem operations,
1342  * release ReplicationSlotControlLock and use
1343  * ReplicationSlotDropAcquired.
1344  *
1345  * As that means the set of slots could change, restart scan from the
1346  * beginning each time we release the lock.
1347  */
1348  LWLockRelease(ReplicationSlotControlLock);
1350  goto restart;
1351  }
1352  LWLockRelease(ReplicationSlotControlLock);
1353 }
1354 
1355 
1356 /*
1357  * Check whether the server's configuration supports using replication
1358  * slots.
1359  */
1360 void
1362 {
1363  /*
1364  * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
1365  * needs the same check.
1366  */
1367 
1368  if (max_replication_slots == 0)
1369  ereport(ERROR,
1370  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1371  errmsg("replication slots can only be used if max_replication_slots > 0")));
1372 
1374  ereport(ERROR,
1375  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1376  errmsg("replication slots can only be used if wal_level >= replica")));
1377 }
1378 
1379 /*
1380  * Check whether the user has privilege to use replication slots.
1381  */
1382 void
1384 {
1385  if (!has_rolreplication(GetUserId()))
1386  ereport(ERROR,
1387  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1388  errmsg("permission denied to use replication slots"),
1389  errdetail("Only roles with the %s attribute may use replication slots.",
1390  "REPLICATION")));
1391 }
1392 
1393 /*
1394  * Reserve WAL for the currently active slot.
1395  *
1396  * Compute and set restart_lsn in a manner that's appropriate for the type of
1397  * the slot and concurrency safe.
1398  */
1399 void
1401 {
1403 
1404  Assert(slot != NULL);
1406 
1407  /*
1408  * The replication slot mechanism is used to prevent removal of required
1409  * WAL. As there is no interlock between this routine and checkpoints, WAL
1410  * segments could concurrently be removed when a now stale return value of
1411  * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
1412  * this happens we'll just retry.
1413  */
1414  while (true)
1415  {
1416  XLogSegNo segno;
1417  XLogRecPtr restart_lsn;
1418 
1419  /*
1420  * For logical slots log a standby snapshot and start logical decoding
1421  * at exactly that position. That allows the slot to start up more
1422  * quickly. But on a standby we cannot do WAL writes, so just use the
1423  * replay pointer; effectively, an attempt to create a logical slot on
1424  * standby will cause it to wait for an xl_running_xact record to be
1425  * logged independently on the primary, so that a snapshot can be
1426  * built using the record.
1427  *
1428  * None of this is needed (or indeed helpful) for physical slots as
1429  * they'll start replay at the last logged checkpoint anyway. Instead
1430  * return the location of the last redo LSN. While that slightly
1431  * increases the chance that we have to retry, it's where a base
1432  * backup has to start replay at.
1433  */
1434  if (SlotIsPhysical(slot))
1435  restart_lsn = GetRedoRecPtr();
1436  else if (RecoveryInProgress())
1437  restart_lsn = GetXLogReplayRecPtr(NULL);
1438  else
1439  restart_lsn = GetXLogInsertRecPtr();
1440 
1441  SpinLockAcquire(&slot->mutex);
1442  slot->data.restart_lsn = restart_lsn;
1443  SpinLockRelease(&slot->mutex);
1444 
1445  /* prevent WAL removal as fast as possible */
1447 
1448  /*
1449  * If all required WAL is still there, great, otherwise retry. The
1450  * slot should prevent further removal of WAL, unless there's a
1451  * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
1452  * the new restart_lsn above, so normally we should never need to loop
1453  * more than twice.
1454  */
1456  if (XLogGetLastRemovedSegno() < segno)
1457  break;
1458  }
1459 
1460  if (!RecoveryInProgress() && SlotIsLogical(slot))
1461  {
1462  XLogRecPtr flushptr;
1463 
1464  /* make sure we have enough information to start */
1465  flushptr = LogStandbySnapshot();
1466 
1467  /* and make sure it's fsynced to disk */
1468  XLogFlush(flushptr);
1469  }
1470 }
1471 
1472 /*
1473  * Report that replication slot needs to be invalidated
1474  */
1475 static void
1477  bool terminating,
1478  int pid,
1479  NameData slotname,
1480  XLogRecPtr restart_lsn,
1481  XLogRecPtr oldestLSN,
1482  TransactionId snapshotConflictHorizon)
1483 {
1484  StringInfoData err_detail;
1485  bool hint = false;
1486 
1487  initStringInfo(&err_detail);
1488 
1489  switch (cause)
1490  {
1491  case RS_INVAL_WAL_REMOVED:
1492  {
1493  unsigned long long ex = oldestLSN - restart_lsn;
1494 
1495  hint = true;
1496  appendStringInfo(&err_detail,
1497  ngettext("The slot's restart_lsn %X/%X exceeds the limit by %llu byte.",
1498  "The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1499  ex),
1500  LSN_FORMAT_ARGS(restart_lsn),
1501  ex);
1502  break;
1503  }
1504  case RS_INVAL_HORIZON:
1505  appendStringInfo(&err_detail, _("The slot conflicted with xid horizon %u."),
1506  snapshotConflictHorizon);
1507  break;
1508 
1509  case RS_INVAL_WAL_LEVEL:
1510  appendStringInfoString(&err_detail, _("Logical decoding on standby requires wal_level >= logical on the primary server."));
1511  break;
1512  case RS_INVAL_NONE:
1513  pg_unreachable();
1514  }
1515 
1516  ereport(LOG,
1517  terminating ?
1518  errmsg("terminating process %d to release replication slot \"%s\"",
1519  pid, NameStr(slotname)) :
1520  errmsg("invalidating obsolete replication slot \"%s\"",
1521  NameStr(slotname)),
1522  errdetail_internal("%s", err_detail.data),
1523  hint ? errhint("You might need to increase %s.", "max_slot_wal_keep_size") : 0);
1524 
1525  pfree(err_detail.data);
1526 }
1527 
1528 /*
1529  * Helper for InvalidateObsoleteReplicationSlots
1530  *
1531  * Acquires the given slot and mark it invalid, if necessary and possible.
1532  *
1533  * Returns whether ReplicationSlotControlLock was released in the interim (and
1534  * in that case we're not holding the lock at return, otherwise we are).
1535  *
1536  * Sets *invalidated true if the slot was invalidated. (Untouched otherwise.)
1537  *
1538  * This is inherently racy, because we release the LWLock
1539  * for syscalls, so caller must restart if we return true.
1540  */
1541 static bool
1543  ReplicationSlot *s,
1544  XLogRecPtr oldestLSN,
1545  Oid dboid, TransactionId snapshotConflictHorizon,
1546  bool *invalidated)
1547 {
1548  int last_signaled_pid = 0;
1549  bool released_lock = false;
1550  bool terminated = false;
1551  XLogRecPtr initial_effective_xmin = InvalidXLogRecPtr;
1552  XLogRecPtr initial_catalog_effective_xmin = InvalidXLogRecPtr;
1553  XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr;
1555 
1556  for (;;)
1557  {
1558  XLogRecPtr restart_lsn;
1559  NameData slotname;
1560  int active_pid = 0;
1561  ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
1562 
1563  Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_SHARED));
1564 
1565  if (!s->in_use)
1566  {
1567  if (released_lock)
1568  LWLockRelease(ReplicationSlotControlLock);
1569  break;
1570  }
1571 
1572  /*
1573  * Check if the slot needs to be invalidated. If it needs to be
1574  * invalidated, and is not currently acquired, acquire it and mark it
1575  * as having been invalidated. We do this with the spinlock held to
1576  * avoid race conditions -- for example the restart_lsn could move
1577  * forward, or the slot could be dropped.
1578  */
1579  SpinLockAcquire(&s->mutex);
1580 
1581  restart_lsn = s->data.restart_lsn;
1582 
1583  /* we do nothing if the slot is already invalid */
1584  if (s->data.invalidated == RS_INVAL_NONE)
1585  {
1586  /*
1587  * The slot's mutex will be released soon, and it is possible that
1588  * those values change since the process holding the slot has been
1589  * terminated (if any), so record them here to ensure that we
1590  * would report the correct invalidation cause.
1591  */
1592  if (!terminated)
1593  {
1594  initial_restart_lsn = s->data.restart_lsn;
1595  initial_effective_xmin = s->effective_xmin;
1596  initial_catalog_effective_xmin = s->effective_catalog_xmin;
1597  }
1598 
1599  switch (cause)
1600  {
1601  case RS_INVAL_WAL_REMOVED:
1602  if (initial_restart_lsn != InvalidXLogRecPtr &&
1603  initial_restart_lsn < oldestLSN)
1604  invalidation_cause = cause;
1605  break;
1606  case RS_INVAL_HORIZON:
1607  if (!SlotIsLogical(s))
1608  break;
1609  /* invalid DB oid signals a shared relation */
1610  if (dboid != InvalidOid && dboid != s->data.database)
1611  break;
1612  if (TransactionIdIsValid(initial_effective_xmin) &&
1613  TransactionIdPrecedesOrEquals(initial_effective_xmin,
1614  snapshotConflictHorizon))
1615  invalidation_cause = cause;
1616  else if (TransactionIdIsValid(initial_catalog_effective_xmin) &&
1617  TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin,
1618  snapshotConflictHorizon))
1619  invalidation_cause = cause;
1620  break;
1621  case RS_INVAL_WAL_LEVEL:
1622  if (SlotIsLogical(s))
1623  invalidation_cause = cause;
1624  break;
1625  case RS_INVAL_NONE:
1626  pg_unreachable();
1627  }
1628  }
1629 
1630  /*
1631  * The invalidation cause recorded previously should not change while
1632  * the process owning the slot (if any) has been terminated.
1633  */
1634  Assert(!(invalidation_cause_prev != RS_INVAL_NONE && terminated &&
1635  invalidation_cause_prev != invalidation_cause));
1636 
1637  /* if there's no invalidation, we're done */
1638  if (invalidation_cause == RS_INVAL_NONE)
1639  {
1640  SpinLockRelease(&s->mutex);
1641  if (released_lock)
1642  LWLockRelease(ReplicationSlotControlLock);
1643  break;
1644  }
1645 
1646  slotname = s->data.name;
1647  active_pid = s->active_pid;
1648 
1649  /*
1650  * If the slot can be acquired, do so and mark it invalidated
1651  * immediately. Otherwise we'll signal the owning process, below, and
1652  * retry.
1653  */
1654  if (active_pid == 0)
1655  {
1656  MyReplicationSlot = s;
1657  s->active_pid = MyProcPid;
1658  s->data.invalidated = invalidation_cause;
1659 
1660  /*
1661  * XXX: We should consider not overwriting restart_lsn and instead
1662  * just rely on .invalidated.
1663  */
1664  if (invalidation_cause == RS_INVAL_WAL_REMOVED)
1666 
1667  /* Let caller know */
1668  *invalidated = true;
1669  }
1670 
1671  SpinLockRelease(&s->mutex);
1672 
1673  /*
1674  * The logical replication slots shouldn't be invalidated as GUC
1675  * max_slot_wal_keep_size is set to -1 during the binary upgrade. See
1676  * check_old_cluster_for_valid_slots() where we ensure that no
1677  * invalidated before the upgrade.
1678  */
1679  Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
1680 
1681  if (active_pid != 0)
1682  {
1683  /*
1684  * Prepare the sleep on the slot's condition variable before
1685  * releasing the lock, to close a possible race condition if the
1686  * slot is released before the sleep below.
1687  */
1689 
1690  LWLockRelease(ReplicationSlotControlLock);
1691  released_lock = true;
1692 
1693  /*
1694  * Signal to terminate the process that owns the slot, if we
1695  * haven't already signalled it. (Avoidance of repeated
1696  * signalling is the only reason for there to be a loop in this
1697  * routine; otherwise we could rely on caller's restart loop.)
1698  *
1699  * There is the race condition that other process may own the slot
1700  * after its current owner process is terminated and before this
1701  * process owns it. To handle that, we signal only if the PID of
1702  * the owning process has changed from the previous time. (This
1703  * logic assumes that the same PID is not reused very quickly.)
1704  */
1705  if (last_signaled_pid != active_pid)
1706  {
1707  ReportSlotInvalidation(invalidation_cause, true, active_pid,
1708  slotname, restart_lsn,
1709  oldestLSN, snapshotConflictHorizon);
1710 
1711  if (MyBackendType == B_STARTUP)
1712  (void) SendProcSignal(active_pid,
1715  else
1716  (void) kill(active_pid, SIGTERM);
1717 
1718  last_signaled_pid = active_pid;
1719  terminated = true;
1720  invalidation_cause_prev = invalidation_cause;
1721  }
1722 
1723  /* Wait until the slot is released. */
1725  WAIT_EVENT_REPLICATION_SLOT_DROP);
1726 
1727  /*
1728  * Re-acquire lock and start over; we expect to invalidate the
1729  * slot next time (unless another process acquires the slot in the
1730  * meantime).
1731  */
1732  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1733  continue;
1734  }
1735  else
1736  {
1737  /*
1738  * We hold the slot now and have already invalidated it; flush it
1739  * to ensure that state persists.
1740  *
1741  * Don't want to hold ReplicationSlotControlLock across file
1742  * system operations, so release it now but be sure to tell caller
1743  * to restart from scratch.
1744  */
1745  LWLockRelease(ReplicationSlotControlLock);
1746  released_lock = true;
1747 
1748  /* Make sure the invalidated state persists across server restart */
1752 
1753  ReportSlotInvalidation(invalidation_cause, false, active_pid,
1754  slotname, restart_lsn,
1755  oldestLSN, snapshotConflictHorizon);
1756 
1757  /* done with this slot for now */
1758  break;
1759  }
1760  }
1761 
1762  Assert(released_lock == !LWLockHeldByMe(ReplicationSlotControlLock));
1763 
1764  return released_lock;
1765 }
1766 
1767 /*
1768  * Invalidate slots that require resources about to be removed.
1769  *
1770  * Returns true when any slot have got invalidated.
1771  *
1772  * Whether a slot needs to be invalidated depends on the cause. A slot is
1773  * removed if it:
1774  * - RS_INVAL_WAL_REMOVED: requires a LSN older than the given segment
1775  * - RS_INVAL_HORIZON: requires a snapshot <= the given horizon in the given
1776  * db; dboid may be InvalidOid for shared relations
1777  * - RS_INVAL_WAL_LEVEL: is logical
1778  *
1779  * NB - this runs as part of checkpoint, so avoid raising errors if possible.
1780  */
1781 bool
1783  XLogSegNo oldestSegno, Oid dboid,
1784  TransactionId snapshotConflictHorizon)
1785 {
1786  XLogRecPtr oldestLSN;
1787  bool invalidated = false;
1788 
1789  Assert(cause != RS_INVAL_HORIZON || TransactionIdIsValid(snapshotConflictHorizon));
1790  Assert(cause != RS_INVAL_WAL_REMOVED || oldestSegno > 0);
1791  Assert(cause != RS_INVAL_NONE);
1792 
1793  if (max_replication_slots == 0)
1794  return invalidated;
1795 
1796  XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
1797 
1798 restart:
1799  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1800  for (int i = 0; i < max_replication_slots; i++)
1801  {
1803 
1804  if (!s->in_use)
1805  continue;
1806 
1807  if (InvalidatePossiblyObsoleteSlot(cause, s, oldestLSN, dboid,
1808  snapshotConflictHorizon,
1809  &invalidated))
1810  {
1811  /* if the lock was released, start from scratch */
1812  goto restart;
1813  }
1814  }
1815  LWLockRelease(ReplicationSlotControlLock);
1816 
1817  /*
1818  * If any slots have been invalidated, recalculate the resource limits.
1819  */
1820  if (invalidated)
1821  {
1824  }
1825 
1826  return invalidated;
1827 }
1828 
1829 /*
1830  * Flush all replication slots to disk.
1831  *
1832  * It is convenient to flush dirty replication slots at the time of checkpoint.
1833  * Additionally, in case of a shutdown checkpoint, we also identify the slots
1834  * for which the confirmed_flush LSN has been updated since the last time it
1835  * was saved and flush them.
1836  */
1837 void
1839 {
1840  int i;
1841 
1842  elog(DEBUG1, "performing replication slot checkpoint");
1843 
1844  /*
1845  * Prevent any slot from being created/dropped while we're active. As we
1846  * explicitly do *not* want to block iterating over replication_slots or
1847  * acquiring a slot we cannot take the control lock - but that's OK,
1848  * because holding ReplicationSlotAllocationLock is strictly stronger, and
1849  * enough to guarantee that nobody can change the in_use bits on us.
1850  */
1851  LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
1852 
1853  for (i = 0; i < max_replication_slots; i++)
1854  {
1856  char path[MAXPGPATH];
1857 
1858  if (!s->in_use)
1859  continue;
1860 
1861  /* save the slot to disk, locking is handled in SaveSlotToPath() */
1862  sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
1863 
1864  /*
1865  * Slot's data is not flushed each time the confirmed_flush LSN is
1866  * updated as that could lead to frequent writes. However, we decide
1867  * to force a flush of all logical slot's data at the time of shutdown
1868  * if the confirmed_flush LSN is changed since we last flushed it to
1869  * disk. This helps in avoiding an unnecessary retreat of the
1870  * confirmed_flush LSN after restart.
1871  */
1872  if (is_shutdown && SlotIsLogical(s))
1873  {
1874  SpinLockAcquire(&s->mutex);
1875 
1877 
1878  if (s->data.invalidated == RS_INVAL_NONE &&
1880  {
1881  s->just_dirtied = true;
1882  s->dirty = true;
1883  }
1884  SpinLockRelease(&s->mutex);
1885  }
1886 
1887  SaveSlotToPath(s, path, LOG);
1888  }
1889  LWLockRelease(ReplicationSlotAllocationLock);
1890 }
1891 
1892 /*
1893  * Load all replication slots from disk into memory at server startup. This
1894  * needs to be run before we start crash recovery.
1895  */
1896 void
1898 {
1899  DIR *replication_dir;
1900  struct dirent *replication_de;
1901 
1902  elog(DEBUG1, "starting up replication slots");
1903 
1904  /* restore all slots by iterating over all on-disk entries */
1905  replication_dir = AllocateDir("pg_replslot");
1906  while ((replication_de = ReadDir(replication_dir, "pg_replslot")) != NULL)
1907  {
1908  char path[MAXPGPATH + 12];
1909  PGFileType de_type;
1910 
1911  if (strcmp(replication_de->d_name, ".") == 0 ||
1912  strcmp(replication_de->d_name, "..") == 0)
1913  continue;
1914 
1915  snprintf(path, sizeof(path), "pg_replslot/%s", replication_de->d_name);
1916  de_type = get_dirent_type(path, replication_de, false, DEBUG1);
1917 
1918  /* we're only creating directories here, skip if it's not our's */
1919  if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_DIR)
1920  continue;
1921 
1922  /* we crashed while a slot was being setup or deleted, clean up */
1923  if (pg_str_endswith(replication_de->d_name, ".tmp"))
1924  {
1925  if (!rmtree(path, true))
1926  {
1927  ereport(WARNING,
1928  (errmsg("could not remove directory \"%s\"",
1929  path)));
1930  continue;
1931  }
1932  fsync_fname("pg_replslot", true);
1933  continue;
1934  }
1935 
1936  /* looks like a slot in a normal state, restore */
1937  RestoreSlotFromDisk(replication_de->d_name);
1938  }
1939  FreeDir(replication_dir);
1940 
1941  /* currently no slots exist, we're done. */
1942  if (max_replication_slots <= 0)
1943  return;
1944 
1945  /* Now that we have recovered all the data, compute replication xmin */
1948 }
1949 
1950 /* ----
1951  * Manipulation of on-disk state of replication slots
1952  *
1953  * NB: none of the routines below should take any notice whether a slot is the
1954  * current one or not, that's all handled a layer above.
1955  * ----
1956  */
1957 static void
1959 {
1960  char tmppath[MAXPGPATH];
1961  char path[MAXPGPATH];
1962  struct stat st;
1963 
1964  /*
1965  * No need to take out the io_in_progress_lock, nobody else can see this
1966  * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
1967  * takes out the lock, if we'd take the lock here, we'd deadlock.
1968  */
1969 
1970  sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
1971  sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
1972 
1973  /*
1974  * It's just barely possible that some previous effort to create or drop a
1975  * slot with this name left a temp directory lying around. If that seems
1976  * to be the case, try to remove it. If the rmtree() fails, we'll error
1977  * out at the MakePGDirectory() below, so we don't bother checking
1978  * success.
1979  */
1980  if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
1981  rmtree(tmppath, true);
1982 
1983  /* Create and fsync the temporary slot directory. */
1984  if (MakePGDirectory(tmppath) < 0)
1985  ereport(ERROR,
1987  errmsg("could not create directory \"%s\": %m",
1988  tmppath)));
1989  fsync_fname(tmppath, true);
1990 
1991  /* Write the actual state file. */
1992  slot->dirty = true; /* signal that we really need to write */
1993  SaveSlotToPath(slot, tmppath, ERROR);
1994 
1995  /* Rename the directory into place. */
1996  if (rename(tmppath, path) != 0)
1997  ereport(ERROR,
1999  errmsg("could not rename file \"%s\" to \"%s\": %m",
2000  tmppath, path)));
2001 
2002  /*
2003  * If we'd now fail - really unlikely - we wouldn't know whether this slot
2004  * would persist after an OS crash or not - so, force a restart. The
2005  * restart would try to fsync this again till it works.
2006  */
2008 
2009  fsync_fname(path, true);
2010  fsync_fname("pg_replslot", true);
2011 
2012  END_CRIT_SECTION();
2013 }
2014 
2015 /*
2016  * Shared functionality between saving and creating a replication slot.
2017  */
2018 static void
2019 SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
2020 {
2021  char tmppath[MAXPGPATH];
2022  char path[MAXPGPATH];
2023  int fd;
2025  bool was_dirty;
2026 
2027  /* first check whether there's something to write out */
2028  SpinLockAcquire(&slot->mutex);
2029  was_dirty = slot->dirty;
2030  slot->just_dirtied = false;
2031  SpinLockRelease(&slot->mutex);
2032 
2033  /* and don't do anything if there's nothing to write */
2034  if (!was_dirty)
2035  return;
2036 
2038 
2039  /* silence valgrind :( */
2040  memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
2041 
2042  sprintf(tmppath, "%s/state.tmp", dir);
2043  sprintf(path, "%s/state", dir);
2044 
2045  fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
2046  if (fd < 0)
2047  {
2048  /*
2049  * If not an ERROR, then release the lock before returning. In case
2050  * of an ERROR, the error recovery path automatically releases the
2051  * lock, but no harm in explicitly releasing even in that case. Note
2052  * that LWLockRelease() could affect errno.
2053  */
2054  int save_errno = errno;
2055 
2057  errno = save_errno;
2058  ereport(elevel,
2060  errmsg("could not create file \"%s\": %m",
2061  tmppath)));
2062  return;
2063  }
2064 
2065  cp.magic = SLOT_MAGIC;
2066  INIT_CRC32C(cp.checksum);
2067  cp.version = SLOT_VERSION;
2069 
2070  SpinLockAcquire(&slot->mutex);
2071 
2072  memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
2073 
2074  SpinLockRelease(&slot->mutex);
2075 
2076  COMP_CRC32C(cp.checksum,
2077  (char *) (&cp) + ReplicationSlotOnDiskNotChecksummedSize,
2079  FIN_CRC32C(cp.checksum);
2080 
2081  errno = 0;
2082  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
2083  if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
2084  {
2085  int save_errno = errno;
2086 
2090 
2091  /* if write didn't set errno, assume problem is no disk space */
2092  errno = save_errno ? save_errno : ENOSPC;
2093  ereport(elevel,
2095  errmsg("could not write to file \"%s\": %m",
2096  tmppath)));
2097  return;
2098  }
2100 
2101  /* fsync the temporary file */
2102  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
2103  if (pg_fsync(fd) != 0)
2104  {
2105  int save_errno = errno;
2106 
2110  errno = save_errno;
2111  ereport(elevel,
2113  errmsg("could not fsync file \"%s\": %m",
2114  tmppath)));
2115  return;
2116  }
2118 
2119  if (CloseTransientFile(fd) != 0)
2120  {
2121  int save_errno = errno;
2122 
2124  errno = save_errno;
2125  ereport(elevel,
2127  errmsg("could not close file \"%s\": %m",
2128  tmppath)));
2129  return;
2130  }
2131 
2132  /* rename to permanent file, fsync file and directory */
2133  if (rename(tmppath, path) != 0)
2134  {
2135  int save_errno = errno;
2136 
2138  errno = save_errno;
2139  ereport(elevel,
2141  errmsg("could not rename file \"%s\" to \"%s\": %m",
2142  tmppath, path)));
2143  return;
2144  }
2145 
2146  /*
2147  * Check CreateSlotOnDisk() for the reasoning of using a critical section.
2148  */
2150 
2151  fsync_fname(path, false);
2152  fsync_fname(dir, true);
2153  fsync_fname("pg_replslot", true);
2154 
2155  END_CRIT_SECTION();
2156 
2157  /*
2158  * Successfully wrote, unset dirty bit, unless somebody dirtied again
2159  * already and remember the confirmed_flush LSN value.
2160  */
2161  SpinLockAcquire(&slot->mutex);
2162  if (!slot->just_dirtied)
2163  slot->dirty = false;
2165  SpinLockRelease(&slot->mutex);
2166 
2168 }
2169 
2170 /*
2171  * Load a single slot from disk into memory.
2172  */
2173 static void
2175 {
2177  int i;
2178  char slotdir[MAXPGPATH + 12];
2179  char path[MAXPGPATH + 22];
2180  int fd;
2181  bool restored = false;
2182  int readBytes;
2183  pg_crc32c checksum;
2184 
2185  /* no need to lock here, no concurrent access allowed yet */
2186 
2187  /* delete temp file if it exists */
2188  sprintf(slotdir, "pg_replslot/%s", name);
2189  sprintf(path, "%s/state.tmp", slotdir);
2190  if (unlink(path) < 0 && errno != ENOENT)
2191  ereport(PANIC,
2193  errmsg("could not remove file \"%s\": %m", path)));
2194 
2195  sprintf(path, "%s/state", slotdir);
2196 
2197  elog(DEBUG1, "restoring replication slot from \"%s\"", path);
2198 
2199  /* on some operating systems fsyncing a file requires O_RDWR */
2200  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
2201 
2202  /*
2203  * We do not need to handle this as we are rename()ing the directory into
2204  * place only after we fsync()ed the state file.
2205  */
2206  if (fd < 0)
2207  ereport(PANIC,
2209  errmsg("could not open file \"%s\": %m", path)));
2210 
2211  /*
2212  * Sync state file before we're reading from it. We might have crashed
2213  * while it wasn't synced yet and we shouldn't continue on that basis.
2214  */
2215  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
2216  if (pg_fsync(fd) != 0)
2217  ereport(PANIC,
2219  errmsg("could not fsync file \"%s\": %m",
2220  path)));
2222 
2223  /* Also sync the parent directory */
2225  fsync_fname(slotdir, true);
2226  END_CRIT_SECTION();
2227 
2228  /* read part of statefile that's guaranteed to be version independent */
2229  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2230  readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
2232  if (readBytes != ReplicationSlotOnDiskConstantSize)
2233  {
2234  if (readBytes < 0)
2235  ereport(PANIC,
2237  errmsg("could not read file \"%s\": %m", path)));
2238  else
2239  ereport(PANIC,
2241  errmsg("could not read file \"%s\": read %d of %zu",
2242  path, readBytes,
2244  }
2245 
2246  /* verify magic */
2247  if (cp.magic != SLOT_MAGIC)
2248  ereport(PANIC,
2250  errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
2251  path, cp.magic, SLOT_MAGIC)));
2252 
2253  /* verify version */
2254  if (cp.version != SLOT_VERSION)
2255  ereport(PANIC,
2257  errmsg("replication slot file \"%s\" has unsupported version %u",
2258  path, cp.version)));
2259 
2260  /* boundary check on length */
2262  ereport(PANIC,
2264  errmsg("replication slot file \"%s\" has corrupted length %u",
2265  path, cp.length)));
2266 
2267  /* Now that we know the size, read the entire file */
2268  pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
2269  readBytes = read(fd,
2270  (char *) &cp + ReplicationSlotOnDiskConstantSize,
2271  cp.length);
2273  if (readBytes != cp.length)
2274  {
2275  if (readBytes < 0)
2276  ereport(PANIC,
2278  errmsg("could not read file \"%s\": %m", path)));
2279  else
2280  ereport(PANIC,
2282  errmsg("could not read file \"%s\": read %d of %zu",
2283  path, readBytes, (Size) cp.length)));
2284  }
2285 
2286  if (CloseTransientFile(fd) != 0)
2287  ereport(PANIC,
2289  errmsg("could not close file \"%s\": %m", path)));
2290 
2291  /* now verify the CRC */
2292  INIT_CRC32C(checksum);
2293  COMP_CRC32C(checksum,
2296  FIN_CRC32C(checksum);
2297 
2298  if (!EQ_CRC32C(checksum, cp.checksum))
2299  ereport(PANIC,
2300  (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
2301  path, checksum, cp.checksum)));
2302 
2303  /*
2304  * If we crashed with an ephemeral slot active, don't restore but delete
2305  * it.
2306  */
2308  {
2309  if (!rmtree(slotdir, true))
2310  {
2311  ereport(WARNING,
2312  (errmsg("could not remove directory \"%s\"",
2313  slotdir)));
2314  }
2315  fsync_fname("pg_replslot", true);
2316  return;
2317  }
2318 
2319  /*
2320  * Verify that requirements for the specific slot type are met. That's
2321  * important because if these aren't met we're not guaranteed to retain
2322  * all the necessary resources for the slot.
2323  *
2324  * NB: We have to do so *after* the above checks for ephemeral slots,
2325  * because otherwise a slot that shouldn't exist anymore could prevent
2326  * restarts.
2327  *
2328  * NB: Changing the requirements here also requires adapting
2329  * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
2330  */
2332  ereport(FATAL,
2333  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2334  errmsg("logical replication slot \"%s\" exists, but wal_level < logical",
2335  NameStr(cp.slotdata.name)),
2336  errhint("Change wal_level to be logical or higher.")));
2337  else if (wal_level < WAL_LEVEL_REPLICA)
2338  ereport(FATAL,
2339  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2340  errmsg("physical replication slot \"%s\" exists, but wal_level < replica",
2341  NameStr(cp.slotdata.name)),
2342  errhint("Change wal_level to be replica or higher.")));
2343 
2344  /* nothing can be active yet, don't lock anything */
2345  for (i = 0; i < max_replication_slots; i++)
2346  {
2347  ReplicationSlot *slot;
2348 
2350 
2351  if (slot->in_use)
2352  continue;
2353 
2354  /* restore the entire set of persistent data */
2355  memcpy(&slot->data, &cp.slotdata,
2357 
2358  /* initialize in memory state */
2359  slot->effective_xmin = cp.slotdata.xmin;
2362 
2367 
2368  slot->in_use = true;
2369  slot->active_pid = 0;
2370 
2371  /*
2372  * We set the last inactive time after loading the slot from the disk
2373  * into memory. Whoever acquires the slot i.e. makes the slot active
2374  * will reset it. We don't set it for the slots currently being synced
2375  * from the primary to the standby because such slots are typically
2376  * inactive as decoding is not allowed on those.
2377  */
2378  if (!(RecoveryInProgress() && slot->data.synced))
2380  else
2381  slot->inactive_since = 0;
2382 
2383  restored = true;
2384  break;
2385  }
2386 
2387  if (!restored)
2388  ereport(FATAL,
2389  (errmsg("too many replication slots active before shutdown"),
2390  errhint("Increase max_replication_slots and try again.")));
2391 }
2392 
2393 /*
2394  * Maps an invalidation reason for a replication slot to
2395  * ReplicationSlotInvalidationCause.
2396  */
2398 GetSlotInvalidationCause(const char *invalidation_reason)
2399 {
2402  bool found PG_USED_FOR_ASSERTS_ONLY = false;
2403 
2404  Assert(invalidation_reason);
2405 
2406  for (cause = RS_INVAL_NONE; cause <= RS_INVAL_MAX_CAUSES; cause++)
2407  {
2408  if (strcmp(SlotInvalidationCauses[cause], invalidation_reason) == 0)
2409  {
2410  found = true;
2411  result = cause;
2412  break;
2413  }
2414  }
2415 
2416  Assert(found);
2417  return result;
2418 }
2419 
2420 /*
2421  * A helper function to validate slots specified in GUC standby_slot_names.
2422  *
2423  * The rawname will be parsed, and the result will be saved into *elemlist.
2424  */
2425 static bool
2426 validate_standby_slots(char *rawname, List **elemlist)
2427 {
2428  bool ok;
2429 
2430  /* Verify syntax and parse string into a list of identifiers */
2431  ok = SplitIdentifierString(rawname, ',', elemlist);
2432 
2433  if (!ok)
2434  {
2435  GUC_check_errdetail("List syntax is invalid.");
2436  }
2437  else if (!ReplicationSlotCtl)
2438  {
2439  /*
2440  * We cannot validate the replication slot if the replication slots'
2441  * data has not been initialized. This is ok as we will anyway
2442  * validate the specified slot when waiting for them to catch up. See
2443  * StandbySlotsHaveCaughtup() for details.
2444  */
2445  }
2446  else
2447  {
2448  /* Check that the specified slots exist and are logical slots */
2449  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2450 
2451  foreach_ptr(char, name, *elemlist)
2452  {
2453  ReplicationSlot *slot;
2454 
2455  slot = SearchNamedReplicationSlot(name, false);
2456 
2457  if (!slot)
2458  {
2459  GUC_check_errdetail("replication slot \"%s\" does not exist",
2460  name);
2461  ok = false;
2462  break;
2463  }
2464 
2465  if (!SlotIsPhysical(slot))
2466  {
2467  GUC_check_errdetail("\"%s\" is not a physical replication slot",
2468  name);
2469  ok = false;
2470  break;
2471  }
2472  }
2473 
2474  LWLockRelease(ReplicationSlotControlLock);
2475  }
2476 
2477  return ok;
2478 }
2479 
2480 /*
2481  * GUC check_hook for standby_slot_names
2482  */
2483 bool
2485 {
2486  char *rawname;
2487  char *ptr;
2488  List *elemlist;
2489  int size;
2490  bool ok;
2492 
2493  if ((*newval)[0] == '\0')
2494  return true;
2495 
2496  /* Need a modifiable copy of the GUC string */
2497  rawname = pstrdup(*newval);
2498 
2499  /* Now verify if the specified slots exist and have correct type */
2500  ok = validate_standby_slots(rawname, &elemlist);
2501 
2502  if (!ok || elemlist == NIL)
2503  {
2504  pfree(rawname);
2505  list_free(elemlist);
2506  return ok;
2507  }
2508 
2509  /* Compute the size required for the StandbySlotNamesConfigData struct */
2510  size = offsetof(StandbySlotNamesConfigData, slot_names);
2511  foreach_ptr(char, slot_name, elemlist)
2512  size += strlen(slot_name) + 1;
2513 
2514  /* GUC extra value must be guc_malloc'd, not palloc'd */
2516 
2517  /* Transform the data into StandbySlotNamesConfigData */
2518  config->nslotnames = list_length(elemlist);
2519 
2520  ptr = config->slot_names;
2521  foreach_ptr(char, slot_name, elemlist)
2522  {
2523  strcpy(ptr, slot_name);
2524  ptr += strlen(slot_name) + 1;
2525  }
2526 
2527  *extra = (void *) config;
2528 
2529  pfree(rawname);
2530  list_free(elemlist);
2531  return true;
2532 }
2533 
2534 /*
2535  * GUC assign_hook for standby_slot_names
2536  */
2537 void
2538 assign_standby_slot_names(const char *newval, void *extra)
2539 {
2540  /*
2541  * The standby slots may have changed, so we must recompute the oldest
2542  * LSN.
2543  */
2545 
2547 }
2548 
2549 /*
2550  * Check if the passed slot_name is specified in the standby_slot_names GUC.
2551  */
2552 bool
2553 SlotExistsInStandbySlotNames(const char *slot_name)
2554 {
2555  const char *standby_slot_name;
2556 
2557  /* Return false if there is no value in standby_slot_names */
2558  if (standby_slot_names_config == NULL)
2559  return false;
2560 
2561  /*
2562  * XXX: We are not expecting this list to be long so a linear search
2563  * shouldn't hurt but if that turns out not to be true then we can cache
2564  * this information for each WalSender as well.
2565  */
2566  standby_slot_name = standby_slot_names_config->slot_names;
2567  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2568  {
2569  if (strcmp(standby_slot_name, slot_name) == 0)
2570  return true;
2571 
2572  standby_slot_name += strlen(standby_slot_name) + 1;
2573  }
2574 
2575  return false;
2576 }
2577 
2578 /*
2579  * Return true if the slots specified in standby_slot_names have caught up to
2580  * the given WAL location, false otherwise.
2581  *
2582  * The elevel parameter specifies the error level used for logging messages
2583  * related to slots that do not exist, are invalidated, or are inactive.
2584  */
2585 bool
2586 StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
2587 {
2588  const char *name;
2589  int caught_up_slot_num = 0;
2590  XLogRecPtr min_restart_lsn = InvalidXLogRecPtr;
2591 
2592  /*
2593  * Don't need to wait for the standbys to catch up if there is no value in
2594  * standby_slot_names.
2595  */
2596  if (standby_slot_names_config == NULL)
2597  return true;
2598 
2599  /*
2600  * Don't need to wait for the standbys to catch up if we are on a standby
2601  * server, since we do not support syncing slots to cascading standbys.
2602  */
2603  if (RecoveryInProgress())
2604  return true;
2605 
2606  /*
2607  * Don't need to wait for the standbys to catch up if they are already
2608  * beyond the specified WAL location.
2609  */
2611  ss_oldest_flush_lsn >= wait_for_lsn)
2612  return true;
2613 
2614  /*
2615  * To prevent concurrent slot dropping and creation while filtering the
2616  * slots, take the ReplicationSlotControlLock outside of the loop.
2617  */
2618  LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
2619 
2621  for (int i = 0; i < standby_slot_names_config->nslotnames; i++)
2622  {
2623  XLogRecPtr restart_lsn;
2624  bool invalidated;
2625  bool inactive;
2626  ReplicationSlot *slot;
2627 
2628  slot = SearchNamedReplicationSlot(name, false);
2629 
2630  if (!slot)
2631  {
2632  /*
2633  * If a slot name provided in standby_slot_names does not exist,
2634  * report a message and exit the loop. A user can specify a slot
2635  * name that does not exist just before the server startup. The
2636  * GUC check_hook(validate_standby_slots) cannot validate such a
2637  * slot during startup as the ReplicationSlotCtl shared memory is
2638  * not initialized at that time. It is also possible for a user to
2639  * drop the slot in standby_slot_names afterwards.
2640  */
2641  ereport(elevel,
2642  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2643  errmsg("replication slot \"%s\" specified in parameter %s does not exist",
2644  name, "standby_slot_names"),
2645  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2646  name),
2647  errhint("Consider creating the slot \"%s\" or amend parameter %s.",
2648  name, "standby_slot_names"));
2649  break;
2650  }
2651 
2652  if (SlotIsLogical(slot))
2653  {
2654  /*
2655  * If a logical slot name is provided in standby_slot_names,
2656  * report a message and exit the loop. Similar to the non-existent
2657  * case, a user can specify a logical slot name in
2658  * standby_slot_names before the server startup, or drop an
2659  * existing physical slot and recreate a logical slot with the
2660  * same name.
2661  */
2662  ereport(elevel,
2663  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2664  errmsg("cannot have logical replication slot \"%s\" in parameter %s",
2665  name, "standby_slot_names"),
2666  errdetail("Logical replication is waiting for correction on \"%s\".",
2667  name),
2668  errhint("Consider removing logical slot \"%s\" from parameter %s.",
2669  name, "standby_slot_names"));
2670  break;
2671  }
2672 
2673  SpinLockAcquire(&slot->mutex);
2674  restart_lsn = slot->data.restart_lsn;
2675  invalidated = slot->data.invalidated != RS_INVAL_NONE;
2676  inactive = slot->active_pid == 0;
2677  SpinLockRelease(&slot->mutex);
2678 
2679  if (invalidated)
2680  {
2681  /* Specified physical slot has been invalidated */
2682  ereport(elevel,
2683  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2684  errmsg("physical slot \"%s\" specified in parameter %s has been invalidated",
2685  name, "standby_slot_names"),
2686  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2687  name),
2688  errhint("Consider dropping and recreating the slot \"%s\" or amend parameter %s.",
2689  name, "standby_slot_names"));
2690  break;
2691  }
2692 
2693  if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn < wait_for_lsn)
2694  {
2695  /* Log a message if no active_pid for this physical slot */
2696  if (inactive)
2697  ereport(elevel,
2698  errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2699  errmsg("replication slot \"%s\" specified in parameter %s does not have active_pid",
2700  name, "standby_slot_names"),
2701  errdetail("Logical replication is waiting on the standby associated with \"%s\".",
2702  name),
2703  errhint("Consider starting standby associated with \"%s\" or amend parameter %s.",
2704  name, "standby_slot_names"));
2705 
2706  /* Continue if the current slot hasn't caught up. */
2707  break;
2708  }
2709 
2710  Assert(restart_lsn >= wait_for_lsn);
2711 
2712  if (XLogRecPtrIsInvalid(min_restart_lsn) ||
2713  min_restart_lsn > restart_lsn)
2714  min_restart_lsn = restart_lsn;
2715 
2716  caught_up_slot_num++;
2717 
2718  name += strlen(name) + 1;
2719  }
2720 
2721  LWLockRelease(ReplicationSlotControlLock);
2722 
2723  /*
2724  * Return false if not all the standbys have caught up to the specified
2725  * WAL location.
2726  */
2727  if (caught_up_slot_num != standby_slot_names_config->nslotnames)
2728  return false;
2729 
2730  /* The ss_oldest_flush_lsn must not retreat. */
2732  min_restart_lsn >= ss_oldest_flush_lsn);
2733 
2734  ss_oldest_flush_lsn = min_restart_lsn;
2735 
2736  return true;
2737 }
2738 
2739 /*
2740  * Wait for physical standbys to confirm receiving the given lsn.
2741  *
2742  * Used by logical decoding SQL functions. It waits for physical standbys
2743  * corresponding to the physical slots specified in the standby_slot_names GUC.
2744  */
2745 void
2747 {
2748  /*
2749  * Don't need to wait for the standby to catch up if the current acquired
2750  * slot is not a logical failover slot, or there is no value in
2751  * standby_slot_names.
2752  */
2754  return;
2755 
2757 
2758  for (;;)
2759  {
2761 
2762  if (ConfigReloadPending)
2763  {
2764  ConfigReloadPending = false;
2766  }
2767 
2768  /* Exit if done waiting for every slot. */
2769  if (StandbySlotsHaveCaughtup(wait_for_lsn, WARNING))
2770  break;
2771 
2772  /*
2773  * Wait for the slots in the standby_slot_names to catch up, but use a
2774  * timeout (1s) so we can also check if the standby_slot_names has
2775  * been changed.
2776  */
2778  WAIT_EVENT_WAIT_FOR_STANDBY_CONFIRMATION);
2779  }
2780 
2782 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
#define NameStr(name)
Definition: c.h:733
unsigned int uint32
Definition: c.h:493
#define ngettext(s, p, n)
Definition: c.h:1168
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:169
#define PG_BINARY
Definition: c.h:1260
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:385
#define pg_unreachable()
Definition: c.h:283
#define lengthof(array)
Definition: c.h:775
#define MemSet(start, val, len)
Definition: c.h:1007
uint32 TransactionId
Definition: c.h:639
size_t Size
Definition: c.h:592
bool ConditionVariableCancelSleep(void)
bool ConditionVariableTimedSleep(ConditionVariable *cv, long timeout, uint32 wait_event_info)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1232
int errcode_for_file_access(void)
Definition: elog.c:882
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
int FreeDir(DIR *dir)
Definition: fd.c:2961
int CloseTransientFile(int fd)
Definition: fd.c:2809
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_DIR
Definition: file_utils.h:23
@ PGFILETYPE_ERROR
Definition: file_utils.h:20
bool IsBinaryUpgrade
Definition: globals.c:118
int MyProcPid
Definition: globals.c:45
bool IsUnderPostmaster
Definition: globals.c:117
Oid MyDatabaseId
Definition: globals.c:91
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:640
#define newval
#define GUC_check_errdetail
Definition: guc.h:447
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
void ProcessConfigFile(GucContext context)
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void list_free(List *list)
Definition: list.c:1546
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1894
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1169
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1938
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1782
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:708
@ LWTRANCHE_REPLICATION_SLOT_IO
Definition: lwlock.h:189
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pstrdup(const char *in)
Definition: mcxt.c:1683
void pfree(void *pointer)
Definition: mcxt.c:1508
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
@ B_STARTUP
Definition: miscadmin.h:358
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:514
BackendType MyBackendType
Definition: miscinit.c:63
bool has_rolreplication(Oid roleid)
Definition: miscinit.c:711
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define NAMEDATALEN
#define MAXPGPATH
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static bool two_phase
static rewind_source * source
Definition: pg_rewind.c:89
void pgstat_create_replslot(ReplicationSlot *slot)
void pgstat_acquire_replslot(ReplicationSlot *slot)
void pgstat_drop_replslot(ReplicationSlot *slot)
#define sprintf
Definition: port.h:240
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PROC_IN_LOGICAL_DECODING
Definition: proc.h:61
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
Definition: procarray.c:3927
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition: procsignal.c:257
@ PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT
Definition: procsignal.h:46
bool rmtree(const char *path, bool rmtopdir)
Definition: rmtree.c:50
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:464
void ReplicationSlotAlter(const char *name, bool failover)
Definition: slot.c:806
int ReplicationSlotIndex(ReplicationSlot *slot)
Definition: slot.c:497
#define ReplicationSlotOnDiskChecksummedSize
Definition: slot.c:125
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1838
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:309
void ReplicationSlotCleanup(void)
Definition: slot.c:745
void ReplicationSlotDropAcquired(void)
Definition: slot.c:867
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1009
void ReplicationSlotReserveWal(void)
Definition: slot.c:1400
char * standby_slot_names
Definition: slot.c:148
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
Definition: slot.c:1211
void ReplicationSlotAcquire(const char *name, bool nowait)
Definition: slot.c:540
bool SlotExistsInStandbySlotNames(const char *slot_name)
Definition: slot.c:2553
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1782
static bool validate_standby_slots(char *rawname, List **elemlist)
Definition: slot.c:2426
static XLogRecPtr ss_oldest_flush_lsn
Definition: slot.c:157
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *invalidation_reason)
Definition: slot.c:2398
void ReplicationSlotsDropDBSlots(Oid dboid)
Definition: slot.c:1269
#define ReplicationSlotOnDiskNotChecksummedSize
Definition: slot.c:122
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1153
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1048
static void RestoreSlotFromDisk(const char *name)
Definition: slot.c:2174
#define RS_INVAL_MAX_CAUSES
Definition: slot.c:113
void ReplicationSlotPersist(void)
Definition: slot.c:1026
ReplicationSlot * MyReplicationSlot
Definition: slot.c:138
static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
Definition: slot.c:2019
void ReplicationSlotDrop(const char *name, bool nowait)
Definition: slot.c:783
void ReplicationSlotSave(void)
Definition: slot.c:991
static void CreateSlotOnDisk(ReplicationSlot *slot)
Definition: slot.c:1958
#define ReplicationSlotOnDiskV2Size
Definition: slot.c:128
void CheckSlotPermissions(void)
Definition: slot.c:1383
bool ReplicationSlotName(int index, Name name)
Definition: slot.c:513
void ReplicationSlotsShmemInit(void)
Definition: slot.c:189
const char *const SlotInvalidationCauses[]
Definition: slot.c:105
static StandbySlotNamesConfigData * standby_slot_names_config
Definition: slot.c:151
void ReplicationSlotRelease(void)
Definition: slot.c:652
int max_replication_slots
Definition: slot.c:141
StaticAssertDecl(lengthof(SlotInvalidationCauses)==(RS_INVAL_MAX_CAUSES+1), "array length mismatch")
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:135
#define SLOT_VERSION
Definition: slot.c:132
struct ReplicationSlotOnDisk ReplicationSlotOnDisk
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
Definition: slot.c:2746
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
Definition: slot.c:2586
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1104
void ReplicationSlotInitialize(void)
Definition: slot.c:224
static void ReplicationSlotDropPtr(ReplicationSlot *slot)
Definition: slot.c:884
static bool InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, ReplicationSlot *s, XLogRecPtr oldestLSN, Oid dboid, TransactionId snapshotConflictHorizon, bool *invalidated)
Definition: slot.c:1542
void StartupReplicationSlots(void)
Definition: slot.c:1897
void CheckSlotRequirements(void)
Definition: slot.c:1361
#define SLOT_MAGIC
Definition: slot.c:131
void assign_standby_slot_names(const char *newval, void *extra)
Definition: slot.c:2538
bool check_standby_slot_names(char **newval, void **extra, GucSource source)
Definition: slot.c:2484
static void ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, bool terminating, int pid, NameData slotname, XLogRecPtr restart_lsn, XLogRecPtr oldestLSN, TransactionId snapshotConflictHorizon)
Definition: slot.c:1476
#define ReplicationSlotOnDiskConstantSize
Definition: slot.c:119
Size ReplicationSlotsShmemSize(void)
Definition: slot.c:171
bool ReplicationSlotValidateName(const char *name, int elevel)
Definition: slot.c:252
static void ReplicationSlotShmemExit(int code, Datum arg)
Definition: slot.c:233
ReplicationSlotPersistency
Definition: slot.h:34
@ RS_PERSISTENT
Definition: slot.h:35
@ RS_EPHEMERAL
Definition: slot.h:36
@ RS_TEMPORARY
Definition: slot.h:37
#define SlotIsPhysical(slot)
Definition: slot.h:209
ReplicationSlotInvalidationCause
Definition: slot.h:48
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
@ RS_INVAL_HORIZON
Definition: slot.h:53
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
@ RS_INVAL_NONE
Definition: slot.h:49
#define SlotIsLogical(slot)
Definition: slot.h:210
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1378
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
PGPROC * MyProc
Definition: proc.c:66
PROC_HDR * ProcGlobal
Definition: proc.c:78
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:32
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:32
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
Definition: pg_list.h:54
uint8 statusFlags
Definition: proc.h:238
int pgxactoff
Definition: proc.h:180
uint8 * statusFlags
Definition: proc.h:395
ReplicationSlot replication_slots[1]
Definition: slot.h:221
uint32 version
Definition: slot.c:73
ReplicationSlotPersistentData slotdata
Definition: slot.c:81
pg_crc32c checksum
Definition: slot.c:70
TransactionId xmin
Definition: slot.h:82
TransactionId catalog_xmin
Definition: slot.h:90
XLogRecPtr restart_lsn
Definition: slot.h:93
XLogRecPtr confirmed_flush
Definition: slot.h:104
ReplicationSlotPersistency persistency
Definition: slot.h:74
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:96
XLogRecPtr candidate_xmin_lsn
Definition: slot.h:194
TransactionId effective_catalog_xmin
Definition: slot.h:175
slock_t mutex
Definition: slot.h:151
XLogRecPtr candidate_restart_valid
Definition: slot.h:195
XLogRecPtr last_saved_confirmed_flush
Definition: slot.h:203
pid_t active_pid
Definition: slot.h:157
bool in_use
Definition: slot.h:154
TransactionId effective_xmin
Definition: slot.h:174
bool just_dirtied
Definition: slot.h:160
XLogRecPtr candidate_restart_lsn
Definition: slot.h:196
LWLock io_in_progress_lock
Definition: slot.h:181
ConditionVariable active_cv
Definition: slot.h:184
TransactionId candidate_catalog_xmin
Definition: slot.h:193
bool dirty
Definition: slot.h:161
ReplicationSlotPersistentData data
Definition: slot.h:178
TimestampTz inactive_since
Definition: slot.h:206
char slot_names[FLEXIBLE_ARRAY_MEMBER]
Definition: slot.c:99
ConditionVariable wal_confirm_rcv_cv
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: type.h:95
Definition: c.h:728
unsigned short st_mode
Definition: win32_port.h:268
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3457
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
const char * name
bool am_walsender
Definition: walsender.c:115
bool log_replication_commands
Definition: walsender.c:125
WalSndCtlData * WalSndCtl
Definition: walsender.c:109
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
#define kill(pid, sig)
Definition: win32_port.h:485
bool RecoveryInProgress(void)
Definition: xlog.c:6201
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3688
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6304
int wal_level
Definition: xlog.c:131
int wal_segment_size
Definition: xlog.c:143
void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
Definition: xlog.c:2614
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9266
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2728
@ WAL_LEVEL_REPLICA
Definition: xlog.h:73
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:74
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint64 XLogSegNo
Definition: xlogdefs.h:48
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)