PostgreSQL Source Code  git master
origin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * origin.c
4  * Logical replication progress tracking support.
5  *
6  * Copyright (c) 2013-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/logical/origin.c
10  *
11  * NOTES
12  *
13  * This file provides the following:
14  * * An infrastructure to name nodes in a replication setup
15  * * A facility to efficiently store and persist replication progress in an
16  * efficient and durable manner.
17  *
18  * Replication origin consist out of a descriptive, user defined, external
19  * name and a short, thus space efficient, internal 2 byte one. This split
20  * exists because replication origin have to be stored in WAL and shared
21  * memory and long descriptors would be inefficient. For now only use 2 bytes
22  * for the internal id of a replication origin as it seems unlikely that there
23  * soon will be more than 65k nodes in one replication setup; and using only
24  * two bytes allow us to be more space efficient.
25  *
26  * Replication progress is tracked in a shared memory table
27  * (ReplicationState) that's dumped to disk every checkpoint. Entries
28  * ('slots') in this table are identified by the internal id. That's the case
29  * because it allows to increase replication progress during crash
30  * recovery. To allow doing so we store the original LSN (from the originating
31  * system) of a transaction in the commit record. That allows to recover the
32  * precise replayed state after crash recovery; without requiring synchronous
33  * commits. Allowing logical replication to use asynchronous commit is
34  * generally good for performance, but especially important as it allows a
35  * single threaded replay process to keep up with a source that has multiple
36  * backends generating changes concurrently. For efficiency and simplicity
37  * reasons a backend can setup one replication origin that's from then used as
38  * the source of changes produced by the backend, until reset again.
39  *
40  * This infrastructure is intended to be used in cooperation with logical
41  * decoding. When replaying from a remote system the configured origin is
42  * provided to output plugins, allowing prevention of replication loops and
43  * other filtering.
44  *
45  * There are several levels of locking at work:
46  *
47  * * To create and drop replication origins an exclusive lock on
48  * pg_replication_slot is required for the duration. That allows us to
49  * safely and conflict free assign new origins using a dirty snapshot.
50  *
51  * * When creating an in-memory replication progress slot the ReplicationOrigin
52  * LWLock has to be held exclusively; when iterating over the replication
53  * progress a shared lock has to be held, the same when advancing the
54  * replication progress of an individual backend that has not setup as the
55  * session's replication origin.
56  *
57  * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58  * replication progress slot that slot's lwlock has to be held. That's
59  * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60  * all our platforms, but it also simplifies memory ordering concerns
61  * between the remote and local lsn. We use a lwlock instead of a spinlock
62  * so it's less harmful to hold the lock over a WAL write
63  * (cf. AdvanceReplicationProgress).
64  *
65  * ---------------------------------------------------------------------------
66  */
67 
68 #include "postgres.h"
69 
70 #include <unistd.h>
71 #include <sys/stat.h>
72 
73 #include "access/genam.h"
74 #include "access/htup_details.h"
75 #include "access/table.h"
76 #include "access/xact.h"
77 #include "catalog/catalog.h"
78 #include "catalog/indexing.h"
79 #include "funcapi.h"
80 #include "miscadmin.h"
81 #include "nodes/execnodes.h"
82 #include "pgstat.h"
83 #include "replication/logical.h"
84 #include "replication/origin.h"
86 #include "storage/copydir.h"
87 #include "storage/fd.h"
88 #include "storage/ipc.h"
89 #include "storage/lmgr.h"
90 #include "utils/builtins.h"
91 #include "utils/fmgroids.h"
92 #include "utils/pg_lsn.h"
93 #include "utils/rel.h"
94 #include "utils/snapmgr.h"
95 #include "utils/syscache.h"
96 
97 /*
98  * Replay progress of a single remote node.
99  */
100 typedef struct ReplicationState
101 {
102  /*
103  * Local identifier for the remote node.
104  */
106 
107  /*
108  * Location of the latest commit from the remote side.
109  */
111 
112  /*
113  * Remember the local lsn of the commit record so we can XLogFlush() to it
114  * during a checkpoint so we know the commit record actually is safe on
115  * disk.
116  */
118 
119  /*
120  * PID of backend that's acquired slot, or 0 if none.
121  */
123 
124  /*
125  * Condition variable that's signalled when acquired_by changes.
126  */
128 
129  /*
130  * Lock protecting remote_lsn and local_lsn.
131  */
134 
135 /*
136  * On disk version of ReplicationState.
137  */
139 {
143 
144 
145 typedef struct ReplicationStateCtl
146 {
148  ReplicationState states[FLEXIBLE_ARRAY_MEMBER];
150 
151 /* external variables */
155 
156 /*
157  * Base address into a shared memory array of replication states of size
158  * max_replication_slots.
159  *
160  * XXX: Should we use a separate variable to size this rather than
161  * max_replication_slots?
162  */
165 
166 /*
167  * Backend-local, cached element from ReplicationState for use in a backend
168  * replaying remote commits, so we don't have to search ReplicationState for
169  * the backends current RepOriginId.
170  */
172 
173 /* Magic for on disk files. */
174 #define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
175 
176 static void
177 replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
178 {
179  if (!superuser())
180  ereport(ERROR,
181  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
182  errmsg("only superusers can query or manipulate replication origins")));
183 
184  if (check_slots && max_replication_slots == 0)
185  ereport(ERROR,
186  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
187  errmsg("cannot query or manipulate replication origin when max_replication_slots = 0")));
188 
189  if (!recoveryOK && RecoveryInProgress())
190  ereport(ERROR,
191  (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
192  errmsg("cannot manipulate replication origins during recovery")));
193 
194 }
195 
196 
197 /* ---------------------------------------------------------------------------
198  * Functions for working with replication origins themselves.
199  * ---------------------------------------------------------------------------
200  */
201 
202 /*
203  * Check for a persistent replication origin identified by name.
204  *
205  * Returns InvalidOid if the node isn't known yet and missing_ok is true.
206  */
208 replorigin_by_name(char *roname, bool missing_ok)
209 {
212  HeapTuple tuple;
213  Datum roname_d;
214 
215  roname_d = CStringGetTextDatum(roname);
216 
217  tuple = SearchSysCache1(REPLORIGNAME, roname_d);
218  if (HeapTupleIsValid(tuple))
219  {
220  ident = (Form_pg_replication_origin) GETSTRUCT(tuple);
221  roident = ident->roident;
222  ReleaseSysCache(tuple);
223  }
224  else if (!missing_ok)
225  ereport(ERROR,
226  (errcode(ERRCODE_UNDEFINED_OBJECT),
227  errmsg("replication origin \"%s\" does not exist",
228  roname)));
229 
230  return roident;
231 }
232 
233 /*
234  * Create a replication origin.
235  *
236  * Needs to be called in a transaction.
237  */
239 replorigin_create(char *roname)
240 {
241  Oid roident;
242  HeapTuple tuple = NULL;
243  Relation rel;
244  Datum roname_d;
245  SnapshotData SnapshotDirty;
246  SysScanDesc scan;
248 
249  roname_d = CStringGetTextDatum(roname);
250 
252 
253  /*
254  * We need the numeric replication origin to be 16bit wide, so we cannot
255  * rely on the normal oid allocation. Instead we simply scan
256  * pg_replication_origin for the first unused id. That's not particularly
257  * efficient, but this should be a fairly infrequent operation - we can
258  * easily spend a bit more code on this when it turns out it needs to be
259  * faster.
260  *
261  * We handle concurrency by taking an exclusive lock (allowing reads!)
262  * over the table for the duration of the search. Because we use a "dirty
263  * snapshot" we can read rows that other in-progress sessions have
264  * written, even though they would be invisible with normal snapshots. Due
265  * to the exclusive lock there's no danger that new rows can appear while
266  * we're checking.
267  */
268  InitDirtySnapshot(SnapshotDirty);
269 
270  rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
271 
272  for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
273  {
274  bool nulls[Natts_pg_replication_origin];
275  Datum values[Natts_pg_replication_origin];
276  bool collides;
277 
279 
280  ScanKeyInit(&key,
281  Anum_pg_replication_origin_roident,
282  BTEqualStrategyNumber, F_OIDEQ,
283  ObjectIdGetDatum(roident));
284 
286  true /* indexOK */ ,
287  &SnapshotDirty,
288  1, &key);
289 
290  collides = HeapTupleIsValid(systable_getnext(scan));
291 
292  systable_endscan(scan);
293 
294  if (!collides)
295  {
296  /*
297  * Ok, found an unused roident, insert the new row and do a CCI,
298  * so our callers can look it up if they want to.
299  */
300  memset(&nulls, 0, sizeof(nulls));
301 
302  values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(roident);
303  values[Anum_pg_replication_origin_roname - 1] = roname_d;
304 
305  tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
306  CatalogTupleInsert(rel, tuple);
308  break;
309  }
310  }
311 
312  /* now release lock again, */
314 
315  if (tuple == NULL)
316  ereport(ERROR,
317  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
318  errmsg("could not find free replication origin OID")));
319 
320  heap_freetuple(tuple);
321  return roident;
322 }
323 
324 
325 /*
326  * Drop replication origin.
327  *
328  * Needs to be called in a transaction.
329  */
330 void
332 {
333  HeapTuple tuple;
334  Relation rel;
335  int i;
336 
338 
339  /*
340  * To interlock against concurrent drops, we hold ExclusiveLock on
341  * pg_replication_origin throughout this function.
342  */
343  rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
344 
345  /*
346  * First, clean up the slot state info, if there is any matching slot.
347  */
348 restart:
349  tuple = NULL;
350  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
351 
352  for (i = 0; i < max_replication_slots; i++)
353  {
354  ReplicationState *state = &replication_states[i];
355 
356  if (state->roident == roident)
357  {
358  /* found our slot, is it busy? */
359  if (state->acquired_by != 0)
360  {
361  ConditionVariable *cv;
362 
363  if (nowait)
364  ereport(ERROR,
365  (errcode(ERRCODE_OBJECT_IN_USE),
366  errmsg("could not drop replication origin with OID %d, in use by PID %d",
367  state->roident,
368  state->acquired_by)));
369 
370  /*
371  * We must wait and then retry. Since we don't know which CV
372  * to wait on until here, we can't readily use
373  * ConditionVariablePrepareToSleep (calling it here would be
374  * wrong, since we could miss the signal if we did so); just
375  * use ConditionVariableSleep directly.
376  */
377  cv = &state->origin_cv;
378 
379  LWLockRelease(ReplicationOriginLock);
380 
382  goto restart;
383  }
384 
385  /* first make a WAL log entry */
386  {
387  xl_replorigin_drop xlrec;
388 
389  xlrec.node_id = roident;
390  XLogBeginInsert();
391  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
392  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
393  }
394 
395  /* then clear the in-memory slot */
396  state->roident = InvalidRepOriginId;
397  state->remote_lsn = InvalidXLogRecPtr;
398  state->local_lsn = InvalidXLogRecPtr;
399  break;
400  }
401  }
402  LWLockRelease(ReplicationOriginLock);
404 
405  /*
406  * Now, we can delete the catalog entry.
407  */
409  if (!HeapTupleIsValid(tuple))
410  elog(ERROR, "cache lookup failed for replication origin with oid %u",
411  roident);
412 
413  CatalogTupleDelete(rel, &tuple->t_self);
414  ReleaseSysCache(tuple);
415 
417 
418  /* now release lock again */
420 }
421 
422 
423 /*
424  * Lookup replication origin via it's oid and return the name.
425  *
426  * The external name is palloc'd in the calling context.
427  *
428  * Returns true if the origin is known, false otherwise.
429  */
430 bool
431 replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
432 {
433  HeapTuple tuple;
435 
436  Assert(OidIsValid((Oid) roident));
437  Assert(roident != InvalidRepOriginId);
438  Assert(roident != DoNotReplicateId);
439 
441  ObjectIdGetDatum((Oid) roident));
442 
443  if (HeapTupleIsValid(tuple))
444  {
445  ric = (Form_pg_replication_origin) GETSTRUCT(tuple);
446  *roname = text_to_cstring(&ric->roname);
447  ReleaseSysCache(tuple);
448 
449  return true;
450  }
451  else
452  {
453  *roname = NULL;
454 
455  if (!missing_ok)
456  ereport(ERROR,
457  (errcode(ERRCODE_UNDEFINED_OBJECT),
458  errmsg("replication origin with OID %u does not exist",
459  roident)));
460 
461  return false;
462  }
463 }
464 
465 
466 /* ---------------------------------------------------------------------------
467  * Functions for handling replication progress.
468  * ---------------------------------------------------------------------------
469  */
470 
471 Size
473 {
474  Size size = 0;
475 
476  /*
477  * XXX: max_replication_slots is arguably the wrong thing to use, as here
478  * we keep the replay state of *remote* transactions. But for now it seems
479  * sufficient to reuse it, lest we introduce a separate GUC.
480  */
481  if (max_replication_slots == 0)
482  return size;
483 
484  size = add_size(size, offsetof(ReplicationStateCtl, states));
485 
486  size = add_size(size,
488  return size;
489 }
490 
491 void
493 {
494  bool found;
495 
496  if (max_replication_slots == 0)
497  return;
498 
499  replication_states_ctl = (ReplicationStateCtl *)
500  ShmemInitStruct("ReplicationOriginState",
502  &found);
503  replication_states = replication_states_ctl->states;
504 
505  if (!found)
506  {
507  int i;
508 
509  replication_states_ctl->tranche_id = LWTRANCHE_REPLICATION_ORIGIN;
510 
511  MemSet(replication_states, 0, ReplicationOriginShmemSize());
512 
513  for (i = 0; i < max_replication_slots; i++)
514  {
515  LWLockInitialize(&replication_states[i].lock,
516  replication_states_ctl->tranche_id);
517  ConditionVariableInit(&replication_states[i].origin_cv);
518  }
519  }
520 
521  LWLockRegisterTranche(replication_states_ctl->tranche_id,
522  "replication_origin");
523 }
524 
525 /* ---------------------------------------------------------------------------
526  * Perform a checkpoint of each replication origin's progress with respect to
527  * the replayed remote_lsn. Make sure that all transactions we refer to in the
528  * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
529  * if the transactions were originally committed asynchronously.
530  *
531  * We store checkpoints in the following format:
532  * +-------+------------------------+------------------+-----+--------+
533  * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
534  * +-------+------------------------+------------------+-----+--------+
535  *
536  * So its just the magic, followed by the statically sized
537  * ReplicationStateOnDisk structs. Note that the maximum number of
538  * ReplicationState is determined by max_replication_slots.
539  * ---------------------------------------------------------------------------
540  */
541 void
543 {
544  const char *tmppath = "pg_logical/replorigin_checkpoint.tmp";
545  const char *path = "pg_logical/replorigin_checkpoint";
546  int tmpfd;
547  int i;
549  pg_crc32c crc;
550 
551  if (max_replication_slots == 0)
552  return;
553 
554  INIT_CRC32C(crc);
555 
556  /* make sure no old temp file is remaining */
557  if (unlink(tmppath) < 0 && errno != ENOENT)
558  ereport(PANIC,
560  errmsg("could not remove file \"%s\": %m",
561  tmppath)));
562 
563  /*
564  * no other backend can perform this at the same time, we're protected by
565  * CheckpointLock.
566  */
567  tmpfd = OpenTransientFile(tmppath,
568  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
569  if (tmpfd < 0)
570  ereport(PANIC,
572  errmsg("could not create file \"%s\": %m",
573  tmppath)));
574 
575  /* write magic */
576  errno = 0;
577  if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
578  {
579  /* if write didn't set errno, assume problem is no disk space */
580  if (errno == 0)
581  errno = ENOSPC;
582  ereport(PANIC,
584  errmsg("could not write to file \"%s\": %m",
585  tmppath)));
586  }
587  COMP_CRC32C(crc, &magic, sizeof(magic));
588 
589  /* prevent concurrent creations/drops */
590  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
591 
592  /* write actual data */
593  for (i = 0; i < max_replication_slots; i++)
594  {
595  ReplicationStateOnDisk disk_state;
596  ReplicationState *curstate = &replication_states[i];
598 
599  if (curstate->roident == InvalidRepOriginId)
600  continue;
601 
602  /* zero, to avoid uninitialized padding bytes */
603  memset(&disk_state, 0, sizeof(disk_state));
604 
605  LWLockAcquire(&curstate->lock, LW_SHARED);
606 
607  disk_state.roident = curstate->roident;
608 
609  disk_state.remote_lsn = curstate->remote_lsn;
610  local_lsn = curstate->local_lsn;
611 
612  LWLockRelease(&curstate->lock);
613 
614  /* make sure we only write out a commit that's persistent */
615  XLogFlush(local_lsn);
616 
617  errno = 0;
618  if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
619  sizeof(disk_state))
620  {
621  /* if write didn't set errno, assume problem is no disk space */
622  if (errno == 0)
623  errno = ENOSPC;
624  ereport(PANIC,
626  errmsg("could not write to file \"%s\": %m",
627  tmppath)));
628  }
629 
630  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
631  }
632 
633  LWLockRelease(ReplicationOriginLock);
634 
635  /* write out the CRC */
636  FIN_CRC32C(crc);
637  errno = 0;
638  if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
639  {
640  /* if write didn't set errno, assume problem is no disk space */
641  if (errno == 0)
642  errno = ENOSPC;
643  ereport(PANIC,
645  errmsg("could not write to file \"%s\": %m",
646  tmppath)));
647  }
648 
649  if (CloseTransientFile(tmpfd) != 0)
650  ereport(PANIC,
652  errmsg("could not close file \"%s\": %m",
653  tmppath)));
654 
655  /* fsync, rename to permanent file, fsync file and directory */
656  durable_rename(tmppath, path, PANIC);
657 }
658 
659 /*
660  * Recover replication replay status from checkpoint data saved earlier by
661  * CheckPointReplicationOrigin.
662  *
663  * This only needs to be called at startup and *not* during every checkpoint
664  * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
665  * state thereafter can be recovered by looking at commit records.
666  */
667 void
669 {
670  const char *path = "pg_logical/replorigin_checkpoint";
671  int fd;
672  int readBytes;
674  int last_state = 0;
675  pg_crc32c file_crc;
676  pg_crc32c crc;
677 
678  /* don't want to overwrite already existing state */
679 #ifdef USE_ASSERT_CHECKING
680  static bool already_started = false;
681 
682  Assert(!already_started);
683  already_started = true;
684 #endif
685 
686  if (max_replication_slots == 0)
687  return;
688 
689  INIT_CRC32C(crc);
690 
691  elog(DEBUG2, "starting up replication origin progress state");
692 
693  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
694 
695  /*
696  * might have had max_replication_slots == 0 last run, or we just brought
697  * up a standby.
698  */
699  if (fd < 0 && errno == ENOENT)
700  return;
701  else if (fd < 0)
702  ereport(PANIC,
704  errmsg("could not open file \"%s\": %m",
705  path)));
706 
707  /* verify magic, that is written even if nothing was active */
708  readBytes = read(fd, &magic, sizeof(magic));
709  if (readBytes != sizeof(magic))
710  {
711  if (readBytes < 0)
712  ereport(PANIC,
714  errmsg("could not read file \"%s\": %m",
715  path)));
716  else
717  ereport(PANIC,
719  errmsg("could not read file \"%s\": read %d of %zu",
720  path, readBytes, sizeof(magic))));
721  }
722  COMP_CRC32C(crc, &magic, sizeof(magic));
723 
724  if (magic != REPLICATION_STATE_MAGIC)
725  ereport(PANIC,
726  (errmsg("replication checkpoint has wrong magic %u instead of %u",
727  magic, REPLICATION_STATE_MAGIC)));
728 
729  /* we can skip locking here, no other access is possible */
730 
731  /* recover individual states, until there are no more to be found */
732  while (true)
733  {
734  ReplicationStateOnDisk disk_state;
735 
736  readBytes = read(fd, &disk_state, sizeof(disk_state));
737 
738  /* no further data */
739  if (readBytes == sizeof(crc))
740  {
741  /* not pretty, but simple ... */
742  file_crc = *(pg_crc32c *) &disk_state;
743  break;
744  }
745 
746  if (readBytes < 0)
747  {
748  ereport(PANIC,
750  errmsg("could not read file \"%s\": %m",
751  path)));
752  }
753 
754  if (readBytes != sizeof(disk_state))
755  {
756  ereport(PANIC,
758  errmsg("could not read file \"%s\": read %d of %zu",
759  path, readBytes, sizeof(disk_state))));
760  }
761 
762  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
763 
764  if (last_state == max_replication_slots)
765  ereport(PANIC,
766  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
767  errmsg("could not find free replication state, increase max_replication_slots")));
768 
769  /* copy data to shared memory */
770  replication_states[last_state].roident = disk_state.roident;
771  replication_states[last_state].remote_lsn = disk_state.remote_lsn;
772  last_state++;
773 
774  elog(LOG, "recovered replication state of node %u to %X/%X",
775  disk_state.roident,
776  (uint32) (disk_state.remote_lsn >> 32),
777  (uint32) disk_state.remote_lsn);
778  }
779 
780  /* now check checksum */
781  FIN_CRC32C(crc);
782  if (file_crc != crc)
783  ereport(PANIC,
784  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
785  errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
786  crc, file_crc)));
787 
788  if (CloseTransientFile(fd) != 0)
789  ereport(PANIC,
791  errmsg("could not close file \"%s\": %m",
792  path)));
793 }
794 
795 void
797 {
798  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
799 
800  switch (info)
801  {
802  case XLOG_REPLORIGIN_SET:
803  {
804  xl_replorigin_set *xlrec =
805  (xl_replorigin_set *) XLogRecGetData(record);
806 
808  xlrec->remote_lsn, record->EndRecPtr,
809  xlrec->force /* backward */ ,
810  false /* WAL log */ );
811  break;
812  }
814  {
815  xl_replorigin_drop *xlrec;
816  int i;
817 
818  xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
819 
820  for (i = 0; i < max_replication_slots; i++)
821  {
822  ReplicationState *state = &replication_states[i];
823 
824  /* found our slot */
825  if (state->roident == xlrec->node_id)
826  {
827  /* reset entry */
828  state->roident = InvalidRepOriginId;
829  state->remote_lsn = InvalidXLogRecPtr;
830  state->local_lsn = InvalidXLogRecPtr;
831  break;
832  }
833  }
834  break;
835  }
836  default:
837  elog(PANIC, "replorigin_redo: unknown op code %u", info);
838  }
839 }
840 
841 
842 /*
843  * Tell the replication origin progress machinery that a commit from 'node'
844  * that originated at the LSN remote_commit on the remote node was replayed
845  * successfully and that we don't need to do so again. In combination with
846  * setting up replorigin_session_origin_lsn and replorigin_session_origin
847  * that ensures we won't loose knowledge about that after a crash if the
848  * transaction had a persistent effect (think of asynchronous commits).
849  *
850  * local_commit needs to be a local LSN of the commit so that we can make sure
851  * upon a checkpoint that enough WAL has been persisted to disk.
852  *
853  * Needs to be called with a RowExclusiveLock on pg_replication_origin,
854  * unless running in recovery.
855  */
856 void
858  XLogRecPtr remote_commit, XLogRecPtr local_commit,
859  bool go_backward, bool wal_log)
860 {
861  int i;
862  ReplicationState *replication_state = NULL;
863  ReplicationState *free_state = NULL;
864 
865  Assert(node != InvalidRepOriginId);
866 
867  /* we don't track DoNotReplicateId */
868  if (node == DoNotReplicateId)
869  return;
870 
871  /*
872  * XXX: For the case where this is called by WAL replay, it'd be more
873  * efficient to restore into a backend local hashtable and only dump into
874  * shmem after recovery is finished. Let's wait with implementing that
875  * till it's shown to be a measurable expense
876  */
877 
878  /* Lock exclusively, as we may have to create a new table entry. */
879  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
880 
881  /*
882  * Search for either an existing slot for the origin, or a free one we can
883  * use.
884  */
885  for (i = 0; i < max_replication_slots; i++)
886  {
887  ReplicationState *curstate = &replication_states[i];
888 
889  /* remember where to insert if necessary */
890  if (curstate->roident == InvalidRepOriginId &&
891  free_state == NULL)
892  {
893  free_state = curstate;
894  continue;
895  }
896 
897  /* not our slot */
898  if (curstate->roident != node)
899  {
900  continue;
901  }
902 
903  /* ok, found slot */
904  replication_state = curstate;
905 
906  LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
907 
908  /* Make sure it's not used by somebody else */
909  if (replication_state->acquired_by != 0)
910  {
911  ereport(ERROR,
912  (errcode(ERRCODE_OBJECT_IN_USE),
913  errmsg("replication origin with OID %d is already active for PID %d",
914  replication_state->roident,
915  replication_state->acquired_by)));
916  }
917 
918  break;
919  }
920 
921  if (replication_state == NULL && free_state == NULL)
922  ereport(ERROR,
923  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
924  errmsg("could not find free replication state slot for replication origin with OID %u",
925  node),
926  errhint("Increase max_replication_slots and try again.")));
927 
928  if (replication_state == NULL)
929  {
930  /* initialize new slot */
931  LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
932  replication_state = free_state;
933  Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
934  Assert(replication_state->local_lsn == InvalidXLogRecPtr);
935  replication_state->roident = node;
936  }
937 
938  Assert(replication_state->roident != InvalidRepOriginId);
939 
940  /*
941  * If somebody "forcefully" sets this slot, WAL log it, so it's durable
942  * and the standby gets the message. Primarily this will be called during
943  * WAL replay (of commit records) where no WAL logging is necessary.
944  */
945  if (wal_log)
946  {
947  xl_replorigin_set xlrec;
948 
949  xlrec.remote_lsn = remote_commit;
950  xlrec.node_id = node;
951  xlrec.force = go_backward;
952 
953  XLogBeginInsert();
954  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
955 
956  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
957  }
958 
959  /*
960  * Due to - harmless - race conditions during a checkpoint we could see
961  * values here that are older than the ones we already have in memory.
962  * Don't overwrite those.
963  */
964  if (go_backward || replication_state->remote_lsn < remote_commit)
965  replication_state->remote_lsn = remote_commit;
966  if (local_commit != InvalidXLogRecPtr &&
967  (go_backward || replication_state->local_lsn < local_commit))
968  replication_state->local_lsn = local_commit;
969  LWLockRelease(&replication_state->lock);
970 
971  /*
972  * Release *after* changing the LSNs, slot isn't acquired and thus could
973  * otherwise be dropped anytime.
974  */
975  LWLockRelease(ReplicationOriginLock);
976 }
977 
978 
981 {
982  int i;
985 
986  /* prevent slots from being concurrently dropped */
987  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
988 
989  for (i = 0; i < max_replication_slots; i++)
990  {
992 
993  state = &replication_states[i];
994 
995  if (state->roident == node)
996  {
997  LWLockAcquire(&state->lock, LW_SHARED);
998 
999  remote_lsn = state->remote_lsn;
1000  local_lsn = state->local_lsn;
1001 
1002  LWLockRelease(&state->lock);
1003 
1004  break;
1005  }
1006  }
1007 
1008  LWLockRelease(ReplicationOriginLock);
1009 
1010  if (flush && local_lsn != InvalidXLogRecPtr)
1011  XLogFlush(local_lsn);
1012 
1013  return remote_lsn;
1014 }
1015 
1016 /*
1017  * Tear down a (possibly) configured session replication origin during process
1018  * exit.
1019  */
1020 static void
1022 {
1023  ConditionVariable *cv = NULL;
1024 
1025  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1026 
1027  if (session_replication_state != NULL &&
1028  session_replication_state->acquired_by == MyProcPid)
1029  {
1030  cv = &session_replication_state->origin_cv;
1031 
1032  session_replication_state->acquired_by = 0;
1033  session_replication_state = NULL;
1034  }
1035 
1036  LWLockRelease(ReplicationOriginLock);
1037 
1038  if (cv)
1040 }
1041 
1042 /*
1043  * Setup a replication origin in the shared memory struct if it doesn't
1044  * already exists and cache access to the specific ReplicationSlot so the
1045  * array doesn't have to be searched when calling
1046  * replorigin_session_advance().
1047  *
1048  * Obviously only one such cached origin can exist per process and the current
1049  * cached value can only be set again after the previous value is torn down
1050  * with replorigin_session_reset().
1051  */
1052 void
1054 {
1055  static bool registered_cleanup;
1056  int i;
1057  int free_slot = -1;
1058 
1059  if (!registered_cleanup)
1060  {
1062  registered_cleanup = true;
1063  }
1064 
1066 
1067  if (session_replication_state != NULL)
1068  ereport(ERROR,
1069  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1070  errmsg("cannot setup replication origin when one is already setup")));
1071 
1072  /* Lock exclusively, as we may have to create a new table entry. */
1073  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1074 
1075  /*
1076  * Search for either an existing slot for the origin, or a free one we can
1077  * use.
1078  */
1079  for (i = 0; i < max_replication_slots; i++)
1080  {
1081  ReplicationState *curstate = &replication_states[i];
1082 
1083  /* remember where to insert if necessary */
1084  if (curstate->roident == InvalidRepOriginId &&
1085  free_slot == -1)
1086  {
1087  free_slot = i;
1088  continue;
1089  }
1090 
1091  /* not our slot */
1092  if (curstate->roident != node)
1093  continue;
1094 
1095  else if (curstate->acquired_by != 0)
1096  {
1097  ereport(ERROR,
1098  (errcode(ERRCODE_OBJECT_IN_USE),
1099  errmsg("replication origin with OID %d is already active for PID %d",
1100  curstate->roident, curstate->acquired_by)));
1101  }
1102 
1103  /* ok, found slot */
1104  session_replication_state = curstate;
1105  }
1106 
1107 
1108  if (session_replication_state == NULL && free_slot == -1)
1109  ereport(ERROR,
1110  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1111  errmsg("could not find free replication state slot for replication origin with OID %u",
1112  node),
1113  errhint("Increase max_replication_slots and try again.")));
1114  else if (session_replication_state == NULL)
1115  {
1116  /* initialize new slot */
1117  session_replication_state = &replication_states[free_slot];
1118  Assert(session_replication_state->remote_lsn == InvalidXLogRecPtr);
1119  Assert(session_replication_state->local_lsn == InvalidXLogRecPtr);
1120  session_replication_state->roident = node;
1121  }
1122 
1123 
1124  Assert(session_replication_state->roident != InvalidRepOriginId);
1125 
1126  session_replication_state->acquired_by = MyProcPid;
1127 
1128  LWLockRelease(ReplicationOriginLock);
1129 
1130  /* probably this one is pointless */
1131  ConditionVariableBroadcast(&session_replication_state->origin_cv);
1132 }
1133 
1134 /*
1135  * Reset replay state previously setup in this session.
1136  *
1137  * This function may only be called if an origin was setup with
1138  * replorigin_session_setup().
1139  */
1140 void
1142 {
1143  ConditionVariable *cv;
1144 
1146 
1147  if (session_replication_state == NULL)
1148  ereport(ERROR,
1149  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1150  errmsg("no replication origin is configured")));
1151 
1152  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1153 
1154  session_replication_state->acquired_by = 0;
1155  cv = &session_replication_state->origin_cv;
1156  session_replication_state = NULL;
1157 
1158  LWLockRelease(ReplicationOriginLock);
1159 
1161 }
1162 
1163 /*
1164  * Do the same work replorigin_advance() does, just on the session's
1165  * configured origin.
1166  *
1167  * This is noticeably cheaper than using replorigin_advance().
1168  */
1169 void
1171 {
1172  Assert(session_replication_state != NULL);
1173  Assert(session_replication_state->roident != InvalidRepOriginId);
1174 
1175  LWLockAcquire(&session_replication_state->lock, LW_EXCLUSIVE);
1176  if (session_replication_state->local_lsn < local_commit)
1177  session_replication_state->local_lsn = local_commit;
1178  if (session_replication_state->remote_lsn < remote_commit)
1179  session_replication_state->remote_lsn = remote_commit;
1180  LWLockRelease(&session_replication_state->lock);
1181 }
1182 
1183 /*
1184  * Ask the machinery about the point up to which we successfully replayed
1185  * changes from an already setup replication origin.
1186  */
1187 XLogRecPtr
1189 {
1192 
1193  Assert(session_replication_state != NULL);
1194 
1195  LWLockAcquire(&session_replication_state->lock, LW_SHARED);
1196  remote_lsn = session_replication_state->remote_lsn;
1197  local_lsn = session_replication_state->local_lsn;
1198  LWLockRelease(&session_replication_state->lock);
1199 
1200  if (flush && local_lsn != InvalidXLogRecPtr)
1201  XLogFlush(local_lsn);
1202 
1203  return remote_lsn;
1204 }
1205 
1206 
1207 
1208 /* ---------------------------------------------------------------------------
1209  * SQL functions for working with replication origin.
1210  *
1211  * These mostly should be fairly short wrappers around more generic functions.
1212  * ---------------------------------------------------------------------------
1213  */
1214 
1215 /*
1216  * Create replication origin for the passed in name, and return the assigned
1217  * oid.
1218  */
1219 Datum
1221 {
1222  char *name;
1224 
1225  replorigin_check_prerequisites(false, false);
1226 
1228 
1229  /* Replication origins "pg_xxx" are reserved for internal use */
1230  if (IsReservedName(name))
1231  ereport(ERROR,
1232  (errcode(ERRCODE_RESERVED_NAME),
1233  errmsg("replication origin name \"%s\" is reserved",
1234  name),
1235  errdetail("Origin names starting with \"pg_\" are reserved.")));
1236 
1237  /*
1238  * If built with appropriate switch, whine when regression-testing
1239  * conventions for replication origin names are violated.
1240  */
1241 #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
1242  if (strncmp(name, "regress_", 8) != 0)
1243  elog(WARNING, "replication origins created by regression test cases should have names starting with \"regress_\"");
1244 #endif
1245 
1246  roident = replorigin_create(name);
1247 
1248  pfree(name);
1249 
1250  PG_RETURN_OID(roident);
1251 }
1252 
1253 /*
1254  * Drop replication origin.
1255  */
1256 Datum
1258 {
1259  char *name;
1261 
1262  replorigin_check_prerequisites(false, false);
1263 
1265 
1266  roident = replorigin_by_name(name, false);
1267  Assert(OidIsValid(roident));
1268 
1269  replorigin_drop(roident, true);
1270 
1271  pfree(name);
1272 
1273  PG_RETURN_VOID();
1274 }
1275 
1276 /*
1277  * Return oid of a replication origin.
1278  */
1279 Datum
1281 {
1282  char *name;
1284 
1285  replorigin_check_prerequisites(false, false);
1286 
1288  roident = replorigin_by_name(name, true);
1289 
1290  pfree(name);
1291 
1292  if (OidIsValid(roident))
1293  PG_RETURN_OID(roident);
1294  PG_RETURN_NULL();
1295 }
1296 
1297 /*
1298  * Setup a replication origin for this session.
1299  */
1300 Datum
1302 {
1303  char *name;
1304  RepOriginId origin;
1305 
1306  replorigin_check_prerequisites(true, false);
1307 
1309  origin = replorigin_by_name(name, false);
1310  replorigin_session_setup(origin);
1311 
1312  replorigin_session_origin = origin;
1313 
1314  pfree(name);
1315 
1316  PG_RETURN_VOID();
1317 }
1318 
1319 /*
1320  * Reset previously setup origin in this session
1321  */
1322 Datum
1324 {
1325  replorigin_check_prerequisites(true, false);
1326 
1328 
1332 
1333  PG_RETURN_VOID();
1334 }
1335 
1336 /*
1337  * Has a replication origin been setup for this session.
1338  */
1339 Datum
1341 {
1342  replorigin_check_prerequisites(false, false);
1343 
1345 }
1346 
1347 
1348 /*
1349  * Return the replication progress for origin setup in the current session.
1350  *
1351  * If 'flush' is set to true it is ensured that the returned value corresponds
1352  * to a local transaction that has been flushed. This is useful if asynchronous
1353  * commits are used when replaying replicated transactions.
1354  */
1355 Datum
1357 {
1359  bool flush = PG_GETARG_BOOL(0);
1360 
1361  replorigin_check_prerequisites(true, false);
1362 
1363  if (session_replication_state == NULL)
1364  ereport(ERROR,
1365  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1366  errmsg("no replication origin is configured")));
1367 
1368  remote_lsn = replorigin_session_get_progress(flush);
1369 
1370  if (remote_lsn == InvalidXLogRecPtr)
1371  PG_RETURN_NULL();
1372 
1373  PG_RETURN_LSN(remote_lsn);
1374 }
1375 
1376 Datum
1378 {
1379  XLogRecPtr location = PG_GETARG_LSN(0);
1380 
1381  replorigin_check_prerequisites(true, false);
1382 
1383  if (session_replication_state == NULL)
1384  ereport(ERROR,
1385  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1386  errmsg("no replication origin is configured")));
1387 
1388  replorigin_session_origin_lsn = location;
1390 
1391  PG_RETURN_VOID();
1392 }
1393 
1394 Datum
1396 {
1397  replorigin_check_prerequisites(true, false);
1398 
1401 
1402  PG_RETURN_VOID();
1403 }
1404 
1405 
1406 Datum
1408 {
1409  text *name = PG_GETARG_TEXT_PP(0);
1410  XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1411  RepOriginId node;
1412 
1413  replorigin_check_prerequisites(true, false);
1414 
1415  /* lock to prevent the replication origin from vanishing */
1416  LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1417 
1418  node = replorigin_by_name(text_to_cstring(name), false);
1419 
1420  /*
1421  * Can't sensibly pass a local commit to be flushed at checkpoint - this
1422  * xact hasn't committed yet. This is why this function should be used to
1423  * set up the initial replication state, but not for replay.
1424  */
1425  replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1426  true /* go backward */ , true /* WAL log */ );
1427 
1428  UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1429 
1430  PG_RETURN_VOID();
1431 }
1432 
1433 
1434 /*
1435  * Return the replication progress for an individual replication origin.
1436  *
1437  * If 'flush' is set to true it is ensured that the returned value corresponds
1438  * to a local transaction that has been flushed. This is useful if asynchronous
1439  * commits are used when replaying replicated transactions.
1440  */
1441 Datum
1443 {
1444  char *name;
1445  bool flush;
1448 
1449  replorigin_check_prerequisites(true, true);
1450 
1452  flush = PG_GETARG_BOOL(1);
1453 
1454  roident = replorigin_by_name(name, false);
1455  Assert(OidIsValid(roident));
1456 
1457  remote_lsn = replorigin_get_progress(roident, flush);
1458 
1459  if (remote_lsn == InvalidXLogRecPtr)
1460  PG_RETURN_NULL();
1461 
1462  PG_RETURN_LSN(remote_lsn);
1463 }
1464 
1465 
1466 Datum
1468 {
1469  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1470  TupleDesc tupdesc;
1471  Tuplestorestate *tupstore;
1472  MemoryContext per_query_ctx;
1473  MemoryContext oldcontext;
1474  int i;
1476 
1477  /* we want to return 0 rows if slot is set to zero */
1478  replorigin_check_prerequisites(false, true);
1479 
1480  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1481  ereport(ERROR,
1482  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1483  errmsg("set-valued function called in context that cannot accept a set")));
1484  if (!(rsinfo->allowedModes & SFRM_Materialize))
1485  ereport(ERROR,
1486  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1487  errmsg("materialize mode required, but it is not allowed in this context")));
1488  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1489  elog(ERROR, "return type must be a row type");
1490 
1491  if (tupdesc->natts != REPLICATION_ORIGIN_PROGRESS_COLS)
1492  elog(ERROR, "wrong function definition");
1493 
1494  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1495  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1496 
1497  tupstore = tuplestore_begin_heap(true, false, work_mem);
1498  rsinfo->returnMode = SFRM_Materialize;
1499  rsinfo->setResult = tupstore;
1500  rsinfo->setDesc = tupdesc;
1501 
1502  MemoryContextSwitchTo(oldcontext);
1503 
1504 
1505  /* prevent slots from being concurrently dropped */
1506  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1507 
1508  /*
1509  * Iterate through all possible replication_states, display if they are
1510  * filled. Note that we do not take any locks, so slightly corrupted/out
1511  * of date values are a possibility.
1512  */
1513  for (i = 0; i < max_replication_slots; i++)
1514  {
1518  char *roname;
1519 
1520  state = &replication_states[i];
1521 
1522  /* unused slot, nothing to display */
1523  if (state->roident == InvalidRepOriginId)
1524  continue;
1525 
1526  memset(values, 0, sizeof(values));
1527  memset(nulls, 1, sizeof(nulls));
1528 
1529  values[0] = ObjectIdGetDatum(state->roident);
1530  nulls[0] = false;
1531 
1532  /*
1533  * We're not preventing the origin to be dropped concurrently, so
1534  * silently accept that it might be gone.
1535  */
1536  if (replorigin_by_oid(state->roident, true,
1537  &roname))
1538  {
1539  values[1] = CStringGetTextDatum(roname);
1540  nulls[1] = false;
1541  }
1542 
1543  LWLockAcquire(&state->lock, LW_SHARED);
1544 
1545  values[2] = LSNGetDatum(state->remote_lsn);
1546  nulls[2] = false;
1547 
1548  values[3] = LSNGetDatum(state->local_lsn);
1549  nulls[3] = false;
1550 
1551  LWLockRelease(&state->lock);
1552 
1553  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1554  }
1555 
1556  tuplestore_donestoring(tupstore);
1557 
1558  LWLockRelease(ReplicationOriginLock);
1559 
1560 #undef REPLICATION_ORIGIN_PROGRESS_COLS
1561 
1562  return (Datum) 0;
1563 }
static ReplicationState * session_replication_state
Definition: origin.c:171
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
static void replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
Definition: origin.c:177
Definition: lwlock.h:32
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:196
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:1069
XLogRecPtr local_lsn
Definition: origin.c:117
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:525
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define RelationGetDescr(relation)
Definition: rel.h:448
#define DoNotReplicateId
Definition: origin.h:34
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1377
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:199
#define write(a, b, c)
Definition: win32.h:14
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1257
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
XLogRecPtr remote_lsn
Definition: origin.c:141
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
uint32 pg_crc32c
Definition: pg_crc32c.h:38
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:1021
RepOriginId roident
Definition: origin.c:140
void replorigin_drop(RepOriginId roident, bool nowait)
Definition: origin.c:331
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned char uint8
Definition: c.h:357
uint16 RepOriginId
Definition: xlogdefs.h:58
void ConditionVariableBroadcast(ConditionVariable *cv)
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1188
int errcode(int sqlerrcode)
Definition: elog.c:608
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
bool superuser(void)
Definition: superuser.c:46
#define MemSet(start, val, len)
Definition: c.h:962
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:269
void ReplicationOriginShmemInit(void)
Definition: origin.c:492
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:857
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:269
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:1053
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:431
#define LOG
Definition: elog.h:26
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7935
bool IsReservedName(const char *name)
Definition: catalog.c:213
#define OidIsValid(objectId)
Definition: c.h:645
#define PANIC
Definition: elog.h:53
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2805
static int fd(const char *x, int i)
Definition: preproc-init.c:105
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:352
#define PG_BINARY
Definition: c.h:1222
RepOriginId replorigin_by_name(char *roname, bool missing_ok)
Definition: origin.c:208
RepOriginId roident
Definition: origin.c:105
XLogRecPtr EndRecPtr
Definition: xlogreader.h:132
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:603
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1407
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:25
void CheckPointReplicationOrigin(void)
Definition: origin.c:542
void ConditionVariableInit(ConditionVariable *cv)
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:796
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:444
void pfree(void *pointer)
Definition: mcxt.c:1056
#define XLogRecGetData(decoder)
Definition: xlogreader.h:283
void ConditionVariableCancelSleep(void)
#define ReplicationOriginIdentIndex
Definition: indexing.h:340
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
#define PG_UINT16_MAX
Definition: c.h:440
void replorigin_session_reset(void)
Definition: origin.c:1141
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2292
FormData_pg_replication_origin * Form_pg_replication_origin
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
LWLock lock
Definition: origin.c:132
ItemPointerData t_self
Definition: htup.h:65
#define DEBUG2
Definition: elog.h:24
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:153
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:154
#define XLOG_REPLORIGIN_SET
Definition: origin.h:30
void StartupReplicationOrigin(void)
Definition: origin.c:668
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:955
int errcode_for_file_access(void)
Definition: elog.c:631
struct ReplicationState ReplicationState
unsigned int uint32
Definition: c.h:359
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1301
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1323
RepOriginId replorigin_create(char *roname)
Definition: origin.c:239
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1467
#define ereport(elevel, rest)
Definition: elog.h:141
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:279
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:643
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1340
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:678
int CloseTransientFile(int fd)
Definition: fd.c:2469
#define WARNING
Definition: elog.h:40
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:31
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:24
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:1005
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
int work_mem
Definition: globals.c:121
Size ReplicationOriginShmemSize(void)
Definition: origin.c:472
#define REPLICATION_STATE_MAGIC
Definition: origin.c:174
#define InvalidOid
Definition: postgres_ext.h:36
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:164
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1395
int allowedModes
Definition: execnodes.h:302
#define PG_RETURN_VOID()
Definition: fmgr.h:339
struct ReplicationStateCtl ReplicationStateCtl
SetFunctionReturnMode returnMode
Definition: execnodes.h:304
int max_replication_slots
Definition: slot.c:99
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
XLogRecPtr remote_lsn
Definition: origin.c:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1442
RepOriginId node_id
Definition: origin.h:27
uint64 XLogRecPtr
Definition: xlogdefs.h:21
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:148
#define Assert(condition)
Definition: c.h:739
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
RepOriginId replorigin_session_origin
Definition: origin.c:152
Definition: regguts.h:298
RepOriginId node_id
Definition: origin.h:21
ConditionVariable origin_cv
Definition: origin.c:127
size_t Size
Definition: c.h:467
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1170
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
bool IsTransactionState(void)
Definition: xact.c:355
XLogRecPtr remote_lsn
Definition: origin.h:20
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:230
const char * name
Definition: encode.c:521
#define InvalidRepOriginId
Definition: origin.h:33
Tuplestorestate * setResult
Definition: execnodes.h:307
#define DatumGetPointer(X)
Definition: postgres.h:549
static Datum values[MAXATTR]
Definition: bootstrap.c:167
char * text_to_cstring(const text *t)
Definition: varlena.c:204
ExprContext * econtext
Definition: execnodes.h:300
TupleDesc setDesc
Definition: execnodes.h:308
int errmsg(const char *fmt,...)
Definition: elog.c:822
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1280
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:36
#define elog(elevel,...)
Definition: elog.h:228
int i
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1356
static ReplicationState * replication_states
Definition: origin.c:163
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define CStringGetTextDatum(s)
Definition: builtins.h:83
void * arg
Definition: c.h:556
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:108
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1220
#define REPLICATION_ORIGIN_PROGRESS_COLS
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_RETURN_OID(x)
Definition: fmgr.h:350
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:980
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:183
#define PG_RETURN_NULL()
Definition: fmgr.h:335
#define read(a, b, c)
Definition: win32.h:13
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define offsetof(type, field)
Definition: c.h:662