PostgreSQL Source Code  git master
origin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * origin.c
4  * Logical replication progress tracking support.
5  *
6  * Copyright (c) 2013-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/logical/origin.c
10  *
11  * NOTES
12  *
13  * This file provides the following:
14  * * An infrastructure to name nodes in a replication setup
15  * * A facility to efficiently store and persist replication progress in an
16  * efficient and durable manner.
17  *
18  * Replication origin consist out of a descriptive, user defined, external
19  * name and a short, thus space efficient, internal 2 byte one. This split
20  * exists because replication origin have to be stored in WAL and shared
21  * memory and long descriptors would be inefficient. For now only use 2 bytes
22  * for the internal id of a replication origin as it seems unlikely that there
23  * soon will be more than 65k nodes in one replication setup; and using only
24  * two bytes allow us to be more space efficient.
25  *
26  * Replication progress is tracked in a shared memory table
27  * (ReplicationState) that's dumped to disk every checkpoint. Entries
28  * ('slots') in this table are identified by the internal id. That's the case
29  * because it allows to increase replication progress during crash
30  * recovery. To allow doing so we store the original LSN (from the originating
31  * system) of a transaction in the commit record. That allows to recover the
32  * precise replayed state after crash recovery; without requiring synchronous
33  * commits. Allowing logical replication to use asynchronous commit is
34  * generally good for performance, but especially important as it allows a
35  * single threaded replay process to keep up with a source that has multiple
36  * backends generating changes concurrently. For efficiency and simplicity
37  * reasons a backend can setup one replication origin that's from then used as
38  * the source of changes produced by the backend, until reset again.
39  *
40  * This infrastructure is intended to be used in cooperation with logical
41  * decoding. When replaying from a remote system the configured origin is
42  * provided to output plugins, allowing prevention of replication loops and
43  * other filtering.
44  *
45  * There are several levels of locking at work:
46  *
47  * * To create and drop replication origins an exclusive lock on
48  * pg_replication_slot is required for the duration. That allows us to
49  * safely and conflict free assign new origins using a dirty snapshot.
50  *
51  * * When creating an in-memory replication progress slot the ReplicationOrigin
52  * LWLock has to be held exclusively; when iterating over the replication
53  * progress a shared lock has to be held, the same when advancing the
54  * replication progress of an individual backend that has not setup as the
55  * session's replication origin.
56  *
57  * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58  * replication progress slot that slot's lwlock has to be held. That's
59  * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60  * all our platforms, but it also simplifies memory ordering concerns
61  * between the remote and local lsn. We use a lwlock instead of a spinlock
62  * so it's less harmful to hold the lock over a WAL write
63  * (cf. AdvanceReplicationProgress).
64  *
65  * ---------------------------------------------------------------------------
66  */
67 
68 #include "postgres.h"
69 
70 #include <unistd.h>
71 #include <sys/stat.h>
72 
73 #include "funcapi.h"
74 #include "miscadmin.h"
75 
76 #include "access/genam.h"
77 #include "access/htup_details.h"
78 #include "access/table.h"
79 #include "access/xact.h"
80 
81 #include "catalog/catalog.h"
82 #include "catalog/indexing.h"
83 #include "nodes/execnodes.h"
84 
85 #include "replication/origin.h"
86 #include "replication/logical.h"
87 #include "pgstat.h"
88 #include "storage/fd.h"
89 #include "storage/ipc.h"
90 #include "storage/lmgr.h"
92 #include "storage/copydir.h"
93 
94 #include "utils/builtins.h"
95 #include "utils/fmgroids.h"
96 #include "utils/pg_lsn.h"
97 #include "utils/rel.h"
98 #include "utils/syscache.h"
99 #include "utils/snapmgr.h"
100 
101 /*
102  * Replay progress of a single remote node.
103  */
104 typedef struct ReplicationState
105 {
106  /*
107  * Local identifier for the remote node.
108  */
110 
111  /*
112  * Location of the latest commit from the remote side.
113  */
115 
116  /*
117  * Remember the local lsn of the commit record so we can XLogFlush() to it
118  * during a checkpoint so we know the commit record actually is safe on
119  * disk.
120  */
122 
123  /*
124  * PID of backend that's acquired slot, or 0 if none.
125  */
127 
128  /*
129  * Condition variable that's signalled when acquired_by changes.
130  */
132 
133  /*
134  * Lock protecting remote_lsn and local_lsn.
135  */
138 
139 /*
140  * On disk version of ReplicationState.
141  */
143 {
147 
148 
149 typedef struct ReplicationStateCtl
150 {
152  ReplicationState states[FLEXIBLE_ARRAY_MEMBER];
154 
155 /* external variables */
159 
160 /*
161  * Base address into a shared memory array of replication states of size
162  * max_replication_slots.
163  *
164  * XXX: Should we use a separate variable to size this rather than
165  * max_replication_slots?
166  */
169 
170 /*
171  * Backend-local, cached element from ReplicationState for use in a backend
172  * replaying remote commits, so we don't have to search ReplicationState for
173  * the backends current RepOriginId.
174  */
176 
177 /* Magic for on disk files. */
178 #define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
179 
180 static void
181 replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
182 {
183  if (!superuser())
184  ereport(ERROR,
185  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
186  errmsg("only superusers can query or manipulate replication origins")));
187 
188  if (check_slots && max_replication_slots == 0)
189  ereport(ERROR,
190  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
191  errmsg("cannot query or manipulate replication origin when max_replication_slots = 0")));
192 
193  if (!recoveryOK && RecoveryInProgress())
194  ereport(ERROR,
195  (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
196  errmsg("cannot manipulate replication origins during recovery")));
197 
198 }
199 
200 
201 /* ---------------------------------------------------------------------------
202  * Functions for working with replication origins themselves.
203  * ---------------------------------------------------------------------------
204  */
205 
206 /*
207  * Check for a persistent replication origin identified by name.
208  *
209  * Returns InvalidOid if the node isn't known yet and missing_ok is true.
210  */
212 replorigin_by_name(char *roname, bool missing_ok)
213 {
216  HeapTuple tuple;
217  Datum roname_d;
218 
219  roname_d = CStringGetTextDatum(roname);
220 
221  tuple = SearchSysCache1(REPLORIGNAME, roname_d);
222  if (HeapTupleIsValid(tuple))
223  {
224  ident = (Form_pg_replication_origin) GETSTRUCT(tuple);
225  roident = ident->roident;
226  ReleaseSysCache(tuple);
227  }
228  else if (!missing_ok)
229  ereport(ERROR,
230  (errcode(ERRCODE_UNDEFINED_OBJECT),
231  errmsg("replication origin \"%s\" does not exist",
232  roname)));
233 
234  return roident;
235 }
236 
237 /*
238  * Create a replication origin.
239  *
240  * Needs to be called in a transaction.
241  */
243 replorigin_create(char *roname)
244 {
245  Oid roident;
246  HeapTuple tuple = NULL;
247  Relation rel;
248  Datum roname_d;
249  SnapshotData SnapshotDirty;
250  SysScanDesc scan;
252 
253  roname_d = CStringGetTextDatum(roname);
254 
256 
257  /*
258  * We need the numeric replication origin to be 16bit wide, so we cannot
259  * rely on the normal oid allocation. Instead we simply scan
260  * pg_replication_origin for the first unused id. That's not particularly
261  * efficient, but this should be a fairly infrequent operation - we can
262  * easily spend a bit more code on this when it turns out it needs to be
263  * faster.
264  *
265  * We handle concurrency by taking an exclusive lock (allowing reads!)
266  * over the table for the duration of the search. Because we use a "dirty
267  * snapshot" we can read rows that other in-progress sessions have
268  * written, even though they would be invisible with normal snapshots. Due
269  * to the exclusive lock there's no danger that new rows can appear while
270  * we're checking.
271  */
272  InitDirtySnapshot(SnapshotDirty);
273 
274  rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
275 
276  for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
277  {
278  bool nulls[Natts_pg_replication_origin];
279  Datum values[Natts_pg_replication_origin];
280  bool collides;
281 
283 
284  ScanKeyInit(&key,
285  Anum_pg_replication_origin_roident,
286  BTEqualStrategyNumber, F_OIDEQ,
287  ObjectIdGetDatum(roident));
288 
290  true /* indexOK */ ,
291  &SnapshotDirty,
292  1, &key);
293 
294  collides = HeapTupleIsValid(systable_getnext(scan));
295 
296  systable_endscan(scan);
297 
298  if (!collides)
299  {
300  /*
301  * Ok, found an unused roident, insert the new row and do a CCI,
302  * so our callers can look it up if they want to.
303  */
304  memset(&nulls, 0, sizeof(nulls));
305 
306  values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(roident);
307  values[Anum_pg_replication_origin_roname - 1] = roname_d;
308 
309  tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
310  CatalogTupleInsert(rel, tuple);
312  break;
313  }
314  }
315 
316  /* now release lock again, */
318 
319  if (tuple == NULL)
320  ereport(ERROR,
321  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
322  errmsg("could not find free replication origin OID")));
323 
324  heap_freetuple(tuple);
325  return roident;
326 }
327 
328 
329 /*
330  * Drop replication origin.
331  *
332  * Needs to be called in a transaction.
333  */
334 void
336 {
337  HeapTuple tuple;
338  Relation rel;
339  int i;
340 
342 
343  /*
344  * To interlock against concurrent drops, we hold ExclusiveLock on
345  * pg_replication_origin throughout this function.
346  */
347  rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
348 
349  /*
350  * First, clean up the slot state info, if there is any matching slot.
351  */
352 restart:
353  tuple = NULL;
354  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
355 
356  for (i = 0; i < max_replication_slots; i++)
357  {
358  ReplicationState *state = &replication_states[i];
359 
360  if (state->roident == roident)
361  {
362  /* found our slot, is it busy? */
363  if (state->acquired_by != 0)
364  {
365  ConditionVariable *cv;
366 
367  if (nowait)
368  ereport(ERROR,
369  (errcode(ERRCODE_OBJECT_IN_USE),
370  errmsg("could not drop replication origin with OID %d, in use by PID %d",
371  state->roident,
372  state->acquired_by)));
373 
374  /*
375  * We must wait and then retry. Since we don't know which CV
376  * to wait on until here, we can't readily use
377  * ConditionVariablePrepareToSleep (calling it here would be
378  * wrong, since we could miss the signal if we did so); just
379  * use ConditionVariableSleep directly.
380  */
381  cv = &state->origin_cv;
382 
383  LWLockRelease(ReplicationOriginLock);
384 
386  goto restart;
387  }
388 
389  /* first make a WAL log entry */
390  {
391  xl_replorigin_drop xlrec;
392 
393  xlrec.node_id = roident;
394  XLogBeginInsert();
395  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
396  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
397  }
398 
399  /* then clear the in-memory slot */
400  state->roident = InvalidRepOriginId;
401  state->remote_lsn = InvalidXLogRecPtr;
402  state->local_lsn = InvalidXLogRecPtr;
403  break;
404  }
405  }
406  LWLockRelease(ReplicationOriginLock);
408 
409  /*
410  * Now, we can delete the catalog entry.
411  */
413  if (!HeapTupleIsValid(tuple))
414  elog(ERROR, "cache lookup failed for replication origin with oid %u",
415  roident);
416 
417  CatalogTupleDelete(rel, &tuple->t_self);
418  ReleaseSysCache(tuple);
419 
421 
422  /* now release lock again */
424 }
425 
426 
427 /*
428  * Lookup replication origin via it's oid and return the name.
429  *
430  * The external name is palloc'd in the calling context.
431  *
432  * Returns true if the origin is known, false otherwise.
433  */
434 bool
435 replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
436 {
437  HeapTuple tuple;
439 
440  Assert(OidIsValid((Oid) roident));
441  Assert(roident != InvalidRepOriginId);
442  Assert(roident != DoNotReplicateId);
443 
445  ObjectIdGetDatum((Oid) roident));
446 
447  if (HeapTupleIsValid(tuple))
448  {
449  ric = (Form_pg_replication_origin) GETSTRUCT(tuple);
450  *roname = text_to_cstring(&ric->roname);
451  ReleaseSysCache(tuple);
452 
453  return true;
454  }
455  else
456  {
457  *roname = NULL;
458 
459  if (!missing_ok)
460  ereport(ERROR,
461  (errcode(ERRCODE_UNDEFINED_OBJECT),
462  errmsg("replication origin with OID %u does not exist",
463  roident)));
464 
465  return false;
466  }
467 }
468 
469 
470 /* ---------------------------------------------------------------------------
471  * Functions for handling replication progress.
472  * ---------------------------------------------------------------------------
473  */
474 
475 Size
477 {
478  Size size = 0;
479 
480  /*
481  * XXX: max_replication_slots is arguably the wrong thing to use, as here
482  * we keep the replay state of *remote* transactions. But for now it seems
483  * sufficient to reuse it, lest we introduce a separate GUC.
484  */
485  if (max_replication_slots == 0)
486  return size;
487 
488  size = add_size(size, offsetof(ReplicationStateCtl, states));
489 
490  size = add_size(size,
492  return size;
493 }
494 
495 void
497 {
498  bool found;
499 
500  if (max_replication_slots == 0)
501  return;
502 
503  replication_states_ctl = (ReplicationStateCtl *)
504  ShmemInitStruct("ReplicationOriginState",
506  &found);
507  replication_states = replication_states_ctl->states;
508 
509  if (!found)
510  {
511  int i;
512 
513  replication_states_ctl->tranche_id = LWTRANCHE_REPLICATION_ORIGIN;
514 
515  MemSet(replication_states, 0, ReplicationOriginShmemSize());
516 
517  for (i = 0; i < max_replication_slots; i++)
518  {
519  LWLockInitialize(&replication_states[i].lock,
520  replication_states_ctl->tranche_id);
521  ConditionVariableInit(&replication_states[i].origin_cv);
522  }
523  }
524 
525  LWLockRegisterTranche(replication_states_ctl->tranche_id,
526  "replication_origin");
527 }
528 
529 /* ---------------------------------------------------------------------------
530  * Perform a checkpoint of each replication origin's progress with respect to
531  * the replayed remote_lsn. Make sure that all transactions we refer to in the
532  * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
533  * if the transactions were originally committed asynchronously.
534  *
535  * We store checkpoints in the following format:
536  * +-------+------------------------+------------------+-----+--------+
537  * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
538  * +-------+------------------------+------------------+-----+--------+
539  *
540  * So its just the magic, followed by the statically sized
541  * ReplicationStateOnDisk structs. Note that the maximum number of
542  * ReplicationState is determined by max_replication_slots.
543  * ---------------------------------------------------------------------------
544  */
545 void
547 {
548  const char *tmppath = "pg_logical/replorigin_checkpoint.tmp";
549  const char *path = "pg_logical/replorigin_checkpoint";
550  int tmpfd;
551  int i;
553  pg_crc32c crc;
554 
555  if (max_replication_slots == 0)
556  return;
557 
558  INIT_CRC32C(crc);
559 
560  /* make sure no old temp file is remaining */
561  if (unlink(tmppath) < 0 && errno != ENOENT)
562  ereport(PANIC,
564  errmsg("could not remove file \"%s\": %m",
565  tmppath)));
566 
567  /*
568  * no other backend can perform this at the same time, we're protected by
569  * CheckpointLock.
570  */
571  tmpfd = OpenTransientFile(tmppath,
572  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
573  if (tmpfd < 0)
574  ereport(PANIC,
576  errmsg("could not create file \"%s\": %m",
577  tmppath)));
578 
579  /* write magic */
580  errno = 0;
581  if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
582  {
583  /* if write didn't set errno, assume problem is no disk space */
584  if (errno == 0)
585  errno = ENOSPC;
586  ereport(PANIC,
588  errmsg("could not write to file \"%s\": %m",
589  tmppath)));
590  }
591  COMP_CRC32C(crc, &magic, sizeof(magic));
592 
593  /* prevent concurrent creations/drops */
594  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
595 
596  /* write actual data */
597  for (i = 0; i < max_replication_slots; i++)
598  {
599  ReplicationStateOnDisk disk_state;
600  ReplicationState *curstate = &replication_states[i];
602 
603  if (curstate->roident == InvalidRepOriginId)
604  continue;
605 
606  /* zero, to avoid uninitialized padding bytes */
607  memset(&disk_state, 0, sizeof(disk_state));
608 
609  LWLockAcquire(&curstate->lock, LW_SHARED);
610 
611  disk_state.roident = curstate->roident;
612 
613  disk_state.remote_lsn = curstate->remote_lsn;
614  local_lsn = curstate->local_lsn;
615 
616  LWLockRelease(&curstate->lock);
617 
618  /* make sure we only write out a commit that's persistent */
619  XLogFlush(local_lsn);
620 
621  errno = 0;
622  if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
623  sizeof(disk_state))
624  {
625  /* if write didn't set errno, assume problem is no disk space */
626  if (errno == 0)
627  errno = ENOSPC;
628  ereport(PANIC,
630  errmsg("could not write to file \"%s\": %m",
631  tmppath)));
632  }
633 
634  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
635  }
636 
637  LWLockRelease(ReplicationOriginLock);
638 
639  /* write out the CRC */
640  FIN_CRC32C(crc);
641  errno = 0;
642  if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
643  {
644  /* if write didn't set errno, assume problem is no disk space */
645  if (errno == 0)
646  errno = ENOSPC;
647  ereport(PANIC,
649  errmsg("could not write to file \"%s\": %m",
650  tmppath)));
651  }
652 
653  if (CloseTransientFile(tmpfd) != 0)
654  ereport(PANIC,
656  errmsg("could not close file \"%s\": %m",
657  tmppath)));
658 
659  /* fsync, rename to permanent file, fsync file and directory */
660  durable_rename(tmppath, path, PANIC);
661 }
662 
663 /*
664  * Recover replication replay status from checkpoint data saved earlier by
665  * CheckPointReplicationOrigin.
666  *
667  * This only needs to be called at startup and *not* during every checkpoint
668  * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
669  * state thereafter can be recovered by looking at commit records.
670  */
671 void
673 {
674  const char *path = "pg_logical/replorigin_checkpoint";
675  int fd;
676  int readBytes;
678  int last_state = 0;
679  pg_crc32c file_crc;
680  pg_crc32c crc;
681 
682  /* don't want to overwrite already existing state */
683 #ifdef USE_ASSERT_CHECKING
684  static bool already_started = false;
685 
686  Assert(!already_started);
687  already_started = true;
688 #endif
689 
690  if (max_replication_slots == 0)
691  return;
692 
693  INIT_CRC32C(crc);
694 
695  elog(DEBUG2, "starting up replication origin progress state");
696 
697  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
698 
699  /*
700  * might have had max_replication_slots == 0 last run, or we just brought
701  * up a standby.
702  */
703  if (fd < 0 && errno == ENOENT)
704  return;
705  else if (fd < 0)
706  ereport(PANIC,
708  errmsg("could not open file \"%s\": %m",
709  path)));
710 
711  /* verify magic, that is written even if nothing was active */
712  readBytes = read(fd, &magic, sizeof(magic));
713  if (readBytes != sizeof(magic))
714  {
715  if (readBytes < 0)
716  ereport(PANIC,
718  errmsg("could not read file \"%s\": %m",
719  path)));
720  else
721  ereport(PANIC,
723  errmsg("could not read file \"%s\": read %d of %zu",
724  path, readBytes, sizeof(magic))));
725  }
726  COMP_CRC32C(crc, &magic, sizeof(magic));
727 
728  if (magic != REPLICATION_STATE_MAGIC)
729  ereport(PANIC,
730  (errmsg("replication checkpoint has wrong magic %u instead of %u",
731  magic, REPLICATION_STATE_MAGIC)));
732 
733  /* we can skip locking here, no other access is possible */
734 
735  /* recover individual states, until there are no more to be found */
736  while (true)
737  {
738  ReplicationStateOnDisk disk_state;
739 
740  readBytes = read(fd, &disk_state, sizeof(disk_state));
741 
742  /* no further data */
743  if (readBytes == sizeof(crc))
744  {
745  /* not pretty, but simple ... */
746  file_crc = *(pg_crc32c *) &disk_state;
747  break;
748  }
749 
750  if (readBytes < 0)
751  {
752  ereport(PANIC,
754  errmsg("could not read file \"%s\": %m",
755  path)));
756  }
757 
758  if (readBytes != sizeof(disk_state))
759  {
760  ereport(PANIC,
762  errmsg("could not read file \"%s\": read %d of %zu",
763  path, readBytes, sizeof(disk_state))));
764  }
765 
766  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
767 
768  if (last_state == max_replication_slots)
769  ereport(PANIC,
770  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
771  errmsg("could not find free replication state, increase max_replication_slots")));
772 
773  /* copy data to shared memory */
774  replication_states[last_state].roident = disk_state.roident;
775  replication_states[last_state].remote_lsn = disk_state.remote_lsn;
776  last_state++;
777 
778  elog(LOG, "recovered replication state of node %u to %X/%X",
779  disk_state.roident,
780  (uint32) (disk_state.remote_lsn >> 32),
781  (uint32) disk_state.remote_lsn);
782  }
783 
784  /* now check checksum */
785  FIN_CRC32C(crc);
786  if (file_crc != crc)
787  ereport(PANIC,
788  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
789  errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
790  crc, file_crc)));
791 
792  if (CloseTransientFile(fd) != 0)
793  ereport(PANIC,
795  errmsg("could not close file \"%s\": %m",
796  path)));
797 }
798 
799 void
801 {
802  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
803 
804  switch (info)
805  {
806  case XLOG_REPLORIGIN_SET:
807  {
808  xl_replorigin_set *xlrec =
809  (xl_replorigin_set *) XLogRecGetData(record);
810 
812  xlrec->remote_lsn, record->EndRecPtr,
813  xlrec->force /* backward */ ,
814  false /* WAL log */ );
815  break;
816  }
818  {
819  xl_replorigin_drop *xlrec;
820  int i;
821 
822  xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
823 
824  for (i = 0; i < max_replication_slots; i++)
825  {
826  ReplicationState *state = &replication_states[i];
827 
828  /* found our slot */
829  if (state->roident == xlrec->node_id)
830  {
831  /* reset entry */
832  state->roident = InvalidRepOriginId;
833  state->remote_lsn = InvalidXLogRecPtr;
834  state->local_lsn = InvalidXLogRecPtr;
835  break;
836  }
837  }
838  break;
839  }
840  default:
841  elog(PANIC, "replorigin_redo: unknown op code %u", info);
842  }
843 }
844 
845 
846 /*
847  * Tell the replication origin progress machinery that a commit from 'node'
848  * that originated at the LSN remote_commit on the remote node was replayed
849  * successfully and that we don't need to do so again. In combination with
850  * setting up replorigin_session_origin_lsn and replorigin_session_origin
851  * that ensures we won't loose knowledge about that after a crash if the
852  * transaction had a persistent effect (think of asynchronous commits).
853  *
854  * local_commit needs to be a local LSN of the commit so that we can make sure
855  * upon a checkpoint that enough WAL has been persisted to disk.
856  *
857  * Needs to be called with a RowExclusiveLock on pg_replication_origin,
858  * unless running in recovery.
859  */
860 void
862  XLogRecPtr remote_commit, XLogRecPtr local_commit,
863  bool go_backward, bool wal_log)
864 {
865  int i;
866  ReplicationState *replication_state = NULL;
867  ReplicationState *free_state = NULL;
868 
869  Assert(node != InvalidRepOriginId);
870 
871  /* we don't track DoNotReplicateId */
872  if (node == DoNotReplicateId)
873  return;
874 
875  /*
876  * XXX: For the case where this is called by WAL replay, it'd be more
877  * efficient to restore into a backend local hashtable and only dump into
878  * shmem after recovery is finished. Let's wait with implementing that
879  * till it's shown to be a measurable expense
880  */
881 
882  /* Lock exclusively, as we may have to create a new table entry. */
883  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
884 
885  /*
886  * Search for either an existing slot for the origin, or a free one we can
887  * use.
888  */
889  for (i = 0; i < max_replication_slots; i++)
890  {
891  ReplicationState *curstate = &replication_states[i];
892 
893  /* remember where to insert if necessary */
894  if (curstate->roident == InvalidRepOriginId &&
895  free_state == NULL)
896  {
897  free_state = curstate;
898  continue;
899  }
900 
901  /* not our slot */
902  if (curstate->roident != node)
903  {
904  continue;
905  }
906 
907  /* ok, found slot */
908  replication_state = curstate;
909 
910  LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
911 
912  /* Make sure it's not used by somebody else */
913  if (replication_state->acquired_by != 0)
914  {
915  ereport(ERROR,
916  (errcode(ERRCODE_OBJECT_IN_USE),
917  errmsg("replication origin with OID %d is already active for PID %d",
918  replication_state->roident,
919  replication_state->acquired_by)));
920  }
921 
922  break;
923  }
924 
925  if (replication_state == NULL && free_state == NULL)
926  ereport(ERROR,
927  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
928  errmsg("could not find free replication state slot for replication origin with OID %u",
929  node),
930  errhint("Increase max_replication_slots and try again.")));
931 
932  if (replication_state == NULL)
933  {
934  /* initialize new slot */
935  LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
936  replication_state = free_state;
937  Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
938  Assert(replication_state->local_lsn == InvalidXLogRecPtr);
939  replication_state->roident = node;
940  }
941 
942  Assert(replication_state->roident != InvalidRepOriginId);
943 
944  /*
945  * If somebody "forcefully" sets this slot, WAL log it, so it's durable
946  * and the standby gets the message. Primarily this will be called during
947  * WAL replay (of commit records) where no WAL logging is necessary.
948  */
949  if (wal_log)
950  {
951  xl_replorigin_set xlrec;
952 
953  xlrec.remote_lsn = remote_commit;
954  xlrec.node_id = node;
955  xlrec.force = go_backward;
956 
957  XLogBeginInsert();
958  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
959 
960  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
961  }
962 
963  /*
964  * Due to - harmless - race conditions during a checkpoint we could see
965  * values here that are older than the ones we already have in memory.
966  * Don't overwrite those.
967  */
968  if (go_backward || replication_state->remote_lsn < remote_commit)
969  replication_state->remote_lsn = remote_commit;
970  if (local_commit != InvalidXLogRecPtr &&
971  (go_backward || replication_state->local_lsn < local_commit))
972  replication_state->local_lsn = local_commit;
973  LWLockRelease(&replication_state->lock);
974 
975  /*
976  * Release *after* changing the LSNs, slot isn't acquired and thus could
977  * otherwise be dropped anytime.
978  */
979  LWLockRelease(ReplicationOriginLock);
980 }
981 
982 
985 {
986  int i;
989 
990  /* prevent slots from being concurrently dropped */
991  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
992 
993  for (i = 0; i < max_replication_slots; i++)
994  {
996 
997  state = &replication_states[i];
998 
999  if (state->roident == node)
1000  {
1001  LWLockAcquire(&state->lock, LW_SHARED);
1002 
1003  remote_lsn = state->remote_lsn;
1004  local_lsn = state->local_lsn;
1005 
1006  LWLockRelease(&state->lock);
1007 
1008  break;
1009  }
1010  }
1011 
1012  LWLockRelease(ReplicationOriginLock);
1013 
1014  if (flush && local_lsn != InvalidXLogRecPtr)
1015  XLogFlush(local_lsn);
1016 
1017  return remote_lsn;
1018 }
1019 
1020 /*
1021  * Tear down a (possibly) configured session replication origin during process
1022  * exit.
1023  */
1024 static void
1026 {
1027  ConditionVariable *cv = NULL;
1028 
1029  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1030 
1031  if (session_replication_state != NULL &&
1032  session_replication_state->acquired_by == MyProcPid)
1033  {
1034  cv = &session_replication_state->origin_cv;
1035 
1036  session_replication_state->acquired_by = 0;
1037  session_replication_state = NULL;
1038  }
1039 
1040  LWLockRelease(ReplicationOriginLock);
1041 
1042  if (cv)
1044 }
1045 
1046 /*
1047  * Setup a replication origin in the shared memory struct if it doesn't
1048  * already exists and cache access to the specific ReplicationSlot so the
1049  * array doesn't have to be searched when calling
1050  * replorigin_session_advance().
1051  *
1052  * Obviously only one such cached origin can exist per process and the current
1053  * cached value can only be set again after the previous value is torn down
1054  * with replorigin_session_reset().
1055  */
1056 void
1058 {
1059  static bool registered_cleanup;
1060  int i;
1061  int free_slot = -1;
1062 
1063  if (!registered_cleanup)
1064  {
1066  registered_cleanup = true;
1067  }
1068 
1070 
1071  if (session_replication_state != NULL)
1072  ereport(ERROR,
1073  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1074  errmsg("cannot setup replication origin when one is already setup")));
1075 
1076  /* Lock exclusively, as we may have to create a new table entry. */
1077  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1078 
1079  /*
1080  * Search for either an existing slot for the origin, or a free one we can
1081  * use.
1082  */
1083  for (i = 0; i < max_replication_slots; i++)
1084  {
1085  ReplicationState *curstate = &replication_states[i];
1086 
1087  /* remember where to insert if necessary */
1088  if (curstate->roident == InvalidRepOriginId &&
1089  free_slot == -1)
1090  {
1091  free_slot = i;
1092  continue;
1093  }
1094 
1095  /* not our slot */
1096  if (curstate->roident != node)
1097  continue;
1098 
1099  else if (curstate->acquired_by != 0)
1100  {
1101  ereport(ERROR,
1102  (errcode(ERRCODE_OBJECT_IN_USE),
1103  errmsg("replication origin %d is already active for PID %d",
1104  curstate->roident, curstate->acquired_by)));
1105  }
1106 
1107  /* ok, found slot */
1108  session_replication_state = curstate;
1109  }
1110 
1111 
1112  if (session_replication_state == NULL && free_slot == -1)
1113  ereport(ERROR,
1114  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1115  errmsg("could not find free replication state slot for replication origin with OID %u",
1116  node),
1117  errhint("Increase max_replication_slots and try again.")));
1118  else if (session_replication_state == NULL)
1119  {
1120  /* initialize new slot */
1121  session_replication_state = &replication_states[free_slot];
1122  Assert(session_replication_state->remote_lsn == InvalidXLogRecPtr);
1123  Assert(session_replication_state->local_lsn == InvalidXLogRecPtr);
1124  session_replication_state->roident = node;
1125  }
1126 
1127 
1128  Assert(session_replication_state->roident != InvalidRepOriginId);
1129 
1130  session_replication_state->acquired_by = MyProcPid;
1131 
1132  LWLockRelease(ReplicationOriginLock);
1133 
1134  /* probably this one is pointless */
1135  ConditionVariableBroadcast(&session_replication_state->origin_cv);
1136 }
1137 
1138 /*
1139  * Reset replay state previously setup in this session.
1140  *
1141  * This function may only be called if an origin was setup with
1142  * replorigin_session_setup().
1143  */
1144 void
1146 {
1147  ConditionVariable *cv;
1148 
1150 
1151  if (session_replication_state == NULL)
1152  ereport(ERROR,
1153  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1154  errmsg("no replication origin is configured")));
1155 
1156  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1157 
1158  session_replication_state->acquired_by = 0;
1159  cv = &session_replication_state->origin_cv;
1160  session_replication_state = NULL;
1161 
1162  LWLockRelease(ReplicationOriginLock);
1163 
1165 }
1166 
1167 /*
1168  * Do the same work replorigin_advance() does, just on the session's
1169  * configured origin.
1170  *
1171  * This is noticeably cheaper than using replorigin_advance().
1172  */
1173 void
1175 {
1176  Assert(session_replication_state != NULL);
1177  Assert(session_replication_state->roident != InvalidRepOriginId);
1178 
1179  LWLockAcquire(&session_replication_state->lock, LW_EXCLUSIVE);
1180  if (session_replication_state->local_lsn < local_commit)
1181  session_replication_state->local_lsn = local_commit;
1182  if (session_replication_state->remote_lsn < remote_commit)
1183  session_replication_state->remote_lsn = remote_commit;
1184  LWLockRelease(&session_replication_state->lock);
1185 }
1186 
1187 /*
1188  * Ask the machinery about the point up to which we successfully replayed
1189  * changes from an already setup replication origin.
1190  */
1191 XLogRecPtr
1193 {
1196 
1197  Assert(session_replication_state != NULL);
1198 
1199  LWLockAcquire(&session_replication_state->lock, LW_SHARED);
1200  remote_lsn = session_replication_state->remote_lsn;
1201  local_lsn = session_replication_state->local_lsn;
1202  LWLockRelease(&session_replication_state->lock);
1203 
1204  if (flush && local_lsn != InvalidXLogRecPtr)
1205  XLogFlush(local_lsn);
1206 
1207  return remote_lsn;
1208 }
1209 
1210 
1211 
1212 /* ---------------------------------------------------------------------------
1213  * SQL functions for working with replication origin.
1214  *
1215  * These mostly should be fairly short wrappers around more generic functions.
1216  * ---------------------------------------------------------------------------
1217  */
1218 
1219 /*
1220  * Create replication origin for the passed in name, and return the assigned
1221  * oid.
1222  */
1223 Datum
1225 {
1226  char *name;
1228 
1229  replorigin_check_prerequisites(false, false);
1230 
1232 
1233  /* Replication origins "pg_xxx" are reserved for internal use */
1234  if (IsReservedName(name))
1235  ereport(ERROR,
1236  (errcode(ERRCODE_RESERVED_NAME),
1237  errmsg("replication origin name \"%s\" is reserved",
1238  name),
1239  errdetail("Origin names starting with \"pg_\" are reserved.")));
1240 
1241  /*
1242  * If built with appropriate switch, whine when regression-testing
1243  * conventions for replication origin names are violated.
1244  */
1245 #ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
1246  if (strncmp(name, "regress_", 8) != 0)
1247  elog(WARNING, "replication origins created by regression test cases should have names starting with \"regress_\"");
1248 #endif
1249 
1250  roident = replorigin_create(name);
1251 
1252  pfree(name);
1253 
1254  PG_RETURN_OID(roident);
1255 }
1256 
1257 /*
1258  * Drop replication origin.
1259  */
1260 Datum
1262 {
1263  char *name;
1265 
1266  replorigin_check_prerequisites(false, false);
1267 
1269 
1270  roident = replorigin_by_name(name, false);
1271  Assert(OidIsValid(roident));
1272 
1273  replorigin_drop(roident, true);
1274 
1275  pfree(name);
1276 
1277  PG_RETURN_VOID();
1278 }
1279 
1280 /*
1281  * Return oid of a replication origin.
1282  */
1283 Datum
1285 {
1286  char *name;
1288 
1289  replorigin_check_prerequisites(false, false);
1290 
1292  roident = replorigin_by_name(name, true);
1293 
1294  pfree(name);
1295 
1296  if (OidIsValid(roident))
1297  PG_RETURN_OID(roident);
1298  PG_RETURN_NULL();
1299 }
1300 
1301 /*
1302  * Setup a replication origin for this session.
1303  */
1304 Datum
1306 {
1307  char *name;
1308  RepOriginId origin;
1309 
1310  replorigin_check_prerequisites(true, false);
1311 
1313  origin = replorigin_by_name(name, false);
1314  replorigin_session_setup(origin);
1315 
1316  replorigin_session_origin = origin;
1317 
1318  pfree(name);
1319 
1320  PG_RETURN_VOID();
1321 }
1322 
1323 /*
1324  * Reset previously setup origin in this session
1325  */
1326 Datum
1328 {
1329  replorigin_check_prerequisites(true, false);
1330 
1332 
1336 
1337  PG_RETURN_VOID();
1338 }
1339 
1340 /*
1341  * Has a replication origin been setup for this session.
1342  */
1343 Datum
1345 {
1346  replorigin_check_prerequisites(false, false);
1347 
1349 }
1350 
1351 
1352 /*
1353  * Return the replication progress for origin setup in the current session.
1354  *
1355  * If 'flush' is set to true it is ensured that the returned value corresponds
1356  * to a local transaction that has been flushed. This is useful if asynchronous
1357  * commits are used when replaying replicated transactions.
1358  */
1359 Datum
1361 {
1363  bool flush = PG_GETARG_BOOL(0);
1364 
1365  replorigin_check_prerequisites(true, false);
1366 
1367  if (session_replication_state == NULL)
1368  ereport(ERROR,
1369  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1370  errmsg("no replication origin is configured")));
1371 
1372  remote_lsn = replorigin_session_get_progress(flush);
1373 
1374  if (remote_lsn == InvalidXLogRecPtr)
1375  PG_RETURN_NULL();
1376 
1377  PG_RETURN_LSN(remote_lsn);
1378 }
1379 
1380 Datum
1382 {
1383  XLogRecPtr location = PG_GETARG_LSN(0);
1384 
1385  replorigin_check_prerequisites(true, false);
1386 
1387  if (session_replication_state == NULL)
1388  ereport(ERROR,
1389  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1390  errmsg("no replication origin is configured")));
1391 
1392  replorigin_session_origin_lsn = location;
1394 
1395  PG_RETURN_VOID();
1396 }
1397 
1398 Datum
1400 {
1401  replorigin_check_prerequisites(true, false);
1402 
1405 
1406  PG_RETURN_VOID();
1407 }
1408 
1409 
1410 Datum
1412 {
1413  text *name = PG_GETARG_TEXT_PP(0);
1414  XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1415  RepOriginId node;
1416 
1417  replorigin_check_prerequisites(true, false);
1418 
1419  /* lock to prevent the replication origin from vanishing */
1420  LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1421 
1422  node = replorigin_by_name(text_to_cstring(name), false);
1423 
1424  /*
1425  * Can't sensibly pass a local commit to be flushed at checkpoint - this
1426  * xact hasn't committed yet. This is why this function should be used to
1427  * set up the initial replication state, but not for replay.
1428  */
1429  replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1430  true /* go backward */ , true /* WAL log */ );
1431 
1432  UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1433 
1434  PG_RETURN_VOID();
1435 }
1436 
1437 
1438 /*
1439  * Return the replication progress for an individual replication origin.
1440  *
1441  * If 'flush' is set to true it is ensured that the returned value corresponds
1442  * to a local transaction that has been flushed. This is useful if asynchronous
1443  * commits are used when replaying replicated transactions.
1444  */
1445 Datum
1447 {
1448  char *name;
1449  bool flush;
1452 
1453  replorigin_check_prerequisites(true, true);
1454 
1456  flush = PG_GETARG_BOOL(1);
1457 
1458  roident = replorigin_by_name(name, false);
1459  Assert(OidIsValid(roident));
1460 
1461  remote_lsn = replorigin_get_progress(roident, flush);
1462 
1463  if (remote_lsn == InvalidXLogRecPtr)
1464  PG_RETURN_NULL();
1465 
1466  PG_RETURN_LSN(remote_lsn);
1467 }
1468 
1469 
1470 Datum
1472 {
1473  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1474  TupleDesc tupdesc;
1475  Tuplestorestate *tupstore;
1476  MemoryContext per_query_ctx;
1477  MemoryContext oldcontext;
1478  int i;
1480 
1481  /* we want to return 0 rows if slot is set to zero */
1482  replorigin_check_prerequisites(false, true);
1483 
1484  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1485  ereport(ERROR,
1486  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1487  errmsg("set-valued function called in context that cannot accept a set")));
1488  if (!(rsinfo->allowedModes & SFRM_Materialize))
1489  ereport(ERROR,
1490  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1491  errmsg("materialize mode required, but it is not allowed in this context")));
1492  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1493  elog(ERROR, "return type must be a row type");
1494 
1495  if (tupdesc->natts != REPLICATION_ORIGIN_PROGRESS_COLS)
1496  elog(ERROR, "wrong function definition");
1497 
1498  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1499  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1500 
1501  tupstore = tuplestore_begin_heap(true, false, work_mem);
1502  rsinfo->returnMode = SFRM_Materialize;
1503  rsinfo->setResult = tupstore;
1504  rsinfo->setDesc = tupdesc;
1505 
1506  MemoryContextSwitchTo(oldcontext);
1507 
1508 
1509  /* prevent slots from being concurrently dropped */
1510  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1511 
1512  /*
1513  * Iterate through all possible replication_states, display if they are
1514  * filled. Note that we do not take any locks, so slightly corrupted/out
1515  * of date values are a possibility.
1516  */
1517  for (i = 0; i < max_replication_slots; i++)
1518  {
1522  char *roname;
1523 
1524  state = &replication_states[i];
1525 
1526  /* unused slot, nothing to display */
1527  if (state->roident == InvalidRepOriginId)
1528  continue;
1529 
1530  memset(values, 0, sizeof(values));
1531  memset(nulls, 1, sizeof(nulls));
1532 
1533  values[0] = ObjectIdGetDatum(state->roident);
1534  nulls[0] = false;
1535 
1536  /*
1537  * We're not preventing the origin to be dropped concurrently, so
1538  * silently accept that it might be gone.
1539  */
1540  if (replorigin_by_oid(state->roident, true,
1541  &roname))
1542  {
1543  values[1] = CStringGetTextDatum(roname);
1544  nulls[1] = false;
1545  }
1546 
1547  LWLockAcquire(&state->lock, LW_SHARED);
1548 
1549  values[2] = LSNGetDatum(state->remote_lsn);
1550  nulls[2] = false;
1551 
1552  values[3] = LSNGetDatum(state->local_lsn);
1553  nulls[3] = false;
1554 
1555  LWLockRelease(&state->lock);
1556 
1557  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1558  }
1559 
1560  tuplestore_donestoring(tupstore);
1561 
1562  LWLockRelease(ReplicationOriginLock);
1563 
1564 #undef REPLICATION_ORIGIN_PROGRESS_COLS
1565 
1566  return (Datum) 0;
1567 }
static ReplicationState * session_replication_state
Definition: origin.c:175
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
static void replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
Definition: origin.c:181
Definition: lwlock.h:32
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:196
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:974
XLogRecPtr local_lsn
Definition: origin.c:121
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:525
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define RelationGetDescr(relation)
Definition: rel.h:445
#define DoNotReplicateId
Definition: origin.h:34
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1381
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:199
#define write(a, b, c)
Definition: win32.h:14
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1261
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
XLogRecPtr remote_lsn
Definition: origin.c:145
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:263
uint32 pg_crc32c
Definition: pg_crc32c.h:38
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:1025
RepOriginId roident
Definition: origin.c:144
void replorigin_drop(RepOriginId roident, bool nowait)
Definition: origin.c:335
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned char uint8
Definition: c.h:356
uint16 RepOriginId
Definition: xlogdefs.h:58
void ConditionVariableBroadcast(ConditionVariable *cv)
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1192
int errcode(int sqlerrcode)
Definition: elog.c:570
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
bool superuser(void)
Definition: superuser.c:47
#define MemSet(start, val, len)
Definition: c.h:955
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:269
void ReplicationOriginShmemInit(void)
Definition: origin.c:496
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:861
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:269
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:1057
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:435
#define LOG
Definition: elog.h:26
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7917
bool IsReservedName(const char *name)
Definition: catalog.c:214
#define OidIsValid(objectId)
Definition: c.h:638
#define PANIC
Definition: elog.h:53
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2798
static int fd(const char *x, int i)
Definition: preproc-init.c:105
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:352
#define PG_BINARY
Definition: c.h:1191
RepOriginId replorigin_by_name(char *roname, bool missing_ok)
Definition: origin.c:212
RepOriginId roident
Definition: origin.c:109
XLogRecPtr EndRecPtr
Definition: xlogreader.h:133
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:603
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1411
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:25
void CheckPointReplicationOrigin(void)
Definition: origin.c:546
void ConditionVariableInit(ConditionVariable *cv)
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:800
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:444
void pfree(void *pointer)
Definition: mcxt.c:1056
#define XLogRecGetData(decoder)
Definition: xlogreader.h:246
void ConditionVariableCancelSleep(void)
#define ReplicationOriginIdentIndex
Definition: indexing.h:340
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
#define PG_UINT16_MAX
Definition: c.h:439
void replorigin_session_reset(void)
Definition: origin.c:1145
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2255
FormData_pg_replication_origin * Form_pg_replication_origin
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
LWLock lock
Definition: origin.c:136
ItemPointerData t_self
Definition: htup.h:65
#define DEBUG2
Definition: elog.h:24
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:157
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:158
#define XLOG_REPLORIGIN_SET
Definition: origin.h:30
void StartupReplicationOrigin(void)
Definition: origin.c:672
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:860
int errcode_for_file_access(void)
Definition: elog.c:593
struct ReplicationState ReplicationState
unsigned int uint32
Definition: c.h:358
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1305
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1327
RepOriginId replorigin_create(char *roname)
Definition: origin.c:243
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1471
#define ereport(elevel, rest)
Definition: elog.h:141
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:242
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:606
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1344
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:678
int CloseTransientFile(int fd)
Definition: fd.c:2432
#define WARNING
Definition: elog.h:40
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1124
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:31
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:24
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:1003
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1172
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
int work_mem
Definition: globals.c:121
Size ReplicationOriginShmemSize(void)
Definition: origin.c:476
#define REPLICATION_STATE_MAGIC
Definition: origin.c:178
#define InvalidOid
Definition: postgres_ext.h:36
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:168
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1399
int allowedModes
Definition: execnodes.h:303
#define PG_RETURN_VOID()
Definition: fmgr.h:339
struct ReplicationStateCtl ReplicationStateCtl
SetFunctionReturnMode returnMode
Definition: execnodes.h:305
int max_replication_slots
Definition: slot.c:99
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
XLogRecPtr remote_lsn
Definition: origin.c:114
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1446
RepOriginId node_id
Definition: origin.h:27
uint64 XLogRecPtr
Definition: xlogdefs.h:21
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:152
#define Assert(condition)
Definition: c.h:732
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
RepOriginId replorigin_session_origin
Definition: origin.c:156
Definition: regguts.h:298
RepOriginId node_id
Definition: origin.h:21
ConditionVariable origin_cv
Definition: origin.c:131
size_t Size
Definition: c.h:466
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1174
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
bool IsTransactionState(void)
Definition: xact.c:356
XLogRecPtr remote_lsn
Definition: origin.h:20
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:231
const char * name
Definition: encode.c:521
#define InvalidRepOriginId
Definition: origin.h:33
Tuplestorestate * setResult
Definition: execnodes.h:308
#define DatumGetPointer(X)
Definition: postgres.h:549
static Datum values[MAXATTR]
Definition: bootstrap.c:167
char * text_to_cstring(const text *t)
Definition: varlena.c:204
ExprContext * econtext
Definition: execnodes.h:301
TupleDesc setDesc
Definition: execnodes.h:309
int errmsg(const char *fmt,...)
Definition: elog.c:784
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1284
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:36
#define elog(elevel,...)
Definition: elog.h:226
int i
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1360
static ReplicationState * replication_states
Definition: origin.c:167
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define CStringGetTextDatum(s)
Definition: builtins.h:83
void * arg
Definition: c.h:549
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:108
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1224
#define REPLICATION_ORIGIN_PROGRESS_COLS
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_RETURN_OID(x)
Definition: fmgr.h:350
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:984
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:183
#define PG_RETURN_NULL()
Definition: fmgr.h:335
#define read(a, b, c)
Definition: win32.h:13
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define offsetof(type, field)
Definition: c.h:655