PostgreSQL Source Code  git master
origin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * origin.c
4  * Logical replication progress tracking support.
5  *
6  * Copyright (c) 2013-2018, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/logical/origin.c
10  *
11  * NOTES
12  *
13  * This file provides the following:
14  * * An infrastructure to name nodes in a replication setup
15  * * A facility to efficiently store and persist replication progress in an
16  * efficient and durable manner.
17  *
18  * Replication origin consist out of a descriptive, user defined, external
19  * name and a short, thus space efficient, internal 2 byte one. This split
20  * exists because replication origin have to be stored in WAL and shared
21  * memory and long descriptors would be inefficient. For now only use 2 bytes
22  * for the internal id of a replication origin as it seems unlikely that there
23  * soon will be more than 65k nodes in one replication setup; and using only
24  * two bytes allow us to be more space efficient.
25  *
26  * Replication progress is tracked in a shared memory table
27  * (ReplicationState) that's dumped to disk every checkpoint. Entries
28  * ('slots') in this table are identified by the internal id. That's the case
29  * because it allows to increase replication progress during crash
30  * recovery. To allow doing so we store the original LSN (from the originating
31  * system) of a transaction in the commit record. That allows to recover the
32  * precise replayed state after crash recovery; without requiring synchronous
33  * commits. Allowing logical replication to use asynchronous commit is
34  * generally good for performance, but especially important as it allows a
35  * single threaded replay process to keep up with a source that has multiple
36  * backends generating changes concurrently. For efficiency and simplicity
37  * reasons a backend can setup one replication origin that's from then used as
38  * the source of changes produced by the backend, until reset again.
39  *
40  * This infrastructure is intended to be used in cooperation with logical
41  * decoding. When replaying from a remote system the configured origin is
42  * provided to output plugins, allowing prevention of replication loops and
43  * other filtering.
44  *
45  * There are several levels of locking at work:
46  *
47  * * To create and drop replication origins an exclusive lock on
48  * pg_replication_slot is required for the duration. That allows us to
49  * safely and conflict free assign new origins using a dirty snapshot.
50  *
51  * * When creating an in-memory replication progress slot the ReplicationOrigin
52  * LWLock has to be held exclusively; when iterating over the replication
53  * progress a shared lock has to be held, the same when advancing the
54  * replication progress of an individual backend that has not setup as the
55  * session's replication origin.
56  *
57  * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58  * replication progress slot that slot's lwlock has to be held. That's
59  * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60  * all our platforms, but it also simplifies memory ordering concerns
61  * between the remote and local lsn. We use a lwlock instead of a spinlock
62  * so it's less harmful to hold the lock over a WAL write
63  * (cf. AdvanceReplicationProgress).
64  *
65  * ---------------------------------------------------------------------------
66  */
67 
68 #include "postgres.h"
69 
70 #include <unistd.h>
71 #include <sys/stat.h>
72 
73 #include "funcapi.h"
74 #include "miscadmin.h"
75 
76 #include "access/genam.h"
77 #include "access/heapam.h"
78 #include "access/htup_details.h"
79 #include "access/xact.h"
80 
81 #include "catalog/indexing.h"
82 #include "nodes/execnodes.h"
83 
84 #include "replication/origin.h"
85 #include "replication/logical.h"
86 #include "pgstat.h"
87 #include "storage/fd.h"
88 #include "storage/ipc.h"
89 #include "storage/lmgr.h"
91 #include "storage/copydir.h"
92 
93 #include "utils/builtins.h"
94 #include "utils/fmgroids.h"
95 #include "utils/pg_lsn.h"
96 #include "utils/rel.h"
97 #include "utils/syscache.h"
98 #include "utils/tqual.h"
99 
100 /*
101  * Replay progress of a single remote node.
102  */
103 typedef struct ReplicationState
104 {
105  /*
106  * Local identifier for the remote node.
107  */
109 
110  /*
111  * Location of the latest commit from the remote side.
112  */
114 
115  /*
116  * Remember the local lsn of the commit record so we can XLogFlush() to it
117  * during a checkpoint so we know the commit record actually is safe on
118  * disk.
119  */
121 
122  /*
123  * PID of backend that's acquired slot, or 0 if none.
124  */
126 
127  /*
128  * Condition variable that's signalled when acquired_by changes.
129  */
131 
132  /*
133  * Lock protecting remote_lsn and local_lsn.
134  */
137 
138 /*
139  * On disk version of ReplicationState.
140  */
142 {
146 
147 
148 typedef struct ReplicationStateCtl
149 {
151  ReplicationState states[FLEXIBLE_ARRAY_MEMBER];
153 
154 /* external variables */
158 
159 /*
160  * Base address into a shared memory array of replication states of size
161  * max_replication_slots.
162  *
163  * XXX: Should we use a separate variable to size this rather than
164  * max_replication_slots?
165  */
168 
169 /*
170  * Backend-local, cached element from ReplicationState for use in a backend
171  * replaying remote commits, so we don't have to search ReplicationState for
172  * the backends current RepOriginId.
173  */
175 
176 /* Magic for on disk files. */
177 #define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
178 
179 static void
180 replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
181 {
182  if (!superuser())
183  ereport(ERROR,
184  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
185  errmsg("only superusers can query or manipulate replication origins")));
186 
187  if (check_slots && max_replication_slots == 0)
188  ereport(ERROR,
189  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
190  errmsg("cannot query or manipulate replication origin when max_replication_slots = 0")));
191 
192  if (!recoveryOK && RecoveryInProgress())
193  ereport(ERROR,
194  (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
195  errmsg("cannot manipulate replication origins during recovery")));
196 
197 }
198 
199 
200 /* ---------------------------------------------------------------------------
201  * Functions for working with replication origins themselves.
202  * ---------------------------------------------------------------------------
203  */
204 
205 /*
206  * Check for a persistent replication origin identified by name.
207  *
208  * Returns InvalidOid if the node isn't known yet and missing_ok is true.
209  */
211 replorigin_by_name(char *roname, bool missing_ok)
212 {
215  HeapTuple tuple;
216  Datum roname_d;
217 
218  roname_d = CStringGetTextDatum(roname);
219 
220  tuple = SearchSysCache1(REPLORIGNAME, roname_d);
221  if (HeapTupleIsValid(tuple))
222  {
223  ident = (Form_pg_replication_origin) GETSTRUCT(tuple);
224  roident = ident->roident;
225  ReleaseSysCache(tuple);
226  }
227  else if (!missing_ok)
228  ereport(ERROR,
229  (errcode(ERRCODE_UNDEFINED_OBJECT),
230  errmsg("replication origin \"%s\" does not exist",
231  roname)));
232 
233  return roident;
234 }
235 
236 /*
237  * Create a replication origin.
238  *
239  * Needs to be called in a transaction.
240  */
242 replorigin_create(char *roname)
243 {
244  Oid roident;
245  HeapTuple tuple = NULL;
246  Relation rel;
247  Datum roname_d;
248  SnapshotData SnapshotDirty;
249  SysScanDesc scan;
250  ScanKeyData key;
251 
252  roname_d = CStringGetTextDatum(roname);
253 
255 
256  /*
257  * We need the numeric replication origin to be 16bit wide, so we cannot
258  * rely on the normal oid allocation. Instead we simply scan
259  * pg_replication_origin for the first unused id. That's not particularly
260  * efficient, but this should be a fairly infrequent operation - we can
261  * easily spend a bit more code on this when it turns out it needs to be
262  * faster.
263  *
264  * We handle concurrency by taking an exclusive lock (allowing reads!)
265  * over the table for the duration of the search. Because we use a "dirty
266  * snapshot" we can read rows that other in-progress sessions have
267  * written, even though they would be invisible with normal snapshots. Due
268  * to the exclusive lock there's no danger that new rows can appear while
269  * we're checking.
270  */
271  InitDirtySnapshot(SnapshotDirty);
272 
273  rel = heap_open(ReplicationOriginRelationId, ExclusiveLock);
274 
275  for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
276  {
277  bool nulls[Natts_pg_replication_origin];
278  Datum values[Natts_pg_replication_origin];
279  bool collides;
280 
282 
283  ScanKeyInit(&key,
284  Anum_pg_replication_origin_roident,
285  BTEqualStrategyNumber, F_OIDEQ,
286  ObjectIdGetDatum(roident));
287 
289  true /* indexOK */ ,
290  &SnapshotDirty,
291  1, &key);
292 
293  collides = HeapTupleIsValid(systable_getnext(scan));
294 
295  systable_endscan(scan);
296 
297  if (!collides)
298  {
299  /*
300  * Ok, found an unused roident, insert the new row and do a CCI,
301  * so our callers can look it up if they want to.
302  */
303  memset(&nulls, 0, sizeof(nulls));
304 
305  values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(roident);
306  values[Anum_pg_replication_origin_roname - 1] = roname_d;
307 
308  tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
309  CatalogTupleInsert(rel, tuple);
311  break;
312  }
313  }
314 
315  /* now release lock again, */
317 
318  if (tuple == NULL)
319  ereport(ERROR,
320  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
321  errmsg("could not find free replication origin OID")));
322 
323  heap_freetuple(tuple);
324  return roident;
325 }
326 
327 
328 /*
329  * Drop replication origin.
330  *
331  * Needs to be called in a transaction.
332  */
333 void
335 {
336  HeapTuple tuple;
337  Relation rel;
338  int i;
339 
341 
342  /*
343  * To interlock against concurrent drops, we hold ExclusiveLock on
344  * pg_replication_origin throughout this function.
345  */
346  rel = heap_open(ReplicationOriginRelationId, ExclusiveLock);
347 
348  /*
349  * First, clean up the slot state info, if there is any matching slot.
350  */
351 restart:
352  tuple = NULL;
353  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
354 
355  for (i = 0; i < max_replication_slots; i++)
356  {
357  ReplicationState *state = &replication_states[i];
358 
359  if (state->roident == roident)
360  {
361  /* found our slot, is it busy? */
362  if (state->acquired_by != 0)
363  {
364  ConditionVariable *cv;
365 
366  if (nowait)
367  ereport(ERROR,
368  (errcode(ERRCODE_OBJECT_IN_USE),
369  errmsg("could not drop replication origin with OID %d, in use by PID %d",
370  state->roident,
371  state->acquired_by)));
372 
373  /*
374  * We must wait and then retry. Since we don't know which CV
375  * to wait on until here, we can't readily use
376  * ConditionVariablePrepareToSleep (calling it here would be
377  * wrong, since we could miss the signal if we did so); just
378  * use ConditionVariableSleep directly.
379  */
380  cv = &state->origin_cv;
381 
382  LWLockRelease(ReplicationOriginLock);
383 
385  goto restart;
386  }
387 
388  /* first make a WAL log entry */
389  {
390  xl_replorigin_drop xlrec;
391 
392  xlrec.node_id = roident;
393  XLogBeginInsert();
394  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
395  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
396  }
397 
398  /* then clear the in-memory slot */
399  state->roident = InvalidRepOriginId;
400  state->remote_lsn = InvalidXLogRecPtr;
401  state->local_lsn = InvalidXLogRecPtr;
402  break;
403  }
404  }
405  LWLockRelease(ReplicationOriginLock);
407 
408  /*
409  * Now, we can delete the catalog entry.
410  */
412  if (!HeapTupleIsValid(tuple))
413  elog(ERROR, "cache lookup failed for replication origin with oid %u",
414  roident);
415 
416  CatalogTupleDelete(rel, &tuple->t_self);
417  ReleaseSysCache(tuple);
418 
420 
421  /* now release lock again */
423 }
424 
425 
426 /*
427  * Lookup replication origin via it's oid and return the name.
428  *
429  * The external name is palloc'd in the calling context.
430  *
431  * Returns true if the origin is known, false otherwise.
432  */
433 bool
434 replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
435 {
436  HeapTuple tuple;
438 
439  Assert(OidIsValid((Oid) roident));
440  Assert(roident != InvalidRepOriginId);
441  Assert(roident != DoNotReplicateId);
442 
444  ObjectIdGetDatum((Oid) roident));
445 
446  if (HeapTupleIsValid(tuple))
447  {
448  ric = (Form_pg_replication_origin) GETSTRUCT(tuple);
449  *roname = text_to_cstring(&ric->roname);
450  ReleaseSysCache(tuple);
451 
452  return true;
453  }
454  else
455  {
456  *roname = NULL;
457 
458  if (!missing_ok)
459  ereport(ERROR,
460  (errcode(ERRCODE_UNDEFINED_OBJECT),
461  errmsg("replication origin with OID %u does not exist",
462  roident)));
463 
464  return false;
465  }
466 }
467 
468 
469 /* ---------------------------------------------------------------------------
470  * Functions for handling replication progress.
471  * ---------------------------------------------------------------------------
472  */
473 
474 Size
476 {
477  Size size = 0;
478 
479  /*
480  * XXX: max_replication_slots is arguably the wrong thing to use, as here
481  * we keep the replay state of *remote* transactions. But for now it seems
482  * sufficient to reuse it, lest we introduce a separate GUC.
483  */
484  if (max_replication_slots == 0)
485  return size;
486 
487  size = add_size(size, offsetof(ReplicationStateCtl, states));
488 
489  size = add_size(size,
491  return size;
492 }
493 
494 void
496 {
497  bool found;
498 
499  if (max_replication_slots == 0)
500  return;
501 
502  replication_states_ctl = (ReplicationStateCtl *)
503  ShmemInitStruct("ReplicationOriginState",
505  &found);
506  replication_states = replication_states_ctl->states;
507 
508  if (!found)
509  {
510  int i;
511 
512  replication_states_ctl->tranche_id = LWTRANCHE_REPLICATION_ORIGIN;
513 
514  MemSet(replication_states, 0, ReplicationOriginShmemSize());
515 
516  for (i = 0; i < max_replication_slots; i++)
517  {
518  LWLockInitialize(&replication_states[i].lock,
519  replication_states_ctl->tranche_id);
520  ConditionVariableInit(&replication_states[i].origin_cv);
521  }
522  }
523 
524  LWLockRegisterTranche(replication_states_ctl->tranche_id,
525  "replication_origin");
526 }
527 
528 /* ---------------------------------------------------------------------------
529  * Perform a checkpoint of each replication origin's progress with respect to
530  * the replayed remote_lsn. Make sure that all transactions we refer to in the
531  * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
532  * if the transactions were originally committed asynchronously.
533  *
534  * We store checkpoints in the following format:
535  * +-------+------------------------+------------------+-----+--------+
536  * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
537  * +-------+------------------------+------------------+-----+--------+
538  *
539  * So its just the magic, followed by the statically sized
540  * ReplicationStateOnDisk structs. Note that the maximum number of
541  * ReplicationState is determined by max_replication_slots.
542  * ---------------------------------------------------------------------------
543  */
544 void
546 {
547  const char *tmppath = "pg_logical/replorigin_checkpoint.tmp";
548  const char *path = "pg_logical/replorigin_checkpoint";
549  int tmpfd;
550  int i;
552  pg_crc32c crc;
553 
554  if (max_replication_slots == 0)
555  return;
556 
557  INIT_CRC32C(crc);
558 
559  /* make sure no old temp file is remaining */
560  if (unlink(tmppath) < 0 && errno != ENOENT)
561  ereport(PANIC,
563  errmsg("could not remove file \"%s\": %m",
564  tmppath)));
565 
566  /*
567  * no other backend can perform this at the same time, we're protected by
568  * CheckpointLock.
569  */
570  tmpfd = OpenTransientFile(tmppath,
571  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
572  if (tmpfd < 0)
573  ereport(PANIC,
575  errmsg("could not create file \"%s\": %m",
576  tmppath)));
577 
578  /* write magic */
579  if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
580  {
581  CloseTransientFile(tmpfd);
582  ereport(PANIC,
584  errmsg("could not write to file \"%s\": %m",
585  tmppath)));
586  }
587  COMP_CRC32C(crc, &magic, sizeof(magic));
588 
589  /* prevent concurrent creations/drops */
590  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
591 
592  /* write actual data */
593  for (i = 0; i < max_replication_slots; i++)
594  {
595  ReplicationStateOnDisk disk_state;
596  ReplicationState *curstate = &replication_states[i];
598 
599  if (curstate->roident == InvalidRepOriginId)
600  continue;
601 
602  /* zero, to avoid uninitialized padding bytes */
603  memset(&disk_state, 0, sizeof(disk_state));
604 
605  LWLockAcquire(&curstate->lock, LW_SHARED);
606 
607  disk_state.roident = curstate->roident;
608 
609  disk_state.remote_lsn = curstate->remote_lsn;
610  local_lsn = curstate->local_lsn;
611 
612  LWLockRelease(&curstate->lock);
613 
614  /* make sure we only write out a commit that's persistent */
615  XLogFlush(local_lsn);
616 
617  if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
618  sizeof(disk_state))
619  {
620  CloseTransientFile(tmpfd);
621  ereport(PANIC,
623  errmsg("could not write to file \"%s\": %m",
624  tmppath)));
625  }
626 
627  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
628  }
629 
630  LWLockRelease(ReplicationOriginLock);
631 
632  /* write out the CRC */
633  FIN_CRC32C(crc);
634  if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
635  {
636  CloseTransientFile(tmpfd);
637  ereport(PANIC,
639  errmsg("could not write to file \"%s\": %m",
640  tmppath)));
641  }
642 
643  CloseTransientFile(tmpfd);
644 
645  /* fsync, rename to permanent file, fsync file and directory */
646  durable_rename(tmppath, path, PANIC);
647 }
648 
649 /*
650  * Recover replication replay status from checkpoint data saved earlier by
651  * CheckPointReplicationOrigin.
652  *
653  * This only needs to be called at startup and *not* during every checkpoint
654  * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
655  * state thereafter can be recovered by looking at commit records.
656  */
657 void
659 {
660  const char *path = "pg_logical/replorigin_checkpoint";
661  int fd;
662  int readBytes;
664  int last_state = 0;
665  pg_crc32c file_crc;
666  pg_crc32c crc;
667 
668  /* don't want to overwrite already existing state */
669 #ifdef USE_ASSERT_CHECKING
670  static bool already_started = false;
671 
672  Assert(!already_started);
673  already_started = true;
674 #endif
675 
676  if (max_replication_slots == 0)
677  return;
678 
679  INIT_CRC32C(crc);
680 
681  elog(DEBUG2, "starting up replication origin progress state");
682 
683  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
684 
685  /*
686  * might have had max_replication_slots == 0 last run, or we just brought
687  * up a standby.
688  */
689  if (fd < 0 && errno == ENOENT)
690  return;
691  else if (fd < 0)
692  ereport(PANIC,
694  errmsg("could not open file \"%s\": %m",
695  path)));
696 
697  /* verify magic, that is written even if nothing was active */
698  readBytes = read(fd, &magic, sizeof(magic));
699  if (readBytes != sizeof(magic))
700  ereport(PANIC,
701  (errmsg("could not read file \"%s\": %m",
702  path)));
703  COMP_CRC32C(crc, &magic, sizeof(magic));
704 
705  if (magic != REPLICATION_STATE_MAGIC)
706  ereport(PANIC,
707  (errmsg("replication checkpoint has wrong magic %u instead of %u",
708  magic, REPLICATION_STATE_MAGIC)));
709 
710  /* we can skip locking here, no other access is possible */
711 
712  /* recover individual states, until there are no more to be found */
713  while (true)
714  {
715  ReplicationStateOnDisk disk_state;
716 
717  readBytes = read(fd, &disk_state, sizeof(disk_state));
718 
719  /* no further data */
720  if (readBytes == sizeof(crc))
721  {
722  /* not pretty, but simple ... */
723  file_crc = *(pg_crc32c *) &disk_state;
724  break;
725  }
726 
727  if (readBytes < 0)
728  {
729  ereport(PANIC,
731  errmsg("could not read file \"%s\": %m",
732  path)));
733  }
734 
735  if (readBytes != sizeof(disk_state))
736  {
737  ereport(PANIC,
739  errmsg("could not read file \"%s\": read %d of %zu",
740  path, readBytes, sizeof(disk_state))));
741  }
742 
743  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
744 
745  if (last_state == max_replication_slots)
746  ereport(PANIC,
747  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
748  errmsg("could not find free replication state, increase max_replication_slots")));
749 
750  /* copy data to shared memory */
751  replication_states[last_state].roident = disk_state.roident;
752  replication_states[last_state].remote_lsn = disk_state.remote_lsn;
753  last_state++;
754 
755  elog(LOG, "recovered replication state of node %u to %X/%X",
756  disk_state.roident,
757  (uint32) (disk_state.remote_lsn >> 32),
758  (uint32) disk_state.remote_lsn);
759  }
760 
761  /* now check checksum */
762  FIN_CRC32C(crc);
763  if (file_crc != crc)
764  ereport(PANIC,
765  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
766  errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
767  crc, file_crc)));
768 
769  CloseTransientFile(fd);
770 }
771 
772 void
774 {
775  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
776 
777  switch (info)
778  {
779  case XLOG_REPLORIGIN_SET:
780  {
781  xl_replorigin_set *xlrec =
782  (xl_replorigin_set *) XLogRecGetData(record);
783 
785  xlrec->remote_lsn, record->EndRecPtr,
786  xlrec->force /* backward */ ,
787  false /* WAL log */ );
788  break;
789  }
791  {
792  xl_replorigin_drop *xlrec;
793  int i;
794 
795  xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
796 
797  for (i = 0; i < max_replication_slots; i++)
798  {
799  ReplicationState *state = &replication_states[i];
800 
801  /* found our slot */
802  if (state->roident == xlrec->node_id)
803  {
804  /* reset entry */
805  state->roident = InvalidRepOriginId;
806  state->remote_lsn = InvalidXLogRecPtr;
807  state->local_lsn = InvalidXLogRecPtr;
808  break;
809  }
810  }
811  break;
812  }
813  default:
814  elog(PANIC, "replorigin_redo: unknown op code %u", info);
815  }
816 }
817 
818 
819 /*
820  * Tell the replication origin progress machinery that a commit from 'node'
821  * that originated at the LSN remote_commit on the remote node was replayed
822  * successfully and that we don't need to do so again. In combination with
823  * setting up replorigin_session_origin_lsn and replorigin_session_origin
824  * that ensures we won't loose knowledge about that after a crash if the
825  * transaction had a persistent effect (think of asynchronous commits).
826  *
827  * local_commit needs to be a local LSN of the commit so that we can make sure
828  * upon a checkpoint that enough WAL has been persisted to disk.
829  *
830  * Needs to be called with a RowExclusiveLock on pg_replication_origin,
831  * unless running in recovery.
832  */
833 void
835  XLogRecPtr remote_commit, XLogRecPtr local_commit,
836  bool go_backward, bool wal_log)
837 {
838  int i;
839  ReplicationState *replication_state = NULL;
840  ReplicationState *free_state = NULL;
841 
842  Assert(node != InvalidRepOriginId);
843 
844  /* we don't track DoNotReplicateId */
845  if (node == DoNotReplicateId)
846  return;
847 
848  /*
849  * XXX: For the case where this is called by WAL replay, it'd be more
850  * efficient to restore into a backend local hashtable and only dump into
851  * shmem after recovery is finished. Let's wait with implementing that
852  * till it's shown to be a measurable expense
853  */
854 
855  /* Lock exclusively, as we may have to create a new table entry. */
856  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
857 
858  /*
859  * Search for either an existing slot for the origin, or a free one we can
860  * use.
861  */
862  for (i = 0; i < max_replication_slots; i++)
863  {
864  ReplicationState *curstate = &replication_states[i];
865 
866  /* remember where to insert if necessary */
867  if (curstate->roident == InvalidRepOriginId &&
868  free_state == NULL)
869  {
870  free_state = curstate;
871  continue;
872  }
873 
874  /* not our slot */
875  if (curstate->roident != node)
876  {
877  continue;
878  }
879 
880  /* ok, found slot */
881  replication_state = curstate;
882 
883  LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
884 
885  /* Make sure it's not used by somebody else */
886  if (replication_state->acquired_by != 0)
887  {
888  ereport(ERROR,
889  (errcode(ERRCODE_OBJECT_IN_USE),
890  errmsg("replication origin with OID %d is already active for PID %d",
891  replication_state->roident,
892  replication_state->acquired_by)));
893  }
894 
895  break;
896  }
897 
898  if (replication_state == NULL && free_state == NULL)
899  ereport(ERROR,
900  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
901  errmsg("could not find free replication state slot for replication origin with OID %u",
902  node),
903  errhint("Increase max_replication_slots and try again.")));
904 
905  if (replication_state == NULL)
906  {
907  /* initialize new slot */
908  LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
909  replication_state = free_state;
910  Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
911  Assert(replication_state->local_lsn == InvalidXLogRecPtr);
912  replication_state->roident = node;
913  }
914 
915  Assert(replication_state->roident != InvalidRepOriginId);
916 
917  /*
918  * If somebody "forcefully" sets this slot, WAL log it, so it's durable
919  * and the standby gets the message. Primarily this will be called during
920  * WAL replay (of commit records) where no WAL logging is necessary.
921  */
922  if (wal_log)
923  {
924  xl_replorigin_set xlrec;
925 
926  xlrec.remote_lsn = remote_commit;
927  xlrec.node_id = node;
928  xlrec.force = go_backward;
929 
930  XLogBeginInsert();
931  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
932 
933  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
934  }
935 
936  /*
937  * Due to - harmless - race conditions during a checkpoint we could see
938  * values here that are older than the ones we already have in memory.
939  * Don't overwrite those.
940  */
941  if (go_backward || replication_state->remote_lsn < remote_commit)
942  replication_state->remote_lsn = remote_commit;
943  if (local_commit != InvalidXLogRecPtr &&
944  (go_backward || replication_state->local_lsn < local_commit))
945  replication_state->local_lsn = local_commit;
946  LWLockRelease(&replication_state->lock);
947 
948  /*
949  * Release *after* changing the LSNs, slot isn't acquired and thus could
950  * otherwise be dropped anytime.
951  */
952  LWLockRelease(ReplicationOriginLock);
953 }
954 
955 
958 {
959  int i;
962 
963  /* prevent slots from being concurrently dropped */
964  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
965 
966  for (i = 0; i < max_replication_slots; i++)
967  {
969 
970  state = &replication_states[i];
971 
972  if (state->roident == node)
973  {
974  LWLockAcquire(&state->lock, LW_SHARED);
975 
976  remote_lsn = state->remote_lsn;
977  local_lsn = state->local_lsn;
978 
979  LWLockRelease(&state->lock);
980 
981  break;
982  }
983  }
984 
985  LWLockRelease(ReplicationOriginLock);
986 
987  if (flush && local_lsn != InvalidXLogRecPtr)
988  XLogFlush(local_lsn);
989 
990  return remote_lsn;
991 }
992 
993 /*
994  * Tear down a (possibly) configured session replication origin during process
995  * exit.
996  */
997 static void
999 {
1000  ConditionVariable *cv = NULL;
1001 
1002  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1003 
1004  if (session_replication_state != NULL &&
1005  session_replication_state->acquired_by == MyProcPid)
1006  {
1007  cv = &session_replication_state->origin_cv;
1008 
1009  session_replication_state->acquired_by = 0;
1010  session_replication_state = NULL;
1011  }
1012 
1013  LWLockRelease(ReplicationOriginLock);
1014 
1015  if (cv)
1017 }
1018 
1019 /*
1020  * Setup a replication origin in the shared memory struct if it doesn't
1021  * already exists and cache access to the specific ReplicationSlot so the
1022  * array doesn't have to be searched when calling
1023  * replorigin_session_advance().
1024  *
1025  * Obviously only one such cached origin can exist per process and the current
1026  * cached value can only be set again after the previous value is torn down
1027  * with replorigin_session_reset().
1028  */
1029 void
1031 {
1032  static bool registered_cleanup;
1033  int i;
1034  int free_slot = -1;
1035 
1036  if (!registered_cleanup)
1037  {
1039  registered_cleanup = true;
1040  }
1041 
1043 
1044  if (session_replication_state != NULL)
1045  ereport(ERROR,
1046  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1047  errmsg("cannot setup replication origin when one is already setup")));
1048 
1049  /* Lock exclusively, as we may have to create a new table entry. */
1050  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1051 
1052  /*
1053  * Search for either an existing slot for the origin, or a free one we can
1054  * use.
1055  */
1056  for (i = 0; i < max_replication_slots; i++)
1057  {
1058  ReplicationState *curstate = &replication_states[i];
1059 
1060  /* remember where to insert if necessary */
1061  if (curstate->roident == InvalidRepOriginId &&
1062  free_slot == -1)
1063  {
1064  free_slot = i;
1065  continue;
1066  }
1067 
1068  /* not our slot */
1069  if (curstate->roident != node)
1070  continue;
1071 
1072  else if (curstate->acquired_by != 0)
1073  {
1074  ereport(ERROR,
1075  (errcode(ERRCODE_OBJECT_IN_USE),
1076  errmsg("replication identifier %d is already active for PID %d",
1077  curstate->roident, curstate->acquired_by)));
1078  }
1079 
1080  /* ok, found slot */
1081  session_replication_state = curstate;
1082  }
1083 
1084 
1085  if (session_replication_state == NULL && free_slot == -1)
1086  ereport(ERROR,
1087  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1088  errmsg("could not find free replication state slot for replication origin with OID %u",
1089  node),
1090  errhint("Increase max_replication_slots and try again.")));
1091  else if (session_replication_state == NULL)
1092  {
1093  /* initialize new slot */
1094  session_replication_state = &replication_states[free_slot];
1095  Assert(session_replication_state->remote_lsn == InvalidXLogRecPtr);
1096  Assert(session_replication_state->local_lsn == InvalidXLogRecPtr);
1097  session_replication_state->roident = node;
1098  }
1099 
1100 
1101  Assert(session_replication_state->roident != InvalidRepOriginId);
1102 
1103  session_replication_state->acquired_by = MyProcPid;
1104 
1105  LWLockRelease(ReplicationOriginLock);
1106 
1107  /* probably this one is pointless */
1108  ConditionVariableBroadcast(&session_replication_state->origin_cv);
1109 }
1110 
1111 /*
1112  * Reset replay state previously setup in this session.
1113  *
1114  * This function may only be called if an origin was setup with
1115  * replorigin_session_setup().
1116  */
1117 void
1119 {
1120  ConditionVariable *cv;
1121 
1123 
1124  if (session_replication_state == NULL)
1125  ereport(ERROR,
1126  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1127  errmsg("no replication origin is configured")));
1128 
1129  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1130 
1131  session_replication_state->acquired_by = 0;
1132  cv = &session_replication_state->origin_cv;
1133  session_replication_state = NULL;
1134 
1135  LWLockRelease(ReplicationOriginLock);
1136 
1138 }
1139 
1140 /*
1141  * Do the same work replorigin_advance() does, just on the session's
1142  * configured origin.
1143  *
1144  * This is noticeably cheaper than using replorigin_advance().
1145  */
1146 void
1148 {
1149  Assert(session_replication_state != NULL);
1150  Assert(session_replication_state->roident != InvalidRepOriginId);
1151 
1152  LWLockAcquire(&session_replication_state->lock, LW_EXCLUSIVE);
1153  if (session_replication_state->local_lsn < local_commit)
1154  session_replication_state->local_lsn = local_commit;
1155  if (session_replication_state->remote_lsn < remote_commit)
1156  session_replication_state->remote_lsn = remote_commit;
1157  LWLockRelease(&session_replication_state->lock);
1158 }
1159 
1160 /*
1161  * Ask the machinery about the point up to which we successfully replayed
1162  * changes from an already setup replication origin.
1163  */
1164 XLogRecPtr
1166 {
1169 
1170  Assert(session_replication_state != NULL);
1171 
1172  LWLockAcquire(&session_replication_state->lock, LW_SHARED);
1173  remote_lsn = session_replication_state->remote_lsn;
1174  local_lsn = session_replication_state->local_lsn;
1175  LWLockRelease(&session_replication_state->lock);
1176 
1177  if (flush && local_lsn != InvalidXLogRecPtr)
1178  XLogFlush(local_lsn);
1179 
1180  return remote_lsn;
1181 }
1182 
1183 
1184 
1185 /* ---------------------------------------------------------------------------
1186  * SQL functions for working with replication origin.
1187  *
1188  * These mostly should be fairly short wrappers around more generic functions.
1189  * ---------------------------------------------------------------------------
1190  */
1191 
1192 /*
1193  * Create replication origin for the passed in name, and return the assigned
1194  * oid.
1195  */
1196 Datum
1198 {
1199  char *name;
1201 
1202  replorigin_check_prerequisites(false, false);
1203 
1205  roident = replorigin_create(name);
1206 
1207  pfree(name);
1208 
1209  PG_RETURN_OID(roident);
1210 }
1211 
1212 /*
1213  * Drop replication origin.
1214  */
1215 Datum
1217 {
1218  char *name;
1220 
1221  replorigin_check_prerequisites(false, false);
1222 
1224 
1225  roident = replorigin_by_name(name, false);
1226  Assert(OidIsValid(roident));
1227 
1228  replorigin_drop(roident, true);
1229 
1230  pfree(name);
1231 
1232  PG_RETURN_VOID();
1233 }
1234 
1235 /*
1236  * Return oid of a replication origin.
1237  */
1238 Datum
1240 {
1241  char *name;
1243 
1244  replorigin_check_prerequisites(false, false);
1245 
1247  roident = replorigin_by_name(name, true);
1248 
1249  pfree(name);
1250 
1251  if (OidIsValid(roident))
1252  PG_RETURN_OID(roident);
1253  PG_RETURN_NULL();
1254 }
1255 
1256 /*
1257  * Setup a replication origin for this session.
1258  */
1259 Datum
1261 {
1262  char *name;
1263  RepOriginId origin;
1264 
1265  replorigin_check_prerequisites(true, false);
1266 
1268  origin = replorigin_by_name(name, false);
1269  replorigin_session_setup(origin);
1270 
1271  replorigin_session_origin = origin;
1272 
1273  pfree(name);
1274 
1275  PG_RETURN_VOID();
1276 }
1277 
1278 /*
1279  * Reset previously setup origin in this session
1280  */
1281 Datum
1283 {
1284  replorigin_check_prerequisites(true, false);
1285 
1287 
1291 
1292  PG_RETURN_VOID();
1293 }
1294 
1295 /*
1296  * Has a replication origin been setup for this session.
1297  */
1298 Datum
1300 {
1301  replorigin_check_prerequisites(false, false);
1302 
1304 }
1305 
1306 
1307 /*
1308  * Return the replication progress for origin setup in the current session.
1309  *
1310  * If 'flush' is set to true it is ensured that the returned value corresponds
1311  * to a local transaction that has been flushed. This is useful if asynchronous
1312  * commits are used when replaying replicated transactions.
1313  */
1314 Datum
1316 {
1318  bool flush = PG_GETARG_BOOL(0);
1319 
1320  replorigin_check_prerequisites(true, false);
1321 
1322  if (session_replication_state == NULL)
1323  ereport(ERROR,
1324  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1325  errmsg("no replication origin is configured")));
1326 
1327  remote_lsn = replorigin_session_get_progress(flush);
1328 
1329  if (remote_lsn == InvalidXLogRecPtr)
1330  PG_RETURN_NULL();
1331 
1332  PG_RETURN_LSN(remote_lsn);
1333 }
1334 
1335 Datum
1337 {
1338  XLogRecPtr location = PG_GETARG_LSN(0);
1339 
1340  replorigin_check_prerequisites(true, false);
1341 
1342  if (session_replication_state == NULL)
1343  ereport(ERROR,
1344  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1345  errmsg("no replication origin is configured")));
1346 
1347  replorigin_session_origin_lsn = location;
1349 
1350  PG_RETURN_VOID();
1351 }
1352 
1353 Datum
1355 {
1356  replorigin_check_prerequisites(true, false);
1357 
1360 
1361  PG_RETURN_VOID();
1362 }
1363 
1364 
1365 Datum
1367 {
1368  text *name = PG_GETARG_TEXT_PP(0);
1369  XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1370  RepOriginId node;
1371 
1372  replorigin_check_prerequisites(true, false);
1373 
1374  /* lock to prevent the replication origin from vanishing */
1375  LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1376 
1377  node = replorigin_by_name(text_to_cstring(name), false);
1378 
1379  /*
1380  * Can't sensibly pass a local commit to be flushed at checkpoint - this
1381  * xact hasn't committed yet. This is why this function should be used to
1382  * set up the initial replication state, but not for replay.
1383  */
1384  replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1385  true /* go backward */ , true /* WAL log */ );
1386 
1387  UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1388 
1389  PG_RETURN_VOID();
1390 }
1391 
1392 
1393 /*
1394  * Return the replication progress for an individual replication origin.
1395  *
1396  * If 'flush' is set to true it is ensured that the returned value corresponds
1397  * to a local transaction that has been flushed. This is useful if asynchronous
1398  * commits are used when replaying replicated transactions.
1399  */
1400 Datum
1402 {
1403  char *name;
1404  bool flush;
1407 
1408  replorigin_check_prerequisites(true, true);
1409 
1411  flush = PG_GETARG_BOOL(1);
1412 
1413  roident = replorigin_by_name(name, false);
1414  Assert(OidIsValid(roident));
1415 
1416  remote_lsn = replorigin_get_progress(roident, flush);
1417 
1418  if (remote_lsn == InvalidXLogRecPtr)
1419  PG_RETURN_NULL();
1420 
1421  PG_RETURN_LSN(remote_lsn);
1422 }
1423 
1424 
1425 Datum
1427 {
1428  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1429  TupleDesc tupdesc;
1430  Tuplestorestate *tupstore;
1431  MemoryContext per_query_ctx;
1432  MemoryContext oldcontext;
1433  int i;
1435 
1436  /* we we want to return 0 rows if slot is set to zero */
1437  replorigin_check_prerequisites(false, true);
1438 
1439  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1440  ereport(ERROR,
1441  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1442  errmsg("set-valued function called in context that cannot accept a set")));
1443  if (!(rsinfo->allowedModes & SFRM_Materialize))
1444  ereport(ERROR,
1445  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1446  errmsg("materialize mode required, but it is not allowed in this context")));
1447  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1448  elog(ERROR, "return type must be a row type");
1449 
1450  if (tupdesc->natts != REPLICATION_ORIGIN_PROGRESS_COLS)
1451  elog(ERROR, "wrong function definition");
1452 
1453  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1454  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1455 
1456  tupstore = tuplestore_begin_heap(true, false, work_mem);
1457  rsinfo->returnMode = SFRM_Materialize;
1458  rsinfo->setResult = tupstore;
1459  rsinfo->setDesc = tupdesc;
1460 
1461  MemoryContextSwitchTo(oldcontext);
1462 
1463 
1464  /* prevent slots from being concurrently dropped */
1465  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1466 
1467  /*
1468  * Iterate through all possible replication_states, display if they are
1469  * filled. Note that we do not take any locks, so slightly corrupted/out
1470  * of date values are a possibility.
1471  */
1472  for (i = 0; i < max_replication_slots; i++)
1473  {
1477  char *roname;
1478 
1479  state = &replication_states[i];
1480 
1481  /* unused slot, nothing to display */
1482  if (state->roident == InvalidRepOriginId)
1483  continue;
1484 
1485  memset(values, 0, sizeof(values));
1486  memset(nulls, 1, sizeof(nulls));
1487 
1488  values[0] = ObjectIdGetDatum(state->roident);
1489  nulls[0] = false;
1490 
1491  /*
1492  * We're not preventing the origin to be dropped concurrently, so
1493  * silently accept that it might be gone.
1494  */
1495  if (replorigin_by_oid(state->roident, true,
1496  &roname))
1497  {
1498  values[1] = CStringGetTextDatum(roname);
1499  nulls[1] = false;
1500  }
1501 
1502  LWLockAcquire(&state->lock, LW_SHARED);
1503 
1504  values[2] = LSNGetDatum(state->remote_lsn);
1505  nulls[2] = false;
1506 
1507  values[3] = LSNGetDatum(state->local_lsn);
1508  nulls[3] = false;
1509 
1510  LWLockRelease(&state->lock);
1511 
1512  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1513  }
1514 
1515  tuplestore_donestoring(tupstore);
1516 
1517  LWLockRelease(ReplicationOriginLock);
1518 
1519 #undef REPLICATION_ORIGIN_PROGRESS_COLS
1520 
1521  return (Datum) 0;
1522 }
static ReplicationState * session_replication_state
Definition: origin.c:174
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
static void replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
Definition: origin.c:180
Definition: lwlock.h:32
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:567
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:987
XLogRecPtr local_lsn
Definition: origin.c:120
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:502
#define GETSTRUCT(TUP)
Definition: htup_details.h:668
#define RelationGetDescr(relation)
Definition: rel.h:433
#define DoNotReplicateId
Definition: origin.h:35
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1336
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:182
#define write(a, b, c)
Definition: win32.h:14
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1216
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
XLogRecPtr remote_lsn
Definition: origin.c:144
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:238
uint32 pg_crc32c
Definition: pg_crc32c.h:38
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:998
RepOriginId roident
Definition: origin.c:143
void replorigin_drop(RepOriginId roident, bool nowait)
Definition: origin.c:334
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned char uint8
Definition: c.h:323
uint16 RepOriginId
Definition: xlogdefs.h:51
void ConditionVariableBroadcast(ConditionVariable *cv)
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1165
int errcode(int sqlerrcode)
Definition: elog.c:575
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
bool superuser(void)
Definition: superuser.c:47
#define MemSet(start, val, len)
Definition: c.h:908
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:256
void ReplicationOriginShmemInit(void)
Definition: origin.c:495
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:834
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1074
#define heap_close(r, l)
Definition: heapam.h:97
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:244
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:1030
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:434
#define LOG
Definition: elog.h:26
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1773
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7949
#define OidIsValid(objectId)
Definition: c.h:605
#define PANIC
Definition: elog.h:53
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2783
static int fd(const char *x, int i)
Definition: preproc-init.c:105
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:331
#define PG_BINARY
Definition: c.h:1080
RepOriginId replorigin_by_name(char *roname, bool missing_ok)
Definition: origin.c:211
RepOriginId roident
Definition: origin.c:108
XLogRecPtr EndRecPtr
Definition: xlogreader.h:120
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:278
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:602
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1366
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:25
void CheckPointReplicationOrigin(void)
Definition: origin.c:545
void ConditionVariableInit(ConditionVariable *cv)
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:773
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:419
void pfree(void *pointer)
Definition: mcxt.c:1031
#define XLogRecGetData(decoder)
Definition: xlogreader.h:230
void ConditionVariableCancelSleep(void)
#define ReplicationOriginIdentIndex
Definition: indexing.h:334
#define ObjectIdGetDatum(X)
Definition: postgres.h:492
#define ERROR
Definition: elog.h:43
#define PG_UINT16_MAX
Definition: c.h:406
void replorigin_session_reset(void)
Definition: origin.c:1118
Oid CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:163
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2386
FormData_pg_replication_origin * Form_pg_replication_origin
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
#define InitDirtySnapshot(snapshotdata)
Definition: tqual.h:103
LWLock lock
Definition: origin.c:135
ItemPointerData t_self
Definition: htup.h:65
#define DEBUG2
Definition: elog.h:24
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:156
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:359
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:157
#define XLOG_REPLORIGIN_SET
Definition: origin.h:31
void StartupReplicationOrigin(void)
Definition: origin.c:658
#define RowExclusiveLock
Definition: lockdefs.h:38
int errcode_for_file_access(void)
Definition: elog.c:598
struct ReplicationState ReplicationState
unsigned int uint32
Definition: c.h:325
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1260
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1282
RepOriginId replorigin_create(char *roname)
Definition: origin.c:242
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1426
#define ereport(elevel, rest)
Definition: elog.h:122
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:226
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:601
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1299
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:677
int CloseTransientFile(int fd)
Definition: fd.c:2556
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1112
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:32
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:24
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:324
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:914
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1160
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1294
int work_mem
Definition: globals.c:120
Size ReplicationOriginShmemSize(void)
Definition: origin.c:475
#define REPLICATION_STATE_MAGIC
Definition: origin.c:177
#define InvalidOid
Definition: postgres_ext.h:36
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:167
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1354
int allowedModes
Definition: execnodes.h:297
#define PG_RETURN_VOID()
Definition: fmgr.h:314
struct ReplicationStateCtl ReplicationStateCtl
SetFunctionReturnMode returnMode
Definition: execnodes.h:299
int max_replication_slots
Definition: slot.c:99
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
XLogRecPtr remote_lsn
Definition: origin.c:113
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1401
RepOriginId node_id
Definition: origin.h:28
uint64 XLogRecPtr
Definition: xlogdefs.h:21
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:151
#define Assert(condition)
Definition: c.h:699
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
RepOriginId replorigin_session_origin
Definition: origin.c:155
Definition: regguts.h:298
RepOriginId node_id
Definition: origin.h:22
ConditionVariable origin_cv
Definition: origin.c:130
size_t Size
Definition: c.h:433
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1147
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
bool IsTransactionState(void)
Definition: xact.c:350
XLogRecPtr remote_lsn
Definition: origin.h:21
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:225
const char * name
Definition: encode.c:521
#define InvalidRepOriginId
Definition: origin.h:34
Tuplestorestate * setResult
Definition: execnodes.h:302
#define DatumGetPointer(X)
Definition: postgres.h:534
static Datum values[MAXATTR]
Definition: bootstrap.c:164
char * text_to_cstring(const text *t)
Definition: varlena.c:182
ExprContext * econtext
Definition: execnodes.h:295
TupleDesc setDesc
Definition: execnodes.h:303
int errmsg(const char *fmt,...)
Definition: elog.c:797
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1239
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:36
int i
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1315
static ReplicationState * replication_states
Definition: origin.c:166
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define CStringGetTextDatum(s)
Definition: builtins.h:95
void * arg
Definition: c.h:516
#define PG_FUNCTION_ARGS
Definition: fmgr.h:163
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
#define elog
Definition: elog.h:219
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:105
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1197
#define REPLICATION_ORIGIN_PROGRESS_COLS
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_RETURN_OID(x)
Definition: fmgr.h:325
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:957
#define PG_RETURN_NULL()
Definition: fmgr.h:310
#define read(a, b, c)
Definition: win32.h:13
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define offsetof(type, field)
Definition: c.h:622