PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
origin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * origin.c
4  * Logical replication progress tracking support.
5  *
6  * Copyright (c) 2013-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/logical/origin.c
10  *
11  * NOTES
12  *
13  * This file provides the following:
14  * * An infrastructure to name nodes in a replication setup
15  * * A facility to efficiently store and persist replication progress in an
16  * efficient and durable manner.
17  *
18  * Replication origin consist out of a descriptive, user defined, external
19  * name and a short, thus space efficient, internal 2 byte one. This split
20  * exists because replication origin have to be stored in WAL and shared
21  * memory and long descriptors would be inefficient. For now only use 2 bytes
22  * for the internal id of a replication origin as it seems unlikely that there
23  * soon will be more than 65k nodes in one replication setup; and using only
24  * two bytes allow us to be more space efficient.
25  *
26  * Replication progress is tracked in a shared memory table
27  * (ReplicationState) that's dumped to disk every checkpoint. Entries
28  * ('slots') in this table are identified by the internal id. That's the case
29  * because it allows to increase replication progress during crash
30  * recovery. To allow doing so we store the original LSN (from the originating
31  * system) of a transaction in the commit record. That allows to recover the
32  * precise replayed state after crash recovery; without requiring synchronous
33  * commits. Allowing logical replication to use asynchronous commit is
34  * generally good for performance, but especially important as it allows a
35  * single threaded replay process to keep up with a source that has multiple
36  * backends generating changes concurrently. For efficiency and simplicity
37  * reasons a backend can setup one replication origin that's from then used as
38  * the source of changes produced by the backend, until reset again.
39  *
40  * This infrastructure is intended to be used in cooperation with logical
41  * decoding. When replaying from a remote system the configured origin is
42  * provided to output plugins, allowing prevention of replication loops and
43  * other filtering.
44  *
45  * There are several levels of locking at work:
46  *
47  * * To create and drop replication origins an exclusive lock on
48  * pg_replication_slot is required for the duration. That allows us to
49  * safely and conflict free assign new origins using a dirty snapshot.
50  *
51  * * When creating an in-memory replication progress slot the ReplicationOrigin
52  * LWLock has to be held exclusively; when iterating over the replication
53  * progress a shared lock has to be held, the same when advancing the
54  * replication progress of an individual backend that has not setup as the
55  * session's replication origin.
56  *
57  * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58  * replication progress slot that slot's lwlock has to be held. That's
59  * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60  * all our platforms, but it also simplifies memory ordering concerns
61  * between the remote and local lsn. We use a lwlock instead of a spinlock
62  * so it's less harmful to hold the lock over a WAL write
63  * (c.f. AdvanceReplicationProgress).
64  *
65  * ---------------------------------------------------------------------------
66  */
67 
68 #include "postgres.h"
69 
70 #include <unistd.h>
71 #include <sys/stat.h>
72 
73 #include "funcapi.h"
74 #include "miscadmin.h"
75 
76 #include "access/genam.h"
77 #include "access/heapam.h"
78 #include "access/htup_details.h"
79 #include "access/xact.h"
80 
81 #include "catalog/indexing.h"
82 
83 #include "nodes/execnodes.h"
84 
85 #include "replication/origin.h"
86 #include "replication/logical.h"
87 
88 #include "storage/fd.h"
89 #include "storage/ipc.h"
90 #include "storage/lmgr.h"
91 #include "storage/copydir.h"
92 
93 #include "utils/builtins.h"
94 #include "utils/fmgroids.h"
95 #include "utils/pg_lsn.h"
96 #include "utils/rel.h"
97 #include "utils/syscache.h"
98 #include "utils/tqual.h"
99 
100 /*
101  * Replay progress of a single remote node.
102  */
103 typedef struct ReplicationState
104 {
105  /*
106  * Local identifier for the remote node.
107  */
109 
110  /*
111  * Location of the latest commit from the remote side.
112  */
114 
115  /*
116  * Remember the local lsn of the commit record so we can XLogFlush() to it
117  * during a checkpoint so we know the commit record actually is safe on
118  * disk.
119  */
121 
122  /*
123  * PID of backend that's acquired slot, or 0 if none.
124  */
126 
127  /*
128  * Lock protecting remote_lsn and local_lsn.
129  */
132 
133 /*
134  * On disk version of ReplicationState.
135  */
137 {
141 
142 
143 typedef struct ReplicationStateCtl
144 {
146  ReplicationState states[FLEXIBLE_ARRAY_MEMBER];
148 
149 /* external variables */
153 
154 /*
155  * Base address into a shared memory array of replication states of size
156  * max_replication_slots.
157  *
158  * XXX: Should we use a separate variable to size this rather than
159  * max_replication_slots?
160  */
163 
164 /*
165  * Backend-local, cached element from ReplicationState for use in a backend
166  * replaying remote commits, so we don't have to search ReplicationState for
167  * the backends current RepOriginId.
168  */
170 
171 /* Magic for on disk files. */
172 #define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
173 
174 static void
175 replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
176 {
177  if (!superuser())
178  ereport(ERROR,
179  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
180  errmsg("only superusers can query or manipulate replication origins")));
181 
182  if (check_slots && max_replication_slots == 0)
183  ereport(ERROR,
184  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
185  errmsg("cannot query or manipulate replication origin when max_replication_slots = 0")));
186 
187  if (!recoveryOK && RecoveryInProgress())
188  ereport(ERROR,
189  (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
190  errmsg("cannot manipulate replication origins during recovery")));
191 
192 }
193 
194 
195 /* ---------------------------------------------------------------------------
196  * Functions for working with replication origins themselves.
197  * ---------------------------------------------------------------------------
198  */
199 
200 /*
201  * Check for a persistent replication origin identified by name.
202  *
203  * Returns InvalidOid if the node isn't known yet and missing_ok is true.
204  */
206 replorigin_by_name(char *roname, bool missing_ok)
207 {
209  Oid roident = InvalidOid;
210  HeapTuple tuple;
211  Datum roname_d;
212 
213  roname_d = CStringGetTextDatum(roname);
214 
215  tuple = SearchSysCache1(REPLORIGNAME, roname_d);
216  if (HeapTupleIsValid(tuple))
217  {
218  ident = (Form_pg_replication_origin) GETSTRUCT(tuple);
219  roident = ident->roident;
220  ReleaseSysCache(tuple);
221  }
222  else if (!missing_ok)
223  elog(ERROR, "cache lookup failed for replication origin '%s'",
224  roname);
225 
226  return roident;
227 }
228 
229 /*
230  * Create a replication origin.
231  *
232  * Needs to be called in a transaction.
233  */
235 replorigin_create(char *roname)
236 {
237  Oid roident;
238  HeapTuple tuple = NULL;
239  Relation rel;
240  Datum roname_d;
241  SnapshotData SnapshotDirty;
242  SysScanDesc scan;
243  ScanKeyData key;
244 
245  roname_d = CStringGetTextDatum(roname);
246 
248 
249  /*
250  * We need the numeric replication origin to be 16bit wide, so we cannot
251  * rely on the normal oid allocation. Instead we simply scan
252  * pg_replication_origin for the first unused id. That's not particularly
253  * efficient, but this should be a fairly infrequent operation - we can
254  * easily spend a bit more code on this when it turns out it needs to be
255  * faster.
256  *
257  * We handle concurrency by taking an exclusive lock (allowing reads!)
258  * over the table for the duration of the search. Because we use a "dirty
259  * snapshot" we can read rows that other in-progress sessions have
260  * written, even though they would be invisible with normal snapshots. Due
261  * to the exclusive lock there's no danger that new rows can appear while
262  * we're checking.
263  */
264  InitDirtySnapshot(SnapshotDirty);
265 
267 
268  for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
269  {
270  bool nulls[Natts_pg_replication_origin];
272  bool collides;
273 
275 
276  ScanKeyInit(&key,
278  BTEqualStrategyNumber, F_OIDEQ,
279  ObjectIdGetDatum(roident));
280 
282  true /* indexOK */ ,
283  &SnapshotDirty,
284  1, &key);
285 
286  collides = HeapTupleIsValid(systable_getnext(scan));
287 
288  systable_endscan(scan);
289 
290  if (!collides)
291  {
292  /*
293  * Ok, found an unused roident, insert the new row and do a CCI,
294  * so our callers can look it up if they want to.
295  */
296  memset(&nulls, 0, sizeof(nulls));
297 
299  values[Anum_pg_replication_origin_roname - 1] = roname_d;
300 
301  tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
302  CatalogTupleInsert(rel, tuple);
304  break;
305  }
306  }
307 
308  /* now release lock again, */
310 
311  if (tuple == NULL)
312  ereport(ERROR,
313  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
314  errmsg("could not find free replication origin OID")));
315 
316  heap_freetuple(tuple);
317  return roident;
318 }
319 
320 
321 /*
322  * Drop replication origin.
323  *
324  * Needs to be called in a transaction.
325  */
326 void
328 {
329  HeapTuple tuple = NULL;
330  Relation rel;
331  int i;
332 
334 
336 
337  /* cleanup the slot state info */
338  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
339 
340  for (i = 0; i < max_replication_slots; i++)
341  {
342  ReplicationState *state = &replication_states[i];
343 
344  /* found our slot */
345  if (state->roident == roident)
346  {
347  if (state->acquired_by != 0)
348  {
349  ereport(ERROR,
350  (errcode(ERRCODE_OBJECT_IN_USE),
351  errmsg("could not drop replication origin with OID %d, in use by PID %d",
352  state->roident,
353  state->acquired_by)));
354  }
355 
356  /* first WAL log */
357  {
358  xl_replorigin_drop xlrec;
359 
360  xlrec.node_id = roident;
361  XLogBeginInsert();
362  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
363  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
364  }
365 
366  /* then reset the in-memory entry */
367  state->roident = InvalidRepOriginId;
368  state->remote_lsn = InvalidXLogRecPtr;
369  state->local_lsn = InvalidXLogRecPtr;
370  break;
371  }
372  }
373  LWLockRelease(ReplicationOriginLock);
374 
376  if (!HeapTupleIsValid(tuple))
377  elog(ERROR, "cache lookup failed for replication origin with oid %u",
378  roident);
379 
380  CatalogTupleDelete(rel, &tuple->t_self);
381  ReleaseSysCache(tuple);
382 
384 
385  /* now release lock again, */
387 }
388 
389 
390 /*
391  * Lookup replication origin via it's oid and return the name.
392  *
393  * The external name is palloc'd in the calling context.
394  *
395  * Returns true if the origin is known, false otherwise.
396  */
397 bool
398 replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
399 {
400  HeapTuple tuple;
402 
403  Assert(OidIsValid((Oid) roident));
404  Assert(roident != InvalidRepOriginId);
405  Assert(roident != DoNotReplicateId);
406 
408  ObjectIdGetDatum((Oid) roident));
409 
410  if (HeapTupleIsValid(tuple))
411  {
412  ric = (Form_pg_replication_origin) GETSTRUCT(tuple);
413  *roname = text_to_cstring(&ric->roname);
414  ReleaseSysCache(tuple);
415 
416  return true;
417  }
418  else
419  {
420  *roname = NULL;
421 
422  if (!missing_ok)
423  elog(ERROR, "cache lookup failed for replication origin with oid %u",
424  roident);
425 
426  return false;
427  }
428 }
429 
430 
431 /* ---------------------------------------------------------------------------
432  * Functions for handling replication progress.
433  * ---------------------------------------------------------------------------
434  */
435 
436 Size
438 {
439  Size size = 0;
440 
441  /*
442  * XXX: max_replication_slots is arguably the wrong thing to use, as here
443  * we keep the replay state of *remote* transactions. But for now it seems
444  * sufficient to reuse it, lest we introduce a separate GUC.
445  */
446  if (max_replication_slots == 0)
447  return size;
448 
449  size = add_size(size, offsetof(ReplicationStateCtl, states));
450 
451  size = add_size(size,
453  return size;
454 }
455 
456 void
458 {
459  bool found;
460 
461  if (max_replication_slots == 0)
462  return;
463 
464  replication_states_ctl = (ReplicationStateCtl *)
465  ShmemInitStruct("ReplicationOriginState",
467  &found);
468  replication_states = replication_states_ctl->states;
469 
470  if (!found)
471  {
472  int i;
473 
474  replication_states_ctl->tranche_id = LWTRANCHE_REPLICATION_ORIGIN;
475 
476  MemSet(replication_states, 0, ReplicationOriginShmemSize());
477 
478  for (i = 0; i < max_replication_slots; i++)
479  LWLockInitialize(&replication_states[i].lock,
480  replication_states_ctl->tranche_id);
481  }
482 
483  LWLockRegisterTranche(replication_states_ctl->tranche_id,
484  "replication_origin");
485 }
486 
487 /* ---------------------------------------------------------------------------
488  * Perform a checkpoint of each replication origin's progress with respect to
489  * the replayed remote_lsn. Make sure that all transactions we refer to in the
490  * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
491  * if the transactions were originally committed asynchronously.
492  *
493  * We store checkpoints in the following format:
494  * +-------+------------------------+------------------+-----+--------+
495  * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
496  * +-------+------------------------+------------------+-----+--------+
497  *
498  * So its just the magic, followed by the statically sized
499  * ReplicationStateOnDisk structs. Note that the maximum number of
500  * ReplicationState is determined by max_replication_slots.
501  * ---------------------------------------------------------------------------
502  */
503 void
505 {
506  const char *tmppath = "pg_logical/replorigin_checkpoint.tmp";
507  const char *path = "pg_logical/replorigin_checkpoint";
508  int tmpfd;
509  int i;
511  pg_crc32c crc;
512 
513  if (max_replication_slots == 0)
514  return;
515 
516  INIT_CRC32C(crc);
517 
518  /* make sure no old temp file is remaining */
519  if (unlink(tmppath) < 0 && errno != ENOENT)
520  ereport(PANIC,
522  errmsg("could not remove file \"%s\": %m",
523  tmppath)));
524 
525  /*
526  * no other backend can perform this at the same time, we're protected by
527  * CheckpointLock.
528  */
529  tmpfd = OpenTransientFile((char *) tmppath,
530  O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
531  S_IRUSR | S_IWUSR);
532  if (tmpfd < 0)
533  ereport(PANIC,
535  errmsg("could not create file \"%s\": %m",
536  tmppath)));
537 
538  /* write magic */
539  if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
540  {
541  CloseTransientFile(tmpfd);
542  ereport(PANIC,
544  errmsg("could not write to file \"%s\": %m",
545  tmppath)));
546  }
547  COMP_CRC32C(crc, &magic, sizeof(magic));
548 
549  /* prevent concurrent creations/drops */
550  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
551 
552  /* write actual data */
553  for (i = 0; i < max_replication_slots; i++)
554  {
555  ReplicationStateOnDisk disk_state;
556  ReplicationState *curstate = &replication_states[i];
557  XLogRecPtr local_lsn;
558 
559  if (curstate->roident == InvalidRepOriginId)
560  continue;
561 
562  /* zero, to avoid uninitialized padding bytes */
563  memset(&disk_state, 0, sizeof(disk_state));
564 
565  LWLockAcquire(&curstate->lock, LW_SHARED);
566 
567  disk_state.roident = curstate->roident;
568 
569  disk_state.remote_lsn = curstate->remote_lsn;
570  local_lsn = curstate->local_lsn;
571 
572  LWLockRelease(&curstate->lock);
573 
574  /* make sure we only write out a commit that's persistent */
575  XLogFlush(local_lsn);
576 
577  if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
578  sizeof(disk_state))
579  {
580  CloseTransientFile(tmpfd);
581  ereport(PANIC,
583  errmsg("could not write to file \"%s\": %m",
584  tmppath)));
585  }
586 
587  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
588  }
589 
590  LWLockRelease(ReplicationOriginLock);
591 
592  /* write out the CRC */
593  FIN_CRC32C(crc);
594  if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
595  {
596  CloseTransientFile(tmpfd);
597  ereport(PANIC,
599  errmsg("could not write to file \"%s\": %m",
600  tmppath)));
601  }
602 
603  CloseTransientFile(tmpfd);
604 
605  /* fsync, rename to permanent file, fsync file and directory */
606  durable_rename(tmppath, path, PANIC);
607 }
608 
609 /*
610  * Recover replication replay status from checkpoint data saved earlier by
611  * CheckPointReplicationOrigin.
612  *
613  * This only needs to be called at startup and *not* during every checkpoint
614  * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
615  * state thereafter can be recovered by looking at commit records.
616  */
617 void
619 {
620  const char *path = "pg_logical/replorigin_checkpoint";
621  int fd;
622  int readBytes;
624  int last_state = 0;
625  pg_crc32c file_crc;
626  pg_crc32c crc;
627 
628  /* don't want to overwrite already existing state */
629 #ifdef USE_ASSERT_CHECKING
630  static bool already_started = false;
631 
632  Assert(!already_started);
633  already_started = true;
634 #endif
635 
636  if (max_replication_slots == 0)
637  return;
638 
639  INIT_CRC32C(crc);
640 
641  elog(DEBUG2, "starting up replication origin progress state");
642 
643  fd = OpenTransientFile((char *) path, O_RDONLY | PG_BINARY, 0);
644 
645  /*
646  * might have had max_replication_slots == 0 last run, or we just brought
647  * up a standby.
648  */
649  if (fd < 0 && errno == ENOENT)
650  return;
651  else if (fd < 0)
652  ereport(PANIC,
654  errmsg("could not open file \"%s\": %m",
655  path)));
656 
657  /* verify magic, that is written even if nothing was active */
658  readBytes = read(fd, &magic, sizeof(magic));
659  if (readBytes != sizeof(magic))
660  ereport(PANIC,
661  (errmsg("could not read file \"%s\": %m",
662  path)));
663  COMP_CRC32C(crc, &magic, sizeof(magic));
664 
665  if (magic != REPLICATION_STATE_MAGIC)
666  ereport(PANIC,
667  (errmsg("replication checkpoint has wrong magic %u instead of %u",
668  magic, REPLICATION_STATE_MAGIC)));
669 
670  /* we can skip locking here, no other access is possible */
671 
672  /* recover individual states, until there are no more to be found */
673  while (true)
674  {
675  ReplicationStateOnDisk disk_state;
676 
677  readBytes = read(fd, &disk_state, sizeof(disk_state));
678 
679  /* no further data */
680  if (readBytes == sizeof(crc))
681  {
682  /* not pretty, but simple ... */
683  file_crc = *(pg_crc32c *) &disk_state;
684  break;
685  }
686 
687  if (readBytes < 0)
688  {
689  ereport(PANIC,
691  errmsg("could not read file \"%s\": %m",
692  path)));
693  }
694 
695  if (readBytes != sizeof(disk_state))
696  {
697  ereport(PANIC,
699  errmsg("could not read file \"%s\": read %d of %zu",
700  path, readBytes, sizeof(disk_state))));
701  }
702 
703  COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
704 
705  if (last_state == max_replication_slots)
706  ereport(PANIC,
707  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
708  errmsg("could not find free replication state, increase max_replication_slots")));
709 
710  /* copy data to shared memory */
711  replication_states[last_state].roident = disk_state.roident;
712  replication_states[last_state].remote_lsn = disk_state.remote_lsn;
713  last_state++;
714 
715  elog(LOG, "recovered replication state of node %u to %X/%X",
716  disk_state.roident,
717  (uint32) (disk_state.remote_lsn >> 32),
718  (uint32) disk_state.remote_lsn);
719  }
720 
721  /* now check checksum */
722  FIN_CRC32C(crc);
723  if (file_crc != crc)
724  ereport(PANIC,
725  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
726  errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
727  crc, file_crc)));
728 
729  CloseTransientFile(fd);
730 }
731 
732 void
734 {
735  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
736 
737  switch (info)
738  {
739  case XLOG_REPLORIGIN_SET:
740  {
741  xl_replorigin_set *xlrec =
742  (xl_replorigin_set *) XLogRecGetData(record);
743 
745  xlrec->remote_lsn, record->EndRecPtr,
746  xlrec->force /* backward */ ,
747  false /* WAL log */ );
748  break;
749  }
751  {
752  xl_replorigin_drop *xlrec;
753  int i;
754 
755  xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
756 
757  for (i = 0; i < max_replication_slots; i++)
758  {
759  ReplicationState *state = &replication_states[i];
760 
761  /* found our slot */
762  if (state->roident == xlrec->node_id)
763  {
764  /* reset entry */
765  state->roident = InvalidRepOriginId;
766  state->remote_lsn = InvalidXLogRecPtr;
767  state->local_lsn = InvalidXLogRecPtr;
768  break;
769  }
770  }
771  break;
772  }
773  default:
774  elog(PANIC, "replorigin_redo: unknown op code %u", info);
775  }
776 }
777 
778 
779 /*
780  * Tell the replication origin progress machinery that a commit from 'node'
781  * that originated at the LSN remote_commit on the remote node was replayed
782  * successfully and that we don't need to do so again. In combination with
783  * setting up replorigin_session_origin_lsn and replorigin_session_origin
784  * that ensures we won't loose knowledge about that after a crash if the
785  * transaction had a persistent effect (think of asynchronous commits).
786  *
787  * local_commit needs to be a local LSN of the commit so that we can make sure
788  * upon a checkpoint that enough WAL has been persisted to disk.
789  *
790  * Needs to be called with a RowExclusiveLock on pg_replication_origin,
791  * unless running in recovery.
792  */
793 void
795  XLogRecPtr remote_commit, XLogRecPtr local_commit,
796  bool go_backward, bool wal_log)
797 {
798  int i;
799  ReplicationState *replication_state = NULL;
800  ReplicationState *free_state = NULL;
801 
802  Assert(node != InvalidRepOriginId);
803 
804  /* we don't track DoNotReplicateId */
805  if (node == DoNotReplicateId)
806  return;
807 
808  /*
809  * XXX: For the case where this is called by WAL replay, it'd be more
810  * efficient to restore into a backend local hashtable and only dump into
811  * shmem after recovery is finished. Let's wait with implementing that
812  * till it's shown to be a measurable expense
813  */
814 
815  /* Lock exclusively, as we may have to create a new table entry. */
816  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
817 
818  /*
819  * Search for either an existing slot for the origin, or a free one we can
820  * use.
821  */
822  for (i = 0; i < max_replication_slots; i++)
823  {
824  ReplicationState *curstate = &replication_states[i];
825 
826  /* remember where to insert if necessary */
827  if (curstate->roident == InvalidRepOriginId &&
828  free_state == NULL)
829  {
830  free_state = curstate;
831  continue;
832  }
833 
834  /* not our slot */
835  if (curstate->roident != node)
836  {
837  continue;
838  }
839 
840  /* ok, found slot */
841  replication_state = curstate;
842 
843  LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
844 
845  /* Make sure it's not used by somebody else */
846  if (replication_state->acquired_by != 0)
847  {
848  ereport(ERROR,
849  (errcode(ERRCODE_OBJECT_IN_USE),
850  errmsg("replication origin with OID %d is already active for PID %d",
851  replication_state->roident,
852  replication_state->acquired_by)));
853  }
854 
855  break;
856  }
857 
858  if (replication_state == NULL && free_state == NULL)
859  ereport(ERROR,
860  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
861  errmsg("could not find free replication state slot for replication origin with OID %u",
862  node),
863  errhint("Increase max_replication_slots and try again.")));
864 
865  if (replication_state == NULL)
866  {
867  /* initialize new slot */
868  LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
869  replication_state = free_state;
870  Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
871  Assert(replication_state->local_lsn == InvalidXLogRecPtr);
872  replication_state->roident = node;
873  }
874 
875  Assert(replication_state->roident != InvalidRepOriginId);
876 
877  /*
878  * If somebody "forcefully" sets this slot, WAL log it, so it's durable
879  * and the standby gets the message. Primarily this will be called during
880  * WAL replay (of commit records) where no WAL logging is necessary.
881  */
882  if (wal_log)
883  {
884  xl_replorigin_set xlrec;
885 
886  xlrec.remote_lsn = remote_commit;
887  xlrec.node_id = node;
888  xlrec.force = go_backward;
889 
890  XLogBeginInsert();
891  XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
892 
893  XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
894  }
895 
896  /*
897  * Due to - harmless - race conditions during a checkpoint we could see
898  * values here that are older than the ones we already have in memory.
899  * Don't overwrite those.
900  */
901  if (go_backward || replication_state->remote_lsn < remote_commit)
902  replication_state->remote_lsn = remote_commit;
903  if (local_commit != InvalidXLogRecPtr &&
904  (go_backward || replication_state->local_lsn < local_commit))
905  replication_state->local_lsn = local_commit;
906  LWLockRelease(&replication_state->lock);
907 
908  /*
909  * Release *after* changing the LSNs, slot isn't acquired and thus could
910  * otherwise be dropped anytime.
911  */
912  LWLockRelease(ReplicationOriginLock);
913 }
914 
915 
918 {
919  int i;
920  XLogRecPtr local_lsn = InvalidXLogRecPtr;
921  XLogRecPtr remote_lsn = InvalidXLogRecPtr;
922 
923  /* prevent slots from being concurrently dropped */
924  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
925 
926  for (i = 0; i < max_replication_slots; i++)
927  {
929 
930  state = &replication_states[i];
931 
932  if (state->roident == node)
933  {
934  LWLockAcquire(&state->lock, LW_SHARED);
935 
936  remote_lsn = state->remote_lsn;
937  local_lsn = state->local_lsn;
938 
939  LWLockRelease(&state->lock);
940 
941  break;
942  }
943  }
944 
945  LWLockRelease(ReplicationOriginLock);
946 
947  if (flush && local_lsn != InvalidXLogRecPtr)
948  XLogFlush(local_lsn);
949 
950  return remote_lsn;
951 }
952 
953 /*
954  * Tear down a (possibly) configured session replication origin during process
955  * exit.
956  */
957 static void
959 {
960  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
961 
962  if (session_replication_state != NULL &&
963  session_replication_state->acquired_by == MyProcPid)
964  {
965  session_replication_state->acquired_by = 0;
966  session_replication_state = NULL;
967  }
968 
969  LWLockRelease(ReplicationOriginLock);
970 }
971 
972 /*
973  * Setup a replication origin in the shared memory struct if it doesn't
974  * already exists and cache access to the specific ReplicationSlot so the
975  * array doesn't have to be searched when calling
976  * replorigin_session_advance().
977  *
978  * Obviously only one such cached origin can exist per process and the current
979  * cached value can only be set again after the previous value is torn down
980  * with replorigin_session_reset().
981  */
982 void
984 {
985  static bool registered_cleanup;
986  int i;
987  int free_slot = -1;
988 
989  if (!registered_cleanup)
990  {
992  registered_cleanup = true;
993  }
994 
996 
997  if (session_replication_state != NULL)
998  ereport(ERROR,
999  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1000  errmsg("cannot setup replication origin when one is already setup")));
1001 
1002  /* Lock exclusively, as we may have to create a new table entry. */
1003  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1004 
1005  /*
1006  * Search for either an existing slot for the origin, or a free one we can
1007  * use.
1008  */
1009  for (i = 0; i < max_replication_slots; i++)
1010  {
1011  ReplicationState *curstate = &replication_states[i];
1012 
1013  /* remember where to insert if necessary */
1014  if (curstate->roident == InvalidRepOriginId &&
1015  free_slot == -1)
1016  {
1017  free_slot = i;
1018  continue;
1019  }
1020 
1021  /* not our slot */
1022  if (curstate->roident != node)
1023  continue;
1024 
1025  else if (curstate->acquired_by != 0)
1026  {
1027  ereport(ERROR,
1028  (errcode(ERRCODE_OBJECT_IN_USE),
1029  errmsg("replication identifier %d is already active for PID %d",
1030  curstate->roident, curstate->acquired_by)));
1031  }
1032 
1033  /* ok, found slot */
1034  session_replication_state = curstate;
1035  }
1036 
1037 
1038  if (session_replication_state == NULL && free_slot == -1)
1039  ereport(ERROR,
1040  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1041  errmsg("could not find free replication state slot for replication origin with OID %u",
1042  node),
1043  errhint("Increase max_replication_slots and try again.")));
1044  else if (session_replication_state == NULL)
1045  {
1046  /* initialize new slot */
1047  session_replication_state = &replication_states[free_slot];
1048  Assert(session_replication_state->remote_lsn == InvalidXLogRecPtr);
1049  Assert(session_replication_state->local_lsn == InvalidXLogRecPtr);
1050  session_replication_state->roident = node;
1051  }
1052 
1053 
1054  Assert(session_replication_state->roident != InvalidRepOriginId);
1055 
1056  session_replication_state->acquired_by = MyProcPid;
1057 
1058  LWLockRelease(ReplicationOriginLock);
1059 }
1060 
1061 /*
1062  * Reset replay state previously setup in this session.
1063  *
1064  * This function may only be called if an origin was setup with
1065  * replorigin_session_setup().
1066  */
1067 void
1069 {
1071 
1072  if (session_replication_state == NULL)
1073  ereport(ERROR,
1074  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1075  errmsg("no replication origin is configured")));
1076 
1077  LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1078 
1079  session_replication_state->acquired_by = 0;
1080  session_replication_state = NULL;
1081 
1082  LWLockRelease(ReplicationOriginLock);
1083 }
1084 
1085 /*
1086  * Do the same work replorigin_advance() does, just on the session's
1087  * configured origin.
1088  *
1089  * This is noticeably cheaper than using replorigin_advance().
1090  */
1091 void
1093 {
1094  Assert(session_replication_state != NULL);
1095  Assert(session_replication_state->roident != InvalidRepOriginId);
1096 
1097  LWLockAcquire(&session_replication_state->lock, LW_EXCLUSIVE);
1098  if (session_replication_state->local_lsn < local_commit)
1099  session_replication_state->local_lsn = local_commit;
1100  if (session_replication_state->remote_lsn < remote_commit)
1101  session_replication_state->remote_lsn = remote_commit;
1102  LWLockRelease(&session_replication_state->lock);
1103 }
1104 
1105 /*
1106  * Ask the machinery about the point up to which we successfully replayed
1107  * changes from an already setup replication origin.
1108  */
1109 XLogRecPtr
1111 {
1112  XLogRecPtr remote_lsn;
1113  XLogRecPtr local_lsn;
1114 
1115  Assert(session_replication_state != NULL);
1116 
1117  LWLockAcquire(&session_replication_state->lock, LW_SHARED);
1118  remote_lsn = session_replication_state->remote_lsn;
1119  local_lsn = session_replication_state->local_lsn;
1120  LWLockRelease(&session_replication_state->lock);
1121 
1122  if (flush && local_lsn != InvalidXLogRecPtr)
1123  XLogFlush(local_lsn);
1124 
1125  return remote_lsn;
1126 }
1127 
1128 
1129 
1130 /* ---------------------------------------------------------------------------
1131  * SQL functions for working with replication origin.
1132  *
1133  * These mostly should be fairly short wrappers around more generic functions.
1134  * ---------------------------------------------------------------------------
1135  */
1136 
1137 /*
1138  * Create replication origin for the passed in name, and return the assigned
1139  * oid.
1140  */
1141 Datum
1143 {
1144  char *name;
1145  RepOriginId roident;
1146 
1147  replorigin_check_prerequisites(false, false);
1148 
1150  roident = replorigin_create(name);
1151 
1152  pfree(name);
1153 
1154  PG_RETURN_OID(roident);
1155 }
1156 
1157 /*
1158  * Drop replication origin.
1159  */
1160 Datum
1162 {
1163  char *name;
1164  RepOriginId roident;
1165 
1166  replorigin_check_prerequisites(false, false);
1167 
1169 
1170  roident = replorigin_by_name(name, false);
1171  Assert(OidIsValid(roident));
1172 
1173  replorigin_drop(roident);
1174 
1175  pfree(name);
1176 
1177  PG_RETURN_VOID();
1178 }
1179 
1180 /*
1181  * Return oid of a replication origin.
1182  */
1183 Datum
1185 {
1186  char *name;
1187  RepOriginId roident;
1188 
1189  replorigin_check_prerequisites(false, false);
1190 
1192  roident = replorigin_by_name(name, true);
1193 
1194  pfree(name);
1195 
1196  if (OidIsValid(roident))
1197  PG_RETURN_OID(roident);
1198  PG_RETURN_NULL();
1199 }
1200 
1201 /*
1202  * Setup a replication origin for this session.
1203  */
1204 Datum
1206 {
1207  char *name;
1208  RepOriginId origin;
1209 
1210  replorigin_check_prerequisites(true, false);
1211 
1213  origin = replorigin_by_name(name, false);
1214  replorigin_session_setup(origin);
1215 
1216  replorigin_session_origin = origin;
1217 
1218  pfree(name);
1219 
1220  PG_RETURN_VOID();
1221 }
1222 
1223 /*
1224  * Reset previously setup origin in this session
1225  */
1226 Datum
1228 {
1229  replorigin_check_prerequisites(true, false);
1230 
1232 
1236 
1237  PG_RETURN_VOID();
1238 }
1239 
1240 /*
1241  * Has a replication origin been setup for this session.
1242  */
1243 Datum
1245 {
1246  replorigin_check_prerequisites(false, false);
1247 
1249 }
1250 
1251 
1252 /*
1253  * Return the replication progress for origin setup in the current session.
1254  *
1255  * If 'flush' is set to true it is ensured that the returned value corresponds
1256  * to a local transaction that has been flushed. This is useful if asynchronous
1257  * commits are used when replaying replicated transactions.
1258  */
1259 Datum
1261 {
1262  XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1263  bool flush = PG_GETARG_BOOL(0);
1264 
1265  replorigin_check_prerequisites(true, false);
1266 
1267  if (session_replication_state == NULL)
1268  ereport(ERROR,
1269  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1270  errmsg("no replication origin is configured")));
1271 
1272  remote_lsn = replorigin_session_get_progress(flush);
1273 
1274  if (remote_lsn == InvalidXLogRecPtr)
1275  PG_RETURN_NULL();
1276 
1277  PG_RETURN_LSN(remote_lsn);
1278 }
1279 
1280 Datum
1282 {
1283  XLogRecPtr location = PG_GETARG_LSN(0);
1284 
1285  replorigin_check_prerequisites(true, false);
1286 
1287  if (session_replication_state == NULL)
1288  ereport(ERROR,
1289  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1290  errmsg("no replication origin is configured")));
1291 
1292  replorigin_session_origin_lsn = location;
1294 
1295  PG_RETURN_VOID();
1296 }
1297 
1298 Datum
1300 {
1301  replorigin_check_prerequisites(true, false);
1302 
1305 
1306  PG_RETURN_VOID();
1307 }
1308 
1309 
1310 Datum
1312 {
1313  text *name = PG_GETARG_TEXT_PP(0);
1314  XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1315  RepOriginId node;
1316 
1317  replorigin_check_prerequisites(true, false);
1318 
1319  /* lock to prevent the replication origin from vanishing */
1321 
1322  node = replorigin_by_name(text_to_cstring(name), false);
1323 
1324  /*
1325  * Can't sensibly pass a local commit to be flushed at checkpoint - this
1326  * xact hasn't committed yet. This is why this function should be used to
1327  * set up the initial replication state, but not for replay.
1328  */
1329  replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1330  true /* go backward */ , true /* WAL log */ );
1331 
1333 
1334  PG_RETURN_VOID();
1335 }
1336 
1337 
1338 /*
1339  * Return the replication progress for an individual replication origin.
1340  *
1341  * If 'flush' is set to true it is ensured that the returned value corresponds
1342  * to a local transaction that has been flushed. This is useful if asynchronous
1343  * commits are used when replaying replicated transactions.
1344  */
1345 Datum
1347 {
1348  char *name;
1349  bool flush;
1350  RepOriginId roident;
1351  XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1352 
1353  replorigin_check_prerequisites(true, true);
1354 
1356  flush = PG_GETARG_BOOL(1);
1357 
1358  roident = replorigin_by_name(name, false);
1359  Assert(OidIsValid(roident));
1360 
1361  remote_lsn = replorigin_get_progress(roident, flush);
1362 
1363  if (remote_lsn == InvalidXLogRecPtr)
1364  PG_RETURN_NULL();
1365 
1366  PG_RETURN_LSN(remote_lsn);
1367 }
1368 
1369 
1370 Datum
1372 {
1373  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1374  TupleDesc tupdesc;
1375  Tuplestorestate *tupstore;
1376  MemoryContext per_query_ctx;
1377  MemoryContext oldcontext;
1378  int i;
1380 
1381  /* we we want to return 0 rows if slot is set to zero */
1382  replorigin_check_prerequisites(false, true);
1383 
1384  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1385  ereport(ERROR,
1386  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1387  errmsg("set-valued function called in context that cannot accept a set")));
1388  if (!(rsinfo->allowedModes & SFRM_Materialize))
1389  ereport(ERROR,
1390  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1391  errmsg("materialize mode required, but it is not allowed in this context")));
1392  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1393  elog(ERROR, "return type must be a row type");
1394 
1395  if (tupdesc->natts != REPLICATION_ORIGIN_PROGRESS_COLS)
1396  elog(ERROR, "wrong function definition");
1397 
1398  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1399  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1400 
1401  tupstore = tuplestore_begin_heap(true, false, work_mem);
1402  rsinfo->returnMode = SFRM_Materialize;
1403  rsinfo->setResult = tupstore;
1404  rsinfo->setDesc = tupdesc;
1405 
1406  MemoryContextSwitchTo(oldcontext);
1407 
1408 
1409  /* prevent slots from being concurrently dropped */
1410  LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1411 
1412  /*
1413  * Iterate through all possible replication_states, display if they are
1414  * filled. Note that we do not take any locks, so slightly corrupted/out
1415  * of date values are a possibility.
1416  */
1417  for (i = 0; i < max_replication_slots; i++)
1418  {
1422  char *roname;
1423 
1424  state = &replication_states[i];
1425 
1426  /* unused slot, nothing to display */
1427  if (state->roident == InvalidRepOriginId)
1428  continue;
1429 
1430  memset(values, 0, sizeof(values));
1431  memset(nulls, 1, sizeof(nulls));
1432 
1433  values[0] = ObjectIdGetDatum(state->roident);
1434  nulls[0] = false;
1435 
1436  /*
1437  * We're not preventing the origin to be dropped concurrently, so
1438  * silently accept that it might be gone.
1439  */
1440  if (replorigin_by_oid(state->roident, true,
1441  &roname))
1442  {
1443  values[1] = CStringGetTextDatum(roname);
1444  nulls[1] = false;
1445  }
1446 
1447  LWLockAcquire(&state->lock, LW_SHARED);
1448 
1449  values[2] = LSNGetDatum(state->remote_lsn);
1450  nulls[2] = false;
1451 
1452  values[3] = LSNGetDatum(state->local_lsn);
1453  nulls[3] = false;
1454 
1455  LWLockRelease(&state->lock);
1456 
1457  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1458  }
1459 
1460  tuplestore_donestoring(tupstore);
1461 
1462  LWLockRelease(ReplicationOriginLock);
1463 
1464 #undef REPLICATION_ORIGIN_PROGRESS_COLS
1465 
1466  return (Datum) 0;
1467 }
static ReplicationState * session_replication_state
Definition: origin.c:169
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
static void replorigin_check_prerequisites(bool check_slots, bool recoveryOK)
Definition: origin.c:175
Definition: lwlock.h:32
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
int MyProcPid
Definition: globals.c:39
int errhint(const char *fmt,...)
Definition: elog.c:987
XLogRecPtr local_lsn
Definition: origin.c:120
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:499
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
#define Anum_pg_replication_origin_roident
#define RelationGetDescr(relation)
Definition: rel.h:428
#define DoNotReplicateId
Definition: origin.h:35
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1281
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:182
#define write(a, b, c)
Definition: win32.h:14
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1161
#define ExclusiveLock
Definition: lockdefs.h:44
int64 TimestampTz
Definition: timestamp.h:39
XLogRecPtr remote_lsn
Definition: origin.c:139
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
uint32 pg_crc32c
Definition: pg_crc32c.h:38
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:958
RepOriginId roident
Definition: origin.c:138
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned char uint8
Definition: c.h:266
uint16 RepOriginId
Definition: xlogdefs.h:51
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1110
int errcode(int sqlerrcode)
Definition: elog.c:575
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
bool superuser(void)
Definition: superuser.c:47
#define MemSet(start, val, len)
Definition: c.h:857
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:255
void ReplicationOriginShmemInit(void)
Definition: origin.c:457
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:794
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:692
#define heap_close(r, l)
Definition: heapam.h:97
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:239
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:983
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:398
#define LOG
Definition: elog.h:26
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1372
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:7872
#define OidIsValid(objectId)
Definition: c.h:538
#define PANIC
Definition: elog.h:53
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2757
static int fd(const char *x, int i)
Definition: preproc-init.c:105
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:328
#define PG_BINARY
Definition: c.h:1038
RepOriginId replorigin_by_name(char *roname, bool missing_ok)
Definition: origin.c:206
#define SearchSysCache1(cacheId, key1)
Definition: syscache.h:156
RepOriginId roident
Definition: origin.c:108
XLogRecPtr EndRecPtr
Definition: xlogreader.h:115
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1311
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:25
void CheckPointReplicationOrigin(void)
Definition: origin.c:504
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:733
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:416
void pfree(void *pointer)
Definition: mcxt.c:950
#define XLogRecGetData(decoder)
Definition: xlogreader.h:220
#define Anum_pg_replication_origin_roname
#define ReplicationOriginIdentIndex
Definition: indexing.h:333
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
#define PG_UINT16_MAX
Definition: c.h:338
void replorigin_session_reset(void)
Definition: origin.c:1068
Oid CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:162
FormData_pg_replication_origin * Form_pg_replication_origin
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
#define InitDirtySnapshot(snapshotdata)
Definition: tqual.h:100
LWLock lock
Definition: origin.c:130
ItemPointerData t_self
Definition: htup.h:65
#define DEBUG2
Definition: elog.h:24
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:151
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:152
#define XLOG_REPLORIGIN_SET
Definition: origin.h:31
void StartupReplicationOrigin(void)
Definition: origin.c:618
#define RowExclusiveLock
Definition: lockdefs.h:38
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
int errcode_for_file_access(void)
Definition: elog.c:598
struct ReplicationState ReplicationState
unsigned int uint32
Definition: c.h:268
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1205
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1227
RepOriginId replorigin_create(char *roname)
Definition: origin.c:235
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1371
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:216
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:593
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1244
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:667
int CloseTransientFile(int fd)
Definition: fd.c:2305
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
void replorigin_drop(RepOriginId roident)
Definition: origin.c:327
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:32
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:24
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
void CommandCounterIncrement(void)
Definition: xact.c:922
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1117
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1284
int work_mem
Definition: globals.c:113
Size ReplicationOriginShmemSize(void)
Definition: origin.c:437
#define REPLICATION_STATE_MAGIC
Definition: origin.c:172
#define InvalidOid
Definition: postgres_ext.h:36
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:162
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1299
int allowedModes
Definition: execnodes.h:268
#define PG_RETURN_VOID()
Definition: fmgr.h:309
struct ReplicationStateCtl ReplicationStateCtl
SetFunctionReturnMode returnMode
Definition: execnodes.h:270
int max_replication_slots
Definition: slot.c:99
XLogRecPtr remote_lsn
Definition: origin.c:113
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1346
RepOriginId node_id
Definition: origin.h:28
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:146
#define Assert(condition)
Definition: c.h:675
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
RepOriginId replorigin_session_origin
Definition: origin.c:150
Definition: regguts.h:298
RepOriginId node_id
Definition: origin.h:22
size_t Size
Definition: c.h:356
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1092
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
bool IsTransactionState(void)
Definition: xact.c:350
#define Natts_pg_replication_origin
void LWLockRegisterTranche(int tranche_id, char *tranche_name)
Definition: lwlock.c:592
XLogRecPtr remote_lsn
Definition: origin.h:21
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:202
const char * name
Definition: encode.c:521
#define InvalidRepOriginId
Definition: origin.h:34
Tuplestorestate * setResult
Definition: execnodes.h:273
#define DatumGetPointer(X)
Definition: postgres.h:555
static Datum values[MAXATTR]
Definition: bootstrap.c:163
char * text_to_cstring(const text *t)
Definition: varlena.c:182
ExprContext * econtext
Definition: execnodes.h:266
TupleDesc setDesc
Definition: execnodes.h:274
#define ReplicationOriginRelationId
int errmsg(const char *fmt,...)
Definition: elog.c:797
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1184
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:36
int i
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1260
static ReplicationState * replication_states
Definition: origin.c:161
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define CStringGetTextDatum(s)
Definition: builtins.h:91
void * arg
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:100
#define elog
Definition: elog.h:219
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:105
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:73
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1142
#define REPLICATION_ORIGIN_PROGRESS_COLS
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PG_RETURN_OID(x)
Definition: fmgr.h:320
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:78
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:917
#define PG_RETURN_NULL()
Definition: fmgr.h:305
#define read(a, b, c)
Definition: win32.h:13
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define offsetof(type, field)
Definition: c.h:555