PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
origin.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * origin.c
4 * Logical replication progress tracking support.
5 *
6 * Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/origin.c
10 *
11 * NOTES
12 *
13 * This file provides the following:
14 * * An infrastructure to name nodes in a replication setup
15 * * A facility to efficiently store and persist replication progress in an
16 * efficient and durable manner.
17 *
18 * Replication origin consist out of a descriptive, user defined, external
19 * name and a short, thus space efficient, internal 2 byte one. This split
20 * exists because replication origin have to be stored in WAL and shared
21 * memory and long descriptors would be inefficient. For now only use 2 bytes
22 * for the internal id of a replication origin as it seems unlikely that there
23 * soon will be more than 65k nodes in one replication setup; and using only
24 * two bytes allow us to be more space efficient.
25 *
26 * Replication progress is tracked in a shared memory table
27 * (ReplicationState) that's dumped to disk every checkpoint. Entries
28 * ('slots') in this table are identified by the internal id. That's the case
29 * because it allows to increase replication progress during crash
30 * recovery. To allow doing so we store the original LSN (from the originating
31 * system) of a transaction in the commit record. That allows to recover the
32 * precise replayed state after crash recovery; without requiring synchronous
33 * commits. Allowing logical replication to use asynchronous commit is
34 * generally good for performance, but especially important as it allows a
35 * single threaded replay process to keep up with a source that has multiple
36 * backends generating changes concurrently. For efficiency and simplicity
37 * reasons a backend can setup one replication origin that's from then used as
38 * the source of changes produced by the backend, until reset again.
39 *
40 * This infrastructure is intended to be used in cooperation with logical
41 * decoding. When replaying from a remote system the configured origin is
42 * provided to output plugins, allowing prevention of replication loops and
43 * other filtering.
44 *
45 * There are several levels of locking at work:
46 *
47 * * To create and drop replication origins an exclusive lock on
48 * pg_replication_slot is required for the duration. That allows us to
49 * safely and conflict free assign new origins using a dirty snapshot.
50 *
51 * * When creating an in-memory replication progress slot the ReplicationOrigin
52 * LWLock has to be held exclusively; when iterating over the replication
53 * progress a shared lock has to be held, the same when advancing the
54 * replication progress of an individual backend that has not setup as the
55 * session's replication origin.
56 *
57 * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58 * replication progress slot that slot's lwlock has to be held. That's
59 * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60 * all our platforms, but it also simplifies memory ordering concerns
61 * between the remote and local lsn. We use a lwlock instead of a spinlock
62 * so it's less harmful to hold the lock over a WAL write
63 * (cf. AdvanceReplicationProgress).
64 *
65 * ---------------------------------------------------------------------------
66 */
67
68#include "postgres.h"
69
70#include <unistd.h>
71#include <sys/stat.h>
72
73#include "access/genam.h"
74#include "access/htup_details.h"
75#include "access/table.h"
76#include "access/xact.h"
77#include "access/xloginsert.h"
78#include "catalog/catalog.h"
79#include "catalog/indexing.h"
81#include "funcapi.h"
82#include "miscadmin.h"
83#include "nodes/execnodes.h"
84#include "pgstat.h"
85#include "replication/origin.h"
86#include "replication/slot.h"
88#include "storage/fd.h"
89#include "storage/ipc.h"
90#include "storage/lmgr.h"
91#include "utils/builtins.h"
92#include "utils/fmgroids.h"
93#include "utils/guc.h"
94#include "utils/pg_lsn.h"
95#include "utils/rel.h"
96#include "utils/snapmgr.h"
97#include "utils/syscache.h"
98
99/* paths for replication origin checkpoint files */
100#define PG_REPLORIGIN_CHECKPOINT_FILENAME PG_LOGICAL_DIR "/replorigin_checkpoint"
101#define PG_REPLORIGIN_CHECKPOINT_TMPFILE PG_REPLORIGIN_CHECKPOINT_FILENAME ".tmp"
102
103/* GUC variables */
105
106/*
107 * Replay progress of a single remote node.
108 */
109typedef struct ReplicationState
110{
111 /*
112 * Local identifier for the remote node.
113 */
115
116 /*
117 * Location of the latest commit from the remote side.
118 */
120
121 /*
122 * Remember the local lsn of the commit record so we can XLogFlush() to it
123 * during a checkpoint so we know the commit record actually is safe on
124 * disk.
125 */
127
128 /*
129 * PID of backend that's acquired slot, or 0 if none.
130 */
132
133 /*
134 * Condition variable that's signaled when acquired_by changes.
135 */
137
138 /*
139 * Lock protecting remote_lsn and local_lsn.
140 */
143
144/*
145 * On disk version of ReplicationState.
146 */
148{
152
153
155{
156 /* Tranche to use for per-origin LWLocks */
158 /* Array of length max_active_replication_origins */
161
162/* external variables */
166
167/*
168 * Base address into a shared memory array of replication states of size
169 * max_active_replication_origins.
170 */
172
173/*
174 * Actual shared memory block (replication_states[] is now part of this).
175 */
177
178/*
179 * We keep a pointer to this backend's ReplicationState to avoid having to
180 * search the replication_states array in replorigin_session_advance for each
181 * remote commit. (Ownership of a backend's own entry can only be changed by
182 * that backend.)
183 */
185
186/* Magic for on disk files. */
187#define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
188
189static void
190replorigin_check_prerequisites(bool check_origins, bool recoveryOK)
191{
192 if (check_origins && max_active_replication_origins == 0)
194 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
195 errmsg("cannot query or manipulate replication origin when \"max_active_replication_origins\" is 0")));
196
197 if (!recoveryOK && RecoveryInProgress())
199 (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
200 errmsg("cannot manipulate replication origins during recovery")));
201}
202
203
204/*
205 * IsReservedOriginName
206 * True iff name is either "none" or "any".
207 */
208static bool
210{
211 return ((pg_strcasecmp(name, LOGICALREP_ORIGIN_NONE) == 0) ||
212 (pg_strcasecmp(name, LOGICALREP_ORIGIN_ANY) == 0));
213}
214
215/* ---------------------------------------------------------------------------
216 * Functions for working with replication origins themselves.
217 * ---------------------------------------------------------------------------
218 */
219
220/*
221 * Check for a persistent replication origin identified by name.
222 *
223 * Returns InvalidOid if the node isn't known yet and missing_ok is true.
224 */
226replorigin_by_name(const char *roname, bool missing_ok)
227{
229 Oid roident = InvalidOid;
230 HeapTuple tuple;
231 Datum roname_d;
232
233 roname_d = CStringGetTextDatum(roname);
234
235 tuple = SearchSysCache1(REPLORIGNAME, roname_d);
236 if (HeapTupleIsValid(tuple))
237 {
239 roident = ident->roident;
240 ReleaseSysCache(tuple);
241 }
242 else if (!missing_ok)
244 (errcode(ERRCODE_UNDEFINED_OBJECT),
245 errmsg("replication origin \"%s\" does not exist",
246 roname)));
247
248 return roident;
249}
250
251/*
252 * Create a replication origin.
253 *
254 * Needs to be called in a transaction.
255 */
257replorigin_create(const char *roname)
258{
259 Oid roident;
260 HeapTuple tuple = NULL;
261 Relation rel;
262 Datum roname_d;
263 SnapshotData SnapshotDirty;
264 SysScanDesc scan;
266
267 roname_d = CStringGetTextDatum(roname);
268
270
271 /*
272 * We need the numeric replication origin to be 16bit wide, so we cannot
273 * rely on the normal oid allocation. Instead we simply scan
274 * pg_replication_origin for the first unused id. That's not particularly
275 * efficient, but this should be a fairly infrequent operation - we can
276 * easily spend a bit more code on this when it turns out it needs to be
277 * faster.
278 *
279 * We handle concurrency by taking an exclusive lock (allowing reads!)
280 * over the table for the duration of the search. Because we use a "dirty
281 * snapshot" we can read rows that other in-progress sessions have
282 * written, even though they would be invisible with normal snapshots. Due
283 * to the exclusive lock there's no danger that new rows can appear while
284 * we're checking.
285 */
286 InitDirtySnapshot(SnapshotDirty);
287
288 rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
289
290 for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
291 {
292 bool nulls[Natts_pg_replication_origin];
293 Datum values[Natts_pg_replication_origin];
294 bool collides;
295
297
299 Anum_pg_replication_origin_roident,
300 BTEqualStrategyNumber, F_OIDEQ,
301 ObjectIdGetDatum(roident));
302
303 scan = systable_beginscan(rel, ReplicationOriginIdentIndex,
304 true /* indexOK */ ,
305 &SnapshotDirty,
306 1, &key);
307
308 collides = HeapTupleIsValid(systable_getnext(scan));
309
310 systable_endscan(scan);
311
312 if (!collides)
313 {
314 /*
315 * Ok, found an unused roident, insert the new row and do a CCI,
316 * so our callers can look it up if they want to.
317 */
318 memset(&nulls, 0, sizeof(nulls));
319
320 values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(roident);
321 values[Anum_pg_replication_origin_roname - 1] = roname_d;
322
323 tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
324 CatalogTupleInsert(rel, tuple);
326 break;
327 }
328 }
329
330 /* now release lock again, */
332
333 if (tuple == NULL)
335 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
336 errmsg("could not find free replication origin ID")));
337
338 heap_freetuple(tuple);
339 return roident;
340}
341
342/*
343 * Helper function to drop a replication origin.
344 */
345static void
347{
348 int i;
349
350 /*
351 * Clean up the slot state info, if there is any matching slot.
352 */
353restart:
354 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
355
356 for (i = 0; i < max_active_replication_origins; i++)
357 {
359
360 if (state->roident == roident)
361 {
362 /* found our slot, is it busy? */
363 if (state->acquired_by != 0)
364 {
366
367 if (nowait)
369 (errcode(ERRCODE_OBJECT_IN_USE),
370 errmsg("could not drop replication origin with ID %d, in use by PID %d",
371 state->roident,
372 state->acquired_by)));
373
374 /*
375 * We must wait and then retry. Since we don't know which CV
376 * to wait on until here, we can't readily use
377 * ConditionVariablePrepareToSleep (calling it here would be
378 * wrong, since we could miss the signal if we did so); just
379 * use ConditionVariableSleep directly.
380 */
381 cv = &state->origin_cv;
382
383 LWLockRelease(ReplicationOriginLock);
384
385 ConditionVariableSleep(cv, WAIT_EVENT_REPLICATION_ORIGIN_DROP);
386 goto restart;
387 }
388
389 /* first make a WAL log entry */
390 {
391 xl_replorigin_drop xlrec;
392
393 xlrec.node_id = roident;
395 XLogRegisterData(&xlrec, sizeof(xlrec));
396 XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
397 }
398
399 /* then clear the in-memory slot */
400 state->roident = InvalidRepOriginId;
401 state->remote_lsn = InvalidXLogRecPtr;
402 state->local_lsn = InvalidXLogRecPtr;
403 break;
404 }
405 }
406 LWLockRelease(ReplicationOriginLock);
408}
409
410/*
411 * Drop replication origin (by name).
412 *
413 * Needs to be called in a transaction.
414 */
415void
416replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait)
417{
418 RepOriginId roident;
419 Relation rel;
420 HeapTuple tuple;
421
423
424 rel = table_open(ReplicationOriginRelationId, RowExclusiveLock);
425
426 roident = replorigin_by_name(name, missing_ok);
427
428 /* Lock the origin to prevent concurrent drops. */
429 LockSharedObject(ReplicationOriginRelationId, roident, 0,
431
432 tuple = SearchSysCache1(REPLORIGIDENT, ObjectIdGetDatum(roident));
433 if (!HeapTupleIsValid(tuple))
434 {
435 if (!missing_ok)
436 elog(ERROR, "cache lookup failed for replication origin with ID %d",
437 roident);
438
439 /*
440 * We don't need to retain the locks if the origin is already dropped.
441 */
442 UnlockSharedObject(ReplicationOriginRelationId, roident, 0,
445 return;
446 }
447
448 replorigin_state_clear(roident, nowait);
449
450 /*
451 * Now, we can delete the catalog entry.
452 */
453 CatalogTupleDelete(rel, &tuple->t_self);
454 ReleaseSysCache(tuple);
455
457
458 /* We keep the lock on pg_replication_origin until commit */
459 table_close(rel, NoLock);
460}
461
462/*
463 * Lookup replication origin via its oid and return the name.
464 *
465 * The external name is palloc'd in the calling context.
466 *
467 * Returns true if the origin is known, false otherwise.
468 */
469bool
470replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
471{
472 HeapTuple tuple;
474
475 Assert(OidIsValid((Oid) roident));
476 Assert(roident != InvalidRepOriginId);
477 Assert(roident != DoNotReplicateId);
478
479 tuple = SearchSysCache1(REPLORIGIDENT,
480 ObjectIdGetDatum((Oid) roident));
481
482 if (HeapTupleIsValid(tuple))
483 {
485 *roname = text_to_cstring(&ric->roname);
486 ReleaseSysCache(tuple);
487
488 return true;
489 }
490 else
491 {
492 *roname = NULL;
493
494 if (!missing_ok)
496 (errcode(ERRCODE_UNDEFINED_OBJECT),
497 errmsg("replication origin with ID %d does not exist",
498 roident)));
499
500 return false;
501 }
502}
503
504
505/* ---------------------------------------------------------------------------
506 * Functions for handling replication progress.
507 * ---------------------------------------------------------------------------
508 */
509
510Size
512{
513 Size size = 0;
514
516 return size;
517
518 size = add_size(size, offsetof(ReplicationStateCtl, states));
519
520 size = add_size(size,
522 return size;
523}
524
525void
527{
528 bool found;
529
531 return;
532
534 ShmemInitStruct("ReplicationOriginState",
536 &found);
538
539 if (!found)
540 {
541 int i;
542
544
546
547 for (i = 0; i < max_active_replication_origins; i++)
548 {
552 }
553 }
554}
555
556/* ---------------------------------------------------------------------------
557 * Perform a checkpoint of each replication origin's progress with respect to
558 * the replayed remote_lsn. Make sure that all transactions we refer to in the
559 * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
560 * if the transactions were originally committed asynchronously.
561 *
562 * We store checkpoints in the following format:
563 * +-------+------------------------+------------------+-----+--------+
564 * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
565 * +-------+------------------------+------------------+-----+--------+
566 *
567 * So its just the magic, followed by the statically sized
568 * ReplicationStateOnDisk structs. Note that the maximum number of
569 * ReplicationState is determined by max_active_replication_origins.
570 * ---------------------------------------------------------------------------
571 */
572void
574{
575 const char *tmppath = PG_REPLORIGIN_CHECKPOINT_TMPFILE;
576 const char *path = PG_REPLORIGIN_CHECKPOINT_FILENAME;
577 int tmpfd;
578 int i;
581
583 return;
584
586
587 /* make sure no old temp file is remaining */
588 if (unlink(tmppath) < 0 && errno != ENOENT)
591 errmsg("could not remove file \"%s\": %m",
592 tmppath)));
593
594 /*
595 * no other backend can perform this at the same time; only one checkpoint
596 * can happen at a time.
597 */
598 tmpfd = OpenTransientFile(tmppath,
599 O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
600 if (tmpfd < 0)
603 errmsg("could not create file \"%s\": %m",
604 tmppath)));
605
606 /* write magic */
607 errno = 0;
608 if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
609 {
610 /* if write didn't set errno, assume problem is no disk space */
611 if (errno == 0)
612 errno = ENOSPC;
615 errmsg("could not write to file \"%s\": %m",
616 tmppath)));
617 }
618 COMP_CRC32C(crc, &magic, sizeof(magic));
619
620 /* prevent concurrent creations/drops */
621 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
622
623 /* write actual data */
624 for (i = 0; i < max_active_replication_origins; i++)
625 {
626 ReplicationStateOnDisk disk_state;
628 XLogRecPtr local_lsn;
629
630 if (curstate->roident == InvalidRepOriginId)
631 continue;
632
633 /* zero, to avoid uninitialized padding bytes */
634 memset(&disk_state, 0, sizeof(disk_state));
635
636 LWLockAcquire(&curstate->lock, LW_SHARED);
637
638 disk_state.roident = curstate->roident;
639
640 disk_state.remote_lsn = curstate->remote_lsn;
641 local_lsn = curstate->local_lsn;
642
643 LWLockRelease(&curstate->lock);
644
645 /* make sure we only write out a commit that's persistent */
646 XLogFlush(local_lsn);
647
648 errno = 0;
649 if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
650 sizeof(disk_state))
651 {
652 /* if write didn't set errno, assume problem is no disk space */
653 if (errno == 0)
654 errno = ENOSPC;
657 errmsg("could not write to file \"%s\": %m",
658 tmppath)));
659 }
660
661 COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
662 }
663
664 LWLockRelease(ReplicationOriginLock);
665
666 /* write out the CRC */
668 errno = 0;
669 if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
670 {
671 /* if write didn't set errno, assume problem is no disk space */
672 if (errno == 0)
673 errno = ENOSPC;
676 errmsg("could not write to file \"%s\": %m",
677 tmppath)));
678 }
679
680 if (CloseTransientFile(tmpfd) != 0)
683 errmsg("could not close file \"%s\": %m",
684 tmppath)));
685
686 /* fsync, rename to permanent file, fsync file and directory */
687 durable_rename(tmppath, path, PANIC);
688}
689
690/*
691 * Recover replication replay status from checkpoint data saved earlier by
692 * CheckPointReplicationOrigin.
693 *
694 * This only needs to be called at startup and *not* during every checkpoint
695 * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
696 * state thereafter can be recovered by looking at commit records.
697 */
698void
700{
701 const char *path = PG_REPLORIGIN_CHECKPOINT_FILENAME;
702 int fd;
703 int readBytes;
705 int last_state = 0;
706 pg_crc32c file_crc;
708
709 /* don't want to overwrite already existing state */
710#ifdef USE_ASSERT_CHECKING
711 static bool already_started = false;
712
713 Assert(!already_started);
714 already_started = true;
715#endif
716
718 return;
719
721
722 elog(DEBUG2, "starting up replication origin progress state");
723
724 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
725
726 /*
727 * might have had max_active_replication_origins == 0 last run, or we just
728 * brought up a standby.
729 */
730 if (fd < 0 && errno == ENOENT)
731 return;
732 else if (fd < 0)
735 errmsg("could not open file \"%s\": %m",
736 path)));
737
738 /* verify magic, that is written even if nothing was active */
739 readBytes = read(fd, &magic, sizeof(magic));
740 if (readBytes != sizeof(magic))
741 {
742 if (readBytes < 0)
745 errmsg("could not read file \"%s\": %m",
746 path)));
747 else
750 errmsg("could not read file \"%s\": read %d of %zu",
751 path, readBytes, sizeof(magic))));
752 }
753 COMP_CRC32C(crc, &magic, sizeof(magic));
754
755 if (magic != REPLICATION_STATE_MAGIC)
757 (errmsg("replication checkpoint has wrong magic %u instead of %u",
758 magic, REPLICATION_STATE_MAGIC)));
759
760 /* we can skip locking here, no other access is possible */
761
762 /* recover individual states, until there are no more to be found */
763 while (true)
764 {
765 ReplicationStateOnDisk disk_state;
766
767 readBytes = read(fd, &disk_state, sizeof(disk_state));
768
769 /* no further data */
770 if (readBytes == sizeof(crc))
771 {
772 /* not pretty, but simple ... */
773 file_crc = *(pg_crc32c *) &disk_state;
774 break;
775 }
776
777 if (readBytes < 0)
778 {
781 errmsg("could not read file \"%s\": %m",
782 path)));
783 }
784
785 if (readBytes != sizeof(disk_state))
786 {
789 errmsg("could not read file \"%s\": read %d of %zu",
790 path, readBytes, sizeof(disk_state))));
791 }
792
793 COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
794
795 if (last_state == max_active_replication_origins)
797 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
798 errmsg("could not find free replication state, increase \"max_active_replication_origins\"")));
799
800 /* copy data to shared memory */
801 replication_states[last_state].roident = disk_state.roident;
802 replication_states[last_state].remote_lsn = disk_state.remote_lsn;
803 last_state++;
804
805 ereport(LOG,
806 (errmsg("recovered replication state of node %d to %X/%X",
807 disk_state.roident,
808 LSN_FORMAT_ARGS(disk_state.remote_lsn))));
809 }
810
811 /* now check checksum */
813 if (file_crc != crc)
816 errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
817 crc, file_crc)));
818
819 if (CloseTransientFile(fd) != 0)
822 errmsg("could not close file \"%s\": %m",
823 path)));
824}
825
826void
828{
829 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
830
831 switch (info)
832 {
834 {
835 xl_replorigin_set *xlrec =
837
839 xlrec->remote_lsn, record->EndRecPtr,
840 xlrec->force /* backward */ ,
841 false /* WAL log */ );
842 break;
843 }
845 {
846 xl_replorigin_drop *xlrec;
847 int i;
848
849 xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
850
851 for (i = 0; i < max_active_replication_origins; i++)
852 {
854
855 /* found our slot */
856 if (state->roident == xlrec->node_id)
857 {
858 /* reset entry */
859 state->roident = InvalidRepOriginId;
860 state->remote_lsn = InvalidXLogRecPtr;
861 state->local_lsn = InvalidXLogRecPtr;
862 break;
863 }
864 }
865 break;
866 }
867 default:
868 elog(PANIC, "replorigin_redo: unknown op code %u", info);
869 }
870}
871
872
873/*
874 * Tell the replication origin progress machinery that a commit from 'node'
875 * that originated at the LSN remote_commit on the remote node was replayed
876 * successfully and that we don't need to do so again. In combination with
877 * setting up replorigin_session_origin_lsn and replorigin_session_origin
878 * that ensures we won't lose knowledge about that after a crash if the
879 * transaction had a persistent effect (think of asynchronous commits).
880 *
881 * local_commit needs to be a local LSN of the commit so that we can make sure
882 * upon a checkpoint that enough WAL has been persisted to disk.
883 *
884 * Needs to be called with a RowExclusiveLock on pg_replication_origin,
885 * unless running in recovery.
886 */
887void
889 XLogRecPtr remote_commit, XLogRecPtr local_commit,
890 bool go_backward, bool wal_log)
891{
892 int i;
893 ReplicationState *replication_state = NULL;
894 ReplicationState *free_state = NULL;
895
896 Assert(node != InvalidRepOriginId);
897
898 /* we don't track DoNotReplicateId */
899 if (node == DoNotReplicateId)
900 return;
901
902 /*
903 * XXX: For the case where this is called by WAL replay, it'd be more
904 * efficient to restore into a backend local hashtable and only dump into
905 * shmem after recovery is finished. Let's wait with implementing that
906 * till it's shown to be a measurable expense
907 */
908
909 /* Lock exclusively, as we may have to create a new table entry. */
910 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
911
912 /*
913 * Search for either an existing slot for the origin, or a free one we can
914 * use.
915 */
916 for (i = 0; i < max_active_replication_origins; i++)
917 {
919
920 /* remember where to insert if necessary */
921 if (curstate->roident == InvalidRepOriginId &&
922 free_state == NULL)
923 {
924 free_state = curstate;
925 continue;
926 }
927
928 /* not our slot */
929 if (curstate->roident != node)
930 {
931 continue;
932 }
933
934 /* ok, found slot */
935 replication_state = curstate;
936
937 LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
938
939 /* Make sure it's not used by somebody else */
940 if (replication_state->acquired_by != 0)
941 {
943 (errcode(ERRCODE_OBJECT_IN_USE),
944 errmsg("replication origin with ID %d is already active for PID %d",
945 replication_state->roident,
946 replication_state->acquired_by)));
947 }
948
949 break;
950 }
951
952 if (replication_state == NULL && free_state == NULL)
954 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
955 errmsg("could not find free replication state slot for replication origin with ID %d",
956 node),
957 errhint("Increase \"max_active_replication_origins\" and try again.")));
958
959 if (replication_state == NULL)
960 {
961 /* initialize new slot */
962 LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
963 replication_state = free_state;
964 Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
965 Assert(replication_state->local_lsn == InvalidXLogRecPtr);
966 replication_state->roident = node;
967 }
968
969 Assert(replication_state->roident != InvalidRepOriginId);
970
971 /*
972 * If somebody "forcefully" sets this slot, WAL log it, so it's durable
973 * and the standby gets the message. Primarily this will be called during
974 * WAL replay (of commit records) where no WAL logging is necessary.
975 */
976 if (wal_log)
977 {
978 xl_replorigin_set xlrec;
979
980 xlrec.remote_lsn = remote_commit;
981 xlrec.node_id = node;
982 xlrec.force = go_backward;
983
985 XLogRegisterData(&xlrec, sizeof(xlrec));
986
987 XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
988 }
989
990 /*
991 * Due to - harmless - race conditions during a checkpoint we could see
992 * values here that are older than the ones we already have in memory. We
993 * could also see older values for prepared transactions when the prepare
994 * is sent at a later point of time along with commit prepared and there
995 * are other transactions commits between prepare and commit prepared. See
996 * ReorderBufferFinishPrepared. Don't overwrite those.
997 */
998 if (go_backward || replication_state->remote_lsn < remote_commit)
999 replication_state->remote_lsn = remote_commit;
1000 if (local_commit != InvalidXLogRecPtr &&
1001 (go_backward || replication_state->local_lsn < local_commit))
1002 replication_state->local_lsn = local_commit;
1003 LWLockRelease(&replication_state->lock);
1004
1005 /*
1006 * Release *after* changing the LSNs, slot isn't acquired and thus could
1007 * otherwise be dropped anytime.
1008 */
1009 LWLockRelease(ReplicationOriginLock);
1010}
1011
1012
1015{
1016 int i;
1017 XLogRecPtr local_lsn = InvalidXLogRecPtr;
1018 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1019
1020 /* prevent slots from being concurrently dropped */
1021 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1022
1023 for (i = 0; i < max_active_replication_origins; i++)
1024 {
1026
1028
1029 if (state->roident == node)
1030 {
1031 LWLockAcquire(&state->lock, LW_SHARED);
1032
1033 remote_lsn = state->remote_lsn;
1034 local_lsn = state->local_lsn;
1035
1036 LWLockRelease(&state->lock);
1037
1038 break;
1039 }
1040 }
1041
1042 LWLockRelease(ReplicationOriginLock);
1043
1044 if (flush && local_lsn != InvalidXLogRecPtr)
1045 XLogFlush(local_lsn);
1046
1047 return remote_lsn;
1048}
1049
1050/*
1051 * Tear down a (possibly) configured session replication origin during process
1052 * exit.
1053 */
1054static void
1056{
1057 ConditionVariable *cv = NULL;
1058
1059 if (session_replication_state == NULL)
1060 return;
1061
1062 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1063
1065 {
1067
1070 }
1071
1072 LWLockRelease(ReplicationOriginLock);
1073
1074 if (cv)
1076}
1077
1078/*
1079 * Setup a replication origin in the shared memory struct if it doesn't
1080 * already exist and cache access to the specific ReplicationSlot so the
1081 * array doesn't have to be searched when calling
1082 * replorigin_session_advance().
1083 *
1084 * Normally only one such cached origin can exist per process so the cached
1085 * value can only be set again after the previous value is torn down with
1086 * replorigin_session_reset(). For this normal case pass acquired_by = 0
1087 * (meaning the slot is not allowed to be already acquired by another process).
1088 *
1089 * However, sometimes multiple processes can safely re-use the same origin slot
1090 * (for example, multiple parallel apply processes can safely use the same
1091 * origin, provided they maintain commit order by allowing only one process to
1092 * commit at a time). For this case the first process must pass acquired_by =
1093 * 0, and then the other processes sharing that same origin can pass
1094 * acquired_by = PID of the first process.
1095 */
1096void
1098{
1099 static bool registered_cleanup;
1100 int i;
1101 int free_slot = -1;
1102
1103 if (!registered_cleanup)
1104 {
1106 registered_cleanup = true;
1107 }
1108
1110
1111 if (session_replication_state != NULL)
1112 ereport(ERROR,
1113 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1114 errmsg("cannot setup replication origin when one is already setup")));
1115
1116 /* Lock exclusively, as we may have to create a new table entry. */
1117 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1118
1119 /*
1120 * Search for either an existing slot for the origin, or a free one we can
1121 * use.
1122 */
1123 for (i = 0; i < max_active_replication_origins; i++)
1124 {
1126
1127 /* remember where to insert if necessary */
1128 if (curstate->roident == InvalidRepOriginId &&
1129 free_slot == -1)
1130 {
1131 free_slot = i;
1132 continue;
1133 }
1134
1135 /* not our slot */
1136 if (curstate->roident != node)
1137 continue;
1138
1139 else if (curstate->acquired_by != 0 && acquired_by == 0)
1140 {
1141 ereport(ERROR,
1142 (errcode(ERRCODE_OBJECT_IN_USE),
1143 errmsg("replication origin with ID %d is already active for PID %d",
1144 curstate->roident, curstate->acquired_by)));
1145 }
1146
1147 /* ok, found slot */
1148 session_replication_state = curstate;
1149 break;
1150 }
1151
1152
1153 if (session_replication_state == NULL && free_slot == -1)
1154 ereport(ERROR,
1155 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1156 errmsg("could not find free replication state slot for replication origin with ID %d",
1157 node),
1158 errhint("Increase \"max_active_replication_origins\" and try again.")));
1159 else if (session_replication_state == NULL)
1160 {
1161 /* initialize new slot */
1166 }
1167
1168
1170
1171 if (acquired_by == 0)
1173 else if (session_replication_state->acquired_by != acquired_by)
1174 elog(ERROR, "could not find replication state slot for replication origin with OID %u which was acquired by %d",
1175 node, acquired_by);
1176
1177 LWLockRelease(ReplicationOriginLock);
1178
1179 /* probably this one is pointless */
1181}
1182
1183/*
1184 * Reset replay state previously setup in this session.
1185 *
1186 * This function may only be called if an origin was setup with
1187 * replorigin_session_setup().
1188 */
1189void
1191{
1193
1195
1196 if (session_replication_state == NULL)
1197 ereport(ERROR,
1198 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1199 errmsg("no replication origin is configured")));
1200
1201 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1202
1206
1207 LWLockRelease(ReplicationOriginLock);
1208
1210}
1211
1212/*
1213 * Do the same work replorigin_advance() does, just on the session's
1214 * configured origin.
1215 *
1216 * This is noticeably cheaper than using replorigin_advance().
1217 */
1218void
1220{
1223
1225 if (session_replication_state->local_lsn < local_commit)
1226 session_replication_state->local_lsn = local_commit;
1227 if (session_replication_state->remote_lsn < remote_commit)
1228 session_replication_state->remote_lsn = remote_commit;
1230}
1231
1232/*
1233 * Ask the machinery about the point up to which we successfully replayed
1234 * changes from an already setup replication origin.
1235 */
1238{
1239 XLogRecPtr remote_lsn;
1240 XLogRecPtr local_lsn;
1241
1243
1248
1249 if (flush && local_lsn != InvalidXLogRecPtr)
1250 XLogFlush(local_lsn);
1251
1252 return remote_lsn;
1253}
1254
1255
1256
1257/* ---------------------------------------------------------------------------
1258 * SQL functions for working with replication origin.
1259 *
1260 * These mostly should be fairly short wrappers around more generic functions.
1261 * ---------------------------------------------------------------------------
1262 */
1263
1264/*
1265 * Create replication origin for the passed in name, and return the assigned
1266 * oid.
1267 */
1268Datum
1270{
1271 char *name;
1272 RepOriginId roident;
1273
1274 replorigin_check_prerequisites(false, false);
1275
1277
1278 /*
1279 * Replication origins "any and "none" are reserved for system options.
1280 * The origins "pg_xxx" are reserved for internal use.
1281 */
1283 ereport(ERROR,
1284 (errcode(ERRCODE_RESERVED_NAME),
1285 errmsg("replication origin name \"%s\" is reserved",
1286 name),
1287 errdetail("Origin names \"%s\", \"%s\", and names starting with \"pg_\" are reserved.",
1288 LOGICALREP_ORIGIN_ANY, LOGICALREP_ORIGIN_NONE)));
1289
1290 /*
1291 * If built with appropriate switch, whine when regression-testing
1292 * conventions for replication origin names are violated.
1293 */
1294#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
1295 if (strncmp(name, "regress_", 8) != 0)
1296 elog(WARNING, "replication origins created by regression test cases should have names starting with \"regress_\"");
1297#endif
1298
1299 roident = replorigin_create(name);
1300
1301 pfree(name);
1302
1303 PG_RETURN_OID(roident);
1304}
1305
1306/*
1307 * Drop replication origin.
1308 */
1309Datum
1311{
1312 char *name;
1313
1314 replorigin_check_prerequisites(false, false);
1315
1317
1318 replorigin_drop_by_name(name, false, true);
1319
1320 pfree(name);
1321
1323}
1324
1325/*
1326 * Return oid of a replication origin.
1327 */
1328Datum
1330{
1331 char *name;
1332 RepOriginId roident;
1333
1334 replorigin_check_prerequisites(false, false);
1335
1337 roident = replorigin_by_name(name, true);
1338
1339 pfree(name);
1340
1341 if (OidIsValid(roident))
1342 PG_RETURN_OID(roident);
1344}
1345
1346/*
1347 * Setup a replication origin for this session.
1348 */
1349Datum
1351{
1352 char *name;
1353 RepOriginId origin;
1354
1355 replorigin_check_prerequisites(true, false);
1356
1358 origin = replorigin_by_name(name, false);
1359 replorigin_session_setup(origin, 0);
1360
1362
1363 pfree(name);
1364
1366}
1367
1368/*
1369 * Reset previously setup origin in this session
1370 */
1371Datum
1373{
1374 replorigin_check_prerequisites(true, false);
1375
1377
1381
1383}
1384
1385/*
1386 * Has a replication origin been setup for this session.
1387 */
1388Datum
1390{
1391 replorigin_check_prerequisites(false, false);
1392
1394}
1395
1396
1397/*
1398 * Return the replication progress for origin setup in the current session.
1399 *
1400 * If 'flush' is set to true it is ensured that the returned value corresponds
1401 * to a local transaction that has been flushed. This is useful if asynchronous
1402 * commits are used when replaying replicated transactions.
1403 */
1404Datum
1406{
1407 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1408 bool flush = PG_GETARG_BOOL(0);
1409
1410 replorigin_check_prerequisites(true, false);
1411
1412 if (session_replication_state == NULL)
1413 ereport(ERROR,
1414 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1415 errmsg("no replication origin is configured")));
1416
1417 remote_lsn = replorigin_session_get_progress(flush);
1418
1419 if (remote_lsn == InvalidXLogRecPtr)
1421
1422 PG_RETURN_LSN(remote_lsn);
1423}
1424
1425Datum
1427{
1428 XLogRecPtr location = PG_GETARG_LSN(0);
1429
1430 replorigin_check_prerequisites(true, false);
1431
1432 if (session_replication_state == NULL)
1433 ereport(ERROR,
1434 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1435 errmsg("no replication origin is configured")));
1436
1439
1441}
1442
1443Datum
1445{
1446 replorigin_check_prerequisites(true, false);
1447
1450
1452}
1453
1454
1455Datum
1457{
1459 XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1460 RepOriginId node;
1461
1462 replorigin_check_prerequisites(true, false);
1463
1464 /* lock to prevent the replication origin from vanishing */
1465 LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1466
1467 node = replorigin_by_name(text_to_cstring(name), false);
1468
1469 /*
1470 * Can't sensibly pass a local commit to be flushed at checkpoint - this
1471 * xact hasn't committed yet. This is why this function should be used to
1472 * set up the initial replication state, but not for replay.
1473 */
1474 replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1475 true /* go backward */ , true /* WAL log */ );
1476
1477 UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1478
1480}
1481
1482
1483/*
1484 * Return the replication progress for an individual replication origin.
1485 *
1486 * If 'flush' is set to true it is ensured that the returned value corresponds
1487 * to a local transaction that has been flushed. This is useful if asynchronous
1488 * commits are used when replaying replicated transactions.
1489 */
1490Datum
1492{
1493 char *name;
1494 bool flush;
1495 RepOriginId roident;
1496 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1497
1499
1501 flush = PG_GETARG_BOOL(1);
1502
1503 roident = replorigin_by_name(name, false);
1504 Assert(OidIsValid(roident));
1505
1506 remote_lsn = replorigin_get_progress(roident, flush);
1507
1508 if (remote_lsn == InvalidXLogRecPtr)
1510
1511 PG_RETURN_LSN(remote_lsn);
1512}
1513
1514
1515Datum
1517{
1518 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1519 int i;
1521
1522 /* we want to return 0 rows if slot is set to zero */
1523 replorigin_check_prerequisites(false, true);
1524
1525 InitMaterializedSRF(fcinfo, 0);
1526
1527 /* prevent slots from being concurrently dropped */
1528 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1529
1530 /*
1531 * Iterate through all possible replication_states, display if they are
1532 * filled. Note that we do not take any locks, so slightly corrupted/out
1533 * of date values are a possibility.
1534 */
1535 for (i = 0; i < max_active_replication_origins; i++)
1536 {
1540 char *roname;
1541
1543
1544 /* unused slot, nothing to display */
1545 if (state->roident == InvalidRepOriginId)
1546 continue;
1547
1548 memset(values, 0, sizeof(values));
1549 memset(nulls, 1, sizeof(nulls));
1550
1551 values[0] = ObjectIdGetDatum(state->roident);
1552 nulls[0] = false;
1553
1554 /*
1555 * We're not preventing the origin to be dropped concurrently, so
1556 * silently accept that it might be gone.
1557 */
1558 if (replorigin_by_oid(state->roident, true,
1559 &roname))
1560 {
1561 values[1] = CStringGetTextDatum(roname);
1562 nulls[1] = false;
1563 }
1564
1565 LWLockAcquire(&state->lock, LW_SHARED);
1566
1567 values[2] = LSNGetDatum(state->remote_lsn);
1568 nulls[2] = false;
1569
1570 values[3] = LSNGetDatum(state->local_lsn);
1571 nulls[3] = false;
1572
1573 LWLockRelease(&state->lock);
1574
1575 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1576 values, nulls);
1577 }
1578
1579 LWLockRelease(ReplicationOriginLock);
1580
1581#undef REPLICATION_ORIGIN_PROGRESS_COLS
1582
1583 return (Datum) 0;
1584}
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define CStringGetTextDatum(s)
Definition: builtins.h:97
uint8_t uint8
Definition: c.h:500
#define PG_BINARY
Definition: c.h:1244
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
uint32_t uint32
Definition: c.h:502
#define PG_UINT16_MAX
Definition: c.h:558
#define MemSet(start, val, len)
Definition: c.h:991
#define OidIsValid(objectId)
Definition: c.h:746
size_t Size
Definition: c.h:576
bool IsReservedName(const char *name)
Definition: catalog.c:278
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2871
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_OID(x)
Definition: fmgr.h:360
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:603
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:514
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:388
int MyProcPid
Definition: globals.c:48
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
#define ident
Definition: indent_codes.h:47
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:233
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:365
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:77
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1082
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:229
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:107
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1142
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define ExclusiveLock
Definition: lockdefs.h:42
#define RowExclusiveLock
Definition: lockdefs.h:38
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
@ LWTRANCHE_REPLICATION_ORIGIN_STATE
Definition: lwlock.h:192
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pfree(void *pointer)
Definition: mcxt.c:2150
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:165
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:176
RepOriginId replorigin_by_name(const char *roname, bool missing_ok)
Definition: origin.c:226
Size ReplicationOriginShmemSize(void)
Definition: origin.c:511
RepOriginId replorigin_create(const char *roname)
Definition: origin.c:257
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1491
void replorigin_session_reset(void)
Definition: origin.c:1190
struct ReplicationState ReplicationState
static bool IsReservedOriginName(const char *name)
Definition: origin.c:209
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1219
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:470
int max_active_replication_origins
Definition: origin.c:104
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1456
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:1014
#define PG_REPLORIGIN_CHECKPOINT_TMPFILE
Definition: origin.c:101
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1405
static ReplicationState * replication_states
Definition: origin.c:171
#define PG_REPLORIGIN_CHECKPOINT_FILENAME
Definition: origin.c:100
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1372
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1426
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1389
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1329
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1350
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:1055
void StartupReplicationOrigin(void)
Definition: origin.c:699
void replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait)
Definition: origin.c:416
RepOriginId replorigin_session_origin
Definition: origin.c:163
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:888
static void replorigin_state_clear(RepOriginId roident, bool nowait)
Definition: origin.c:346
void replorigin_session_setup(RepOriginId node, int acquired_by)
Definition: origin.c:1097
void CheckPointReplicationOrigin(void)
Definition: origin.c:573
static void replorigin_check_prerequisites(bool check_origins, bool recoveryOK)
Definition: origin.c:190
static ReplicationState * session_replication_state
Definition: origin.c:184
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1310
#define REPLICATION_ORIGIN_PROGRESS_COLS
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1237
void ReplicationOriginShmemInit(void)
Definition: origin.c:526
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1516
#define REPLICATION_STATE_MAGIC
Definition: origin.c:187
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:164
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1269
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1444
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:827
struct ReplicationStateCtl ReplicationStateCtl
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define DoNotReplicateId
Definition: origin.h:34
#define InvalidRepOriginId
Definition: origin.h:33
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:31
#define XLOG_REPLORIGIN_SET
Definition: origin.h:30
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
return crc
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:33
static Datum LSNGetDatum(XLogRecPtr X)
Definition: pg_lsn.h:28
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:34
FormData_pg_replication_origin * Form_pg_replication_origin
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define RelationGetDescr(relation)
Definition: rel.h:542
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:42
#define BTEqualStrategyNumber
Definition: stratnum.h:31
ItemPointerData t_self
Definition: htup.h:65
Definition: lwlock.h:42
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:159
XLogRecPtr remote_lsn
Definition: origin.c:150
RepOriginId roident
Definition: origin.c:149
XLogRecPtr remote_lsn
Definition: origin.c:119
XLogRecPtr local_lsn
Definition: origin.c:126
ConditionVariable origin_cv
Definition: origin.c:136
RepOriginId roident
Definition: origin.c:114
LWLock lock
Definition: origin.c:141
TupleDesc setDesc
Definition: execnodes.h:359
Tuplestorestate * setResult
Definition: execnodes.h:358
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
Definition: regguts.h:323
Definition: c.h:658
RepOriginId node_id
Definition: origin.h:27
RepOriginId node_id
Definition: origin.h:21
XLogRecPtr remote_lsn
Definition: origin.h:20
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:64
char * text_to_cstring(const text *t)
Definition: varlena.c:225
const char * name
bool IsTransactionState(void)
Definition: xact.c:387
void CommandCounterIncrement(void)
Definition: xact.c:1100
bool RecoveryInProgress(void)
Definition: xlog.c:6522
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415