PostgreSQL Source Code  git master
tablesync.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * tablesync.c
3  * PostgreSQL logical replication: initial table data synchronization
4  *
5  * Copyright (c) 2012-2020, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/replication/logical/tablesync.c
9  *
10  * NOTES
11  * This file contains code for initial table data synchronization for
12  * logical replication.
13  *
14  * The initial data synchronization is done separately for each table,
15  * in a separate apply worker that only fetches the initial snapshot data
16  * from the publisher and then synchronizes the position in the stream with
17  * the main apply worker.
18  *
19  * There are several reasons for doing the synchronization this way:
20  * - It allows us to parallelize the initial data synchronization
21  * which lowers the time needed for it to happen.
22  * - The initial synchronization does not have to hold the xid and LSN
23  * for the time it takes to copy data of all tables, causing less
24  * bloat and lower disk consumption compared to doing the
25  * synchronization in a single process for the whole database.
26  * - It allows us to synchronize any tables added after the initial
27  * synchronization has finished.
28  *
29  * The stream position synchronization works in multiple steps:
30  * - Apply worker requests a tablesync worker to start, setting the new
31  * table state to INIT.
32  * - Tablesync worker starts; changes table state from INIT to DATASYNC while
33  * copying.
34  * - Tablesync worker finishes the copy and sets table state to SYNCWAIT;
35  * waits for state change.
36  * - Apply worker periodically checks for tables in SYNCWAIT state. When
37  * any appear, it sets the table state to CATCHUP and starts loop-waiting
38  * until either the table state is set to SYNCDONE or the sync worker
39  * exits.
40  * - After the sync worker has seen the state change to CATCHUP, it will
41  * read the stream and apply changes (acting like an apply worker) until
42  * it catches up to the specified stream position. Then it sets the
43  * state to SYNCDONE. There might be zero changes applied between
44  * CATCHUP and SYNCDONE, because the sync worker might be ahead of the
45  * apply worker.
46  * - Once the state is set to SYNCDONE, the apply will continue tracking
47  * the table until it reaches the SYNCDONE stream position, at which
48  * point it sets state to READY and stops tracking. Again, there might
49  * be zero changes in between.
50  *
51  * So the state progression is always: INIT -> DATASYNC -> SYNCWAIT ->
52  * CATCHUP -> SYNCDONE -> READY.
53  *
54  * The catalog pg_subscription_rel is used to keep information about
55  * subscribed tables and their state. Some transient state during data
56  * synchronization is kept in shared memory. The states SYNCWAIT and
57  * CATCHUP only appear in memory.
58  *
59  * Example flows look like this:
60  * - Apply is in front:
61  * sync:8
62  * -> set in memory SYNCWAIT
63  * apply:10
64  * -> set in memory CATCHUP
65  * -> enter wait-loop
66  * sync:10
67  * -> set in catalog SYNCDONE
68  * -> exit
69  * apply:10
70  * -> exit wait-loop
71  * -> continue rep
72  * apply:11
73  * -> set in catalog READY
74  *
75  * - Sync is in front:
76  * sync:10
77  * -> set in memory SYNCWAIT
78  * apply:8
79  * -> set in memory CATCHUP
80  * -> continue per-table filtering
81  * sync:10
82  * -> set in catalog SYNCDONE
83  * -> exit
84  * apply:10
85  * -> set in catalog READY
86  * -> stop per-table filtering
87  * -> continue rep
88  *-------------------------------------------------------------------------
89  */
90 
91 #include "postgres.h"
92 
93 #include "access/table.h"
94 #include "access/xact.h"
96 #include "catalog/pg_type.h"
97 #include "commands/copy.h"
98 #include "miscadmin.h"
99 #include "parser/parse_relation.h"
100 #include "pgstat.h"
103 #include "replication/walreceiver.h"
105 #include "storage/ipc.h"
106 #include "utils/builtins.h"
107 #include "utils/lsyscache.h"
108 #include "utils/memutils.h"
109 #include "utils/snapmgr.h"
110 
111 static bool table_states_valid = false;
112 
114 
115 /*
116  * Exit routine for synchronization worker.
117  */
118 static void
120 finish_sync_worker(void)
121 {
122  /*
123  * Commit any outstanding transaction. This is the usual case, unless
124  * there was nothing to do for the table.
125  */
126  if (IsTransactionState())
127  {
129  pgstat_report_stat(false);
130  }
131 
132  /* And flush all writes. */
134 
136  ereport(LOG,
137  (errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has finished",
141 
142  /* Find the main apply worker and signal it. */
144 
145  /* Stop gracefully */
146  proc_exit(0);
147 }
148 
149 /*
150  * Wait until the relation sync state is set in the catalog to the expected
151  * one; return true when it happens.
152  *
153  * Returns false if the table sync worker or the table itself have
154  * disappeared, or the table state has been reset.
155  *
156  * Currently, this is used in the apply worker when transitioning from
157  * CATCHUP state to SYNCDONE.
158  */
159 static bool
160 wait_for_relation_state_change(Oid relid, char expected_state)
161 {
162  char state;
163 
164  for (;;)
165  {
166  LogicalRepWorker *worker;
167  XLogRecPtr statelsn;
168 
170 
173  relid, &statelsn);
174 
175  if (state == SUBREL_STATE_UNKNOWN)
176  break;
177 
178  if (state == expected_state)
179  return true;
180 
181  /* Check if the sync worker is still running and bail if not. */
182  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
184  false);
185  LWLockRelease(LogicalRepWorkerLock);
186  if (!worker)
187  break;
188 
189  (void) WaitLatch(MyLatch,
192 
194  }
195 
196  return false;
197 }
198 
199 /*
200  * Wait until the apply worker changes the state of our synchronization
201  * worker to the expected one.
202  *
203  * Used when transitioning from SYNCWAIT state to CATCHUP.
204  *
205  * Returns false if the apply worker has disappeared.
206  */
207 static bool
208 wait_for_worker_state_change(char expected_state)
209 {
210  int rc;
211 
212  for (;;)
213  {
214  LogicalRepWorker *worker;
215 
217 
218  /*
219  * Done if already in correct state. (We assume this fetch is atomic
220  * enough to not give a misleading answer if we do it with no lock.)
221  */
222  if (MyLogicalRepWorker->relstate == expected_state)
223  return true;
224 
225  /*
226  * Bail out if the apply worker has died, else signal it we're
227  * waiting.
228  */
229  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
231  InvalidOid, false);
232  if (worker && worker->proc)
234  LWLockRelease(LogicalRepWorkerLock);
235  if (!worker)
236  break;
237 
238  /*
239  * Wait. We expect to get a latch signal back from the apply worker,
240  * but use a timeout in case it dies without sending one.
241  */
242  rc = WaitLatch(MyLatch,
245 
246  if (rc & WL_LATCH_SET)
248  }
249 
250  return false;
251 }
252 
253 /*
254  * Callback from syscache invalidation.
255  */
256 void
258 {
259  table_states_valid = false;
260 }
261 
262 /*
263  * Handle table synchronization cooperation from the synchronization
264  * worker.
265  *
266  * If the sync worker is in CATCHUP state and reached (or passed) the
267  * predetermined synchronization point in the WAL stream, mark the table as
268  * SYNCDONE and finish.
269  */
270 static void
272 {
274 
276 
277  if (MyLogicalRepWorker->relstate == SUBREL_STATE_CATCHUP &&
278  current_lsn >= MyLogicalRepWorker->relstate_lsn)
279  {
280  TimeLineID tli;
281 
282  MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCDONE;
283  MyLogicalRepWorker->relstate_lsn = current_lsn;
284 
286 
291 
293  finish_sync_worker();
294  }
295  else
297 }
298 
299 /*
300  * Handle table synchronization cooperation from the apply worker.
301  *
302  * Walk over all subscription tables that are individually tracked by the
303  * apply process (currently, all that have state other than
304  * SUBREL_STATE_READY) and manage synchronization for them.
305  *
306  * If there are tables that need synchronizing and are not being synchronized
307  * yet, start sync workers for them (if there are free slots for sync
308  * workers). To prevent starting the sync worker for the same relation at a
309  * high frequency after a failure, we store its last start time with each sync
310  * state info. We start the sync worker for the same relation after waiting
311  * at least wal_retrieve_retry_interval.
312  *
313  * For tables that are being synchronized already, check if sync workers
314  * either need action from the apply worker or have finished. This is the
315  * SYNCWAIT to CATCHUP transition.
316  *
317  * If the synchronization position is reached (SYNCDONE), then the table can
318  * be marked as READY and is no longer tracked.
319  */
320 static void
322 {
323  struct tablesync_start_time_mapping
324  {
325  Oid relid;
326  TimestampTz last_start_time;
327  };
328  static List *table_states = NIL;
329  static HTAB *last_start_times = NULL;
330  ListCell *lc;
331  bool started_tx = false;
332 
334 
335  /* We need up-to-date sync state info for subscription tables here. */
336  if (!table_states_valid)
337  {
338  MemoryContext oldctx;
339  List *rstates;
340  ListCell *lc;
341  SubscriptionRelState *rstate;
342 
343  /* Clean the old list. */
344  list_free_deep(table_states);
345  table_states = NIL;
346 
348  started_tx = true;
349 
350  /* Fetch all non-ready tables. */
352 
353  /* Allocate the tracking info in a permanent memory context. */
355  foreach(lc, rstates)
356  {
357  rstate = palloc(sizeof(SubscriptionRelState));
358  memcpy(rstate, lfirst(lc), sizeof(SubscriptionRelState));
359  table_states = lappend(table_states, rstate);
360  }
361  MemoryContextSwitchTo(oldctx);
362 
363  table_states_valid = true;
364  }
365 
366  /*
367  * Prepare a hash table for tracking last start times of workers, to avoid
368  * immediate restarts. We don't need it if there are no tables that need
369  * syncing.
370  */
371  if (table_states && !last_start_times)
372  {
373  HASHCTL ctl;
374 
375  memset(&ctl, 0, sizeof(ctl));
376  ctl.keysize = sizeof(Oid);
377  ctl.entrysize = sizeof(struct tablesync_start_time_mapping);
378  last_start_times = hash_create("Logical replication table sync worker start times",
379  256, &ctl, HASH_ELEM | HASH_BLOBS);
380  }
381 
382  /*
383  * Clean up the hash table when we're done with all tables (just to
384  * release the bit of memory).
385  */
386  else if (!table_states && last_start_times)
387  {
388  hash_destroy(last_start_times);
389  last_start_times = NULL;
390  }
391 
392  /*
393  * Process all tables that are being synchronized.
394  */
395  foreach(lc, table_states)
396  {
398 
399  if (rstate->state == SUBREL_STATE_SYNCDONE)
400  {
401  /*
402  * Apply has caught up to the position where the table sync has
403  * finished. Mark the table as ready so that the apply will just
404  * continue to replicate it normally.
405  */
406  if (current_lsn >= rstate->lsn)
407  {
408  rstate->state = SUBREL_STATE_READY;
409  rstate->lsn = current_lsn;
410  if (!started_tx)
411  {
413  started_tx = true;
414  }
415 
417  rstate->relid, rstate->state,
418  rstate->lsn);
419  }
420  }
421  else
422  {
423  LogicalRepWorker *syncworker;
424 
425  /*
426  * Look for a sync worker for this relation.
427  */
428  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
429 
431  rstate->relid, false);
432 
433  if (syncworker)
434  {
435  /* Found one, update our copy of its state */
436  SpinLockAcquire(&syncworker->relmutex);
437  rstate->state = syncworker->relstate;
438  rstate->lsn = syncworker->relstate_lsn;
439  if (rstate->state == SUBREL_STATE_SYNCWAIT)
440  {
441  /*
442  * Sync worker is waiting for apply. Tell sync worker it
443  * can catchup now.
444  */
445  syncworker->relstate = SUBREL_STATE_CATCHUP;
446  syncworker->relstate_lsn =
447  Max(syncworker->relstate_lsn, current_lsn);
448  }
449  SpinLockRelease(&syncworker->relmutex);
450 
451  /* If we told worker to catch up, wait for it. */
452  if (rstate->state == SUBREL_STATE_SYNCWAIT)
453  {
454  /* Signal the sync worker, as it may be waiting for us. */
455  if (syncworker->proc)
456  logicalrep_worker_wakeup_ptr(syncworker);
457 
458  /* Now safe to release the LWLock */
459  LWLockRelease(LogicalRepWorkerLock);
460 
461  /*
462  * Enter busy loop and wait for synchronization worker to
463  * reach expected state (or die trying).
464  */
465  if (!started_tx)
466  {
468  started_tx = true;
469  }
470 
472  SUBREL_STATE_SYNCDONE);
473  }
474  else
475  LWLockRelease(LogicalRepWorkerLock);
476  }
477  else
478  {
479  /*
480  * If there is no sync worker for this table yet, count
481  * running sync workers for this subscription, while we have
482  * the lock.
483  */
484  int nsyncworkers =
486 
487  /* Now safe to release the LWLock */
488  LWLockRelease(LogicalRepWorkerLock);
489 
490  /*
491  * If there are free sync worker slot(s), start a new sync
492  * worker for the table.
493  */
494  if (nsyncworkers < max_sync_workers_per_subscription)
495  {
497  struct tablesync_start_time_mapping *hentry;
498  bool found;
499 
500  hentry = hash_search(last_start_times, &rstate->relid,
501  HASH_ENTER, &found);
502 
503  if (!found ||
504  TimestampDifferenceExceeds(hentry->last_start_time, now,
506  {
511  rstate->relid);
512  hentry->last_start_time = now;
513  }
514  }
515  }
516  }
517  }
518 
519  if (started_tx)
520  {
522  pgstat_report_stat(false);
523  }
524 }
525 
526 /*
527  * Process possible state change(s) of tables that are being synchronized.
528  */
529 void
531 {
532  if (am_tablesync_worker())
533  process_syncing_tables_for_sync(current_lsn);
534  else
536 }
537 
538 /*
539  * Create list of columns for COPY based on logical relation mapping.
540  */
541 static List *
543 {
544  List *attnamelist = NIL;
545  int i;
546 
547  for (i = 0; i < rel->remoterel.natts; i++)
548  {
549  attnamelist = lappend(attnamelist,
550  makeString(rel->remoterel.attnames[i]));
551  }
552 
553 
554  return attnamelist;
555 }
556 
557 /*
558  * Data source callback for the COPY FROM, which reads from the remote
559  * connection and passes the data back to our local COPY.
560  */
561 static int
562 copy_read_data(void *outbuf, int minread, int maxread)
563 {
564  int bytesread = 0;
565  int avail;
566 
567  /* If there are some leftover data from previous read, use it. */
568  avail = copybuf->len - copybuf->cursor;
569  if (avail)
570  {
571  if (avail > maxread)
572  avail = maxread;
573  memcpy(outbuf, &copybuf->data[copybuf->cursor], avail);
574  copybuf->cursor += avail;
575  maxread -= avail;
576  bytesread += avail;
577  }
578 
579  while (maxread > 0 && bytesread < minread)
580  {
582  int len;
583  char *buf = NULL;
584 
585  for (;;)
586  {
587  /* Try read the data. */
588  len = walrcv_receive(wrconn, &buf, &fd);
589 
591 
592  if (len == 0)
593  break;
594  else if (len < 0)
595  return bytesread;
596  else
597  {
598  /* Process the data */
599  copybuf->data = buf;
600  copybuf->len = len;
601  copybuf->cursor = 0;
602 
603  avail = copybuf->len - copybuf->cursor;
604  if (avail > maxread)
605  avail = maxread;
606  memcpy(outbuf, &copybuf->data[copybuf->cursor], avail);
607  outbuf = (void *) ((char *) outbuf + avail);
608  copybuf->cursor += avail;
609  maxread -= avail;
610  bytesread += avail;
611  }
612 
613  if (maxread <= 0 || bytesread >= minread)
614  return bytesread;
615  }
616 
617  /*
618  * Wait for more data or latch.
619  */
620  (void) WaitLatchOrSocket(MyLatch,
623  fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA);
624 
626  }
627 
628  return bytesread;
629 }
630 
631 
632 /*
633  * Get information about remote relation in similar fashion the RELATION
634  * message provides during replication.
635  */
636 static void
637 fetch_remote_table_info(char *nspname, char *relname,
638  LogicalRepRelation *lrel)
639 {
640  WalRcvExecResult *res;
641  StringInfoData cmd;
642  TupleTableSlot *slot;
643  Oid tableRow[] = {OIDOID, CHAROID, CHAROID};
644  Oid attrRow[] = {TEXTOID, OIDOID, INT4OID, BOOLOID};
645  bool isnull;
646  int natt;
647 
648  lrel->nspname = nspname;
649  lrel->relname = relname;
650 
651  /* First fetch Oid and replica identity. */
652  initStringInfo(&cmd);
653  appendStringInfo(&cmd, "SELECT c.oid, c.relreplident, c.relkind"
654  " FROM pg_catalog.pg_class c"
655  " INNER JOIN pg_catalog.pg_namespace n"
656  " ON (c.relnamespace = n.oid)"
657  " WHERE n.nspname = %s"
658  " AND c.relname = %s",
659  quote_literal_cstr(nspname),
660  quote_literal_cstr(relname));
661  res = walrcv_exec(wrconn, cmd.data, lengthof(tableRow), tableRow);
662 
663  if (res->status != WALRCV_OK_TUPLES)
664  ereport(ERROR,
665  (errmsg("could not fetch table info for table \"%s.%s\" from publisher: %s",
666  nspname, relname, res->err)));
667 
669  if (!tuplestore_gettupleslot(res->tuplestore, true, false, slot))
670  ereport(ERROR,
671  (errmsg("table \"%s.%s\" not found on publisher",
672  nspname, relname)));
673 
674  lrel->remoteid = DatumGetObjectId(slot_getattr(slot, 1, &isnull));
675  Assert(!isnull);
676  lrel->replident = DatumGetChar(slot_getattr(slot, 2, &isnull));
677  Assert(!isnull);
678  lrel->relkind = DatumGetChar(slot_getattr(slot, 3, &isnull));
679  Assert(!isnull);
680 
682  walrcv_clear_result(res);
683 
684  /* Now fetch columns. */
685  resetStringInfo(&cmd);
686  appendStringInfo(&cmd,
687  "SELECT a.attname,"
688  " a.atttypid,"
689  " a.atttypmod,"
690  " a.attnum = ANY(i.indkey)"
691  " FROM pg_catalog.pg_attribute a"
692  " LEFT JOIN pg_catalog.pg_index i"
693  " ON (i.indexrelid = pg_get_replica_identity_index(%u))"
694  " WHERE a.attnum > 0::pg_catalog.int2"
695  " AND NOT a.attisdropped %s"
696  " AND a.attrelid = %u"
697  " ORDER BY a.attnum",
698  lrel->remoteid,
699  (walrcv_server_version(wrconn) >= 120000 ? "AND a.attgenerated = ''" : ""),
700  lrel->remoteid);
701  res = walrcv_exec(wrconn, cmd.data, lengthof(attrRow), attrRow);
702 
703  if (res->status != WALRCV_OK_TUPLES)
704  ereport(ERROR,
705  (errmsg("could not fetch table info for table \"%s.%s\": %s",
706  nspname, relname, res->err)));
707 
708  /* We don't know the number of rows coming, so allocate enough space. */
709  lrel->attnames = palloc0(MaxTupleAttributeNumber * sizeof(char *));
710  lrel->atttyps = palloc0(MaxTupleAttributeNumber * sizeof(Oid));
711  lrel->attkeys = NULL;
712 
713  natt = 0;
715  while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
716  {
717  lrel->attnames[natt] =
718  TextDatumGetCString(slot_getattr(slot, 1, &isnull));
719  Assert(!isnull);
720  lrel->atttyps[natt] = DatumGetObjectId(slot_getattr(slot, 2, &isnull));
721  Assert(!isnull);
722  if (DatumGetBool(slot_getattr(slot, 4, &isnull)))
723  lrel->attkeys = bms_add_member(lrel->attkeys, natt);
724 
725  /* Should never happen. */
726  if (++natt >= MaxTupleAttributeNumber)
727  elog(ERROR, "too many columns in remote table \"%s.%s\"",
728  nspname, relname);
729 
730  ExecClearTuple(slot);
731  }
733 
734  lrel->natts = natt;
735 
736  walrcv_clear_result(res);
737  pfree(cmd.data);
738 }
739 
740 /*
741  * Copy existing data of a table from publisher.
742  *
743  * Caller is responsible for locking the local relation.
744  */
745 static void
747 {
748  LogicalRepRelMapEntry *relmapentry;
749  LogicalRepRelation lrel;
750  WalRcvExecResult *res;
751  StringInfoData cmd;
752  CopyState cstate;
753  List *attnamelist;
754  ParseState *pstate;
755 
756  /* Get the publisher relation info. */
758  RelationGetRelationName(rel), &lrel);
759 
760  /* Put the relation into relmap. */
762 
763  /* Map the publisher relation to local one. */
764  relmapentry = logicalrep_rel_open(lrel.remoteid, NoLock);
765  Assert(rel == relmapentry->localrel);
766 
767  /* Start copy on the publisher. */
768  initStringInfo(&cmd);
769  if (lrel.relkind == RELKIND_RELATION)
770  appendStringInfo(&cmd, "COPY %s TO STDOUT",
772  else
773  {
774  /*
775  * For non-tables, we need to do COPY (SELECT ...), but we can't just
776  * do SELECT * because we need to not copy generated columns.
777  */
778  appendStringInfoString(&cmd, "COPY (SELECT ");
779  for (int i = 0; i < lrel.natts; i++)
780  {
782  if (i < lrel.natts - 1)
783  appendStringInfoString(&cmd, ", ");
784  }
785  appendStringInfo(&cmd, " FROM %s) TO STDOUT",
787  }
788  res = walrcv_exec(wrconn, cmd.data, 0, NULL);
789  pfree(cmd.data);
790  if (res->status != WALRCV_OK_COPY_OUT)
791  ereport(ERROR,
792  (errmsg("could not start initial contents copy for table \"%s.%s\": %s",
793  lrel.nspname, lrel.relname, res->err)));
794  walrcv_clear_result(res);
795 
796  copybuf = makeStringInfo();
797 
798  pstate = make_parsestate(NULL);
800  NULL, false, false);
801 
802  attnamelist = make_copy_attnamelist(relmapentry);
803  cstate = BeginCopyFrom(pstate, rel, NULL, false, copy_read_data, attnamelist, NIL);
804 
805  /* Do the copy */
806  (void) CopyFrom(cstate);
807 
808  logicalrep_rel_close(relmapentry, NoLock);
809 }
810 
811 /*
812  * Start syncing the table in the sync worker.
813  *
814  * If nothing needs to be done to sync the table, we exit the worker without
815  * any further action.
816  *
817  * The returned slot name is palloc'ed in current memory context.
818  */
819 char *
821 {
822  char *slotname;
823  char *err;
824  char relstate;
825  XLogRecPtr relstate_lsn;
826  Relation rel;
827  WalRcvExecResult *res;
828 
829  /* Check the state of the table synchronization. */
833  &relstate_lsn);
835 
837  MyLogicalRepWorker->relstate = relstate;
838  MyLogicalRepWorker->relstate_lsn = relstate_lsn;
840 
841  /*
842  * If synchronization is already done or no longer necessary, exit now
843  * that we've updated shared memory state.
844  */
845  switch (relstate)
846  {
847  case SUBREL_STATE_SYNCDONE:
848  case SUBREL_STATE_READY:
849  case SUBREL_STATE_UNKNOWN:
850  finish_sync_worker(); /* doesn't return */
851  }
852 
853  /*
854  * To build a slot name for the sync work, we are limited to NAMEDATALEN -
855  * 1 characters. We cut the original slot name to NAMEDATALEN - 28 chars
856  * and append _%u_sync_%u (1 + 10 + 6 + 10 + '\0'). (It's actually the
857  * NAMEDATALEN on the remote that matters, but this scheme will also work
858  * reasonably if that is different.)
859  */
860  StaticAssertStmt(NAMEDATALEN >= 32, "NAMEDATALEN too small"); /* for sanity */
861  slotname = psprintf("%.*s_%u_sync_%u",
862  NAMEDATALEN - 28,
866 
867  /*
868  * Here we use the slot name instead of the subscription name as the
869  * application_name, so that it is different from the main apply worker,
870  * so that synchronous replication can distinguish them.
871  */
872  wrconn = walrcv_connect(MySubscription->conninfo, true, slotname, &err);
873  if (wrconn == NULL)
874  ereport(ERROR,
875  (errmsg("could not connect to the publisher: %s", err)));
876 
877  Assert(MyLogicalRepWorker->relstate == SUBREL_STATE_INIT ||
878  MyLogicalRepWorker->relstate == SUBREL_STATE_DATASYNC);
879 
881  MyLogicalRepWorker->relstate = SUBREL_STATE_DATASYNC;
884 
885  /* Update the state and make it visible to others. */
892  pgstat_report_stat(false);
893 
894  /*
895  * We want to do the table data sync in a single transaction.
896  */
898 
899  /*
900  * Use a standard write lock here. It might be better to disallow access
901  * to the table while it's being synchronized. But we don't want to block
902  * the main apply process from working and it has to open the relation in
903  * RowExclusiveLock when remapping remote relation id to local one.
904  */
906 
907  /*
908  * Start a transaction in the remote node in REPEATABLE READ mode. This
909  * ensures that both the replication slot we create (see below) and the
910  * COPY are consistent with each other.
911  */
912  res = walrcv_exec(wrconn,
913  "BEGIN READ ONLY ISOLATION LEVEL REPEATABLE READ",
914  0, NULL);
915  if (res->status != WALRCV_OK_COMMAND)
916  ereport(ERROR,
917  (errmsg("table copy could not start transaction on publisher"),
918  errdetail("The error was: %s", res->err)));
919  walrcv_clear_result(res);
920 
921  /*
922  * Create a new temporary logical decoding slot. This slot will be used
923  * for the catchup phase after COPY is done, so tell it to use the
924  * snapshot to make the final data consistent.
925  */
926  walrcv_create_slot(wrconn, slotname, true,
927  CRS_USE_SNAPSHOT, origin_startpos);
928 
929  /* Now do the initial data copy */
931  copy_table(rel);
933 
934  res = walrcv_exec(wrconn, "COMMIT", 0, NULL);
935  if (res->status != WALRCV_OK_COMMAND)
936  ereport(ERROR,
937  (errmsg("table copy could not finish transaction on publisher"),
938  errdetail("The error was: %s", res->err)));
939  walrcv_clear_result(res);
940 
941  table_close(rel, NoLock);
942 
943  /* Make the copy visible. */
945 
946  /*
947  * We are done with the initial data synchronization, update the state.
948  */
950  MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCWAIT;
951  MyLogicalRepWorker->relstate_lsn = *origin_startpos;
953 
954  /*
955  * Finally, wait until the main apply worker tells us to catch up and then
956  * return to let LogicalRepApplyLoop do it.
957  */
958  wait_for_worker_state_change(SUBREL_STATE_CATCHUP);
959  return slotname;
960 }
Subscription * MySubscription
Definition: worker.c:163
Value * makeString(char *str)
Definition: value.c:53
#define NIL
Definition: pg_list.h:65
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:827
WalReceiverConn * wrconn
Definition: worker.c:161
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define walrcv_endstreaming(conn, next_tli)
Definition: walreceiver.h:414
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:102
uint32 TimeLineID
Definition: xlogdefs.h:52
#define HASH_ELEM
Definition: hsearch.h:85
#define WL_TIMEOUT
Definition: latch.h:127
const char * quote_identifier(const char *ident)
Definition: ruleutils.c:10709
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
#define MaxTupleAttributeNumber
Definition: htup_details.h:33
void process_syncing_tables(XLogRecPtr current_lsn)
Definition: tablesync.c:530
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1208
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1578
void logicalrep_worker_wakeup(Oid subid, Oid relid)
Definition: launcher.c:598
int64 TimestampTz
Definition: timestamp.h:39
static void process_syncing_tables_for_apply(XLogRecPtr current_lsn)
Definition: tablesync.c:321
CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename, bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options)
Definition: copy.c:3355
#define DatumGetObjectId(X)
Definition: postgres.h:500
void CommitTransactionCommand(void)
Definition: xact.c:2947
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
#define walrcv_receive(conn, buffer, wait_fd)
Definition: walreceiver.h:416
#define walrcv_server_version(conn)
Definition: walreceiver.h:408
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
Size entrysize
Definition: hsearch.h:72
static void copy_table(Relation rel)
Definition: tablesync.c:746
void proc_exit(int code)
Definition: ipc.c:104
static bool wait_for_relation_state_change(Oid relid, char expected_state)
Definition: tablesync.c:160
uint64 CopyFrom(CopyState cstate)
Definition: copy.c:2704
void logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
Definition: relation.c:449
#define WL_SOCKET_READABLE
Definition: latch.h:125
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
#define lengthof(array)
Definition: c.h:676
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:919
#define LOG
Definition: elog.h:26
NameData relname
Definition: pg_class.h:38
unsigned int Oid
Definition: postgres_ext.h:31
static bool table_states_valid
Definition: tablesync.c:111
char * LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
Definition: tablesync.c:820
#define walrcv_create_slot(conn, slotname, temporary, snapshot_action, lsn)
Definition: walreceiver.h:420
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1677
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
void list_free_deep(List *list)
Definition: list.c:1390
Bitmapset * attkeys
Definition: logicalproto.h:71
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2847
static int fd(const char *x, int i)
Definition: preproc-init.c:105
void ResetLatch(Latch *latch)
Definition: latch.c:588
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:390
LogicalRepRelId remoteid
Definition: logicalproto.h:63
XLogRecPtr relstate_lsn
ParseNamespaceItem * addRangeTableEntryForRelation(ParseState *pstate, Relation rel, int lockmode, Alias *alias, bool inh, bool inFromCl)
ParseState * make_parsestate(ParseState *parentParseState)
Definition: parse_node.c:43
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1812
#define NAMEDATALEN
void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
Definition: launcher.c:618
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:860
void logicalrep_relmap_update(LogicalRepRelation *remoterel)
Definition: relation.c:173
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:218
static void walrcv_clear_result(WalRcvExecResult *walres)
Definition: walreceiver.h:430
void pfree(void *pointer)
Definition: mcxt.c:1057
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
TupleDesc tupledesc
Definition: walreceiver.h:215
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:57
#define ERROR
Definition: elog.h:43
LogicalRepRelation remoterel
int max_sync_workers_per_subscription
Definition: launcher.c:55
static bool am_tablesync_worker(void)
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3191
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1224
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:954
#define DatumGetBool(X)
Definition: postgres.h:393
#define RelationGetRelationName(relation)
Definition: rel.h:490
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
int WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock, long timeout, uint32 wait_event_info)
Definition: latch.c:438
unsigned int uint32
Definition: c.h:375
int pgsocket
Definition: port.h:31
LogicalRepRelMapEntry * logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
Definition: relation.c:274
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:456
List * lappend(List *list, void *datum)
Definition: list.c:321
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:10793
int wal_retrieve_retry_interval
Definition: xlog.c:110
#define SpinLockRelease(lock)
Definition: spin.h:64
#define HASH_BLOBS
Definition: hsearch.h:86
#define TextDatumGetCString(d)
Definition: builtins.h:87
static void pg_attribute_noreturn()
Definition: tablesync.c:119
void * palloc0(Size size)
Definition: mcxt.c:981
char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn)
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:326
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:1021
#define PGINVALID_SOCKET
Definition: port.h:33
#define DatumGetChar(X)
Definition: postgres.h:409
void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, XLogRecPtr sublsn)
Size keysize
Definition: hsearch.h:71
StringInfo copybuf
Definition: tablesync.c:113
static void fetch_remote_table_info(char *nspname, char *relname, LogicalRepRelation *lrel)
Definition: tablesync.c:637
int logicalrep_sync_worker_count(Oid subid)
Definition: launcher.c:724
#define InvalidOid
Definition: postgres_ext.h:36
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:381
#define ereport(elevel,...)
Definition: elog.h:144
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:11535
#define Max(x, y)
Definition: c.h:922
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1078
Tuplestorestate * tuplestore
Definition: walreceiver.h:214
void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid)
Definition: launcher.c:286
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:746
#define lfirst(lc)
Definition: pg_list.h:169
WalRcvExecStatus status
Definition: walreceiver.h:212
Definition: regguts.h:298
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:235
void StartTransactionCommand(void)
Definition: xact.c:2846
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1208
bool IsTransactionState(void)
Definition: xact.c:371
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
static void process_syncing_tables_for_sync(XLogRecPtr current_lsn)
Definition: tablesync.c:271
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:821
#define elog(elevel,...)
Definition: elog.h:214
int i
void * arg
struct Latch * MyLatch
Definition: globals.c:54
static int copy_read_data(void *outbuf, int minread, int maxread)
Definition: tablesync.c:562
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
static List * make_copy_attnamelist(LogicalRepRelMapEntry *rel)
Definition: tablesync.c:542
Definition: pg_list.h:50
List * GetSubscriptionNotReadyRelations(Oid subid)
char * get_rel_name(Oid relid)
Definition: lsyscache.c:1840
#define WL_LATCH_SET
Definition: latch.h:124
static bool wait_for_worker_state_change(char expected_state)
Definition: tablesync.c:208
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1542
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
Definition: walreceiver.h:424
void invalidate_syncing_table_states(Datum arg, int cacheid, uint32 hashvalue)
Definition: tablesync.c:257
MemoryContext CacheMemoryContext
Definition: mcxt.c:47
#define RelationGetNamespace(relation)
Definition: rel.h:497
void pgstat_report_stat(bool force)
Definition: pgstat.c:850
#define walrcv_connect(conninfo, logical, appname, err)
Definition: walreceiver.h:398