PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
launcher.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * launcher.c
3  * PostgreSQL logical replication worker launcher process
4  *
5  * Copyright (c) 2016-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/replication/logical/launcher.c
9  *
10  * NOTES
11  * This module contains the logical replication worker launcher which
12  * uses the background worker infrastructure to start the logical
13  * replication workers for every enabled subscription.
14  *
15  *-------------------------------------------------------------------------
16  */
17 
18 #include "postgres.h"
19 
20 #include "funcapi.h"
21 #include "miscadmin.h"
22 #include "pgstat.h"
23 
24 #include "access/heapam.h"
25 #include "access/htup.h"
26 #include "access/htup_details.h"
27 #include "access/xact.h"
28 
31 
32 #include "libpq/pqsignal.h"
33 
34 #include "postmaster/bgworker.h"
36 #include "postmaster/postmaster.h"
37 
40 #include "replication/slot.h"
43 
44 #include "storage/ipc.h"
45 #include "storage/proc.h"
46 #include "storage/procarray.h"
47 #include "storage/procsignal.h"
48 
49 #include "tcop/tcopprot.h"
50 
51 #include "utils/memutils.h"
52 #include "utils/pg_lsn.h"
53 #include "utils/ps_status.h"
54 #include "utils/timeout.h"
55 #include "utils/snapmgr.h"
56 
57 /* max sleep time between cycles (3min) */
58 #define DEFAULT_NAPTIME_PER_CYCLE 180000L
59 
62 
64 
65 typedef struct LogicalRepCtxStruct
66 {
67  /* Supervisor process. */
68  pid_t launcher_pid;
69 
70  /* Background workers. */
71  LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
73 
75 
76 typedef struct LogicalRepWorkerId
77 {
81 
83 
84 static void ApplyLauncherWakeup(void);
85 static void logicalrep_launcher_onexit(int code, Datum arg);
86 static void logicalrep_worker_onexit(int code, Datum arg);
87 static void logicalrep_worker_detach(void);
88 static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
89 
90 /* Flags set by signal handlers */
91 static volatile sig_atomic_t got_SIGHUP = false;
92 
93 static bool on_commit_launcher_wakeup = false;
94 
96 
97 
98 /*
99  * Load the list of subscriptions.
100  *
101  * Only the fields interesting for worker start/stop functions are filled for
102  * each subscription.
103  */
104 static List *
106 {
107  List *res = NIL;
108  Relation rel;
109  HeapScanDesc scan;
110  HeapTuple tup;
111  MemoryContext resultcxt;
112 
113  /* This is the context that we will allocate our output data in */
114  resultcxt = CurrentMemoryContext;
115 
116  /*
117  * Start a transaction so we can access pg_database, and get a snapshot.
118  * We don't have a use for the snapshot itself, but we're interested in
119  * the secondary effect that it sets RecentGlobalXmin. (This is critical
120  * for anything that reads heap pages, because HOT may decide to prune
121  * them even if the process doesn't attempt to modify any tuples.)
122  */
124  (void) GetTransactionSnapshot();
125 
127  scan = heap_beginscan_catalog(rel, 0, NULL);
128 
130  {
132  Subscription *sub;
133  MemoryContext oldcxt;
134 
135  /*
136  * Allocate our results in the caller's context, not the
137  * transaction's. We do this inside the loop, and restore the original
138  * context at the end, so that leaky things like heap_getnext() are
139  * not called in a potentially long-lived context.
140  */
141  oldcxt = MemoryContextSwitchTo(resultcxt);
142 
143  sub = (Subscription *) palloc0(sizeof(Subscription));
144  sub->oid = HeapTupleGetOid(tup);
145  sub->dbid = subform->subdbid;
146  sub->owner = subform->subowner;
147  sub->enabled = subform->subenabled;
148  sub->name = pstrdup(NameStr(subform->subname));
149  /* We don't fill fields we are not interested in. */
150 
151  res = lappend(res, sub);
152  MemoryContextSwitchTo(oldcxt);
153  }
154 
155  heap_endscan(scan);
157 
159 
160  return res;
161 }
162 
163 /*
164  * Wait for a background worker to start up and attach to the shmem context.
165  *
166  * This is only needed for cleaning up the shared memory in case the worker
167  * fails to attach.
168  */
169 static void
171  BackgroundWorkerHandle *handle)
172 {
174  int rc;
175  uint16 generation;
176 
177  /* Remember generation for future identification. */
178  generation = worker->generation;
179 
180  for (;;)
181  {
182  pid_t pid;
183 
185 
186  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
187 
188  /* Worker either died or has started; no need to do anything. */
189  if (!worker->in_use || worker->proc)
190  {
191  LWLockRelease(LogicalRepWorkerLock);
192  return;
193  }
194 
195  LWLockRelease(LogicalRepWorkerLock);
196 
197  /* Check if worker has died before attaching, and clean up after it. */
198  status = GetBackgroundWorkerPid(handle, &pid);
199 
200  if (status == BGWH_STOPPED)
201  {
202  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
203  /* Ensure that this was indeed the worker we waited for. */
204  if (generation == worker->generation)
206  LWLockRelease(LogicalRepWorkerLock);
207  return;
208  }
209 
210  /*
211  * We need timeout because we generally don't get notified via latch
212  * about the worker attach. But we don't expect to have to wait long.
213  */
214  rc = WaitLatch(MyLatch,
217 
218  /* emergency bailout if postmaster has died */
219  if (rc & WL_POSTMASTER_DEATH)
220  proc_exit(1);
221 
222  if (rc & WL_LATCH_SET)
223  {
226  }
227  }
228 
229  return;
230 }
231 
232 /*
233  * Walks the workers array and searches for one that matches given
234  * subscription id and relid.
235  */
237 logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
238 {
239  int i;
240  LogicalRepWorker *res = NULL;
241 
242  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
243 
244  /* Search for attached worker for a given subscription id. */
245  for (i = 0; i < max_logical_replication_workers; i++)
246  {
247  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
248 
249  if (w->in_use && w->subid == subid && w->relid == relid &&
250  (!only_running || w->proc))
251  {
252  res = w;
253  break;
254  }
255  }
256 
257  return res;
258 }
259 
260 /*
261  * Similar to logicalrep_worker_find(), but returns list of all workers for
262  * the subscription, instead just one.
263  */
264 List *
265 logicalrep_workers_find(Oid subid, bool only_running)
266 {
267  int i;
268  List *res = NIL;
269 
270  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
271 
272  /* Search for attached worker for a given subscription id. */
273  for (i = 0; i < max_logical_replication_workers; i++)
274  {
275  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
276 
277  if (w->in_use && w->subid == subid && (!only_running || w->proc))
278  res = lappend(res, w);
279  }
280 
281  return res;
282 }
283 
284 /*
285  * Start new apply background worker.
286  */
287 void
288 logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
289  Oid relid)
290 {
291  BackgroundWorker bgw;
292  BackgroundWorkerHandle *bgw_handle;
293  int i;
294  int slot = 0;
295  LogicalRepWorker *worker = NULL;
296  int nsyncworkers;
298 
299  ereport(DEBUG1,
300  (errmsg("starting logical replication worker for subscription \"%s\"",
301  subname)));
302 
303  /* Report this after the initial starting message for consistency. */
304  if (max_replication_slots == 0)
305  ereport(ERROR,
306  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
307  errmsg("cannot start logical replication workers when max_replication_slots = 0")));
308 
309  /*
310  * We need to do the modification of the shared memory under lock so that
311  * we have consistent view.
312  */
313  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
314 
315 retry:
316  /* Find unused worker slot. */
317  for (i = 0; i < max_logical_replication_workers; i++)
318  {
319  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
320 
321  if (!w->in_use)
322  {
323  worker = w;
324  slot = i;
325  break;
326  }
327  }
328 
329  nsyncworkers = logicalrep_sync_worker_count(subid);
330 
331  now = GetCurrentTimestamp();
332 
333  /*
334  * If we didn't find a free slot, try to do garbage collection. The
335  * reason we do this is because if some worker failed to start up and its
336  * parent has crashed while waiting, the in_use state was never cleared.
337  */
338  if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
339  {
340  bool did_cleanup = false;
341 
342  for (i = 0; i < max_logical_replication_workers; i++)
343  {
344  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
345 
346  /*
347  * If the worker was marked in use but didn't manage to attach in
348  * time, clean it up.
349  */
350  if (w->in_use && !w->proc &&
353  {
354  elog(WARNING,
355  "logical replication worker for subscription %u took too long to start; canceled",
356  w->subid);
357 
359  did_cleanup = true;
360  }
361  }
362 
363  if (did_cleanup)
364  goto retry;
365  }
366 
367  /*
368  * If we reached the sync worker limit per subscription, just exit
369  * silently as we might get here because of an otherwise harmless race
370  * condition.
371  */
372  if (nsyncworkers >= max_sync_workers_per_subscription)
373  {
374  LWLockRelease(LogicalRepWorkerLock);
375  return;
376  }
377 
378  /*
379  * However if there are no more free worker slots, inform user about it
380  * before exiting.
381  */
382  if (worker == NULL)
383  {
384  LWLockRelease(LogicalRepWorkerLock);
386  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
387  errmsg("out of logical replication worker slots"),
388  errhint("You might need to increase max_logical_replication_workers.")));
389  return;
390  }
391 
392  /* Prepare the worker slot. */
393  worker->launch_time = now;
394  worker->in_use = true;
395  worker->generation++;
396  worker->proc = NULL;
397  worker->dbid = dbid;
398  worker->userid = userid;
399  worker->subid = subid;
400  worker->relid = relid;
401  worker->relstate = SUBREL_STATE_UNKNOWN;
403  worker->last_lsn = InvalidXLogRecPtr;
406  worker->reply_lsn = InvalidXLogRecPtr;
407  TIMESTAMP_NOBEGIN(worker->reply_time);
408 
409  LWLockRelease(LogicalRepWorkerLock);
410 
411  /* Register the new dynamic worker. */
412  memset(&bgw, 0, sizeof(bgw));
416  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
417  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
418  if (OidIsValid(relid))
420  "logical replication worker for subscription %u sync %u", subid, relid);
421  else
423  "logical replication worker for subscription %u", subid);
424 
427  bgw.bgw_main_arg = Int32GetDatum(slot);
428 
429  if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
430  {
432  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
433  errmsg("out of background worker slots"),
434  errhint("You might need to increase max_worker_processes.")));
435  return;
436  }
437 
438  /* Now wait until it attaches. */
439  WaitForReplicationWorkerAttach(worker, bgw_handle);
440 }
441 
442 /*
443  * Stop the logical replication worker for subid/relid, if any, and wait until
444  * it detaches from the slot.
445  */
446 void
448 {
449  LogicalRepWorker *worker;
450  uint16 generation;
451 
452  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
453 
454  worker = logicalrep_worker_find(subid, relid, false);
455 
456  /* No worker, nothing to do. */
457  if (!worker)
458  {
459  LWLockRelease(LogicalRepWorkerLock);
460  return;
461  }
462 
463  /*
464  * Remember which generation was our worker so we can check if what we see
465  * is still the same one.
466  */
467  generation = worker->generation;
468 
469  /*
470  * If we found a worker but it does not have proc set then it is still
471  * starting up; wait for it to finish starting and then kill it.
472  */
473  while (worker->in_use && !worker->proc)
474  {
475  int rc;
476 
477  LWLockRelease(LogicalRepWorkerLock);
478 
479  /* Wait a bit --- we don't expect to have to wait long. */
480  rc = WaitLatch(MyLatch,
483 
484  /* emergency bailout if postmaster has died */
485  if (rc & WL_POSTMASTER_DEATH)
486  proc_exit(1);
487 
488  if (rc & WL_LATCH_SET)
489  {
492  }
493 
494  /* Recheck worker status. */
495  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
496 
497  /*
498  * Check whether the worker slot is no longer used, which would mean
499  * that the worker has exited, or whether the worker generation is
500  * different, meaning that a different worker has taken the slot.
501  */
502  if (!worker->in_use || worker->generation != generation)
503  {
504  LWLockRelease(LogicalRepWorkerLock);
505  return;
506  }
507 
508  /* Worker has assigned proc, so it has started. */
509  if (worker->proc)
510  break;
511  }
512 
513  /* Now terminate the worker ... */
514  kill(worker->proc->pid, SIGTERM);
515 
516  /* ... and wait for it to die. */
517  for (;;)
518  {
519  int rc;
520 
521  /* is it gone? */
522  if (!worker->proc || worker->generation != generation)
523  break;
524 
525  LWLockRelease(LogicalRepWorkerLock);
526 
527  /* Wait a bit --- we don't expect to have to wait long. */
528  rc = WaitLatch(MyLatch,
531 
532  /* emergency bailout if postmaster has died */
533  if (rc & WL_POSTMASTER_DEATH)
534  proc_exit(1);
535 
536  if (rc & WL_LATCH_SET)
537  {
540  }
541 
542  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
543  }
544 
545  LWLockRelease(LogicalRepWorkerLock);
546 }
547 
548 /*
549  * Request worker for specified sub/rel to be stopped on commit.
550  */
551 void
553 {
554  LogicalRepWorkerId *wid;
555  MemoryContext oldctx;
556 
557  /* Make sure we store the info in context that survives until commit. */
559 
560  wid = palloc(sizeof(LogicalRepWorkerId));
561  wid->subid = subid;
562  wid->relid = relid;
563 
564  on_commit_stop_workers = lappend(on_commit_stop_workers, wid);
565 
566  MemoryContextSwitchTo(oldctx);
567 }
568 
569 /*
570  * Wake up (using latch) any logical replication worker for specified sub/rel.
571  */
572 void
574 {
575  LogicalRepWorker *worker;
576 
577  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
578 
579  worker = logicalrep_worker_find(subid, relid, true);
580 
581  if (worker)
583 
584  LWLockRelease(LogicalRepWorkerLock);
585 }
586 
587 /*
588  * Wake up (using latch) the specified logical replication worker.
589  *
590  * Caller must hold lock, else worker->proc could change under us.
591  */
592 void
594 {
595  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
596 
597  SetLatch(&worker->proc->procLatch);
598 }
599 
600 /*
601  * Attach to a slot.
602  */
603 void
605 {
606  /* Block concurrent access. */
607  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
608 
609  Assert(slot >= 0 && slot < max_logical_replication_workers);
610  MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
611 
612  if (!MyLogicalRepWorker->in_use)
613  {
614  LWLockRelease(LogicalRepWorkerLock);
615  ereport(ERROR,
616  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
617  errmsg("logical replication worker slot %d is empty, cannot attach",
618  slot)));
619  }
620 
621  if (MyLogicalRepWorker->proc)
622  {
623  LWLockRelease(LogicalRepWorkerLock);
624  ereport(ERROR,
625  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
626  errmsg("logical replication worker slot %d is already used by "
627  "another worker, cannot attach", slot)));
628  }
629 
630  MyLogicalRepWorker->proc = MyProc;
632 
633  LWLockRelease(LogicalRepWorkerLock);
634 }
635 
636 /*
637  * Detach the worker (cleans up the worker info).
638  */
639 static void
641 {
642  /* Block concurrent access. */
643  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
644 
645  logicalrep_worker_cleanup(MyLogicalRepWorker);
646 
647  LWLockRelease(LogicalRepWorkerLock);
648 }
649 
650 /*
651  * Clean up worker info.
652  */
653 static void
655 {
656  Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_EXCLUSIVE));
657 
658  worker->in_use = false;
659  worker->proc = NULL;
660  worker->dbid = InvalidOid;
661  worker->userid = InvalidOid;
662  worker->subid = InvalidOid;
663  worker->relid = InvalidOid;
664 }
665 
666 /*
667  * Cleanup function for logical replication launcher.
668  *
669  * Called on logical replication launcher exit.
670  */
671 static void
673 {
674  LogicalRepCtx->launcher_pid = 0;
675 }
676 
677 /*
678  * Cleanup function.
679  *
680  * Called on logical replication worker exit.
681  */
682 static void
684 {
685  /* Disconnect gracefully from the remote side. */
686  if (wrconn)
688 
690 
692 }
693 
694 /* SIGHUP: set flag to reload configuration at next convenient time */
695 static void
697 {
698  int save_errno = errno;
699 
700  got_SIGHUP = true;
701 
702  /* Waken anything waiting on the process latch */
703  SetLatch(MyLatch);
704 
705  errno = save_errno;
706 }
707 
708 /*
709  * Count the number of registered (not necessarily running) sync workers
710  * for a subscription.
711  */
712 int
714 {
715  int i;
716  int res = 0;
717 
718  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
719 
720  /* Search for attached worker for a given subscription id. */
721  for (i = 0; i < max_logical_replication_workers; i++)
722  {
723  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
724 
725  if (w->subid == subid && OidIsValid(w->relid))
726  res++;
727  }
728 
729  return res;
730 }
731 
732 /*
733  * ApplyLauncherShmemSize
734  * Compute space needed for replication launcher shared memory
735  */
736 Size
738 {
739  Size size;
740 
741  /*
742  * Need the fixed struct and the array of LogicalRepWorker.
743  */
744  size = sizeof(LogicalRepCtxStruct);
745  size = MAXALIGN(size);
747  sizeof(LogicalRepWorker)));
748  return size;
749 }
750 
751 /*
752  * ApplyLauncherRegister
753  * Register a background worker running the logical replication launcher.
754  */
755 void
757 {
758  BackgroundWorker bgw;
759 
761  return;
762 
763  memset(&bgw, 0, sizeof(bgw));
767  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
768  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyLauncherMain");
770  "logical replication launcher");
771  bgw.bgw_restart_time = 5;
772  bgw.bgw_notify_pid = 0;
773  bgw.bgw_main_arg = (Datum) 0;
774 
776 }
777 
778 /*
779  * ApplyLauncherShmemInit
780  * Allocate and initialize replication launcher shared memory
781  */
782 void
784 {
785  bool found;
786 
787  LogicalRepCtx = (LogicalRepCtxStruct *)
788  ShmemInitStruct("Logical Replication Launcher Data",
790  &found);
791 
792  if (!found)
793  {
794  int slot;
795 
796  memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
797 
798  /* Initialize memory and spin locks for each worker slot. */
799  for (slot = 0; slot < max_logical_replication_workers; slot++)
800  {
801  LogicalRepWorker *worker = &LogicalRepCtx->workers[slot];
802 
803  memset(worker, 0, sizeof(LogicalRepWorker));
804  SpinLockInit(&worker->relmutex);
805  }
806  }
807 }
808 
809 /*
810  * Check whether current transaction has manipulated logical replication
811  * workers.
812  */
813 bool
815 {
816  return (on_commit_stop_workers != NIL);
817 }
818 
819 /*
820  * Wakeup the launcher on commit if requested.
821  */
822 void
824 {
825  if (isCommit)
826  {
827  ListCell *lc;
828 
829  foreach(lc, on_commit_stop_workers)
830  {
831  LogicalRepWorkerId *wid = lfirst(lc);
832 
833  logicalrep_worker_stop(wid->subid, wid->relid);
834  }
835 
838  }
839 
840  /*
841  * No need to pfree on_commit_stop_workers. It was allocated in
842  * transaction memory context, which is going to be cleaned soon.
843  */
844  on_commit_stop_workers = NIL;
846 }
847 
848 /*
849  * Request wakeup of the launcher on commit of the transaction.
850  *
851  * This is used to send launcher signal to stop sleeping and process the
852  * subscriptions when current transaction commits. Should be used when new
853  * tuple was added to the pg_subscription catalog.
854 */
855 void
857 {
860 }
861 
862 static void
864 {
865  if (LogicalRepCtx->launcher_pid != 0)
866  kill(LogicalRepCtx->launcher_pid, SIGUSR1);
867 }
868 
869 /*
870  * Main loop for the apply launcher process.
871  */
872 void
874 {
875  TimestampTz last_start_time = 0;
876 
877  ereport(DEBUG1,
878  (errmsg("logical replication launcher started")));
879 
881 
882  Assert(LogicalRepCtx->launcher_pid == 0);
883  LogicalRepCtx->launcher_pid = MyProcPid;
884 
885  /* Establish signal handlers. */
887  pqsignal(SIGTERM, die);
889 
890  /*
891  * Establish connection to nailed catalogs (we only ever access
892  * pg_subscription).
893  */
895 
896  /* Enter main loop */
897  for (;;)
898  {
899  int rc;
900  List *sublist;
901  ListCell *lc;
902  MemoryContext subctx;
903  MemoryContext oldctx;
905  long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
906 
908 
909  now = GetCurrentTimestamp();
910 
911  /* Limit the start retry to once a wal_retrieve_retry_interval */
912  if (TimestampDifferenceExceeds(last_start_time, now,
914  {
915  /* Use temporary context for the database list and worker info. */
917  "Logical Replication Launcher sublist",
921  oldctx = MemoryContextSwitchTo(subctx);
922 
923  /* search for subscriptions to start or stop. */
924  sublist = get_subscription_list();
925 
926  /* Start the missing workers for enabled subscriptions. */
927  foreach(lc, sublist)
928  {
929  Subscription *sub = (Subscription *) lfirst(lc);
930  LogicalRepWorker *w;
931 
932  if (!sub->enabled)
933  continue;
934 
935  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
936  w = logicalrep_worker_find(sub->oid, InvalidOid, false);
937  LWLockRelease(LogicalRepWorkerLock);
938 
939  if (w == NULL)
940  {
941  last_start_time = now;
942  wait_time = wal_retrieve_retry_interval;
943 
944  logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
945  sub->owner, InvalidOid);
946  }
947  }
948 
949  /* Switch back to original memory context. */
950  MemoryContextSwitchTo(oldctx);
951  /* Clean the temporary memory. */
952  MemoryContextDelete(subctx);
953  }
954  else
955  {
956  /*
957  * The wait in previous cycle was interrupted in less than
958  * wal_retrieve_retry_interval since last worker was started, this
959  * usually means crash of the worker, so we should retry in
960  * wal_retrieve_retry_interval again.
961  */
962  wait_time = wal_retrieve_retry_interval;
963  }
964 
965  /* Wait for more work. */
966  rc = WaitLatch(MyLatch,
968  wait_time,
970 
971  /* emergency bailout if postmaster has died */
972  if (rc & WL_POSTMASTER_DEATH)
973  proc_exit(1);
974 
975  if (rc & WL_LATCH_SET)
976  {
979  }
980 
981  if (got_SIGHUP)
982  {
983  got_SIGHUP = false;
985  }
986  }
987 
988  /* Not reachable */
989 }
990 
991 /*
992  * Is current process the logical replication launcher?
993  */
994 bool
996 {
997  return LogicalRepCtx->launcher_pid == MyProcPid;
998 }
999 
1000 /*
1001  * Returns state of the subscriptions.
1002  */
1003 Datum
1005 {
1006 #define PG_STAT_GET_SUBSCRIPTION_COLS 8
1007  Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
1008  int i;
1009  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1010  TupleDesc tupdesc;
1011  Tuplestorestate *tupstore;
1012  MemoryContext per_query_ctx;
1013  MemoryContext oldcontext;
1014 
1015  /* check to see if caller supports us returning a tuplestore */
1016  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1017  ereport(ERROR,
1018  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1019  errmsg("set-valued function called in context that cannot accept a set")));
1020  if (!(rsinfo->allowedModes & SFRM_Materialize))
1021  ereport(ERROR,
1022  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1023  errmsg("materialize mode required, but it is not " \
1024  "allowed in this context")));
1025 
1026  /* Build a tuple descriptor for our result type */
1027  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1028  elog(ERROR, "return type must be a row type");
1029 
1030  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1031  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1032 
1033  tupstore = tuplestore_begin_heap(true, false, work_mem);
1034  rsinfo->returnMode = SFRM_Materialize;
1035  rsinfo->setResult = tupstore;
1036  rsinfo->setDesc = tupdesc;
1037 
1038  MemoryContextSwitchTo(oldcontext);
1039 
1040  /* Make sure we get consistent view of the workers. */
1041  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1042 
1043  for (i = 0; i <= max_logical_replication_workers; i++)
1044  {
1045  /* for each row */
1047  bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
1048  int worker_pid;
1049  LogicalRepWorker worker;
1050 
1051  memcpy(&worker, &LogicalRepCtx->workers[i],
1052  sizeof(LogicalRepWorker));
1053  if (!worker.proc || !IsBackendPid(worker.proc->pid))
1054  continue;
1055 
1056  if (OidIsValid(subid) && worker.subid != subid)
1057  continue;
1058 
1059  worker_pid = worker.proc->pid;
1060 
1061  MemSet(values, 0, sizeof(values));
1062  MemSet(nulls, 0, sizeof(nulls));
1063 
1064  values[0] = ObjectIdGetDatum(worker.subid);
1065  if (OidIsValid(worker.relid))
1066  values[1] = ObjectIdGetDatum(worker.relid);
1067  else
1068  nulls[1] = true;
1069  values[2] = Int32GetDatum(worker_pid);
1070  if (XLogRecPtrIsInvalid(worker.last_lsn))
1071  nulls[3] = true;
1072  else
1073  values[3] = LSNGetDatum(worker.last_lsn);
1074  if (worker.last_send_time == 0)
1075  nulls[4] = true;
1076  else
1077  values[4] = TimestampTzGetDatum(worker.last_send_time);
1078  if (worker.last_recv_time == 0)
1079  nulls[5] = true;
1080  else
1081  values[5] = TimestampTzGetDatum(worker.last_recv_time);
1082  if (XLogRecPtrIsInvalid(worker.reply_lsn))
1083  nulls[6] = true;
1084  else
1085  values[6] = LSNGetDatum(worker.reply_lsn);
1086  if (worker.reply_time == 0)
1087  nulls[7] = true;
1088  else
1089  values[7] = TimestampTzGetDatum(worker.reply_time);
1090 
1091  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1092 
1093  /*
1094  * If only a single subscription was requested, and we found it,
1095  * break.
1096  */
1097  if (OidIsValid(subid))
1098  break;
1099  }
1100 
1101  LWLockRelease(LogicalRepWorkerLock);
1102 
1103  /* clean up and return the tuplestore */
1104  tuplestore_donestoring(tupstore);
1105 
1106  return (Datum) 0;
1107 }
void AtEOXact_ApplyLauncher(bool isCommit)
Definition: launcher.c:823
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
static volatile sig_atomic_t got_SIGHUP
Definition: launcher.c:91
#define NIL
Definition: pg_list.h:69
#define SIGUSR1
Definition: win32.h:202
WalReceiverConn * wrconn
Definition: worker.c:110
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
void RegisterBackgroundWorker(BackgroundWorker *worker)
Definition: bgworker.c:849
List * logicalrep_workers_find(Oid subid, bool only_running)
Definition: launcher.c:265
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1849
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
int MyProcPid
Definition: globals.c:39
int errhint(const char *fmt,...)
Definition: elog.c:987
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
MemoryContext TopTransactionContext
Definition: mcxt.c:48
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1580
#define WL_TIMEOUT
Definition: latch.h:127
void ProcessConfigFile(GucContext context)
void ApplyLauncherMain(Datum main_arg)
Definition: launcher.c:873
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1831
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
void logicalrep_worker_wakeup(Oid subid, Oid relid)
Definition: launcher.c:573
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
char * pstrdup(const char *in)
Definition: mcxt.c:1077
void CommitTransactionCommand(void)
Definition: xact.c:2750
#define SpinLockInit(lock)
Definition: spin.h:60
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:71
TimestampTz last_send_time
XLogRecPtr last_lsn
int bgw_restart_time
Definition: bgworker.h:93
void proc_exit(int code)
Definition: ipc.c:99
int errcode(int sqlerrcode)
Definition: elog.c:575
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
#define MemSet(start, val, len)
Definition: c.h:858
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define heap_close(r, l)
Definition: heapam.h:97
FormData_pg_subscription * Form_pg_subscription
void ResetLatch(volatile Latch *latch)
Definition: latch.c:497
unsigned int Oid
Definition: postgres_ext.h:31
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1649
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:304
#define OidIsValid(objectId)
Definition: c.h:538
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:95
#define ALLOCSET_DEFAULT_MINSIZE
Definition: memutils.h:162
int wal_receiver_timeout
Definition: walreceiver.c:75
Latch procLatch
Definition: proc.h:103
XLogRecPtr relstate_lsn
static void logicalrep_worker_detach(void)
Definition: launcher.c:640
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
Definition: launcher.c:593
Datum bgw_main_arg
Definition: bgworker.h:96
int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:336
unsigned short uint16
Definition: c.h:267
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:63
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
int max_sync_workers_per_subscription
Definition: launcher.c:61
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
XLogRecPtr reply_lsn
static void logicalrep_worker_cleanup(LogicalRepWorker *worker)
Definition: launcher.c:654
#define SUBREL_STATE_UNKNOWN
#define SubscriptionRelationId
void logicalrep_worker_attach(int slot)
Definition: launcher.c:604
#define PG_GETARG_OID(n)
Definition: fmgr.h:240
void logicalrep_worker_stop(Oid subid, Oid relid)
Definition: launcher.c:447
Size ApplyLauncherShmemSize(void)
Definition: launcher.c:737
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:320
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
static void logicalrep_launcher_onexit(int code, Datum arg)
Definition: launcher.c:672
#define TIMESTAMP_NOBEGIN(j)
Definition: timestamp.h:112
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
Definition: heapam.c:1405
BgwHandleStatus
Definition: bgworker.h:101
static bool on_commit_launcher_wakeup
Definition: launcher.c:93
#define ereport(elevel, rest)
Definition: elog.h:122
MemoryContext TopMemoryContext
Definition: mcxt.c:43
Definition: guc.h:72
List * lappend(List *list, void *datum)
Definition: list.c:128
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define WARNING
Definition: elog.h:40
int wal_retrieve_retry_interval
Definition: xlog.c:107
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
static void logicalrep_worker_onexit(int code, Datum arg)
Definition: launcher.c:683
#define WL_POSTMASTER_DEATH
Definition: latch.h:128
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
void * palloc0(Size size)
Definition: mcxt.c:878
#define DEFAULT_NAPTIME_PER_CYCLE
Definition: launcher.c:58
uintptr_t Datum
Definition: postgres.h:372
void ApplyLauncherRegister(void)
Definition: launcher.c:756
static void logicalrep_launcher_sighup(SIGNAL_ARGS)
Definition: launcher.c:696
#define PG_STAT_GET_SUBSCRIPTION_COLS
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction)
Definition: heapam.c:1810
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1290
int work_mem
Definition: globals.c:113
int logicalrep_sync_worker_count(Oid subid)
Definition: launcher.c:713
#define SIGHUP
Definition: win32.h:188
#define InvalidOid
Definition: postgres_ext.h:36
int allowedModes
Definition: execnodes.h:268
TimestampTz launch_time
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:168
static List * get_subscription_list(void)
Definition: launcher.c:105
SetFunctionReturnMode returnMode
Definition: execnodes.h:270
int max_replication_slots
Definition: slot.c:99
void SetLatch(volatile Latch *latch)
Definition: latch.c:414
TimestampTz last_recv_time
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define SIGNAL_ARGS
Definition: c.h:1080
#define NULL
Definition: c.h:229
void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid)
Definition: launcher.c:288
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:676
#define lfirst(lc)
Definition: pg_list.h:106
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:237
void StartTransactionCommand(void)
Definition: xact.c:2680
int max_logical_replication_workers
Definition: launcher.c:60
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:356
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:92
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:933
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define MAXALIGN(LEN)
Definition: c.h:588
void ApplyLauncherShmemInit(void)
Definition: launcher.c:783
#define walrcv_disconnect(conn)
Definition: walreceiver.h:264
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:202
static void WaitForReplicationWorkerAttach(LogicalRepWorker *worker, BackgroundWorkerHandle *handle)
Definition: launcher.c:170
struct LogicalRepCtxStruct LogicalRepCtxStruct
Tuplestorestate * setResult
Definition: execnodes.h:273
static Datum values[MAXATTR]
Definition: bootstrap.c:163
ExprContext * econtext
Definition: execnodes.h:266
#define Int32GetDatum(X)
Definition: postgres.h:485
TupleDesc setDesc
Definition: execnodes.h:274
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
pid_t bgw_notify_pid
Definition: bgworker.h:98
static List * on_commit_stop_workers
Definition: launcher.c:82
void die(SIGNAL_ARGS)
Definition: postgres.c:2617
bool IsLogicalLauncher(void)
Definition: launcher.c:995
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:74
#define ALLOCSET_DEFAULT_INITSIZE
Definition: memutils.h:163
int i
#define NameStr(name)
Definition: c.h:499
bool IsBackendPid(int pid)
Definition: procarray.c:2441
void * arg
struct Latch * MyLatch
Definition: globals.c:52
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define ALLOCSET_DEFAULT_MAXSIZE
Definition: memutils.h:164
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
Datum pg_stat_get_subscription(PG_FUNCTION_ARGS)
Definition: launcher.c:1004
void logicalrep_worker_stop_at_commit(Oid subid, Oid relid)
Definition: launcher.c:552
#define elog
Definition: elog.h:219
#define HeapTupleGetOid(tuple)
Definition: htup_details.h:695
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:224
static void ApplyLauncherWakeup(void)
Definition: launcher.c:863
void ApplyLauncherWakeupAtCommit(void)
Definition: launcher.c:856
bool XactManipulatesLogicalReplicationWorkers(void)
Definition: launcher.c:814
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:94
Definition: pg_list.h:45
int pid
Definition: proc.h:108
#define WL_LATCH_SET
Definition: latch.h:124
void BackgroundWorkerInitializeConnection(char *dbname, char *username)
Definition: postmaster.c:5527
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1534
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1041
TimestampTz reply_time
struct LogicalRepWorkerId LogicalRepWorkerId
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5579