PostgreSQL Source Code  git master
launcher.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * launcher.c
3  * PostgreSQL logical replication worker launcher process
4  *
5  * Copyright (c) 2016-2019, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/replication/logical/launcher.c
9  *
10  * NOTES
11  * This module contains the logical replication worker launcher which
12  * uses the background worker infrastructure to start the logical
13  * replication workers for every enabled subscription.
14  *
15  *-------------------------------------------------------------------------
16  */
17 
18 #include "postgres.h"
19 
20 #include "access/heapam.h"
21 #include "access/htup.h"
22 #include "access/htup_details.h"
23 #include "access/tableam.h"
24 #include "access/xact.h"
27 #include "funcapi.h"
28 #include "libpq/pqsignal.h"
29 #include "miscadmin.h"
30 #include "pgstat.h"
31 #include "postmaster/bgworker.h"
33 #include "postmaster/postmaster.h"
36 #include "replication/slot.h"
39 #include "storage/ipc.h"
40 #include "storage/proc.h"
41 #include "storage/procarray.h"
42 #include "storage/procsignal.h"
43 #include "tcop/tcopprot.h"
44 #include "utils/memutils.h"
45 #include "utils/pg_lsn.h"
46 #include "utils/ps_status.h"
47 #include "utils/snapmgr.h"
48 #include "utils/timeout.h"
49 
50 /* max sleep time between cycles (3min) */
51 #define DEFAULT_NAPTIME_PER_CYCLE 180000L
52 
55 
57 
58 typedef struct LogicalRepCtxStruct
59 {
60  /* Supervisor process. */
61  pid_t launcher_pid;
62 
63  /* Background workers. */
64  LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
66 
68 
69 typedef struct LogicalRepWorkerId
70 {
74 
75 typedef struct StopWorkersData
76 {
77  int nestDepth; /* Sub-transaction nest level */
78  List *workers; /* List of LogicalRepWorkerId */
79  struct StopWorkersData *parent; /* This need not be an immediate
80  * subtransaction parent */
82 
83 /*
84  * Stack of StopWorkersData elements. Each stack element contains the workers
85  * to be stopped for that subtransaction.
86  */
88 
89 static void ApplyLauncherWakeup(void);
90 static void logicalrep_launcher_onexit(int code, Datum arg);
91 static void logicalrep_worker_onexit(int code, Datum arg);
92 static void logicalrep_worker_detach(void);
93 static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
94 
95 /* Flags set by signal handlers */
96 static volatile sig_atomic_t got_SIGHUP = false;
97 
98 static bool on_commit_launcher_wakeup = false;
99 
101 
102 
103 /*
104  * Load the list of subscriptions.
105  *
106  * Only the fields interesting for worker start/stop functions are filled for
107  * each subscription.
108  */
109 static List *
111 {
112  List *res = NIL;
113  Relation rel;
114  TableScanDesc scan;
115  HeapTuple tup;
116  MemoryContext resultcxt;
117 
118  /* This is the context that we will allocate our output data in */
119  resultcxt = CurrentMemoryContext;
120 
121  /*
122  * Start a transaction so we can access pg_database, and get a snapshot.
123  * We don't have a use for the snapshot itself, but we're interested in
124  * the secondary effect that it sets RecentGlobalXmin. (This is critical
125  * for anything that reads heap pages, because HOT may decide to prune
126  * them even if the process doesn't attempt to modify any tuples.)
127  */
129  (void) GetTransactionSnapshot();
130 
131  rel = table_open(SubscriptionRelationId, AccessShareLock);
132  scan = table_beginscan_catalog(rel, 0, NULL);
133 
135  {
137  Subscription *sub;
138  MemoryContext oldcxt;
139 
140  /*
141  * Allocate our results in the caller's context, not the
142  * transaction's. We do this inside the loop, and restore the original
143  * context at the end, so that leaky things like heap_getnext() are
144  * not called in a potentially long-lived context.
145  */
146  oldcxt = MemoryContextSwitchTo(resultcxt);
147 
148  sub = (Subscription *) palloc0(sizeof(Subscription));
149  sub->oid = subform->oid;
150  sub->dbid = subform->subdbid;
151  sub->owner = subform->subowner;
152  sub->enabled = subform->subenabled;
153  sub->name = pstrdup(NameStr(subform->subname));
154  /* We don't fill fields we are not interested in. */
155 
156  res = lappend(res, sub);
157  MemoryContextSwitchTo(oldcxt);
158  }
159 
160  table_endscan(scan);
162 
164 
165  return res;
166 }
167 
168 /*
169  * Wait for a background worker to start up and attach to the shmem context.
170  *
171  * This is only needed for cleaning up the shared memory in case the worker
172  * fails to attach.
173  */
174 static void
176  uint16 generation,
177  BackgroundWorkerHandle *handle)
178 {
180  int rc;
181 
182  for (;;)
183  {
184  pid_t pid;
185 
187 
188  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
189 
190  /* Worker either died or has started; no need to do anything. */
191  if (!worker->in_use || worker->proc)
192  {
193  LWLockRelease(LogicalRepWorkerLock);
194  return;
195  }
196 
197  LWLockRelease(LogicalRepWorkerLock);
198 
199  /* Check if worker has died before attaching, and clean up after it. */
200  status = GetBackgroundWorkerPid(handle, &pid);
201 
202  if (status == BGWH_STOPPED)
203  {
204  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
205  /* Ensure that this was indeed the worker we waited for. */
206  if (generation == worker->generation)
208  LWLockRelease(LogicalRepWorkerLock);
209  return;
210  }
211 
212  /*
213  * We need timeout because we generally don't get notified via latch
214  * about the worker attach. But we don't expect to have to wait long.
215  */
216  rc = WaitLatch(MyLatch,
219 
220  if (rc & WL_LATCH_SET)
221  {
224  }
225  }
226 }
227 
228 /*
229  * Walks the workers array and searches for one that matches given
230  * subscription id and relid.
231  */
233 logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
234 {
235  int i;
236  LogicalRepWorker *res = NULL;
237 
238  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
239 
240  /* Search for attached worker for a given subscription id. */
241  for (i = 0; i < max_logical_replication_workers; i++)
242  {
243  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
244 
245  if (w->in_use && w->subid == subid && w->relid == relid &&
246  (!only_running || w->proc))
247  {
248  res = w;
249  break;
250  }
251  }
252 
253  return res;
254 }
255 
256 /*
257  * Similar to logicalrep_worker_find(), but returns list of all workers for
258  * the subscription, instead just one.
259  */
260 List *
261 logicalrep_workers_find(Oid subid, bool only_running)
262 {
263  int i;
264  List *res = NIL;
265 
266  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
267 
268  /* Search for attached worker for a given subscription id. */
269  for (i = 0; i < max_logical_replication_workers; i++)
270  {
271  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
272 
273  if (w->in_use && w->subid == subid && (!only_running || w->proc))
274  res = lappend(res, w);
275  }
276 
277  return res;
278 }
279 
280 /*
281  * Start new apply background worker, if possible.
282  */
283 void
284 logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
285  Oid relid)
286 {
287  BackgroundWorker bgw;
288  BackgroundWorkerHandle *bgw_handle;
289  uint16 generation;
290  int i;
291  int slot = 0;
292  LogicalRepWorker *worker = NULL;
293  int nsyncworkers;
295 
296  ereport(DEBUG1,
297  (errmsg("starting logical replication worker for subscription \"%s\"",
298  subname)));
299 
300  /* Report this after the initial starting message for consistency. */
301  if (max_replication_slots == 0)
302  ereport(ERROR,
303  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
304  errmsg("cannot start logical replication workers when max_replication_slots = 0")));
305 
306  /*
307  * We need to do the modification of the shared memory under lock so that
308  * we have consistent view.
309  */
310  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
311 
312 retry:
313  /* Find unused worker slot. */
314  for (i = 0; i < max_logical_replication_workers; i++)
315  {
316  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
317 
318  if (!w->in_use)
319  {
320  worker = w;
321  slot = i;
322  break;
323  }
324  }
325 
326  nsyncworkers = logicalrep_sync_worker_count(subid);
327 
328  now = GetCurrentTimestamp();
329 
330  /*
331  * If we didn't find a free slot, try to do garbage collection. The
332  * reason we do this is because if some worker failed to start up and its
333  * parent has crashed while waiting, the in_use state was never cleared.
334  */
335  if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
336  {
337  bool did_cleanup = false;
338 
339  for (i = 0; i < max_logical_replication_workers; i++)
340  {
341  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
342 
343  /*
344  * If the worker was marked in use but didn't manage to attach in
345  * time, clean it up.
346  */
347  if (w->in_use && !w->proc &&
350  {
351  elog(WARNING,
352  "logical replication worker for subscription %u took too long to start; canceled",
353  w->subid);
354 
356  did_cleanup = true;
357  }
358  }
359 
360  if (did_cleanup)
361  goto retry;
362  }
363 
364  /*
365  * If we reached the sync worker limit per subscription, just exit
366  * silently as we might get here because of an otherwise harmless race
367  * condition.
368  */
369  if (nsyncworkers >= max_sync_workers_per_subscription)
370  {
371  LWLockRelease(LogicalRepWorkerLock);
372  return;
373  }
374 
375  /*
376  * However if there are no more free worker slots, inform user about it
377  * before exiting.
378  */
379  if (worker == NULL)
380  {
381  LWLockRelease(LogicalRepWorkerLock);
383  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
384  errmsg("out of logical replication worker slots"),
385  errhint("You might need to increase max_logical_replication_workers.")));
386  return;
387  }
388 
389  /* Prepare the worker slot. */
390  worker->launch_time = now;
391  worker->in_use = true;
392  worker->generation++;
393  worker->proc = NULL;
394  worker->dbid = dbid;
395  worker->userid = userid;
396  worker->subid = subid;
397  worker->relid = relid;
398  worker->relstate = SUBREL_STATE_UNKNOWN;
400  worker->last_lsn = InvalidXLogRecPtr;
403  worker->reply_lsn = InvalidXLogRecPtr;
404  TIMESTAMP_NOBEGIN(worker->reply_time);
405 
406  /* Before releasing lock, remember generation for future identification. */
407  generation = worker->generation;
408 
409  LWLockRelease(LogicalRepWorkerLock);
410 
411  /* Register the new dynamic worker. */
412  memset(&bgw, 0, sizeof(bgw));
416  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
417  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
418  if (OidIsValid(relid))
420  "logical replication worker for subscription %u sync %u", subid, relid);
421  else
423  "logical replication worker for subscription %u", subid);
424  snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
425 
428  bgw.bgw_main_arg = Int32GetDatum(slot);
429 
430  if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
431  {
432  /* Failed to start worker, so clean up the worker slot. */
433  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
434  Assert(generation == worker->generation);
436  LWLockRelease(LogicalRepWorkerLock);
437 
439  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
440  errmsg("out of background worker slots"),
441  errhint("You might need to increase max_worker_processes.")));
442  return;
443  }
444 
445  /* Now wait until it attaches. */
446  WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
447 }
448 
449 /*
450  * Stop the logical replication worker for subid/relid, if any, and wait until
451  * it detaches from the slot.
452  */
453 void
455 {
456  LogicalRepWorker *worker;
457  uint16 generation;
458 
459  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
460 
461  worker = logicalrep_worker_find(subid, relid, false);
462 
463  /* No worker, nothing to do. */
464  if (!worker)
465  {
466  LWLockRelease(LogicalRepWorkerLock);
467  return;
468  }
469 
470  /*
471  * Remember which generation was our worker so we can check if what we see
472  * is still the same one.
473  */
474  generation = worker->generation;
475 
476  /*
477  * If we found a worker but it does not have proc set then it is still
478  * starting up; wait for it to finish starting and then kill it.
479  */
480  while (worker->in_use && !worker->proc)
481  {
482  int rc;
483 
484  LWLockRelease(LogicalRepWorkerLock);
485 
486  /* Wait a bit --- we don't expect to have to wait long. */
487  rc = WaitLatch(MyLatch,
490 
491  if (rc & WL_LATCH_SET)
492  {
495  }
496 
497  /* Recheck worker status. */
498  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
499 
500  /*
501  * Check whether the worker slot is no longer used, which would mean
502  * that the worker has exited, or whether the worker generation is
503  * different, meaning that a different worker has taken the slot.
504  */
505  if (!worker->in_use || worker->generation != generation)
506  {
507  LWLockRelease(LogicalRepWorkerLock);
508  return;
509  }
510 
511  /* Worker has assigned proc, so it has started. */
512  if (worker->proc)
513  break;
514  }
515 
516  /* Now terminate the worker ... */
517  kill(worker->proc->pid, SIGTERM);
518 
519  /* ... and wait for it to die. */
520  for (;;)
521  {
522  int rc;
523 
524  /* is it gone? */
525  if (!worker->proc || worker->generation != generation)
526  break;
527 
528  LWLockRelease(LogicalRepWorkerLock);
529 
530  /* Wait a bit --- we don't expect to have to wait long. */
531  rc = WaitLatch(MyLatch,
534 
535  if (rc & WL_LATCH_SET)
536  {
539  }
540 
541  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
542  }
543 
544  LWLockRelease(LogicalRepWorkerLock);
545 }
546 
547 /*
548  * Request worker for specified sub/rel to be stopped on commit.
549  */
550 void
552 {
554  LogicalRepWorkerId *wid;
555  MemoryContext oldctx;
556 
557  /* Make sure we store the info in context that survives until commit. */
559 
560  /* Check that previous transactions were properly cleaned up. */
561  Assert(on_commit_stop_workers == NULL ||
562  nestDepth >= on_commit_stop_workers->nestDepth);
563 
564  /*
565  * Push a new stack element if we don't already have one for the current
566  * nestDepth.
567  */
568  if (on_commit_stop_workers == NULL ||
569  nestDepth > on_commit_stop_workers->nestDepth)
570  {
571  StopWorkersData *newdata = palloc(sizeof(StopWorkersData));
572 
573  newdata->nestDepth = nestDepth;
574  newdata->workers = NIL;
575  newdata->parent = on_commit_stop_workers;
576  on_commit_stop_workers = newdata;
577  }
578 
579  /*
580  * Finally add a new worker into the worker list of the current
581  * subtransaction.
582  */
583  wid = palloc(sizeof(LogicalRepWorkerId));
584  wid->subid = subid;
585  wid->relid = relid;
586  on_commit_stop_workers->workers =
587  lappend(on_commit_stop_workers->workers, wid);
588 
589  MemoryContextSwitchTo(oldctx);
590 }
591 
592 /*
593  * Wake up (using latch) any logical replication worker for specified sub/rel.
594  */
595 void
597 {
598  LogicalRepWorker *worker;
599 
600  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
601 
602  worker = logicalrep_worker_find(subid, relid, true);
603 
604  if (worker)
606 
607  LWLockRelease(LogicalRepWorkerLock);
608 }
609 
610 /*
611  * Wake up (using latch) the specified logical replication worker.
612  *
613  * Caller must hold lock, else worker->proc could change under us.
614  */
615 void
617 {
618  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
619 
620  SetLatch(&worker->proc->procLatch);
621 }
622 
623 /*
624  * Attach to a slot.
625  */
626 void
628 {
629  /* Block concurrent access. */
630  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
631 
632  Assert(slot >= 0 && slot < max_logical_replication_workers);
633  MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
634 
635  if (!MyLogicalRepWorker->in_use)
636  {
637  LWLockRelease(LogicalRepWorkerLock);
638  ereport(ERROR,
639  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
640  errmsg("logical replication worker slot %d is empty, cannot attach",
641  slot)));
642  }
643 
644  if (MyLogicalRepWorker->proc)
645  {
646  LWLockRelease(LogicalRepWorkerLock);
647  ereport(ERROR,
648  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
649  errmsg("logical replication worker slot %d is already used by "
650  "another worker, cannot attach", slot)));
651  }
652 
653  MyLogicalRepWorker->proc = MyProc;
655 
656  LWLockRelease(LogicalRepWorkerLock);
657 }
658 
659 /*
660  * Detach the worker (cleans up the worker info).
661  */
662 static void
664 {
665  /* Block concurrent access. */
666  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
667 
668  logicalrep_worker_cleanup(MyLogicalRepWorker);
669 
670  LWLockRelease(LogicalRepWorkerLock);
671 }
672 
673 /*
674  * Clean up worker info.
675  */
676 static void
678 {
679  Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_EXCLUSIVE));
680 
681  worker->in_use = false;
682  worker->proc = NULL;
683  worker->dbid = InvalidOid;
684  worker->userid = InvalidOid;
685  worker->subid = InvalidOid;
686  worker->relid = InvalidOid;
687 }
688 
689 /*
690  * Cleanup function for logical replication launcher.
691  *
692  * Called on logical replication launcher exit.
693  */
694 static void
696 {
697  LogicalRepCtx->launcher_pid = 0;
698 }
699 
700 /*
701  * Cleanup function.
702  *
703  * Called on logical replication worker exit.
704  */
705 static void
707 {
708  /* Disconnect gracefully from the remote side. */
709  if (wrconn)
711 
713 
715 }
716 
717 /* SIGHUP: set flag to reload configuration at next convenient time */
718 static void
720 {
721  int save_errno = errno;
722 
723  got_SIGHUP = true;
724 
725  /* Waken anything waiting on the process latch */
726  SetLatch(MyLatch);
727 
728  errno = save_errno;
729 }
730 
731 /*
732  * Count the number of registered (not necessarily running) sync workers
733  * for a subscription.
734  */
735 int
737 {
738  int i;
739  int res = 0;
740 
741  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
742 
743  /* Search for attached worker for a given subscription id. */
744  for (i = 0; i < max_logical_replication_workers; i++)
745  {
746  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
747 
748  if (w->subid == subid && OidIsValid(w->relid))
749  res++;
750  }
751 
752  return res;
753 }
754 
755 /*
756  * ApplyLauncherShmemSize
757  * Compute space needed for replication launcher shared memory
758  */
759 Size
761 {
762  Size size;
763 
764  /*
765  * Need the fixed struct and the array of LogicalRepWorker.
766  */
767  size = sizeof(LogicalRepCtxStruct);
768  size = MAXALIGN(size);
770  sizeof(LogicalRepWorker)));
771  return size;
772 }
773 
774 /*
775  * ApplyLauncherRegister
776  * Register a background worker running the logical replication launcher.
777  */
778 void
780 {
781  BackgroundWorker bgw;
782 
784  return;
785 
786  memset(&bgw, 0, sizeof(bgw));
790  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
791  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyLauncherMain");
793  "logical replication launcher");
795  "logical replication launcher");
796  bgw.bgw_restart_time = 5;
797  bgw.bgw_notify_pid = 0;
798  bgw.bgw_main_arg = (Datum) 0;
799 
801 }
802 
803 /*
804  * ApplyLauncherShmemInit
805  * Allocate and initialize replication launcher shared memory
806  */
807 void
809 {
810  bool found;
811 
812  LogicalRepCtx = (LogicalRepCtxStruct *)
813  ShmemInitStruct("Logical Replication Launcher Data",
815  &found);
816 
817  if (!found)
818  {
819  int slot;
820 
821  memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
822 
823  /* Initialize memory and spin locks for each worker slot. */
824  for (slot = 0; slot < max_logical_replication_workers; slot++)
825  {
826  LogicalRepWorker *worker = &LogicalRepCtx->workers[slot];
827 
828  memset(worker, 0, sizeof(LogicalRepWorker));
829  SpinLockInit(&worker->relmutex);
830  }
831  }
832 }
833 
834 /*
835  * Check whether current transaction has manipulated logical replication
836  * workers.
837  */
838 bool
840 {
841  return (on_commit_stop_workers != NULL);
842 }
843 
844 /*
845  * Wakeup the launcher on commit if requested.
846  */
847 void
849 {
850 
851  Assert(on_commit_stop_workers == NULL ||
852  (on_commit_stop_workers->nestDepth == 1 &&
853  on_commit_stop_workers->parent == NULL));
854 
855  if (isCommit)
856  {
857  ListCell *lc;
858 
859  if (on_commit_stop_workers != NULL)
860  {
861  List *workers = on_commit_stop_workers->workers;
862 
863  foreach(lc, workers)
864  {
865  LogicalRepWorkerId *wid = lfirst(lc);
866 
867  logicalrep_worker_stop(wid->subid, wid->relid);
868  }
869  }
870 
873  }
874 
875  /*
876  * No need to pfree on_commit_stop_workers. It was allocated in
877  * transaction memory context, which is going to be cleaned soon.
878  */
879  on_commit_stop_workers = NULL;
881 }
882 
883 /*
884  * On commit, merge the current on_commit_stop_workers list into the
885  * immediate parent, if present.
886  * On rollback, discard the current on_commit_stop_workers list.
887  * Pop out the stack.
888  */
889 void
891 {
893 
894  /* Exit immediately if there's no work to do at this level. */
895  if (on_commit_stop_workers == NULL ||
896  on_commit_stop_workers->nestDepth < nestDepth)
897  return;
898 
899  Assert(on_commit_stop_workers->nestDepth == nestDepth);
900 
901  parent = on_commit_stop_workers->parent;
902 
903  if (isCommit)
904  {
905  /*
906  * If the upper stack element is not an immediate parent
907  * subtransaction, just decrement the notional nesting depth without
908  * doing any real work. Else, we need to merge the current workers
909  * list into the parent.
910  */
911  if (!parent || parent->nestDepth < nestDepth - 1)
912  {
913  on_commit_stop_workers->nestDepth--;
914  return;
915  }
916 
917  parent->workers =
918  list_concat(parent->workers, on_commit_stop_workers->workers);
919  }
920  else
921  {
922  /*
923  * Abandon everything that was done at this nesting level. Explicitly
924  * free memory to avoid a transaction-lifespan leak.
925  */
926  list_free_deep(on_commit_stop_workers->workers);
927  }
928 
929  /*
930  * We have taken care of the current subtransaction workers list for both
931  * abort or commit. So we are ready to pop the stack.
932  */
933  pfree(on_commit_stop_workers);
934  on_commit_stop_workers = parent;
935 }
936 
937 /*
938  * Request wakeup of the launcher on commit of the transaction.
939  *
940  * This is used to send launcher signal to stop sleeping and process the
941  * subscriptions when current transaction commits. Should be used when new
942  * tuple was added to the pg_subscription catalog.
943 */
944 void
946 {
949 }
950 
951 static void
953 {
954  if (LogicalRepCtx->launcher_pid != 0)
955  kill(LogicalRepCtx->launcher_pid, SIGUSR1);
956 }
957 
958 /*
959  * Main loop for the apply launcher process.
960  */
961 void
963 {
964  TimestampTz last_start_time = 0;
965 
966  ereport(DEBUG1,
967  (errmsg("logical replication launcher started")));
968 
970 
971  Assert(LogicalRepCtx->launcher_pid == 0);
972  LogicalRepCtx->launcher_pid = MyProcPid;
973 
974  /* Establish signal handlers. */
976  pqsignal(SIGTERM, die);
978 
979  /*
980  * Establish connection to nailed catalogs (we only ever access
981  * pg_subscription).
982  */
984 
985  /* Enter main loop */
986  for (;;)
987  {
988  int rc;
989  List *sublist;
990  ListCell *lc;
991  MemoryContext subctx;
992  MemoryContext oldctx;
994  long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
995 
997 
998  now = GetCurrentTimestamp();
999 
1000  /* Limit the start retry to once a wal_retrieve_retry_interval */
1001  if (TimestampDifferenceExceeds(last_start_time, now,
1003  {
1004  /* Use temporary context for the database list and worker info. */
1006  "Logical Replication Launcher sublist",
1008  oldctx = MemoryContextSwitchTo(subctx);
1009 
1010  /* search for subscriptions to start or stop. */
1011  sublist = get_subscription_list();
1012 
1013  /* Start the missing workers for enabled subscriptions. */
1014  foreach(lc, sublist)
1015  {
1016  Subscription *sub = (Subscription *) lfirst(lc);
1017  LogicalRepWorker *w;
1018 
1019  if (!sub->enabled)
1020  continue;
1021 
1022  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1023  w = logicalrep_worker_find(sub->oid, InvalidOid, false);
1024  LWLockRelease(LogicalRepWorkerLock);
1025 
1026  if (w == NULL)
1027  {
1028  last_start_time = now;
1029  wait_time = wal_retrieve_retry_interval;
1030 
1031  logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
1032  sub->owner, InvalidOid);
1033  }
1034  }
1035 
1036  /* Switch back to original memory context. */
1037  MemoryContextSwitchTo(oldctx);
1038  /* Clean the temporary memory. */
1039  MemoryContextDelete(subctx);
1040  }
1041  else
1042  {
1043  /*
1044  * The wait in previous cycle was interrupted in less than
1045  * wal_retrieve_retry_interval since last worker was started, this
1046  * usually means crash of the worker, so we should retry in
1047  * wal_retrieve_retry_interval again.
1048  */
1049  wait_time = wal_retrieve_retry_interval;
1050  }
1051 
1052  /* Wait for more work. */
1053  rc = WaitLatch(MyLatch,
1055  wait_time,
1057 
1058  if (rc & WL_LATCH_SET)
1059  {
1062  }
1063 
1064  if (got_SIGHUP)
1065  {
1066  got_SIGHUP = false;
1068  }
1069  }
1070 
1071  /* Not reachable */
1072 }
1073 
1074 /*
1075  * Is current process the logical replication launcher?
1076  */
1077 bool
1079 {
1080  return LogicalRepCtx->launcher_pid == MyProcPid;
1081 }
1082 
1083 /*
1084  * Returns state of the subscriptions.
1085  */
1086 Datum
1088 {
1089 #define PG_STAT_GET_SUBSCRIPTION_COLS 8
1090  Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
1091  int i;
1092  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1093  TupleDesc tupdesc;
1094  Tuplestorestate *tupstore;
1095  MemoryContext per_query_ctx;
1096  MemoryContext oldcontext;
1097 
1098  /* check to see if caller supports us returning a tuplestore */
1099  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1100  ereport(ERROR,
1101  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1102  errmsg("set-valued function called in context that cannot accept a set")));
1103  if (!(rsinfo->allowedModes & SFRM_Materialize))
1104  ereport(ERROR,
1105  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1106  errmsg("materialize mode required, but it is not " \
1107  "allowed in this context")));
1108 
1109  /* Build a tuple descriptor for our result type */
1110  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1111  elog(ERROR, "return type must be a row type");
1112 
1113  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1114  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1115 
1116  tupstore = tuplestore_begin_heap(true, false, work_mem);
1117  rsinfo->returnMode = SFRM_Materialize;
1118  rsinfo->setResult = tupstore;
1119  rsinfo->setDesc = tupdesc;
1120 
1121  MemoryContextSwitchTo(oldcontext);
1122 
1123  /* Make sure we get consistent view of the workers. */
1124  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1125 
1126  for (i = 0; i <= max_logical_replication_workers; i++)
1127  {
1128  /* for each row */
1130  bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
1131  int worker_pid;
1132  LogicalRepWorker worker;
1133 
1134  memcpy(&worker, &LogicalRepCtx->workers[i],
1135  sizeof(LogicalRepWorker));
1136  if (!worker.proc || !IsBackendPid(worker.proc->pid))
1137  continue;
1138 
1139  if (OidIsValid(subid) && worker.subid != subid)
1140  continue;
1141 
1142  worker_pid = worker.proc->pid;
1143 
1144  MemSet(values, 0, sizeof(values));
1145  MemSet(nulls, 0, sizeof(nulls));
1146 
1147  values[0] = ObjectIdGetDatum(worker.subid);
1148  if (OidIsValid(worker.relid))
1149  values[1] = ObjectIdGetDatum(worker.relid);
1150  else
1151  nulls[1] = true;
1152  values[2] = Int32GetDatum(worker_pid);
1153  if (XLogRecPtrIsInvalid(worker.last_lsn))
1154  nulls[3] = true;
1155  else
1156  values[3] = LSNGetDatum(worker.last_lsn);
1157  if (worker.last_send_time == 0)
1158  nulls[4] = true;
1159  else
1160  values[4] = TimestampTzGetDatum(worker.last_send_time);
1161  if (worker.last_recv_time == 0)
1162  nulls[5] = true;
1163  else
1164  values[5] = TimestampTzGetDatum(worker.last_recv_time);
1165  if (XLogRecPtrIsInvalid(worker.reply_lsn))
1166  nulls[6] = true;
1167  else
1168  values[6] = LSNGetDatum(worker.reply_lsn);
1169  if (worker.reply_time == 0)
1170  nulls[7] = true;
1171  else
1172  values[7] = TimestampTzGetDatum(worker.reply_time);
1173 
1174  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1175 
1176  /*
1177  * If only a single subscription was requested, and we found it,
1178  * break.
1179  */
1180  if (OidIsValid(subid))
1181  break;
1182  }
1183 
1184  LWLockRelease(LogicalRepWorkerLock);
1185 
1186  /* clean up and return the tuplestore */
1187  tuplestore_donestoring(tupstore);
1188 
1189  return (Datum) 0;
1190 }
void AtEOXact_ApplyLauncher(bool isCommit)
Definition: launcher.c:848
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
static volatile sig_atomic_t got_SIGHUP
Definition: launcher.c:96
#define NIL
Definition: pg_list.h:65
WalReceiverConn * wrconn
Definition: worker.c:100
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
void RegisterBackgroundWorker(BackgroundWorker *worker)
Definition: bgworker.c:848
List * logicalrep_workers_find(Oid subid, bool only_running)
Definition: launcher.c:261
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1860
#define AllocSetContextCreate
Definition: memutils.h:170
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:196
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:1069
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
MemoryContext TopTransactionContext
Definition: mcxt.c:49
#define WL_TIMEOUT
Definition: latch.h:127
void ProcessConfigFile(GucContext context)
void ApplyLauncherMain(Datum main_arg)
Definition: launcher.c:962
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:98
#define SIGUSR1
Definition: win32_port.h:166
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1842
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
void logicalrep_worker_wakeup(Oid subid, Oid relid)
Definition: launcher.c:596
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
char * pstrdup(const char *in)
Definition: mcxt.c:1186
void CommitTransactionCommand(void)
Definition: xact.c:2898
#define SpinLockInit(lock)
Definition: spin.h:60
List * workers
Definition: launcher.c:78
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:64
TimestampTz last_send_time
XLogRecPtr last_lsn
int bgw_restart_time
Definition: bgworker.h:94
List * list_concat(List *list1, const List *list2)
Definition: list.c:516
int errcode(int sqlerrcode)
Definition: elog.c:608
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
static StopWorkersData * on_commit_stop_workers
Definition: launcher.c:87
#define MemSet(start, val, len)
Definition: c.h:962
#define kill(pid, sig)
Definition: win32_port.h:426
FormData_pg_subscription * Form_pg_subscription
unsigned int Oid
Definition: postgres_ext.h:31
void SetLatch(Latch *latch)
Definition: latch.c:436
NameData subname
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1682
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
#define OidIsValid(objectId)
Definition: c.h:645
void list_free_deep(List *list)
Definition: list.c:1391
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
void ResetLatch(Latch *latch)
Definition: latch.c:519
int wal_receiver_timeout
Definition: walreceiver.c:77
Latch procLatch
Definition: proc.h:104
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:344
XLogRecPtr relstate_lsn
static void logicalrep_worker_detach(void)
Definition: launcher.c:663
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
Definition: launcher.c:616
Datum bgw_main_arg
Definition: bgworker.h:97
unsigned short uint16
Definition: c.h:358
void pfree(void *pointer)
Definition: mcxt.c:1056
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:56
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
int max_sync_workers_per_subscription
Definition: launcher.c:54
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
XLogRecPtr reply_lsn
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
static void logicalrep_worker_cleanup(LogicalRepWorker *worker)
Definition: launcher.c:677
void logicalrep_worker_attach(int slot)
Definition: launcher.c:627
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1290
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
void logicalrep_worker_stop(Oid subid, Oid relid)
Definition: launcher.c:454
void AtEOSubXact_ApplyLauncher(bool isCommit, int nestDepth)
Definition: launcher.c:890
#define SIGHUP
Definition: win32_port.h:154
Size ApplyLauncherShmemSize(void)
Definition: launcher.c:760
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:333
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
static void logicalrep_launcher_onexit(int code, Datum arg)
Definition: launcher.c:695
#define TIMESTAMP_NOBEGIN(j)
Definition: timestamp.h:112
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
BgwHandleStatus
Definition: bgworker.h:102
static bool on_commit_launcher_wakeup
Definition: launcher.c:98
#define ereport(elevel, rest)
Definition: elog.h:141
MemoryContext TopMemoryContext
Definition: mcxt.c:44
Definition: guc.h:72
List * lappend(List *list, void *datum)
Definition: list.c:322
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define WARNING
Definition: elog.h:40
int wal_retrieve_retry_interval
Definition: xlog.c:106
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
void BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
Definition: postmaster.c:5685
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
static void logicalrep_worker_onexit(int code, Datum arg)
Definition: launcher.c:706
void * palloc0(Size size)
Definition: mcxt.c:980
#define DEFAULT_NAPTIME_PER_CYCLE
Definition: launcher.c:51
uintptr_t Datum
Definition: postgres.h:367
void ApplyLauncherRegister(void)
Definition: launcher.c:779
static void logicalrep_launcher_sighup(SIGNAL_ARGS)
Definition: launcher.c:719
#define PG_STAT_GET_SUBSCRIPTION_COLS
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
int work_mem
Definition: globals.c:121
int logicalrep_sync_worker_count(Oid subid)
Definition: launcher.c:736
static void WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle)
Definition: launcher.c:175
#define InvalidOid
Definition: postgres_ext.h:36
int allowedModes
Definition: execnodes.h:302
TimestampTz launch_time
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
static List * get_subscription_list(void)
Definition: launcher.c:110
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:841
struct StopWorkersData StopWorkersData
SetFunctionReturnMode returnMode
Definition: execnodes.h:304
int max_replication_slots
Definition: slot.c:99
TimestampTz last_recv_time
#define PG_ARGISNULL(n)
Definition: fmgr.h:204
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define SIGNAL_ARGS
Definition: c.h:1288
void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid)
Definition: launcher.c:284
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:233
void StartTransactionCommand(void)
Definition: xact.c:2797
int max_logical_replication_workers
Definition: launcher.c:53
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:467
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
struct StopWorkersData * parent
Definition: launcher.c:79
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:932
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
#define MAXALIGN(LEN)
Definition: c.h:692
void ApplyLauncherShmemInit(void)
Definition: launcher.c:808
#define walrcv_disconnect(conn)
Definition: walreceiver.h:281
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:230
struct LogicalRepCtxStruct LogicalRepCtxStruct
Tuplestorestate * setResult
Definition: execnodes.h:307
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:831
static Datum values[MAXATTR]
Definition: bootstrap.c:167
ExprContext * econtext
Definition: execnodes.h:300
#define Int32GetDatum(X)
Definition: postgres.h:479
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
TupleDesc setDesc
Definition: execnodes.h:308
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
pid_t bgw_notify_pid
Definition: bgworker.h:99
bool IsLogicalLauncher(void)
Definition: launcher.c:1078
#define elog(elevel,...)
Definition: elog.h:228
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:67
int i
#define NameStr(name)
Definition: c.h:616
bool IsBackendPid(int pid)
Definition: procarray.c:2460
void * arg
struct Latch * MyLatch
Definition: globals.c:54
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
Datum pg_stat_get_subscription(PG_FUNCTION_ARGS)
Definition: launcher.c:1087
void logicalrep_worker_stop_at_commit(Oid subid, Oid relid)
Definition: launcher.c:551
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:226
static void ApplyLauncherWakeup(void)
Definition: launcher.c:952
void ApplyLauncherWakeupAtCommit(void)
Definition: launcher.c:945
bool XactManipulatesLogicalReplicationWorkers(void)
Definition: launcher.c:839
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
Definition: pg_list.h:50
#define snprintf
Definition: port.h:192
int pid
Definition: proc.h:109
#define WL_LATCH_SET
Definition: latch.h:124
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1547
#define die(msg)
Definition: pg_test_fsync.c:96
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1044
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
TimestampTz reply_time
struct LogicalRepWorkerId LogicalRepWorkerId
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5737