PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
launcher.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * launcher.c
3  * PostgreSQL logical replication worker launcher process
4  *
5  * Copyright (c) 2016-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/replication/logical/launcher.c
9  *
10  * NOTES
11  * This module contains the logical replication worker launcher which
12  * uses the background worker infrastructure to start the logical
13  * replication workers for every enabled subscription.
14  *
15  *-------------------------------------------------------------------------
16  */
17 
18 #include "postgres.h"
19 
20 #include "funcapi.h"
21 #include "miscadmin.h"
22 #include "pgstat.h"
23 
24 #include "access/heapam.h"
25 #include "access/htup.h"
26 #include "access/htup_details.h"
27 #include "access/xact.h"
28 
31 
32 #include "libpq/pqsignal.h"
33 
34 #include "postmaster/bgworker.h"
36 #include "postmaster/postmaster.h"
37 
40 #include "replication/slot.h"
43 
44 #include "storage/ipc.h"
45 #include "storage/proc.h"
46 #include "storage/procarray.h"
47 #include "storage/procsignal.h"
48 
49 #include "tcop/tcopprot.h"
50 
51 #include "utils/memutils.h"
52 #include "utils/pg_lsn.h"
53 #include "utils/ps_status.h"
54 #include "utils/timeout.h"
55 #include "utils/snapmgr.h"
56 
57 /* max sleep time between cycles (3min) */
58 #define DEFAULT_NAPTIME_PER_CYCLE 180000L
59 
62 
64 
65 typedef struct LogicalRepCtxStruct
66 {
67  /* Supervisor process. */
68  pid_t launcher_pid;
69 
70  /* Background workers. */
71  LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
73 
75 
76 typedef struct LogicalRepWorkerId
77 {
81 
83 
84 static void ApplyLauncherWakeup(void);
85 static void logicalrep_launcher_onexit(int code, Datum arg);
86 static void logicalrep_worker_onexit(int code, Datum arg);
87 static void logicalrep_worker_detach(void);
88 static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
89 
90 /* Flags set by signal handlers */
91 static volatile sig_atomic_t got_SIGHUP = false;
92 
93 static bool on_commit_launcher_wakeup = false;
94 
96 
97 
98 /*
99  * Load the list of subscriptions.
100  *
101  * Only the fields interesting for worker start/stop functions are filled for
102  * each subscription.
103  */
104 static List *
106 {
107  List *res = NIL;
108  Relation rel;
109  HeapScanDesc scan;
110  HeapTuple tup;
111  MemoryContext resultcxt;
112 
113  /* This is the context that we will allocate our output data in */
114  resultcxt = CurrentMemoryContext;
115 
116  /*
117  * Start a transaction so we can access pg_database, and get a snapshot.
118  * We don't have a use for the snapshot itself, but we're interested in
119  * the secondary effect that it sets RecentGlobalXmin. (This is critical
120  * for anything that reads heap pages, because HOT may decide to prune
121  * them even if the process doesn't attempt to modify any tuples.)
122  */
124  (void) GetTransactionSnapshot();
125 
127  scan = heap_beginscan_catalog(rel, 0, NULL);
128 
130  {
132  Subscription *sub;
133  MemoryContext oldcxt;
134 
135  /*
136  * Allocate our results in the caller's context, not the
137  * transaction's. We do this inside the loop, and restore the original
138  * context at the end, so that leaky things like heap_getnext() are
139  * not called in a potentially long-lived context.
140  */
141  oldcxt = MemoryContextSwitchTo(resultcxt);
142 
143  sub = (Subscription *) palloc0(sizeof(Subscription));
144  sub->oid = HeapTupleGetOid(tup);
145  sub->dbid = subform->subdbid;
146  sub->owner = subform->subowner;
147  sub->enabled = subform->subenabled;
148  sub->name = pstrdup(NameStr(subform->subname));
149  /* We don't fill fields we are not interested in. */
150 
151  res = lappend(res, sub);
152  MemoryContextSwitchTo(oldcxt);
153  }
154 
155  heap_endscan(scan);
157 
159 
160  return res;
161 }
162 
163 /*
164  * Wait for a background worker to start up and attach to the shmem context.
165  *
166  * This is only needed for cleaning up the shared memory in case the worker
167  * fails to attach.
168  */
169 static void
171  uint16 generation,
172  BackgroundWorkerHandle *handle)
173 {
175  int rc;
176 
177  for (;;)
178  {
179  pid_t pid;
180 
182 
183  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
184 
185  /* Worker either died or has started; no need to do anything. */
186  if (!worker->in_use || worker->proc)
187  {
188  LWLockRelease(LogicalRepWorkerLock);
189  return;
190  }
191 
192  LWLockRelease(LogicalRepWorkerLock);
193 
194  /* Check if worker has died before attaching, and clean up after it. */
195  status = GetBackgroundWorkerPid(handle, &pid);
196 
197  if (status == BGWH_STOPPED)
198  {
199  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
200  /* Ensure that this was indeed the worker we waited for. */
201  if (generation == worker->generation)
203  LWLockRelease(LogicalRepWorkerLock);
204  return;
205  }
206 
207  /*
208  * We need timeout because we generally don't get notified via latch
209  * about the worker attach. But we don't expect to have to wait long.
210  */
211  rc = WaitLatch(MyLatch,
214 
215  /* emergency bailout if postmaster has died */
216  if (rc & WL_POSTMASTER_DEATH)
217  proc_exit(1);
218 
219  if (rc & WL_LATCH_SET)
220  {
223  }
224  }
225 
226  return;
227 }
228 
229 /*
230  * Walks the workers array and searches for one that matches given
231  * subscription id and relid.
232  */
234 logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
235 {
236  int i;
237  LogicalRepWorker *res = NULL;
238 
239  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
240 
241  /* Search for attached worker for a given subscription id. */
242  for (i = 0; i < max_logical_replication_workers; i++)
243  {
244  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
245 
246  if (w->in_use && w->subid == subid && w->relid == relid &&
247  (!only_running || w->proc))
248  {
249  res = w;
250  break;
251  }
252  }
253 
254  return res;
255 }
256 
257 /*
258  * Similar to logicalrep_worker_find(), but returns list of all workers for
259  * the subscription, instead just one.
260  */
261 List *
262 logicalrep_workers_find(Oid subid, bool only_running)
263 {
264  int i;
265  List *res = NIL;
266 
267  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
268 
269  /* Search for attached worker for a given subscription id. */
270  for (i = 0; i < max_logical_replication_workers; i++)
271  {
272  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
273 
274  if (w->in_use && w->subid == subid && (!only_running || w->proc))
275  res = lappend(res, w);
276  }
277 
278  return res;
279 }
280 
281 /*
282  * Start new apply background worker, if possible.
283  */
284 void
285 logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
286  Oid relid)
287 {
288  BackgroundWorker bgw;
289  BackgroundWorkerHandle *bgw_handle;
290  uint16 generation;
291  int i;
292  int slot = 0;
293  LogicalRepWorker *worker = NULL;
294  int nsyncworkers;
296 
297  ereport(DEBUG1,
298  (errmsg("starting logical replication worker for subscription \"%s\"",
299  subname)));
300 
301  /* Report this after the initial starting message for consistency. */
302  if (max_replication_slots == 0)
303  ereport(ERROR,
304  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
305  errmsg("cannot start logical replication workers when max_replication_slots = 0")));
306 
307  /*
308  * We need to do the modification of the shared memory under lock so that
309  * we have consistent view.
310  */
311  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
312 
313 retry:
314  /* Find unused worker slot. */
315  for (i = 0; i < max_logical_replication_workers; i++)
316  {
317  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
318 
319  if (!w->in_use)
320  {
321  worker = w;
322  slot = i;
323  break;
324  }
325  }
326 
327  nsyncworkers = logicalrep_sync_worker_count(subid);
328 
329  now = GetCurrentTimestamp();
330 
331  /*
332  * If we didn't find a free slot, try to do garbage collection. The
333  * reason we do this is because if some worker failed to start up and its
334  * parent has crashed while waiting, the in_use state was never cleared.
335  */
336  if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
337  {
338  bool did_cleanup = false;
339 
340  for (i = 0; i < max_logical_replication_workers; i++)
341  {
342  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
343 
344  /*
345  * If the worker was marked in use but didn't manage to attach in
346  * time, clean it up.
347  */
348  if (w->in_use && !w->proc &&
351  {
352  elog(WARNING,
353  "logical replication worker for subscription %u took too long to start; canceled",
354  w->subid);
355 
357  did_cleanup = true;
358  }
359  }
360 
361  if (did_cleanup)
362  goto retry;
363  }
364 
365  /*
366  * If we reached the sync worker limit per subscription, just exit
367  * silently as we might get here because of an otherwise harmless race
368  * condition.
369  */
370  if (nsyncworkers >= max_sync_workers_per_subscription)
371  {
372  LWLockRelease(LogicalRepWorkerLock);
373  return;
374  }
375 
376  /*
377  * However if there are no more free worker slots, inform user about it
378  * before exiting.
379  */
380  if (worker == NULL)
381  {
382  LWLockRelease(LogicalRepWorkerLock);
384  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
385  errmsg("out of logical replication worker slots"),
386  errhint("You might need to increase max_logical_replication_workers.")));
387  return;
388  }
389 
390  /* Prepare the worker slot. */
391  worker->launch_time = now;
392  worker->in_use = true;
393  worker->generation++;
394  worker->proc = NULL;
395  worker->dbid = dbid;
396  worker->userid = userid;
397  worker->subid = subid;
398  worker->relid = relid;
399  worker->relstate = SUBREL_STATE_UNKNOWN;
401  worker->last_lsn = InvalidXLogRecPtr;
404  worker->reply_lsn = InvalidXLogRecPtr;
405  TIMESTAMP_NOBEGIN(worker->reply_time);
406 
407  /* Before releasing lock, remember generation for future identification. */
408  generation = worker->generation;
409 
410  LWLockRelease(LogicalRepWorkerLock);
411 
412  /* Register the new dynamic worker. */
413  memset(&bgw, 0, sizeof(bgw));
417  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
418  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
419  if (OidIsValid(relid))
421  "logical replication worker for subscription %u sync %u", subid, relid);
422  else
424  "logical replication worker for subscription %u", subid);
425  snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
426 
429  bgw.bgw_main_arg = Int32GetDatum(slot);
430 
431  if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
432  {
433  /* Failed to start worker, so clean up the worker slot. */
434  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
435  Assert(generation == worker->generation);
437  LWLockRelease(LogicalRepWorkerLock);
438 
440  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
441  errmsg("out of background worker slots"),
442  errhint("You might need to increase max_worker_processes.")));
443  return;
444  }
445 
446  /* Now wait until it attaches. */
447  WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
448 }
449 
450 /*
451  * Stop the logical replication worker for subid/relid, if any, and wait until
452  * it detaches from the slot.
453  */
454 void
456 {
457  LogicalRepWorker *worker;
458  uint16 generation;
459 
460  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
461 
462  worker = logicalrep_worker_find(subid, relid, false);
463 
464  /* No worker, nothing to do. */
465  if (!worker)
466  {
467  LWLockRelease(LogicalRepWorkerLock);
468  return;
469  }
470 
471  /*
472  * Remember which generation was our worker so we can check if what we see
473  * is still the same one.
474  */
475  generation = worker->generation;
476 
477  /*
478  * If we found a worker but it does not have proc set then it is still
479  * starting up; wait for it to finish starting and then kill it.
480  */
481  while (worker->in_use && !worker->proc)
482  {
483  int rc;
484 
485  LWLockRelease(LogicalRepWorkerLock);
486 
487  /* Wait a bit --- we don't expect to have to wait long. */
488  rc = WaitLatch(MyLatch,
491 
492  /* emergency bailout if postmaster has died */
493  if (rc & WL_POSTMASTER_DEATH)
494  proc_exit(1);
495 
496  if (rc & WL_LATCH_SET)
497  {
500  }
501 
502  /* Recheck worker status. */
503  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
504 
505  /*
506  * Check whether the worker slot is no longer used, which would mean
507  * that the worker has exited, or whether the worker generation is
508  * different, meaning that a different worker has taken the slot.
509  */
510  if (!worker->in_use || worker->generation != generation)
511  {
512  LWLockRelease(LogicalRepWorkerLock);
513  return;
514  }
515 
516  /* Worker has assigned proc, so it has started. */
517  if (worker->proc)
518  break;
519  }
520 
521  /* Now terminate the worker ... */
522  kill(worker->proc->pid, SIGTERM);
523 
524  /* ... and wait for it to die. */
525  for (;;)
526  {
527  int rc;
528 
529  /* is it gone? */
530  if (!worker->proc || worker->generation != generation)
531  break;
532 
533  LWLockRelease(LogicalRepWorkerLock);
534 
535  /* Wait a bit --- we don't expect to have to wait long. */
536  rc = WaitLatch(MyLatch,
539 
540  /* emergency bailout if postmaster has died */
541  if (rc & WL_POSTMASTER_DEATH)
542  proc_exit(1);
543 
544  if (rc & WL_LATCH_SET)
545  {
548  }
549 
550  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
551  }
552 
553  LWLockRelease(LogicalRepWorkerLock);
554 }
555 
556 /*
557  * Request worker for specified sub/rel to be stopped on commit.
558  */
559 void
561 {
562  LogicalRepWorkerId *wid;
563  MemoryContext oldctx;
564 
565  /* Make sure we store the info in context that survives until commit. */
567 
568  wid = palloc(sizeof(LogicalRepWorkerId));
569  wid->subid = subid;
570  wid->relid = relid;
571 
572  on_commit_stop_workers = lappend(on_commit_stop_workers, wid);
573 
574  MemoryContextSwitchTo(oldctx);
575 }
576 
577 /*
578  * Wake up (using latch) any logical replication worker for specified sub/rel.
579  */
580 void
582 {
583  LogicalRepWorker *worker;
584 
585  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
586 
587  worker = logicalrep_worker_find(subid, relid, true);
588 
589  if (worker)
591 
592  LWLockRelease(LogicalRepWorkerLock);
593 }
594 
595 /*
596  * Wake up (using latch) the specified logical replication worker.
597  *
598  * Caller must hold lock, else worker->proc could change under us.
599  */
600 void
602 {
603  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
604 
605  SetLatch(&worker->proc->procLatch);
606 }
607 
608 /*
609  * Attach to a slot.
610  */
611 void
613 {
614  /* Block concurrent access. */
615  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
616 
617  Assert(slot >= 0 && slot < max_logical_replication_workers);
618  MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
619 
620  if (!MyLogicalRepWorker->in_use)
621  {
622  LWLockRelease(LogicalRepWorkerLock);
623  ereport(ERROR,
624  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
625  errmsg("logical replication worker slot %d is empty, cannot attach",
626  slot)));
627  }
628 
629  if (MyLogicalRepWorker->proc)
630  {
631  LWLockRelease(LogicalRepWorkerLock);
632  ereport(ERROR,
633  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
634  errmsg("logical replication worker slot %d is already used by "
635  "another worker, cannot attach", slot)));
636  }
637 
638  MyLogicalRepWorker->proc = MyProc;
640 
641  LWLockRelease(LogicalRepWorkerLock);
642 }
643 
644 /*
645  * Detach the worker (cleans up the worker info).
646  */
647 static void
649 {
650  /* Block concurrent access. */
651  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
652 
653  logicalrep_worker_cleanup(MyLogicalRepWorker);
654 
655  LWLockRelease(LogicalRepWorkerLock);
656 }
657 
658 /*
659  * Clean up worker info.
660  */
661 static void
663 {
664  Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_EXCLUSIVE));
665 
666  worker->in_use = false;
667  worker->proc = NULL;
668  worker->dbid = InvalidOid;
669  worker->userid = InvalidOid;
670  worker->subid = InvalidOid;
671  worker->relid = InvalidOid;
672 }
673 
674 /*
675  * Cleanup function for logical replication launcher.
676  *
677  * Called on logical replication launcher exit.
678  */
679 static void
681 {
682  LogicalRepCtx->launcher_pid = 0;
683 }
684 
685 /*
686  * Cleanup function.
687  *
688  * Called on logical replication worker exit.
689  */
690 static void
692 {
693  /* Disconnect gracefully from the remote side. */
694  if (wrconn)
696 
698 
700 }
701 
702 /* SIGHUP: set flag to reload configuration at next convenient time */
703 static void
705 {
706  int save_errno = errno;
707 
708  got_SIGHUP = true;
709 
710  /* Waken anything waiting on the process latch */
711  SetLatch(MyLatch);
712 
713  errno = save_errno;
714 }
715 
716 /*
717  * Count the number of registered (not necessarily running) sync workers
718  * for a subscription.
719  */
720 int
722 {
723  int i;
724  int res = 0;
725 
726  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
727 
728  /* Search for attached worker for a given subscription id. */
729  for (i = 0; i < max_logical_replication_workers; i++)
730  {
731  LogicalRepWorker *w = &LogicalRepCtx->workers[i];
732 
733  if (w->subid == subid && OidIsValid(w->relid))
734  res++;
735  }
736 
737  return res;
738 }
739 
740 /*
741  * ApplyLauncherShmemSize
742  * Compute space needed for replication launcher shared memory
743  */
744 Size
746 {
747  Size size;
748 
749  /*
750  * Need the fixed struct and the array of LogicalRepWorker.
751  */
752  size = sizeof(LogicalRepCtxStruct);
753  size = MAXALIGN(size);
755  sizeof(LogicalRepWorker)));
756  return size;
757 }
758 
759 /*
760  * ApplyLauncherRegister
761  * Register a background worker running the logical replication launcher.
762  */
763 void
765 {
766  BackgroundWorker bgw;
767 
769  return;
770 
771  memset(&bgw, 0, sizeof(bgw));
775  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
776  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyLauncherMain");
778  "logical replication launcher");
780  "logical replication launcher");
781  bgw.bgw_restart_time = 5;
782  bgw.bgw_notify_pid = 0;
783  bgw.bgw_main_arg = (Datum) 0;
784 
786 }
787 
788 /*
789  * ApplyLauncherShmemInit
790  * Allocate and initialize replication launcher shared memory
791  */
792 void
794 {
795  bool found;
796 
797  LogicalRepCtx = (LogicalRepCtxStruct *)
798  ShmemInitStruct("Logical Replication Launcher Data",
800  &found);
801 
802  if (!found)
803  {
804  int slot;
805 
806  memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
807 
808  /* Initialize memory and spin locks for each worker slot. */
809  for (slot = 0; slot < max_logical_replication_workers; slot++)
810  {
811  LogicalRepWorker *worker = &LogicalRepCtx->workers[slot];
812 
813  memset(worker, 0, sizeof(LogicalRepWorker));
814  SpinLockInit(&worker->relmutex);
815  }
816  }
817 }
818 
819 /*
820  * Check whether current transaction has manipulated logical replication
821  * workers.
822  */
823 bool
825 {
826  return (on_commit_stop_workers != NIL);
827 }
828 
829 /*
830  * Wakeup the launcher on commit if requested.
831  */
832 void
834 {
835  if (isCommit)
836  {
837  ListCell *lc;
838 
839  foreach(lc, on_commit_stop_workers)
840  {
841  LogicalRepWorkerId *wid = lfirst(lc);
842 
843  logicalrep_worker_stop(wid->subid, wid->relid);
844  }
845 
848  }
849 
850  /*
851  * No need to pfree on_commit_stop_workers. It was allocated in
852  * transaction memory context, which is going to be cleaned soon.
853  */
854  on_commit_stop_workers = NIL;
856 }
857 
858 /*
859  * Request wakeup of the launcher on commit of the transaction.
860  *
861  * This is used to send launcher signal to stop sleeping and process the
862  * subscriptions when current transaction commits. Should be used when new
863  * tuple was added to the pg_subscription catalog.
864 */
865 void
867 {
870 }
871 
872 static void
874 {
875  if (LogicalRepCtx->launcher_pid != 0)
876  kill(LogicalRepCtx->launcher_pid, SIGUSR1);
877 }
878 
879 /*
880  * Main loop for the apply launcher process.
881  */
882 void
884 {
885  TimestampTz last_start_time = 0;
886 
887  ereport(DEBUG1,
888  (errmsg("logical replication launcher started")));
889 
891 
892  Assert(LogicalRepCtx->launcher_pid == 0);
893  LogicalRepCtx->launcher_pid = MyProcPid;
894 
895  /* Establish signal handlers. */
897  pqsignal(SIGTERM, die);
899 
900  /*
901  * Establish connection to nailed catalogs (we only ever access
902  * pg_subscription).
903  */
905 
906  /* Enter main loop */
907  for (;;)
908  {
909  int rc;
910  List *sublist;
911  ListCell *lc;
912  MemoryContext subctx;
913  MemoryContext oldctx;
915  long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
916 
918 
919  now = GetCurrentTimestamp();
920 
921  /* Limit the start retry to once a wal_retrieve_retry_interval */
922  if (TimestampDifferenceExceeds(last_start_time, now,
924  {
925  /* Use temporary context for the database list and worker info. */
927  "Logical Replication Launcher sublist",
931  oldctx = MemoryContextSwitchTo(subctx);
932 
933  /* search for subscriptions to start or stop. */
934  sublist = get_subscription_list();
935 
936  /* Start the missing workers for enabled subscriptions. */
937  foreach(lc, sublist)
938  {
939  Subscription *sub = (Subscription *) lfirst(lc);
940  LogicalRepWorker *w;
941 
942  if (!sub->enabled)
943  continue;
944 
945  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
946  w = logicalrep_worker_find(sub->oid, InvalidOid, false);
947  LWLockRelease(LogicalRepWorkerLock);
948 
949  if (w == NULL)
950  {
951  last_start_time = now;
952  wait_time = wal_retrieve_retry_interval;
953 
954  logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
955  sub->owner, InvalidOid);
956  }
957  }
958 
959  /* Switch back to original memory context. */
960  MemoryContextSwitchTo(oldctx);
961  /* Clean the temporary memory. */
962  MemoryContextDelete(subctx);
963  }
964  else
965  {
966  /*
967  * The wait in previous cycle was interrupted in less than
968  * wal_retrieve_retry_interval since last worker was started, this
969  * usually means crash of the worker, so we should retry in
970  * wal_retrieve_retry_interval again.
971  */
972  wait_time = wal_retrieve_retry_interval;
973  }
974 
975  /* Wait for more work. */
976  rc = WaitLatch(MyLatch,
978  wait_time,
980 
981  /* emergency bailout if postmaster has died */
982  if (rc & WL_POSTMASTER_DEATH)
983  proc_exit(1);
984 
985  if (rc & WL_LATCH_SET)
986  {
989  }
990 
991  if (got_SIGHUP)
992  {
993  got_SIGHUP = false;
995  }
996  }
997 
998  /* Not reachable */
999 }
1000 
1001 /*
1002  * Is current process the logical replication launcher?
1003  */
1004 bool
1006 {
1007  return LogicalRepCtx->launcher_pid == MyProcPid;
1008 }
1009 
1010 /*
1011  * Returns state of the subscriptions.
1012  */
1013 Datum
1015 {
1016 #define PG_STAT_GET_SUBSCRIPTION_COLS 8
1017  Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
1018  int i;
1019  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1020  TupleDesc tupdesc;
1021  Tuplestorestate *tupstore;
1022  MemoryContext per_query_ctx;
1023  MemoryContext oldcontext;
1024 
1025  /* check to see if caller supports us returning a tuplestore */
1026  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1027  ereport(ERROR,
1028  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1029  errmsg("set-valued function called in context that cannot accept a set")));
1030  if (!(rsinfo->allowedModes & SFRM_Materialize))
1031  ereport(ERROR,
1032  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1033  errmsg("materialize mode required, but it is not " \
1034  "allowed in this context")));
1035 
1036  /* Build a tuple descriptor for our result type */
1037  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1038  elog(ERROR, "return type must be a row type");
1039 
1040  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1041  oldcontext = MemoryContextSwitchTo(per_query_ctx);
1042 
1043  tupstore = tuplestore_begin_heap(true, false, work_mem);
1044  rsinfo->returnMode = SFRM_Materialize;
1045  rsinfo->setResult = tupstore;
1046  rsinfo->setDesc = tupdesc;
1047 
1048  MemoryContextSwitchTo(oldcontext);
1049 
1050  /* Make sure we get consistent view of the workers. */
1051  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1052 
1053  for (i = 0; i <= max_logical_replication_workers; i++)
1054  {
1055  /* for each row */
1057  bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
1058  int worker_pid;
1059  LogicalRepWorker worker;
1060 
1061  memcpy(&worker, &LogicalRepCtx->workers[i],
1062  sizeof(LogicalRepWorker));
1063  if (!worker.proc || !IsBackendPid(worker.proc->pid))
1064  continue;
1065 
1066  if (OidIsValid(subid) && worker.subid != subid)
1067  continue;
1068 
1069  worker_pid = worker.proc->pid;
1070 
1071  MemSet(values, 0, sizeof(values));
1072  MemSet(nulls, 0, sizeof(nulls));
1073 
1074  values[0] = ObjectIdGetDatum(worker.subid);
1075  if (OidIsValid(worker.relid))
1076  values[1] = ObjectIdGetDatum(worker.relid);
1077  else
1078  nulls[1] = true;
1079  values[2] = Int32GetDatum(worker_pid);
1080  if (XLogRecPtrIsInvalid(worker.last_lsn))
1081  nulls[3] = true;
1082  else
1083  values[3] = LSNGetDatum(worker.last_lsn);
1084  if (worker.last_send_time == 0)
1085  nulls[4] = true;
1086  else
1087  values[4] = TimestampTzGetDatum(worker.last_send_time);
1088  if (worker.last_recv_time == 0)
1089  nulls[5] = true;
1090  else
1091  values[5] = TimestampTzGetDatum(worker.last_recv_time);
1092  if (XLogRecPtrIsInvalid(worker.reply_lsn))
1093  nulls[6] = true;
1094  else
1095  values[6] = LSNGetDatum(worker.reply_lsn);
1096  if (worker.reply_time == 0)
1097  nulls[7] = true;
1098  else
1099  values[7] = TimestampTzGetDatum(worker.reply_time);
1100 
1101  tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1102 
1103  /*
1104  * If only a single subscription was requested, and we found it,
1105  * break.
1106  */
1107  if (OidIsValid(subid))
1108  break;
1109  }
1110 
1111  LWLockRelease(LogicalRepWorkerLock);
1112 
1113  /* clean up and return the tuplestore */
1114  tuplestore_donestoring(tupstore);
1115 
1116  return (Datum) 0;
1117 }
void AtEOXact_ApplyLauncher(bool isCommit)
Definition: launcher.c:833
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, Datum *values, bool *isnull)
Definition: tuplestore.c:750
static volatile sig_atomic_t got_SIGHUP
Definition: launcher.c:91
#define NIL
Definition: pg_list.h:69
#define SIGUSR1
Definition: win32.h:202
WalReceiverConn * wrconn
Definition: worker.c:110
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define IsA(nodeptr, _type_)
Definition: nodes.h:561
void RegisterBackgroundWorker(BackgroundWorker *worker)
Definition: bgworker.c:855
List * logicalrep_workers_find(Oid subid, bool only_running)
Definition: launcher.c:262
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1855
#define DEBUG1
Definition: elog.h:25
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:211
int MyProcPid
Definition: globals.c:39
int errhint(const char *fmt,...)
Definition: elog.c:987
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
MemoryContext TopTransactionContext
Definition: mcxt.c:48
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1565
#define WL_TIMEOUT
Definition: latch.h:127
void ProcessConfigFile(GucContext context)
void ApplyLauncherMain(Datum main_arg)
Definition: launcher.c:883
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1837
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
void logicalrep_worker_wakeup(Oid subid, Oid relid)
Definition: launcher.c:581
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
char * pstrdup(const char *in)
Definition: mcxt.c:1076
void CommitTransactionCommand(void)
Definition: xact.c:2744
#define SpinLockInit(lock)
Definition: spin.h:60
#define tuplestore_donestoring(state)
Definition: tuplestore.h:60
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:71
TimestampTz last_send_time
XLogRecPtr last_lsn
int bgw_restart_time
Definition: bgworker.h:94
void proc_exit(int code)
Definition: ipc.c:99
int errcode(int sqlerrcode)
Definition: elog.c:575
#define LSNGetDatum(X)
Definition: pg_lsn.h:22
#define MemSet(start, val, len)
Definition: c.h:863
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define heap_close(r, l)
Definition: heapam.h:97
FormData_pg_subscription * Form_pg_subscription
void ResetLatch(volatile Latch *latch)
Definition: latch.c:497
unsigned int Oid
Definition: postgres_ext.h:31
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1649
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:304
#define OidIsValid(objectId)
Definition: c.h:532
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
#define ALLOCSET_DEFAULT_MINSIZE
Definition: memutils.h:162
int wal_receiver_timeout
Definition: walreceiver.c:75
Latch procLatch
Definition: proc.h:104
XLogRecPtr relstate_lsn
static void logicalrep_worker_detach(void)
Definition: launcher.c:648
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
Definition: launcher.c:601
Datum bgw_main_arg
Definition: bgworker.h:97
int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:336
unsigned short uint16
Definition: c.h:257
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:63
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
int max_sync_workers_per_subscription
Definition: launcher.c:61
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
#define TimestampTzGetDatum(X)
Definition: timestamp.h:32
XLogRecPtr reply_lsn
static void logicalrep_worker_cleanup(LogicalRepWorker *worker)
Definition: launcher.c:662
#define SUBREL_STATE_UNKNOWN
#define SubscriptionRelationId
void logicalrep_worker_attach(int slot)
Definition: launcher.c:612
#define PG_GETARG_OID(n)
Definition: fmgr.h:240
void logicalrep_worker_stop(Oid subid, Oid relid)
Definition: launcher.c:455
Size ApplyLauncherShmemSize(void)
Definition: launcher.c:745
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:320
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
static void logicalrep_launcher_onexit(int code, Datum arg)
Definition: launcher.c:680
#define TIMESTAMP_NOBEGIN(j)
Definition: timestamp.h:112
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
Definition: heapam.c:1405
BgwHandleStatus
Definition: bgworker.h:102
static bool on_commit_launcher_wakeup
Definition: launcher.c:93
#define ereport(elevel, rest)
Definition: elog.h:122
MemoryContext TopMemoryContext
Definition: mcxt.c:43
Definition: guc.h:72
List * lappend(List *list, void *datum)
Definition: list.c:128
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define WARNING
Definition: elog.h:40
int wal_retrieve_retry_interval
Definition: xlog.c:107
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
static void logicalrep_worker_onexit(int code, Datum arg)
Definition: launcher.c:691
#define WL_POSTMASTER_DEATH
Definition: latch.h:128
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
void * palloc0(Size size)
Definition: mcxt.c:877
#define DEFAULT_NAPTIME_PER_CYCLE
Definition: launcher.c:58
uintptr_t Datum
Definition: postgres.h:372
void ApplyLauncherRegister(void)
Definition: launcher.c:764
static void logicalrep_launcher_sighup(SIGNAL_ARGS)
Definition: launcher.c:704
#define PG_STAT_GET_SUBSCRIPTION_COLS
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction)
Definition: heapam.c:1808
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1290
int work_mem
Definition: globals.c:113
int logicalrep_sync_worker_count(Oid subid)
Definition: launcher.c:721
static void WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle)
Definition: launcher.c:170
#define SIGHUP
Definition: win32.h:188
#define InvalidOid
Definition: postgres_ext.h:36
int allowedModes
Definition: execnodes.h:268
TimestampTz launch_time
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:168
static List * get_subscription_list(void)
Definition: launcher.c:105
SetFunctionReturnMode returnMode
Definition: execnodes.h:270
int max_replication_slots
Definition: slot.c:99
void SetLatch(volatile Latch *latch)
Definition: latch.c:414
TimestampTz last_recv_time
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define SIGNAL_ARGS
Definition: c.h:1085
void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid)
Definition: launcher.c:285
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:681
#define lfirst(lc)
Definition: pg_list.h:106
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:234
void StartTransactionCommand(void)
Definition: xact.c:2673
int max_logical_replication_workers
Definition: launcher.c:60
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:350
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:939
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
#define MAXALIGN(LEN)
Definition: c.h:576
void ApplyLauncherShmemInit(void)
Definition: launcher.c:793
#define walrcv_disconnect(conn)
Definition: walreceiver.h:265
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:202
struct LogicalRepCtxStruct LogicalRepCtxStruct
Tuplestorestate * setResult
Definition: execnodes.h:273
static Datum values[MAXATTR]
Definition: bootstrap.c:164
ExprContext * econtext
Definition: execnodes.h:266
#define Int32GetDatum(X)
Definition: postgres.h:485
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
TupleDesc setDesc
Definition: execnodes.h:274
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
pid_t bgw_notify_pid
Definition: bgworker.h:99
static List * on_commit_stop_workers
Definition: launcher.c:82
void die(SIGNAL_ARGS)
Definition: postgres.c:2652
bool IsLogicalLauncher(void)
Definition: launcher.c:1005
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:74
#define ALLOCSET_DEFAULT_INITSIZE
Definition: memutils.h:163
int i
#define NameStr(name)
Definition: c.h:493
bool IsBackendPid(int pid)
Definition: procarray.c:2441
void * arg
struct Latch * MyLatch
Definition: globals.c:52
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define ALLOCSET_DEFAULT_MAXSIZE
Definition: memutils.h:164
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
Datum pg_stat_get_subscription(PG_FUNCTION_ARGS)
Definition: launcher.c:1014
void logicalrep_worker_stop_at_commit(Oid subid, Oid relid)
Definition: launcher.c:560
#define elog
Definition: elog.h:219
#define HeapTupleGetOid(tuple)
Definition: htup_details.h:695
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
static void ApplyLauncherWakeup(void)
Definition: launcher.c:873
void ApplyLauncherWakeupAtCommit(void)
Definition: launcher.c:866
bool XactManipulatesLogicalReplicationWorkers(void)
Definition: launcher.c:824
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
Definition: pg_list.h:45
int pid
Definition: proc.h:109
#define WL_LATCH_SET
Definition: latch.h:124
void BackgroundWorkerInitializeConnection(char *dbname, char *username)
Definition: postmaster.c:5540
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1534
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1047
TimestampTz reply_time
struct LogicalRepWorkerId LogicalRepWorkerId
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5592