PostgreSQL Source Code  git master
bgworker.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------
2  * bgworker.c
3  * POSTGRES pluggable background workers implementation
4  *
5  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/postmaster/bgworker.c
9  *
10  *-------------------------------------------------------------------------
11  */
12 
13 #include "postgres.h"
14 
15 #include "access/parallel.h"
16 #include "libpq/pqsignal.h"
17 #include "miscadmin.h"
18 #include "pgstat.h"
19 #include "port/atomics.h"
21 #include "postmaster/interrupt.h"
22 #include "postmaster/postmaster.h"
25 #include "storage/dsm.h"
26 #include "storage/ipc.h"
27 #include "storage/latch.h"
28 #include "storage/lwlock.h"
29 #include "storage/pg_shmem.h"
30 #include "storage/pmsignal.h"
31 #include "storage/proc.h"
32 #include "storage/procsignal.h"
33 #include "storage/shmem.h"
34 #include "tcop/tcopprot.h"
35 #include "utils/ascii.h"
36 #include "utils/ps_status.h"
37 #include "utils/timeout.h"
38 
39 /*
40  * The postmaster's list of registered background workers, in private memory.
41  */
43 
44 /*
45  * BackgroundWorkerSlots exist in shared memory and can be accessed (via
46  * the BackgroundWorkerArray) by both the postmaster and by regular backends.
47  * However, the postmaster cannot take locks, even spinlocks, because this
48  * might allow it to crash or become wedged if shared memory gets corrupted.
49  * Such an outcome is intolerable. Therefore, we need a lockless protocol
50  * for coordinating access to this data.
51  *
52  * The 'in_use' flag is used to hand off responsibility for the slot between
53  * the postmaster and the rest of the system. When 'in_use' is false,
54  * the postmaster will ignore the slot entirely, except for the 'in_use' flag
55  * itself, which it may read. In this state, regular backends may modify the
56  * slot. Once a backend sets 'in_use' to true, the slot becomes the
57  * responsibility of the postmaster. Regular backends may no longer modify it,
58  * but the postmaster may examine it. Thus, a backend initializing a slot
59  * must fully initialize the slot - and insert a write memory barrier - before
60  * marking it as in use.
61  *
62  * As an exception, however, even when the slot is in use, regular backends
63  * may set the 'terminate' flag for a slot, telling the postmaster not
64  * to restart it. Once the background worker is no longer running, the slot
65  * will be released for reuse.
66  *
67  * In addition to coordinating with the postmaster, backends modifying this
68  * data structure must coordinate with each other. Since they can take locks,
69  * this is straightforward: any backend wishing to manipulate a slot must
70  * take BackgroundWorkerLock in exclusive mode. Backends wishing to read
71  * data that might get concurrently modified by other backends should take
72  * this lock in shared mode. No matter what, backends reading this data
73  * structure must be able to tolerate concurrent modifications by the
74  * postmaster.
75  */
76 typedef struct BackgroundWorkerSlot
77 {
78  bool in_use;
79  bool terminate;
80  pid_t pid; /* InvalidPid = not started yet; 0 = dead */
81  uint64 generation; /* incremented when slot is recycled */
84 
85 /*
86  * In order to limit the total number of parallel workers (according to
87  * max_parallel_workers GUC), we maintain the number of active parallel
88  * workers. Since the postmaster cannot take locks, two variables are used for
89  * this purpose: the number of registered parallel workers (modified by the
90  * backends, protected by BackgroundWorkerLock) and the number of terminated
91  * parallel workers (modified only by the postmaster, lockless). The active
92  * number of parallel workers is the number of registered workers minus the
93  * terminated ones. These counters can of course overflow, but it's not
94  * important here since the subtraction will still give the right number.
95  */
96 typedef struct BackgroundWorkerArray
97 {
103 
105 {
106  int slot;
107  uint64 generation;
108 };
109 
111 
112 /*
113  * List of internal background worker entry points. We need this for
114  * reasons explained in LookupBackgroundWorkerFunction(), below.
115  */
116 static const struct
117 {
118  const char *fn_name;
120 } InternalBGWorkers[] =
121 
122 {
123  {
124  "ParallelWorkerMain", ParallelWorkerMain
125  },
126  {
127  "ApplyLauncherMain", ApplyLauncherMain
128  },
129  {
130  "ApplyWorkerMain", ApplyWorkerMain
131  },
132  {
133  "ParallelApplyWorkerMain", ParallelApplyWorkerMain
134  }
135 };
136 
137 /* Private functions. */
138 static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
139 
140 
141 /*
142  * Calculate shared memory needed.
143  */
144 Size
146 {
147  Size size;
148 
149  /* Array of workers is variably sized. */
150  size = offsetof(BackgroundWorkerArray, slot);
152  sizeof(BackgroundWorkerSlot)));
153 
154  return size;
155 }
156 
157 /*
158  * Initialize shared memory.
159  */
160 void
162 {
163  bool found;
164 
165  BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
167  &found);
168  if (!IsUnderPostmaster)
169  {
170  slist_iter siter;
171  int slotno = 0;
172 
176 
177  /*
178  * Copy contents of worker list into shared memory. Record the shared
179  * memory slot assigned to each worker. This ensures a 1-to-1
180  * correspondence between the postmaster's private list and the array
181  * in shared memory.
182  */
184  {
186  RegisteredBgWorker *rw;
187 
188  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
189  Assert(slotno < max_worker_processes);
190  slot->in_use = true;
191  slot->terminate = false;
192  slot->pid = InvalidPid;
193  slot->generation = 0;
194  rw->rw_shmem_slot = slotno;
195  rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
196  memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
197  ++slotno;
198  }
199 
200  /*
201  * Mark any remaining slots as not in use.
202  */
203  while (slotno < max_worker_processes)
204  {
206 
207  slot->in_use = false;
208  ++slotno;
209  }
210  }
211  else
212  Assert(found);
213 }
214 
215 /*
216  * Search the postmaster's backend-private list of RegisteredBgWorker objects
217  * for the one that maps to the given slot number.
218  */
219 static RegisteredBgWorker *
221 {
222  slist_iter siter;
223 
225  {
226  RegisteredBgWorker *rw;
227 
228  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
229  if (rw->rw_shmem_slot == slotno)
230  return rw;
231  }
232 
233  return NULL;
234 }
235 
236 /*
237  * Notice changes to shared memory made by other backends.
238  * Accept new worker requests only if allow_new_workers is true.
239  *
240  * This code runs in the postmaster, so we must be very careful not to assume
241  * that shared memory contents are sane. Otherwise, a rogue backend could
242  * take out the postmaster.
243  */
244 void
245 BackgroundWorkerStateChange(bool allow_new_workers)
246 {
247  int slotno;
248 
249  /*
250  * The total number of slots stored in shared memory should match our
251  * notion of max_worker_processes. If it does not, something is very
252  * wrong. Further down, we always refer to this value as
253  * max_worker_processes, in case shared memory gets corrupted while we're
254  * looping.
255  */
257  {
258  ereport(LOG,
259  (errmsg("inconsistent background worker state (max_worker_processes=%d, total_slots=%d)",
262  return;
263  }
264 
265  /*
266  * Iterate through slots, looking for newly-registered workers or workers
267  * who must die.
268  */
269  for (slotno = 0; slotno < max_worker_processes; ++slotno)
270  {
272  RegisteredBgWorker *rw;
273 
274  if (!slot->in_use)
275  continue;
276 
277  /*
278  * Make sure we don't see the in_use flag before the updated slot
279  * contents.
280  */
281  pg_read_barrier();
282 
283  /* See whether we already know about this worker. */
285  if (rw != NULL)
286  {
287  /*
288  * In general, the worker data can't change after it's initially
289  * registered. However, someone can set the terminate flag.
290  */
291  if (slot->terminate && !rw->rw_terminate)
292  {
293  rw->rw_terminate = true;
294  if (rw->rw_pid != 0)
295  kill(rw->rw_pid, SIGTERM);
296  else
297  {
298  /* Report never-started, now-terminated worker as dead. */
300  }
301  }
302  continue;
303  }
304 
305  /*
306  * If we aren't allowing new workers, then immediately mark it for
307  * termination; the next stanza will take care of cleaning it up.
308  * Doing this ensures that any process waiting for the worker will get
309  * awoken, even though the worker will never be allowed to run.
310  */
311  if (!allow_new_workers)
312  slot->terminate = true;
313 
314  /*
315  * If the worker is marked for termination, we don't need to add it to
316  * the registered workers list; we can just free the slot. However, if
317  * bgw_notify_pid is set, the process that registered the worker may
318  * need to know that we've processed the terminate request, so be sure
319  * to signal it.
320  */
321  if (slot->terminate)
322  {
323  int notify_pid;
324 
325  /*
326  * We need a memory barrier here to make sure that the load of
327  * bgw_notify_pid and the update of parallel_terminate_count
328  * complete before the store to in_use.
329  */
330  notify_pid = slot->worker.bgw_notify_pid;
331  if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
333  slot->pid = 0;
334 
336  slot->in_use = false;
337 
338  if (notify_pid != 0)
339  kill(notify_pid, SIGUSR1);
340 
341  continue;
342  }
343 
344  /*
345  * Copy the registration data into the registered workers list.
346  */
347  rw = malloc(sizeof(RegisteredBgWorker));
348  if (rw == NULL)
349  {
350  ereport(LOG,
351  (errcode(ERRCODE_OUT_OF_MEMORY),
352  errmsg("out of memory")));
353  return;
354  }
355 
356  /*
357  * Copy strings in a paranoid way. If shared memory is corrupted, the
358  * source data might not even be NUL-terminated.
359  */
361  slot->worker.bgw_name, BGW_MAXLEN);
363  slot->worker.bgw_type, BGW_MAXLEN);
368 
369  /*
370  * Copy various fixed-size fields.
371  *
372  * flags, start_time, and restart_time are examined by the postmaster,
373  * but nothing too bad will happen if they are corrupted. The
374  * remaining fields will only be examined by the child process. It
375  * might crash, but we won't.
376  */
377  rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
381  memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
382 
383  /*
384  * Copy the PID to be notified about state changes, but only if the
385  * postmaster knows about a backend with that PID. It isn't an error
386  * if the postmaster doesn't know about the PID, because the backend
387  * that requested the worker could have died (or been killed) just
388  * after doing so. Nonetheless, at least until we get some experience
389  * with how this plays out in the wild, log a message at a relative
390  * high debug level.
391  */
394  {
395  elog(DEBUG1, "worker notification PID %d is not valid",
396  (int) rw->rw_worker.bgw_notify_pid);
397  rw->rw_worker.bgw_notify_pid = 0;
398  }
399 
400  /* Initialize postmaster bookkeeping. */
401  rw->rw_backend = NULL;
402  rw->rw_pid = 0;
403  rw->rw_child_slot = 0;
404  rw->rw_crashed_at = 0;
405  rw->rw_shmem_slot = slotno;
406  rw->rw_terminate = false;
407 
408  /* Log it! */
409  ereport(DEBUG1,
410  (errmsg_internal("registering background worker \"%s\"",
411  rw->rw_worker.bgw_name)));
412 
414  }
415 }
416 
417 /*
418  * Forget about a background worker that's no longer needed.
419  *
420  * The worker must be identified by passing an slist_mutable_iter that
421  * points to it. This convention allows deletion of workers during
422  * searches of the worker list, and saves having to search the list again.
423  *
424  * Caller is responsible for notifying bgw_notify_pid, if appropriate.
425  *
426  * This function must be invoked only in the postmaster.
427  */
428 void
430 {
431  RegisteredBgWorker *rw;
432  BackgroundWorkerSlot *slot;
433 
434  rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
435 
438  Assert(slot->in_use);
439 
440  /*
441  * We need a memory barrier here to make sure that the update of
442  * parallel_terminate_count completes before the store to in_use.
443  */
446 
448  slot->in_use = false;
449 
450  ereport(DEBUG1,
451  (errmsg_internal("unregistering background worker \"%s\"",
452  rw->rw_worker.bgw_name)));
453 
455  free(rw);
456 }
457 
458 /*
459  * Report the PID of a newly-launched background worker in shared memory.
460  *
461  * This function should only be called from the postmaster.
462  */
463 void
465 {
466  BackgroundWorkerSlot *slot;
467 
470  slot->pid = rw->rw_pid;
471 
472  if (rw->rw_worker.bgw_notify_pid != 0)
474 }
475 
476 /*
477  * Report that the PID of a background worker is now zero because a
478  * previously-running background worker has exited.
479  *
480  * This function should only be called from the postmaster.
481  */
482 void
484 {
485  RegisteredBgWorker *rw;
486  BackgroundWorkerSlot *slot;
487  int notify_pid;
488 
489  rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
490 
493  slot->pid = rw->rw_pid;
494  notify_pid = rw->rw_worker.bgw_notify_pid;
495 
496  /*
497  * If this worker is slated for deregistration, do that before notifying
498  * the process which started it. Otherwise, if that process tries to
499  * reuse the slot immediately, it might not be available yet. In theory
500  * that could happen anyway if the process checks slot->pid at just the
501  * wrong moment, but this makes the window narrower.
502  */
503  if (rw->rw_terminate ||
506 
507  if (notify_pid != 0)
508  kill(notify_pid, SIGUSR1);
509 }
510 
511 /*
512  * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
513  *
514  * This function should only be called from the postmaster.
515  */
516 void
518 {
519  slist_iter siter;
520 
522  {
523  RegisteredBgWorker *rw;
524 
525  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
526  if (rw->rw_worker.bgw_notify_pid == pid)
527  rw->rw_worker.bgw_notify_pid = 0;
528  }
529 }
530 
531 /*
532  * Cancel any not-yet-started worker requests that have waiting processes.
533  *
534  * This is called during a normal ("smart" or "fast") database shutdown.
535  * After this point, no new background workers will be started, so anything
536  * that might be waiting for them needs to be kicked off its wait. We do
537  * that by canceling the bgworker registration entirely, which is perhaps
538  * overkill, but since we're shutting down it does not matter whether the
539  * registration record sticks around.
540  *
541  * This function should only be called from the postmaster.
542  */
543 void
545 {
546  slist_mutable_iter iter;
547 
549  {
550  RegisteredBgWorker *rw;
551  BackgroundWorkerSlot *slot;
552 
553  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
556 
557  /* If it's not yet started, and there's someone waiting ... */
558  if (slot->pid == InvalidPid &&
559  rw->rw_worker.bgw_notify_pid != 0)
560  {
561  /* ... then zap it, and notify the waiter */
562  int notify_pid = rw->rw_worker.bgw_notify_pid;
563 
564  ForgetBackgroundWorker(&iter);
565  if (notify_pid != 0)
566  kill(notify_pid, SIGUSR1);
567  }
568  }
569 }
570 
571 /*
572  * Reset background worker crash state.
573  *
574  * We assume that, after a crash-and-restart cycle, background workers without
575  * the never-restart flag should be restarted immediately, instead of waiting
576  * for bgw_restart_time to elapse. On the other hand, workers with that flag
577  * should be forgotten immediately, since we won't ever restart them.
578  *
579  * This function should only be called from the postmaster.
580  */
581 void
583 {
584  slist_mutable_iter iter;
585 
587  {
588  RegisteredBgWorker *rw;
589 
590  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
591 
593  {
594  /*
595  * Workers marked BGW_NEVER_RESTART shouldn't get relaunched after
596  * the crash, so forget about them. (If we wait until after the
597  * crash to forget about them, and they are parallel workers,
598  * parallel_terminate_count will get incremented after we've
599  * already zeroed parallel_register_count, which would be bad.)
600  */
601  ForgetBackgroundWorker(&iter);
602  }
603  else
604  {
605  /*
606  * The accounting which we do via parallel_register_count and
607  * parallel_terminate_count would get messed up if a worker marked
608  * parallel could survive a crash and restart cycle. All such
609  * workers should be marked BGW_NEVER_RESTART, and thus control
610  * should never reach this branch.
611  */
613 
614  /*
615  * Allow this worker to be restarted immediately after we finish
616  * resetting.
617  */
618  rw->rw_crashed_at = 0;
619 
620  /*
621  * If there was anyone waiting for it, they're history.
622  */
623  rw->rw_worker.bgw_notify_pid = 0;
624  }
625  }
626 }
627 
628 #ifdef EXEC_BACKEND
629 /*
630  * In EXEC_BACKEND mode, workers use this to retrieve their details from
631  * shared memory.
632  */
634 BackgroundWorkerEntry(int slotno)
635 {
636  static BackgroundWorker myEntry;
637  BackgroundWorkerSlot *slot;
638 
639  Assert(slotno < BackgroundWorkerData->total_slots);
640  slot = &BackgroundWorkerData->slot[slotno];
641  Assert(slot->in_use);
642 
643  /* must copy this in case we don't intend to retain shmem access */
644  memcpy(&myEntry, &slot->worker, sizeof myEntry);
645  return &myEntry;
646 }
647 #endif
648 
649 /*
650  * Complain about the BackgroundWorker definition using error level elevel.
651  * Return true if it looks ok, false if not (unless elevel >= ERROR, in
652  * which case we won't return at all in the not-OK case).
653  */
654 static bool
656 {
657  /* sanity check for flags */
658 
659  /*
660  * We used to support workers not connected to shared memory, but don't
661  * anymore. Thus this is a required flag now. We're not removing the flag
662  * for compatibility reasons and because the flag still provides some
663  * signal when reading code.
664  */
665  if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
666  {
667  ereport(elevel,
668  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
669  errmsg("background worker \"%s\": background workers without shared memory access are not supported",
670  worker->bgw_name)));
671  return false;
672  }
673 
675  {
677  {
678  ereport(elevel,
679  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680  errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
681  worker->bgw_name)));
682  return false;
683  }
684 
685  /* XXX other checks? */
686  }
687 
688  if ((worker->bgw_restart_time < 0 &&
689  worker->bgw_restart_time != BGW_NEVER_RESTART) ||
690  (worker->bgw_restart_time > USECS_PER_DAY / 1000))
691  {
692  ereport(elevel,
693  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
694  errmsg("background worker \"%s\": invalid restart interval",
695  worker->bgw_name)));
696  return false;
697  }
698 
699  /*
700  * Parallel workers may not be configured for restart, because the
701  * parallel_register_count/parallel_terminate_count accounting can't
702  * handle parallel workers lasting through a crash-and-restart cycle.
703  */
704  if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
705  (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
706  {
707  ereport(elevel,
708  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
709  errmsg("background worker \"%s\": parallel workers may not be configured for restart",
710  worker->bgw_name)));
711  return false;
712  }
713 
714  /*
715  * If bgw_type is not filled in, use bgw_name.
716  */
717  if (strcmp(worker->bgw_type, "") == 0)
718  strcpy(worker->bgw_type, worker->bgw_name);
719 
720  return true;
721 }
722 
723 /*
724  * Standard SIGTERM handler for background workers
725  */
726 static void
728 {
729  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
730 
731  ereport(FATAL,
732  (errcode(ERRCODE_ADMIN_SHUTDOWN),
733  errmsg("terminating background worker \"%s\" due to administrator command",
735 }
736 
737 /*
738  * Start a new background worker
739  *
740  * This is the main entry point for background worker, to be called from
741  * postmaster.
742  */
743 void
745 {
746  sigjmp_buf local_sigjmp_buf;
748  bgworker_main_type entrypt;
749 
750  if (worker == NULL)
751  elog(FATAL, "unable to find bgworker entry");
752 
753  IsBackgroundWorker = true;
754 
756  init_ps_display(worker->bgw_name);
757 
759 
760  /* Apply PostAuthDelay */
761  if (PostAuthDelay > 0)
762  pg_usleep(PostAuthDelay * 1000000L);
763 
764  /*
765  * Set up signal handlers.
766  */
768  {
769  /*
770  * SIGINT is used to signal canceling the current action
771  */
775 
776  /* XXX Any other handlers needed here? */
777  }
778  else
779  {
780  pqsignal(SIGINT, SIG_IGN);
782  pqsignal(SIGFPE, SIG_IGN);
783  }
784  pqsignal(SIGTERM, bgworker_die);
785  /* SIGQUIT handler was already set up by InitPostmasterChild */
787 
788  InitializeTimeouts(); /* establishes SIGALRM handler */
789 
793 
794  /*
795  * If an exception is encountered, processing resumes here.
796  *
797  * We just need to clean up, report the error, and go away.
798  */
799  if (sigsetjmp(local_sigjmp_buf, 1) != 0)
800  {
801  /* Since not using PG_TRY, must reset error stack by hand */
802  error_context_stack = NULL;
803 
804  /* Prevent interrupts while cleaning up */
805  HOLD_INTERRUPTS();
806 
807  /*
808  * sigsetjmp will have blocked all signals, but we may need to accept
809  * signals while communicating with our parallel leader. Once we've
810  * done HOLD_INTERRUPTS() it should be safe to unblock signals.
811  */
813 
814  /* Report the error to the parallel leader and the server log */
815  EmitErrorReport();
816 
817  /*
818  * Do we need more cleanup here? For shmem-connected bgworkers, we
819  * will call InitProcess below, which will install ProcKill as exit
820  * callback. That will take care of releasing locks, etc.
821  */
822 
823  /* and go away */
824  proc_exit(1);
825  }
826 
827  /* We can now handle ereport(ERROR) */
828  PG_exception_stack = &local_sigjmp_buf;
829 
830  /*
831  * Create a per-backend PGPROC struct in shared memory, except in the
832  * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
833  * this before we can use LWLocks (and in the EXEC_BACKEND case we already
834  * had to do some stuff with LWLocks).
835  */
836 #ifndef EXEC_BACKEND
837  InitProcess();
838 #endif
839 
840  /*
841  * Early initialization.
842  */
843  BaseInit();
844 
845  /*
846  * Look up the entry point function, loading its library if necessary.
847  */
849  worker->bgw_function_name);
850 
851  /*
852  * Note that in normal processes, we would call InitPostgres here. For a
853  * worker, however, we don't know what database to connect to, yet; so we
854  * need to wait until the user code does it via
855  * BackgroundWorkerInitializeConnection().
856  */
857 
858  /*
859  * Now invoke the user-defined worker code
860  */
861  entrypt(worker->bgw_main_arg);
862 
863  /* ... and if it returns, we're done */
864  proc_exit(0);
865 }
866 
867 /*
868  * Register a new static background worker.
869  *
870  * This can only be called directly from postmaster or in the _PG_init
871  * function of a module library that's loaded by shared_preload_libraries;
872  * otherwise it will have no effect.
873  */
874 void
876 {
877  RegisteredBgWorker *rw;
878  static int numworkers = 0;
879 
880  if (!IsUnderPostmaster)
881  ereport(DEBUG1,
882  (errmsg_internal("registering background worker \"%s\"", worker->bgw_name)));
883 
885  strcmp(worker->bgw_library_name, "postgres") != 0)
886  {
887  if (!IsUnderPostmaster)
888  ereport(LOG,
889  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
890  errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
891  worker->bgw_name)));
892  return;
893  }
894 
895  if (!SanityCheckBackgroundWorker(worker, LOG))
896  return;
897 
898  if (worker->bgw_notify_pid != 0)
899  {
900  ereport(LOG,
901  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
902  errmsg("background worker \"%s\": only dynamic background workers can request notification",
903  worker->bgw_name)));
904  return;
905  }
906 
907  /*
908  * Enforce maximum number of workers. Note this is overly restrictive: we
909  * could allow more non-shmem-connected workers, because these don't count
910  * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
911  * important to relax this restriction.
912  */
913  if (++numworkers > max_worker_processes)
914  {
915  ereport(LOG,
916  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
917  errmsg("too many background workers"),
918  errdetail_plural("Up to %d background worker can be registered with the current settings.",
919  "Up to %d background workers can be registered with the current settings.",
922  errhint("Consider increasing the configuration parameter \"max_worker_processes\".")));
923  return;
924  }
925 
926  /*
927  * Copy the registration data into the registered workers list.
928  */
929  rw = malloc(sizeof(RegisteredBgWorker));
930  if (rw == NULL)
931  {
932  ereport(LOG,
933  (errcode(ERRCODE_OUT_OF_MEMORY),
934  errmsg("out of memory")));
935  return;
936  }
937 
938  rw->rw_worker = *worker;
939  rw->rw_backend = NULL;
940  rw->rw_pid = 0;
941  rw->rw_child_slot = 0;
942  rw->rw_crashed_at = 0;
943  rw->rw_terminate = false;
944 
946 }
947 
948 /*
949  * Register a new background worker from a regular backend.
950  *
951  * Returns true on success and false on failure. Failure typically indicates
952  * that no background worker slots are currently available.
953  *
954  * If handle != NULL, we'll set *handle to a pointer that can subsequently
955  * be used as an argument to GetBackgroundWorkerPid(). The caller can
956  * free this pointer using pfree(), if desired.
957  */
958 bool
960  BackgroundWorkerHandle **handle)
961 {
962  int slotno;
963  bool success = false;
964  bool parallel;
965  uint64 generation = 0;
966 
967  /*
968  * We can't register dynamic background workers from the postmaster. If
969  * this is a standalone backend, we're the only process and can't start
970  * any more. In a multi-process environment, it might be theoretically
971  * possible, but we don't currently support it due to locking
972  * considerations; see comments on the BackgroundWorkerSlot data
973  * structure.
974  */
975  if (!IsUnderPostmaster)
976  return false;
977 
978  if (!SanityCheckBackgroundWorker(worker, ERROR))
979  return false;
980 
981  parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
982 
983  LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
984 
985  /*
986  * If this is a parallel worker, check whether there are already too many
987  * parallel workers; if so, don't register another one. Our view of
988  * parallel_terminate_count may be slightly stale, but that doesn't really
989  * matter: we would have gotten the same result if we'd arrived here
990  * slightly earlier anyway. There's no help for it, either, since the
991  * postmaster must not take locks; a memory barrier wouldn't guarantee
992  * anything useful.
993  */
997  {
1001  LWLockRelease(BackgroundWorkerLock);
1002  return false;
1003  }
1004 
1005  /*
1006  * Look for an unused slot. If we find one, grab it.
1007  */
1008  for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
1009  {
1011 
1012  if (!slot->in_use)
1013  {
1014  memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
1015  slot->pid = InvalidPid; /* indicates not started yet */
1016  slot->generation++;
1017  slot->terminate = false;
1018  generation = slot->generation;
1019  if (parallel)
1021 
1022  /*
1023  * Make sure postmaster doesn't see the slot as in use before it
1024  * sees the new contents.
1025  */
1026  pg_write_barrier();
1027 
1028  slot->in_use = true;
1029  success = true;
1030  break;
1031  }
1032  }
1033 
1034  LWLockRelease(BackgroundWorkerLock);
1035 
1036  /* If we found a slot, tell the postmaster to notice the change. */
1037  if (success)
1039 
1040  /*
1041  * If we found a slot and the user has provided a handle, initialize it.
1042  */
1043  if (success && handle)
1044  {
1045  *handle = palloc(sizeof(BackgroundWorkerHandle));
1046  (*handle)->slot = slotno;
1047  (*handle)->generation = generation;
1048  }
1049 
1050  return success;
1051 }
1052 
1053 /*
1054  * Get the PID of a dynamically-registered background worker.
1055  *
1056  * If the worker is determined to be running, the return value will be
1057  * BGWH_STARTED and *pidp will get the PID of the worker process. If the
1058  * postmaster has not yet attempted to start the worker, the return value will
1059  * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1060  *
1061  * BGWH_STOPPED can indicate either that the worker is temporarily stopped
1062  * (because it is configured for automatic restart and exited non-zero),
1063  * or that the worker is permanently stopped (because it exited with exit
1064  * code 0, or was not configured for automatic restart), or even that the
1065  * worker was unregistered without ever starting (either because startup
1066  * failed and the worker is not configured for automatic restart, or because
1067  * TerminateBackgroundWorker was used before the worker was successfully
1068  * started).
1069  */
1072 {
1073  BackgroundWorkerSlot *slot;
1074  pid_t pid;
1075 
1076  Assert(handle->slot < max_worker_processes);
1077  slot = &BackgroundWorkerData->slot[handle->slot];
1078 
1079  /*
1080  * We could probably arrange to synchronize access to data using memory
1081  * barriers only, but for now, let's just keep it simple and grab the
1082  * lock. It seems unlikely that there will be enough traffic here to
1083  * result in meaningful contention.
1084  */
1085  LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1086 
1087  /*
1088  * The generation number can't be concurrently changed while we hold the
1089  * lock. The pid, which is updated by the postmaster, can change at any
1090  * time, but we assume such changes are atomic. So the value we read
1091  * won't be garbage, but it might be out of date by the time the caller
1092  * examines it (but that's unavoidable anyway).
1093  *
1094  * The in_use flag could be in the process of changing from true to false,
1095  * but if it is already false then it can't change further.
1096  */
1097  if (handle->generation != slot->generation || !slot->in_use)
1098  pid = 0;
1099  else
1100  pid = slot->pid;
1101 
1102  /* All done. */
1103  LWLockRelease(BackgroundWorkerLock);
1104 
1105  if (pid == 0)
1106  return BGWH_STOPPED;
1107  else if (pid == InvalidPid)
1108  return BGWH_NOT_YET_STARTED;
1109  *pidp = pid;
1110  return BGWH_STARTED;
1111 }
1112 
1113 /*
1114  * Wait for a background worker to start up.
1115  *
1116  * This is like GetBackgroundWorkerPid(), except that if the worker has not
1117  * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1118  * returned. However, if the postmaster has died, we give up and return
1119  * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1120  * take place.
1121  *
1122  * The caller *must* have set our PID as the worker's bgw_notify_pid,
1123  * else we will not be awoken promptly when the worker's state changes.
1124  */
1127 {
1129  int rc;
1130 
1131  for (;;)
1132  {
1133  pid_t pid;
1134 
1136 
1137  status = GetBackgroundWorkerPid(handle, &pid);
1138  if (status == BGWH_STARTED)
1139  *pidp = pid;
1141  break;
1142 
1143  rc = WaitLatch(MyLatch,
1146 
1147  if (rc & WL_POSTMASTER_DEATH)
1148  {
1150  break;
1151  }
1152 
1154  }
1155 
1156  return status;
1157 }
1158 
1159 /*
1160  * Wait for a background worker to stop.
1161  *
1162  * If the worker hasn't yet started, or is running, we wait for it to stop
1163  * and then return BGWH_STOPPED. However, if the postmaster has died, we give
1164  * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1165  * notifies us when a worker's state changes.
1166  *
1167  * The caller *must* have set our PID as the worker's bgw_notify_pid,
1168  * else we will not be awoken promptly when the worker's state changes.
1169  */
1172 {
1174  int rc;
1175 
1176  for (;;)
1177  {
1178  pid_t pid;
1179 
1181 
1182  status = GetBackgroundWorkerPid(handle, &pid);
1183  if (status == BGWH_STOPPED)
1184  break;
1185 
1186  rc = WaitLatch(MyLatch,
1189 
1190  if (rc & WL_POSTMASTER_DEATH)
1191  {
1193  break;
1194  }
1195 
1197  }
1198 
1199  return status;
1200 }
1201 
1202 /*
1203  * Instruct the postmaster to terminate a background worker.
1204  *
1205  * Note that it's safe to do this without regard to whether the worker is
1206  * still running, or even if the worker may already have exited and been
1207  * unregistered.
1208  */
1209 void
1211 {
1212  BackgroundWorkerSlot *slot;
1213  bool signal_postmaster = false;
1214 
1215  Assert(handle->slot < max_worker_processes);
1216  slot = &BackgroundWorkerData->slot[handle->slot];
1217 
1218  /* Set terminate flag in shared memory, unless slot has been reused. */
1219  LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1220  if (handle->generation == slot->generation)
1221  {
1222  slot->terminate = true;
1223  signal_postmaster = true;
1224  }
1225  LWLockRelease(BackgroundWorkerLock);
1226 
1227  /* Make sure the postmaster notices the change to shared memory. */
1228  if (signal_postmaster)
1230 }
1231 
1232 /*
1233  * Look up (and possibly load) a bgworker entry point function.
1234  *
1235  * For functions contained in the core code, we use library name "postgres"
1236  * and consult the InternalBGWorkers array. External functions are
1237  * looked up, and loaded if necessary, using load_external_function().
1238  *
1239  * The point of this is to pass function names as strings across process
1240  * boundaries. We can't pass actual function addresses because of the
1241  * possibility that the function has been loaded at a different address
1242  * in a different process. This is obviously a hazard for functions in
1243  * loadable libraries, but it can happen even for functions in the core code
1244  * on platforms using EXEC_BACKEND (e.g., Windows).
1245  *
1246  * At some point it might be worthwhile to get rid of InternalBGWorkers[]
1247  * in favor of applying load_external_function() for core functions too;
1248  * but that raises portability issues that are not worth addressing now.
1249  */
1250 static bgworker_main_type
1251 LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
1252 {
1253  /*
1254  * If the function is to be loaded from postgres itself, search the
1255  * InternalBGWorkers array.
1256  */
1257  if (strcmp(libraryname, "postgres") == 0)
1258  {
1259  int i;
1260 
1261  for (i = 0; i < lengthof(InternalBGWorkers); i++)
1262  {
1263  if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0)
1264  return InternalBGWorkers[i].fn_addr;
1265  }
1266 
1267  /* We can only reach this by programming error. */
1268  elog(ERROR, "internal function \"%s\" not found", funcname);
1269  }
1270 
1271  /* Otherwise load from external library. */
1272  return (bgworker_main_type)
1273  load_external_function(libraryname, funcname, true, NULL);
1274 }
1275 
1276 /*
1277  * Given a PID, get the bgw_type of the background worker. Returns NULL if
1278  * not a valid background worker.
1279  *
1280  * The return value is in static memory belonging to this function, so it has
1281  * to be used before calling this function again. This is so that the caller
1282  * doesn't have to worry about the background worker locking protocol.
1283  */
1284 const char *
1286 {
1287  int slotno;
1288  bool found = false;
1289  static char result[BGW_MAXLEN];
1290 
1291  LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1292 
1293  for (slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++)
1294  {
1296 
1297  if (slot->pid > 0 && slot->pid == pid)
1298  {
1299  strcpy(result, slot->worker.bgw_type);
1300  found = true;
1301  break;
1302  }
1303  }
1304 
1305  LWLockRelease(BackgroundWorkerLock);
1306 
1307  if (!found)
1308  return NULL;
1309 
1310  return result;
1311 }
void ParallelApplyWorkerMain(Datum main_arg)
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:174
#define pg_memory_barrier()
Definition: atomics.h:140
#define pg_read_barrier()
Definition: atomics.h:153
#define pg_write_barrier()
Definition: atomics.h:154
void ParallelWorkerMain(Datum main_arg)
Definition: parallel.c:1262
sigset_t BlockSig
Definition: pqsignal.c:23
void ApplyWorkerMain(Datum main_arg)
Definition: worker.c:4473
void RegisterBackgroundWorker(BackgroundWorker *worker)
Definition: bgworker.c:875
static RegisteredBgWorker * FindRegisteredWorkerBySlotNumber(int slotno)
Definition: bgworker.c:220
BgwHandleStatus WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1126
static bool SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
Definition: bgworker.c:655
void ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
Definition: bgworker.c:464
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1210
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1171
void ResetBackgroundWorkerCrashTimes(void)
Definition: bgworker.c:582
void BackgroundWorkerShmemInit(void)
Definition: bgworker.c:161
void StartBackgroundWorker(void)
Definition: bgworker.c:744
struct BackgroundWorkerSlot BackgroundWorkerSlot
const char * GetBackgroundWorkerTypeByPid(pid_t pid)
Definition: bgworker.c:1285
void ForgetBackgroundWorker(slist_mutable_iter *cur)
Definition: bgworker.c:429
slist_head BackgroundWorkerList
Definition: bgworker.c:42
const char * fn_name
Definition: bgworker.c:118
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1071
static BackgroundWorkerArray * BackgroundWorkerData
Definition: bgworker.c:110
static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
Definition: bgworker.c:1251
static void bgworker_die(SIGNAL_ARGS)
Definition: bgworker.c:727
void BackgroundWorkerStopNotifications(pid_t pid)
Definition: bgworker.c:517
Size BackgroundWorkerShmemSize(void)
Definition: bgworker.c:145
void BackgroundWorkerStateChange(bool allow_new_workers)
Definition: bgworker.c:245
static const struct @15 InternalBGWorkers[]
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:959
void ReportBackgroundWorkerExit(slist_mutable_iter *cur)
Definition: bgworker.c:483
bgworker_main_type fn_addr
Definition: bgworker.c:119
void ForgetUnstartedBackgroundWorkers(void)
Definition: bgworker.c:544
struct BackgroundWorkerArray BackgroundWorkerArray
#define BGW_NEVER_RESTART
Definition: bgworker.h:85
#define BGW_EXTRALEN
Definition: bgworker.h:87
#define BGWORKER_CLASS_PARALLEL
Definition: bgworker.h:68
BgwHandleStatus
Definition: bgworker.h:104
@ BGWH_POSTMASTER_DIED
Definition: bgworker.h:108
@ BGWH_STARTED
Definition: bgworker.h:105
@ BGWH_NOT_YET_STARTED
Definition: bgworker.h:106
@ BGWH_STOPPED
Definition: bgworker.h:107
@ BgWorkerStart_PostmasterStart
Definition: bgworker.h:79
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:60
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:53
void(* bgworker_main_type)(Datum main_arg)
Definition: bgworker.h:72
#define BGW_MAXLEN
Definition: bgworker.h:86
#define MAX_PARALLEL_WORKER_LIMIT
unsigned int uint32
Definition: c.h:490
#define SIGNAL_ARGS
Definition: c.h:1332
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:382
#define lengthof(array)
Definition: c.h:772
size_t Size
Definition: c.h:589
#define USECS_PER_DAY
Definition: timestamp.h:130
void * load_external_function(const char *filename, const char *funcname, bool signalNotFound, void **filehandle)
Definition: dfmgr.c:105
struct cursor * cur
Definition: ecpg.c:28
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
void EmitErrorReport(void)
Definition: elog.c:1669
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1294
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
sigjmp_buf * PG_exception_stack
Definition: elog.c:97
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
bool IsUnderPostmaster
Definition: globals.c:113
bool IsBackgroundWorker
Definition: globals.c:115
int max_parallel_workers
Definition: globals.c:139
struct Latch * MyLatch
Definition: globals.c:58
int max_worker_processes
Definition: globals.c:138
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:1084
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:1148
#define SLIST_STATIC_INIT(name)
Definition: ilist.h:283
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:1006
#define slist_container(type, membername, ptr)
Definition: ilist.h:1106
#define slist_foreach(iter, lhead)
Definition: ilist.h:1132
#define funcname
Definition: indent_codes.h:69
static bool success
Definition: initdb.c:178
void proc_exit(int code)
Definition: ipc.c:104
int i
Definition: isn.c:73
void ResetLatch(Latch *latch)
Definition: latch.c:699
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:492
#define WL_LATCH_SET
Definition: latch.h:125
#define WL_POSTMASTER_DEATH
Definition: latch.h:129
void ApplyLauncherMain(Datum main_arg)
Definition: launcher.c:1078
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
@ LW_SHARED
Definition: lwlock.h:116
@ LW_EXCLUSIVE
Definition: lwlock.h:115
void * palloc(Size size)
Definition: mcxt.c:1210
@ InitProcessing
Definition: miscadmin.h:399
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:132
#define SetProcessingMode(mode)
Definition: miscadmin.h:411
@ B_BG_WORKER
Definition: miscadmin.h:324
#define InvalidPid
Definition: miscadmin.h:32
BackendType MyBackendType
Definition: miscinit.c:63
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1782
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:224
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:181
@ PMSIGNAL_BACKGROUND_WORKER_CHANGE
Definition: pmsignal.h:40
pqsigfunc pqsignal(int signo, pqsigfunc func)
int PostAuthDelay
Definition: postgres.c:95
void FloatExceptionHandler(SIGNAL_ARGS)
Definition: postgres.c:2990
void StatementCancelHandler(SIGNAL_ARGS)
Definition: postgres.c:2969
void BaseInit(void)
Definition: postinit.c:636
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5660
BackgroundWorker * MyBgworkerEntry
Definition: postmaster.c:193
bool PostmasterMarkPIDForWorkerNotify(int pid)
Definition: postmaster.c:6013
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:639
void init_ps_display(const char *fixed_part)
Definition: ps_status.c:242
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
void pg_usleep(long microsec)
Definition: signal.c:53
void InitProcess(void)
Definition: proc.c:297
uint32 parallel_terminate_count
Definition: bgworker.c:100
uint32 parallel_register_count
Definition: bgworker.c:99
BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER]
Definition: bgworker.c:101
BackgroundWorker worker
Definition: bgworker.c:82
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:97
Datum bgw_main_arg
Definition: bgworker.h:98
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:91
int bgw_restart_time
Definition: bgworker.h:95
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:92
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:94
char bgw_extra[BGW_EXTRALEN]
Definition: bgworker.h:99
pid_t bgw_notify_pid
Definition: bgworker.h:100
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:96
struct bkend * rw_backend
BackgroundWorker rw_worker
slist_node * cur
Definition: ilist.h:259
slist_node * cur
Definition: ilist.h:274
void InitializeTimeouts(void)
Definition: timeout.c:474
@ WAIT_EVENT_BGWORKER_STARTUP
Definition: wait_event.h:88
@ WAIT_EVENT_BGWORKER_SHUTDOWN
Definition: wait_event.h:87
#define SIGCHLD
Definition: win32_port.h:186
#define SIGHUP
Definition: win32_port.h:176
#define SIG_DFL
Definition: win32_port.h:171
#define SIGPIPE
Definition: win32_port.h:181
#define kill(pid, sig)
Definition: win32_port.h:489
#define SIGUSR1
Definition: win32_port.h:188
#define SIGUSR2
Definition: win32_port.h:189
#define SIG_IGN
Definition: win32_port.h:173