PostgreSQL Source Code  git master
parallel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parallel.c
4  * Infrastructure for launching parallel workers
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/access/transam/parallel.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/nbtree.h"
19 #include "access/parallel.h"
20 #include "access/session.h"
21 #include "access/xact.h"
22 #include "access/xlog.h"
23 #include "catalog/index.h"
24 #include "catalog/namespace.h"
25 #include "catalog/pg_enum.h"
26 #include "commands/async.h"
27 #include "executor/execParallel.h"
28 #include "libpq/libpq.h"
29 #include "libpq/pqformat.h"
30 #include "libpq/pqmq.h"
31 #include "miscadmin.h"
32 #include "optimizer/optimizer.h"
33 #include "pgstat.h"
34 #include "storage/ipc.h"
35 #include "storage/predicate.h"
36 #include "storage/sinval.h"
37 #include "storage/spin.h"
38 #include "tcop/tcopprot.h"
39 #include "utils/combocid.h"
40 #include "utils/guc.h"
41 #include "utils/inval.h"
42 #include "utils/memutils.h"
43 #include "utils/relmapper.h"
44 #include "utils/snapmgr.h"
45 #include "utils/typcache.h"
46 
47 /*
48  * We don't want to waste a lot of memory on an error queue which, most of
49  * the time, will process only a handful of small messages. However, it is
50  * desirable to make it large enough that a typical ErrorResponse can be sent
51  * without blocking. That way, a worker that errors out can write the whole
52  * message into the queue and terminate without waiting for the user backend.
53  */
54 #define PARALLEL_ERROR_QUEUE_SIZE 16384
55 
56 /* Magic number for parallel context TOC. */
57 #define PARALLEL_MAGIC 0x50477c7c
58 
59 /*
60  * Magic numbers for per-context parallel state sharing. Higher-level code
61  * should use smaller values, leaving these very large ones for use by this
62  * module.
63  */
64 #define PARALLEL_KEY_FIXED UINT64CONST(0xFFFFFFFFFFFF0001)
65 #define PARALLEL_KEY_ERROR_QUEUE UINT64CONST(0xFFFFFFFFFFFF0002)
66 #define PARALLEL_KEY_LIBRARY UINT64CONST(0xFFFFFFFFFFFF0003)
67 #define PARALLEL_KEY_GUC UINT64CONST(0xFFFFFFFFFFFF0004)
68 #define PARALLEL_KEY_COMBO_CID UINT64CONST(0xFFFFFFFFFFFF0005)
69 #define PARALLEL_KEY_TRANSACTION_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0006)
70 #define PARALLEL_KEY_ACTIVE_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0007)
71 #define PARALLEL_KEY_TRANSACTION_STATE UINT64CONST(0xFFFFFFFFFFFF0008)
72 #define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009)
73 #define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A)
74 #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000B)
75 #define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000C)
76 #define PARALLEL_KEY_ENUMBLACKLIST UINT64CONST(0xFFFFFFFFFFFF000D)
77 
78 /* Fixed-size parallel state. */
79 typedef struct FixedParallelState
80 {
81  /* Fixed-size state that workers must restore. */
96 
97  /* Mutex protects remaining fields. */
99 
100  /* Maximum XactLastRecEnd of any worker. */
103 
104 /*
105  * Our parallel worker number. We initialize this to -1, meaning that we are
106  * not a parallel worker. In parallel workers, it will be set to a value >= 0
107  * and < the number of workers before any user code is invoked; each parallel
108  * worker will get a different parallel worker number.
109  */
111 
112 /* Is there a parallel message pending which we need to receive? */
113 volatile bool ParallelMessagePending = false;
114 
115 /* Are we initializing a parallel worker? */
117 
118 /* Pointer to our fixed parallel state. */
120 
121 /* List of active parallel contexts. */
123 
124 /* Backend-local copy of data from FixedParallelState. */
125 static pid_t ParallelMasterPid;
126 
127 /*
128  * List of internal parallel worker entry points. We need this for
129  * reasons explained in LookupParallelWorkerFunction(), below.
130  */
131 static const struct
132 {
133  const char *fn_name;
136 
137 {
138  {
139  "ParallelQueryMain", ParallelQueryMain
140  },
141  {
142  "_bt_parallel_build_main", _bt_parallel_build_main
143  },
144  {
145  "parallel_vacuum_main", parallel_vacuum_main
146  }
147 };
148 
149 /* Private functions. */
150 static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg);
152 static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname);
153 static void ParallelWorkerShutdown(int code, Datum arg);
154 
155 
156 /*
157  * Establish a new parallel context. This should be done after entering
158  * parallel mode, and (unless there is an error) the context should be
159  * destroyed before exiting the current subtransaction.
160  */
162 CreateParallelContext(const char *library_name, const char *function_name,
163  int nworkers)
164 {
165  MemoryContext oldcontext;
166  ParallelContext *pcxt;
167 
168  /* It is unsafe to create a parallel context if not in parallel mode. */
170 
171  /* Number of workers should be non-negative. */
172  Assert(nworkers >= 0);
173 
174  /* We might be running in a short-lived memory context. */
176 
177  /* Initialize a new ParallelContext. */
178  pcxt = palloc0(sizeof(ParallelContext));
180  pcxt->nworkers = nworkers;
181  pcxt->nworkers_to_launch = nworkers;
182  pcxt->library_name = pstrdup(library_name);
183  pcxt->function_name = pstrdup(function_name);
186  dlist_push_head(&pcxt_list, &pcxt->node);
187 
188  /* Restore previous memory context. */
189  MemoryContextSwitchTo(oldcontext);
190 
191  return pcxt;
192 }
193 
194 /*
195  * Establish the dynamic shared memory segment for a parallel context and
196  * copy state and other bookkeeping information that will be needed by
197  * parallel workers into it.
198  */
199 void
201 {
202  MemoryContext oldcontext;
203  Size library_len = 0;
204  Size guc_len = 0;
205  Size combocidlen = 0;
206  Size tsnaplen = 0;
207  Size asnaplen = 0;
208  Size tstatelen = 0;
209  Size reindexlen = 0;
210  Size relmapperlen = 0;
211  Size enumblacklistlen = 0;
212  Size segsize = 0;
213  int i;
214  FixedParallelState *fps;
215  dsm_handle session_dsm_handle = DSM_HANDLE_INVALID;
216  Snapshot transaction_snapshot = GetTransactionSnapshot();
217  Snapshot active_snapshot = GetActiveSnapshot();
218 
219  /* We might be running in a very short-lived memory context. */
221 
222  /* Allow space to store the fixed-size parallel state. */
224  shm_toc_estimate_keys(&pcxt->estimator, 1);
225 
226  /*
227  * Normally, the user will have requested at least one worker process, but
228  * if by chance they have not, we can skip a bunch of things here.
229  */
230  if (pcxt->nworkers > 0)
231  {
232  /* Get (or create) the per-session DSM segment's handle. */
233  session_dsm_handle = GetSessionDsmHandle();
234 
235  /*
236  * If we weren't able to create a per-session DSM segment, then we can
237  * continue but we can't safely launch any workers because their
238  * record typmods would be incompatible so they couldn't exchange
239  * tuples.
240  */
241  if (session_dsm_handle == DSM_HANDLE_INVALID)
242  pcxt->nworkers = 0;
243  }
244 
245  if (pcxt->nworkers > 0)
246  {
247  /* Estimate space for various kinds of state sharing. */
248  library_len = EstimateLibraryStateSpace();
249  shm_toc_estimate_chunk(&pcxt->estimator, library_len);
250  guc_len = EstimateGUCStateSpace();
251  shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
252  combocidlen = EstimateComboCIDStateSpace();
253  shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
254  tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
255  shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
256  asnaplen = EstimateSnapshotSpace(active_snapshot);
257  shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
258  tstatelen = EstimateTransactionStateSpace();
259  shm_toc_estimate_chunk(&pcxt->estimator, tstatelen);
261  reindexlen = EstimateReindexStateSpace();
262  shm_toc_estimate_chunk(&pcxt->estimator, reindexlen);
263  relmapperlen = EstimateRelationMapSpace();
264  shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen);
265  enumblacklistlen = EstimateEnumBlacklistSpace();
266  shm_toc_estimate_chunk(&pcxt->estimator, enumblacklistlen);
267  /* If you add more chunks here, you probably need to add keys. */
268  shm_toc_estimate_keys(&pcxt->estimator, 10);
269 
270  /* Estimate space need for error queues. */
273  "parallel error queue size not buffer-aligned");
276  pcxt->nworkers));
277  shm_toc_estimate_keys(&pcxt->estimator, 1);
278 
279  /* Estimate how much we'll need for the entrypoint info. */
280  shm_toc_estimate_chunk(&pcxt->estimator, strlen(pcxt->library_name) +
281  strlen(pcxt->function_name) + 2);
282  shm_toc_estimate_keys(&pcxt->estimator, 1);
283  }
284 
285  /*
286  * Create DSM and initialize with new table of contents. But if the user
287  * didn't request any workers, then don't bother creating a dynamic shared
288  * memory segment; instead, just use backend-private memory.
289  *
290  * Also, if we can't create a dynamic shared memory segment because the
291  * maximum number of segments have already been created, then fall back to
292  * backend-private memory, and plan not to use any workers. We hope this
293  * won't happen very often, but it's better to abandon the use of
294  * parallelism than to fail outright.
295  */
296  segsize = shm_toc_estimate(&pcxt->estimator);
297  if (pcxt->nworkers > 0)
299  if (pcxt->seg != NULL)
301  dsm_segment_address(pcxt->seg),
302  segsize);
303  else
304  {
305  pcxt->nworkers = 0;
308  segsize);
309  }
310 
311  /* Initialize fixed-size state in shared memory. */
312  fps = (FixedParallelState *)
313  shm_toc_allocate(pcxt->toc, sizeof(FixedParallelState));
314  fps->database_id = MyDatabaseId;
327  SpinLockInit(&fps->mutex);
328  fps->last_xlog_end = 0;
330 
331  /* We can skip the rest of this if we're not budgeting for any workers. */
332  if (pcxt->nworkers > 0)
333  {
334  char *libraryspace;
335  char *gucspace;
336  char *combocidspace;
337  char *tsnapspace;
338  char *asnapspace;
339  char *tstatespace;
340  char *reindexspace;
341  char *relmapperspace;
342  char *error_queue_space;
343  char *session_dsm_handle_space;
344  char *entrypointstate;
345  char *enumblacklistspace;
346  Size lnamelen;
347 
348  /* Serialize shared libraries we have loaded. */
349  libraryspace = shm_toc_allocate(pcxt->toc, library_len);
350  SerializeLibraryState(library_len, libraryspace);
351  shm_toc_insert(pcxt->toc, PARALLEL_KEY_LIBRARY, libraryspace);
352 
353  /* Serialize GUC settings. */
354  gucspace = shm_toc_allocate(pcxt->toc, guc_len);
355  SerializeGUCState(guc_len, gucspace);
356  shm_toc_insert(pcxt->toc, PARALLEL_KEY_GUC, gucspace);
357 
358  /* Serialize combo CID state. */
359  combocidspace = shm_toc_allocate(pcxt->toc, combocidlen);
360  SerializeComboCIDState(combocidlen, combocidspace);
361  shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
362 
363  /* Serialize transaction snapshot and active snapshot. */
364  tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
365  SerializeSnapshot(transaction_snapshot, tsnapspace);
367  tsnapspace);
368  asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
369  SerializeSnapshot(active_snapshot, asnapspace);
370  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
371 
372  /* Provide the handle for per-session segment. */
373  session_dsm_handle_space = shm_toc_allocate(pcxt->toc,
374  sizeof(dsm_handle));
375  *(dsm_handle *) session_dsm_handle_space = session_dsm_handle;
377  session_dsm_handle_space);
378 
379  /* Serialize transaction state. */
380  tstatespace = shm_toc_allocate(pcxt->toc, tstatelen);
381  SerializeTransactionState(tstatelen, tstatespace);
383 
384  /* Serialize reindex state. */
385  reindexspace = shm_toc_allocate(pcxt->toc, reindexlen);
386  SerializeReindexState(reindexlen, reindexspace);
387  shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace);
388 
389  /* Serialize relmapper state. */
390  relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen);
391  SerializeRelationMap(relmapperlen, relmapperspace);
393  relmapperspace);
394 
395  /* Serialize enum blacklist state. */
396  enumblacklistspace = shm_toc_allocate(pcxt->toc, enumblacklistlen);
397  SerializeEnumBlacklist(enumblacklistspace, enumblacklistlen);
399  enumblacklistspace);
400 
401  /* Allocate space for worker information. */
402  pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
403 
404  /*
405  * Establish error queues in dynamic shared memory.
406  *
407  * These queues should be used only for transmitting ErrorResponse,
408  * NoticeResponse, and NotifyResponse protocol messages. Tuple data
409  * should be transmitted via separate (possibly larger?) queues.
410  */
411  error_queue_space =
412  shm_toc_allocate(pcxt->toc,
414  pcxt->nworkers));
415  for (i = 0; i < pcxt->nworkers; ++i)
416  {
417  char *start;
418  shm_mq *mq;
419 
420  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
421  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
423  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
424  }
425  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, error_queue_space);
426 
427  /*
428  * Serialize entrypoint information. It's unsafe to pass function
429  * pointers across processes, as the function pointer may be different
430  * in each process in EXEC_BACKEND builds, so we always pass library
431  * and function name. (We use library name "postgres" for functions
432  * in the core backend.)
433  */
434  lnamelen = strlen(pcxt->library_name);
435  entrypointstate = shm_toc_allocate(pcxt->toc, lnamelen +
436  strlen(pcxt->function_name) + 2);
437  strcpy(entrypointstate, pcxt->library_name);
438  strcpy(entrypointstate + lnamelen + 1, pcxt->function_name);
439  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ENTRYPOINT, entrypointstate);
440  }
441 
442  /* Restore previous memory context. */
443  MemoryContextSwitchTo(oldcontext);
444 }
445 
446 /*
447  * Reinitialize the dynamic shared memory segment for a parallel context such
448  * that we could launch workers for it again.
449  */
450 void
452 {
453  FixedParallelState *fps;
454 
455  /* Wait for any old workers to exit. */
456  if (pcxt->nworkers_launched > 0)
457  {
460  pcxt->nworkers_launched = 0;
461  if (pcxt->known_attached_workers)
462  {
464  pcxt->known_attached_workers = NULL;
465  pcxt->nknown_attached_workers = 0;
466  }
467  }
468 
469  /* Reset a few bits of fixed parallel state to a clean state. */
470  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
471  fps->last_xlog_end = 0;
472 
473  /* Recreate error queues (if they exist). */
474  if (pcxt->nworkers > 0)
475  {
476  char *error_queue_space;
477  int i;
478 
479  error_queue_space =
481  for (i = 0; i < pcxt->nworkers; ++i)
482  {
483  char *start;
484  shm_mq *mq;
485 
486  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
487  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
489  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
490  }
491  }
492 }
493 
494 /*
495  * Reinitialize parallel workers for a parallel context such that we could
496  * launch the different number of workers. This is required for cases where
497  * we need to reuse the same DSM segment, but the number of workers can
498  * vary from run-to-run.
499  */
500 void
501 ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
502 {
503  /*
504  * The number of workers that need to be launched must be less than the
505  * number of workers with which the parallel context is initialized.
506  */
507  Assert(pcxt->nworkers >= nworkers_to_launch);
508  pcxt->nworkers_to_launch = nworkers_to_launch;
509 }
510 
511 /*
512  * Launch parallel workers.
513  */
514 void
516 {
517  MemoryContext oldcontext;
518  BackgroundWorker worker;
519  int i;
520  bool any_registrations_failed = false;
521 
522  /* Skip this if we have no workers. */
523  if (pcxt->nworkers == 0 || pcxt->nworkers_to_launch == 0)
524  return;
525 
526  /* We need to be a lock group leader. */
528 
529  /* If we do have workers, we'd better have a DSM segment. */
530  Assert(pcxt->seg != NULL);
531 
532  /* We might be running in a short-lived memory context. */
534 
535  /* Configure a worker. */
536  memset(&worker, 0, sizeof(worker));
537  snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
538  MyProcPid);
539  snprintf(worker.bgw_type, BGW_MAXLEN, "parallel worker");
540  worker.bgw_flags =
545  sprintf(worker.bgw_library_name, "postgres");
546  sprintf(worker.bgw_function_name, "ParallelWorkerMain");
548  worker.bgw_notify_pid = MyProcPid;
549 
550  /*
551  * Start workers.
552  *
553  * The caller must be able to tolerate ending up with fewer workers than
554  * expected, so there is no need to throw an error here if registration
555  * fails. It wouldn't help much anyway, because registering the worker in
556  * no way guarantees that it will start up and initialize successfully.
557  */
558  for (i = 0; i < pcxt->nworkers_to_launch; ++i)
559  {
560  memcpy(worker.bgw_extra, &i, sizeof(int));
561  if (!any_registrations_failed &&
563  &pcxt->worker[i].bgwhandle))
564  {
566  pcxt->worker[i].bgwhandle);
567  pcxt->nworkers_launched++;
568  }
569  else
570  {
571  /*
572  * If we weren't able to register the worker, then we've bumped up
573  * against the max_worker_processes limit, and future
574  * registrations will probably fail too, so arrange to skip them.
575  * But we still have to execute this code for the remaining slots
576  * to make sure that we forget about the error queues we budgeted
577  * for those workers. Otherwise, we'll wait for them to start,
578  * but they never will.
579  */
580  any_registrations_failed = true;
581  pcxt->worker[i].bgwhandle = NULL;
582  shm_mq_detach(pcxt->worker[i].error_mqh);
583  pcxt->worker[i].error_mqh = NULL;
584  }
585  }
586 
587  /*
588  * Now that nworkers_launched has taken its final value, we can initialize
589  * known_attached_workers.
590  */
591  if (pcxt->nworkers_launched > 0)
592  {
593  pcxt->known_attached_workers =
594  palloc0(sizeof(bool) * pcxt->nworkers_launched);
595  pcxt->nknown_attached_workers = 0;
596  }
597 
598  /* Restore previous memory context. */
599  MemoryContextSwitchTo(oldcontext);
600 }
601 
602 /*
603  * Wait for all workers to attach to their error queues, and throw an error if
604  * any worker fails to do this.
605  *
606  * Callers can assume that if this function returns successfully, then the
607  * number of workers given by pcxt->nworkers_launched have initialized and
608  * attached to their error queues. Whether or not these workers are guaranteed
609  * to still be running depends on what code the caller asked them to run;
610  * this function does not guarantee that they have not exited. However, it
611  * does guarantee that any workers which exited must have done so cleanly and
612  * after successfully performing the work with which they were tasked.
613  *
614  * If this function is not called, then some of the workers that were launched
615  * may not have been started due to a fork() failure, or may have exited during
616  * early startup prior to attaching to the error queue, so nworkers_launched
617  * cannot be viewed as completely reliable. It will never be less than the
618  * number of workers which actually started, but it might be more. Any workers
619  * that failed to start will still be discovered by
620  * WaitForParallelWorkersToFinish and an error will be thrown at that time,
621  * provided that function is eventually reached.
622  *
623  * In general, the leader process should do as much work as possible before
624  * calling this function. fork() failures and other early-startup failures
625  * are very uncommon, and having the leader sit idle when it could be doing
626  * useful work is undesirable. However, if the leader needs to wait for
627  * all of its workers or for a specific worker, it may want to call this
628  * function before doing so. If not, it must make some other provision for
629  * the failure-to-start case, lest it wait forever. On the other hand, a
630  * leader which never waits for a worker that might not be started yet, or
631  * at least never does so prior to WaitForParallelWorkersToFinish(), need not
632  * call this function at all.
633  */
634 void
636 {
637  int i;
638 
639  /* Skip this if we have no launched workers. */
640  if (pcxt->nworkers_launched == 0)
641  return;
642 
643  for (;;)
644  {
645  /*
646  * This will process any parallel messages that are pending and it may
647  * also throw an error propagated from a worker.
648  */
650 
651  for (i = 0; i < pcxt->nworkers_launched; ++i)
652  {
654  shm_mq *mq;
655  int rc;
656  pid_t pid;
657 
658  if (pcxt->known_attached_workers[i])
659  continue;
660 
661  /*
662  * If error_mqh is NULL, then the worker has already exited
663  * cleanly.
664  */
665  if (pcxt->worker[i].error_mqh == NULL)
666  {
667  pcxt->known_attached_workers[i] = true;
668  ++pcxt->nknown_attached_workers;
669  continue;
670  }
671 
672  status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
673  if (status == BGWH_STARTED)
674  {
675  /* Has the worker attached to the error queue? */
676  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
677  if (shm_mq_get_sender(mq) != NULL)
678  {
679  /* Yes, so it is known to be attached. */
680  pcxt->known_attached_workers[i] = true;
681  ++pcxt->nknown_attached_workers;
682  }
683  }
684  else if (status == BGWH_STOPPED)
685  {
686  /*
687  * If the worker stopped without attaching to the error queue,
688  * throw an error.
689  */
690  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
691  if (shm_mq_get_sender(mq) == NULL)
692  ereport(ERROR,
693  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
694  errmsg("parallel worker failed to initialize"),
695  errhint("More details may be available in the server log.")));
696 
697  pcxt->known_attached_workers[i] = true;
698  ++pcxt->nknown_attached_workers;
699  }
700  else
701  {
702  /*
703  * Worker not yet started, so we must wait. The postmaster
704  * will notify us if the worker's state changes. Our latch
705  * might also get set for some other reason, but if so we'll
706  * just end up waiting for the same worker again.
707  */
708  rc = WaitLatch(MyLatch,
711 
712  if (rc & WL_LATCH_SET)
714  }
715  }
716 
717  /* If all workers are known to have started, we're done. */
718  if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
719  {
721  break;
722  }
723  }
724 }
725 
726 /*
727  * Wait for all workers to finish computing.
728  *
729  * Even if the parallel operation seems to have completed successfully, it's
730  * important to call this function afterwards. We must not miss any errors
731  * the workers may have thrown during the parallel operation, or any that they
732  * may yet throw while shutting down.
733  *
734  * Also, we want to update our notion of XactLastRecEnd based on worker
735  * feedback.
736  */
737 void
739 {
740  for (;;)
741  {
742  bool anyone_alive = false;
743  int nfinished = 0;
744  int i;
745 
746  /*
747  * This will process any parallel messages that are pending, which may
748  * change the outcome of the loop that follows. It may also throw an
749  * error propagated from a worker.
750  */
752 
753  for (i = 0; i < pcxt->nworkers_launched; ++i)
754  {
755  /*
756  * If error_mqh is NULL, then the worker has already exited
757  * cleanly. If we have received a message through error_mqh from
758  * the worker, we know it started up cleanly, and therefore we're
759  * certain to be notified when it exits.
760  */
761  if (pcxt->worker[i].error_mqh == NULL)
762  ++nfinished;
763  else if (pcxt->known_attached_workers[i])
764  {
765  anyone_alive = true;
766  break;
767  }
768  }
769 
770  if (!anyone_alive)
771  {
772  /* If all workers are known to have finished, we're done. */
773  if (nfinished >= pcxt->nworkers_launched)
774  {
775  Assert(nfinished == pcxt->nworkers_launched);
776  break;
777  }
778 
779  /*
780  * We didn't detect any living workers, but not all workers are
781  * known to have exited cleanly. Either not all workers have
782  * launched yet, or maybe some of them failed to start or
783  * terminated abnormally.
784  */
785  for (i = 0; i < pcxt->nworkers_launched; ++i)
786  {
787  pid_t pid;
788  shm_mq *mq;
789 
790  /*
791  * If the worker is BGWH_NOT_YET_STARTED or BGWH_STARTED, we
792  * should just keep waiting. If it is BGWH_STOPPED, then
793  * further investigation is needed.
794  */
795  if (pcxt->worker[i].error_mqh == NULL ||
796  pcxt->worker[i].bgwhandle == NULL ||
798  &pid) != BGWH_STOPPED)
799  continue;
800 
801  /*
802  * Check whether the worker ended up stopped without ever
803  * attaching to the error queue. If so, the postmaster was
804  * unable to fork the worker or it exited without initializing
805  * properly. We must throw an error, since the caller may
806  * have been expecting the worker to do some work before
807  * exiting.
808  */
809  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
810  if (shm_mq_get_sender(mq) == NULL)
811  ereport(ERROR,
812  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
813  errmsg("parallel worker failed to initialize"),
814  errhint("More details may be available in the server log.")));
815 
816  /*
817  * The worker is stopped, but is attached to the error queue.
818  * Unless there's a bug somewhere, this will only happen when
819  * the worker writes messages and terminates after the
820  * CHECK_FOR_INTERRUPTS() near the top of this function and
821  * before the call to GetBackgroundWorkerPid(). In that case,
822  * or latch should have been set as well and the right things
823  * will happen on the next pass through the loop.
824  */
825  }
826  }
827 
831  }
832 
833  if (pcxt->toc != NULL)
834  {
835  FixedParallelState *fps;
836 
837  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
838  if (fps->last_xlog_end > XactLastRecEnd)
840  }
841 }
842 
843 /*
844  * Wait for all workers to exit.
845  *
846  * This function ensures that workers have been completely shutdown. The
847  * difference between WaitForParallelWorkersToFinish and this function is
848  * that former just ensures that last message sent by worker backend is
849  * received by master backend whereas this ensures the complete shutdown.
850  */
851 static void
853 {
854  int i;
855 
856  /* Wait until the workers actually die. */
857  for (i = 0; i < pcxt->nworkers_launched; ++i)
858  {
860 
861  if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL)
862  continue;
863 
865 
866  /*
867  * If the postmaster kicked the bucket, we have no chance of cleaning
868  * up safely -- we won't be able to tell when our workers are actually
869  * dead. This doesn't necessitate a PANIC since they will all abort
870  * eventually, but we can't safely continue this session.
871  */
872  if (status == BGWH_POSTMASTER_DIED)
873  ereport(FATAL,
874  (errcode(ERRCODE_ADMIN_SHUTDOWN),
875  errmsg("postmaster exited during a parallel transaction")));
876 
877  /* Release memory. */
878  pfree(pcxt->worker[i].bgwhandle);
879  pcxt->worker[i].bgwhandle = NULL;
880  }
881 }
882 
883 /*
884  * Destroy a parallel context.
885  *
886  * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
887  * first, before calling this function. When this function is invoked, any
888  * remaining workers are forcibly killed; the dynamic shared memory segment
889  * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
890  */
891 void
893 {
894  int i;
895 
896  /*
897  * Be careful about order of operations here! We remove the parallel
898  * context from the list before we do anything else; otherwise, if an
899  * error occurs during a subsequent step, we might try to nuke it again
900  * from AtEOXact_Parallel or AtEOSubXact_Parallel.
901  */
902  dlist_delete(&pcxt->node);
903 
904  /* Kill each worker in turn, and forget their error queues. */
905  if (pcxt->worker != NULL)
906  {
907  for (i = 0; i < pcxt->nworkers_launched; ++i)
908  {
909  if (pcxt->worker[i].error_mqh != NULL)
910  {
912 
913  shm_mq_detach(pcxt->worker[i].error_mqh);
914  pcxt->worker[i].error_mqh = NULL;
915  }
916  }
917  }
918 
919  /*
920  * If we have allocated a shared memory segment, detach it. This will
921  * implicitly detach the error queues, and any other shared memory queues,
922  * stored there.
923  */
924  if (pcxt->seg != NULL)
925  {
926  dsm_detach(pcxt->seg);
927  pcxt->seg = NULL;
928  }
929 
930  /*
931  * If this parallel context is actually in backend-private memory rather
932  * than shared memory, free that memory instead.
933  */
934  if (pcxt->private_memory != NULL)
935  {
936  pfree(pcxt->private_memory);
937  pcxt->private_memory = NULL;
938  }
939 
940  /*
941  * We can't finish transaction commit or abort until all of the workers
942  * have exited. This means, in particular, that we can't respond to
943  * interrupts at this stage.
944  */
945  HOLD_INTERRUPTS();
948 
949  /* Free the worker array itself. */
950  if (pcxt->worker != NULL)
951  {
952  pfree(pcxt->worker);
953  pcxt->worker = NULL;
954  }
955 
956  /* Free memory. */
957  pfree(pcxt->library_name);
958  pfree(pcxt->function_name);
959  pfree(pcxt);
960 }
961 
962 /*
963  * Are there any parallel contexts currently active?
964  */
965 bool
967 {
968  return !dlist_is_empty(&pcxt_list);
969 }
970 
971 /*
972  * Handle receipt of an interrupt indicating a parallel worker message.
973  *
974  * Note: this is called within a signal handler! All we can do is set
975  * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
976  * HandleParallelMessages().
977  */
978 void
980 {
981  InterruptPending = true;
982  ParallelMessagePending = true;
983  SetLatch(MyLatch);
984 }
985 
986 /*
987  * Handle any queued protocol messages received from parallel workers.
988  */
989 void
991 {
992  dlist_iter iter;
993  MemoryContext oldcontext;
994 
995  static MemoryContext hpm_context = NULL;
996 
997  /*
998  * This is invoked from ProcessInterrupts(), and since some of the
999  * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
1000  * for recursive calls if more signals are received while this runs. It's
1001  * unclear that recursive entry would be safe, and it doesn't seem useful
1002  * even if it is safe, so let's block interrupts until done.
1003  */
1004  HOLD_INTERRUPTS();
1005 
1006  /*
1007  * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
1008  * don't want to risk leaking data into long-lived contexts, so let's do
1009  * our work here in a private context that we can reset on each use.
1010  */
1011  if (hpm_context == NULL) /* first time through? */
1013  "HandleParallelMessages",
1015  else
1016  MemoryContextReset(hpm_context);
1017 
1018  oldcontext = MemoryContextSwitchTo(hpm_context);
1019 
1020  /* OK to process messages. Reset the flag saying there are more to do. */
1021  ParallelMessagePending = false;
1022 
1023  dlist_foreach(iter, &pcxt_list)
1024  {
1025  ParallelContext *pcxt;
1026  int i;
1027 
1028  pcxt = dlist_container(ParallelContext, node, iter.cur);
1029  if (pcxt->worker == NULL)
1030  continue;
1031 
1032  for (i = 0; i < pcxt->nworkers_launched; ++i)
1033  {
1034  /*
1035  * Read as many messages as we can from each worker, but stop when
1036  * either (1) the worker's error queue goes away, which can happen
1037  * if we receive a Terminate message from the worker; or (2) no
1038  * more messages can be read from the worker without blocking.
1039  */
1040  while (pcxt->worker[i].error_mqh != NULL)
1041  {
1042  shm_mq_result res;
1043  Size nbytes;
1044  void *data;
1045 
1046  res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes,
1047  &data, true);
1048  if (res == SHM_MQ_WOULD_BLOCK)
1049  break;
1050  else if (res == SHM_MQ_SUCCESS)
1051  {
1052  StringInfoData msg;
1053 
1054  initStringInfo(&msg);
1055  appendBinaryStringInfo(&msg, data, nbytes);
1056  HandleParallelMessage(pcxt, i, &msg);
1057  pfree(msg.data);
1058  }
1059  else
1060  ereport(ERROR,
1061  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1062  errmsg("lost connection to parallel worker")));
1063  }
1064  }
1065  }
1066 
1067  MemoryContextSwitchTo(oldcontext);
1068 
1069  /* Might as well clear the context on our way out */
1070  MemoryContextReset(hpm_context);
1071 
1073 }
1074 
1075 /*
1076  * Handle a single protocol message received from a single parallel worker.
1077  */
1078 static void
1080 {
1081  char msgtype;
1082 
1083  if (pcxt->known_attached_workers != NULL &&
1084  !pcxt->known_attached_workers[i])
1085  {
1086  pcxt->known_attached_workers[i] = true;
1087  pcxt->nknown_attached_workers++;
1088  }
1089 
1090  msgtype = pq_getmsgbyte(msg);
1091 
1092  switch (msgtype)
1093  {
1094  case 'K': /* BackendKeyData */
1095  {
1096  int32 pid = pq_getmsgint(msg, 4);
1097 
1098  (void) pq_getmsgint(msg, 4); /* discard cancel key */
1099  (void) pq_getmsgend(msg);
1100  pcxt->worker[i].pid = pid;
1101  break;
1102  }
1103 
1104  case 'E': /* ErrorResponse */
1105  case 'N': /* NoticeResponse */
1106  {
1107  ErrorData edata;
1108  ErrorContextCallback *save_error_context_stack;
1109 
1110  /* Parse ErrorResponse or NoticeResponse. */
1111  pq_parse_errornotice(msg, &edata);
1112 
1113  /* Death of a worker isn't enough justification for suicide. */
1114  edata.elevel = Min(edata.elevel, ERROR);
1115 
1116  /*
1117  * If desired, add a context line to show that this is a
1118  * message propagated from a parallel worker. Otherwise, it
1119  * can sometimes be confusing to understand what actually
1120  * happened. (We don't do this in FORCE_PARALLEL_REGRESS mode
1121  * because it causes test-result instability depending on
1122  * whether a parallel worker is actually used or not.)
1123  */
1125  {
1126  if (edata.context)
1127  edata.context = psprintf("%s\n%s", edata.context,
1128  _("parallel worker"));
1129  else
1130  edata.context = pstrdup(_("parallel worker"));
1131  }
1132 
1133  /*
1134  * Context beyond that should use the error context callbacks
1135  * that were in effect when the ParallelContext was created,
1136  * not the current ones.
1137  */
1138  save_error_context_stack = error_context_stack;
1140 
1141  /* Rethrow error or print notice. */
1142  ThrowErrorData(&edata);
1143 
1144  /* Not an error, so restore previous context stack. */
1145  error_context_stack = save_error_context_stack;
1146 
1147  break;
1148  }
1149 
1150  case 'A': /* NotifyResponse */
1151  {
1152  /* Propagate NotifyResponse. */
1153  int32 pid;
1154  const char *channel;
1155  const char *payload;
1156 
1157  pid = pq_getmsgint(msg, 4);
1158  channel = pq_getmsgrawstring(msg);
1159  payload = pq_getmsgrawstring(msg);
1160  pq_endmessage(msg);
1161 
1162  NotifyMyFrontEnd(channel, payload, pid);
1163 
1164  break;
1165  }
1166 
1167  case 'X': /* Terminate, indicating clean exit */
1168  {
1169  shm_mq_detach(pcxt->worker[i].error_mqh);
1170  pcxt->worker[i].error_mqh = NULL;
1171  break;
1172  }
1173 
1174  default:
1175  {
1176  elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %d bytes)",
1177  msgtype, msg->len);
1178  }
1179  }
1180 }
1181 
1182 /*
1183  * End-of-subtransaction cleanup for parallel contexts.
1184  *
1185  * Currently, it's forbidden to enter or leave a subtransaction while
1186  * parallel mode is in effect, so we could just blow away everything. But
1187  * we may want to relax that restriction in the future, so this code
1188  * contemplates that there may be multiple subtransaction IDs in pcxt_list.
1189  */
1190 void
1192 {
1193  while (!dlist_is_empty(&pcxt_list))
1194  {
1195  ParallelContext *pcxt;
1196 
1197  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1198  if (pcxt->subid != mySubId)
1199  break;
1200  if (isCommit)
1201  elog(WARNING, "leaked parallel context");
1202  DestroyParallelContext(pcxt);
1203  }
1204 }
1205 
1206 /*
1207  * End-of-transaction cleanup for parallel contexts.
1208  */
1209 void
1210 AtEOXact_Parallel(bool isCommit)
1211 {
1212  while (!dlist_is_empty(&pcxt_list))
1213  {
1214  ParallelContext *pcxt;
1215 
1216  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1217  if (isCommit)
1218  elog(WARNING, "leaked parallel context");
1219  DestroyParallelContext(pcxt);
1220  }
1221 }
1222 
1223 /*
1224  * Main entrypoint for parallel workers.
1225  */
1226 void
1228 {
1229  dsm_segment *seg;
1230  shm_toc *toc;
1231  FixedParallelState *fps;
1232  char *error_queue_space;
1233  shm_mq *mq;
1234  shm_mq_handle *mqh;
1235  char *libraryspace;
1236  char *entrypointstate;
1237  char *library_name;
1238  char *function_name;
1239  parallel_worker_main_type entrypt;
1240  char *gucspace;
1241  char *combocidspace;
1242  char *tsnapspace;
1243  char *asnapspace;
1244  char *tstatespace;
1245  char *reindexspace;
1246  char *relmapperspace;
1247  char *enumblacklistspace;
1248  StringInfoData msgbuf;
1249  char *session_dsm_handle_space;
1250 
1251  /* Set flag to indicate that we're initializing a parallel worker. */
1253 
1254  /* Establish signal handlers. */
1255  pqsignal(SIGTERM, die);
1257 
1258  /* Determine and set our parallel worker number. */
1260  memcpy(&ParallelWorkerNumber, MyBgworkerEntry->bgw_extra, sizeof(int));
1261 
1262  /* Set up a memory context to work in, just for cleanliness. */
1264  "Parallel worker",
1266 
1267  /*
1268  * Attach to the dynamic shared memory segment for the parallel query, and
1269  * find its table of contents.
1270  *
1271  * Note: at this point, we have not created any ResourceOwner in this
1272  * process. This will result in our DSM mapping surviving until process
1273  * exit, which is fine. If there were a ResourceOwner, it would acquire
1274  * ownership of the mapping, but we have no need for that.
1275  */
1276  seg = dsm_attach(DatumGetUInt32(main_arg));
1277  if (seg == NULL)
1278  ereport(ERROR,
1279  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1280  errmsg("could not map dynamic shared memory segment")));
1282  if (toc == NULL)
1283  ereport(ERROR,
1284  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1285  errmsg("invalid magic number in dynamic shared memory segment")));
1286 
1287  /* Look up fixed parallel state. */
1288  fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
1289  MyFixedParallelState = fps;
1290 
1291  /* Arrange to signal the leader if we exit. */
1295 
1296  /*
1297  * Now we can find and attach to the error queue provided for us. That's
1298  * good, because until we do that, any errors that happen here will not be
1299  * reported back to the process that requested that this worker be
1300  * launched.
1301  */
1302  error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
1303  mq = (shm_mq *) (error_queue_space +
1306  mqh = shm_mq_attach(mq, seg, NULL);
1307  pq_redirect_to_shm_mq(seg, mqh);
1310 
1311  /*
1312  * Send a BackendKeyData message to the process that initiated parallelism
1313  * so that it has access to our PID before it receives any other messages
1314  * from us. Our cancel key is sent, too, since that's the way the
1315  * protocol message is defined, but it won't actually be used for anything
1316  * in this case.
1317  */
1318  pq_beginmessage(&msgbuf, 'K');
1319  pq_sendint32(&msgbuf, (int32) MyProcPid);
1320  pq_sendint32(&msgbuf, (int32) MyCancelKey);
1321  pq_endmessage(&msgbuf);
1322 
1323  /*
1324  * Hooray! Primary initialization is complete. Now, we need to set up our
1325  * backend-local state to match the original backend.
1326  */
1327 
1328  /*
1329  * Join locking group. We must do this before anything that could try to
1330  * acquire a heavyweight lock, because any heavyweight locks acquired to
1331  * this point could block either directly against the parallel group
1332  * leader or against some process which in turn waits for a lock that
1333  * conflicts with the parallel group leader, causing an undetected
1334  * deadlock. (If we can't join the lock group, the leader has gone away,
1335  * so just exit quietly.)
1336  */
1338  fps->parallel_master_pid))
1339  return;
1340 
1341  /*
1342  * Restore transaction and statement start-time timestamps. This must
1343  * happen before anything that would start a transaction, else asserts in
1344  * xact.c will fire.
1345  */
1347 
1348  /*
1349  * Identify the entry point to be called. In theory this could result in
1350  * loading an additional library, though most likely the entry point is in
1351  * the core backend or in a library we just loaded.
1352  */
1353  entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
1354  library_name = entrypointstate;
1355  function_name = entrypointstate + strlen(library_name) + 1;
1356 
1357  entrypt = LookupParallelWorkerFunction(library_name, function_name);
1358 
1359  /* Restore database connection. */
1361  fps->authenticated_user_id,
1362  0);
1363 
1364  /*
1365  * Set the client encoding to the database encoding, since that is what
1366  * the leader will expect.
1367  */
1369 
1370  /*
1371  * Load libraries that were loaded by original backend. We want to do
1372  * this before restoring GUCs, because the libraries might define custom
1373  * variables.
1374  */
1375  libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
1377  RestoreLibraryState(libraryspace);
1378 
1379  /* Restore GUC values from launching backend. */
1380  gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
1381  RestoreGUCState(gucspace);
1383 
1384  /* Crank up a transaction state appropriate to a parallel worker. */
1385  tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
1386  StartParallelWorkerTransaction(tstatespace);
1387 
1388  /* Restore combo CID state. */
1389  combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
1390  RestoreComboCIDState(combocidspace);
1391 
1392  /* Attach to the per-session DSM segment and contained objects. */
1393  session_dsm_handle_space =
1395  AttachSession(*(dsm_handle *) session_dsm_handle_space);
1396 
1397  /* Restore transaction snapshot. */
1398  tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
1400  fps->parallel_master_pgproc);
1401 
1402  /* Restore active snapshot. */
1403  asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
1404  PushActiveSnapshot(RestoreSnapshot(asnapspace));
1405 
1406  /*
1407  * We've changed which tuples we can see, and must therefore invalidate
1408  * system caches.
1409  */
1411 
1412  /*
1413  * Restore current role id. Skip verifying whether session user is
1414  * allowed to become this role and blindly restore the leader's state for
1415  * current role.
1416  */
1418 
1419  /* Restore user ID and security context. */
1421 
1422  /* Restore temp-namespace state to ensure search path matches leader's. */
1425 
1426  /* Restore reindex state. */
1427  reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false);
1428  RestoreReindexState(reindexspace);
1429 
1430  /* Restore relmapper state. */
1431  relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false);
1432  RestoreRelationMap(relmapperspace);
1433 
1434  /* Restore enum blacklist. */
1435  enumblacklistspace = shm_toc_lookup(toc, PARALLEL_KEY_ENUMBLACKLIST,
1436  false);
1437  RestoreEnumBlacklist(enumblacklistspace);
1438 
1439  /* Attach to the leader's serializable transaction, if SERIALIZABLE. */
1441 
1442  /*
1443  * We've initialized all of our state now; nothing should change
1444  * hereafter.
1445  */
1448 
1449  /*
1450  * Time to do the real work: invoke the caller-supplied code.
1451  */
1452  entrypt(seg, toc);
1453 
1454  /* Must exit parallel mode to pop active snapshot. */
1455  ExitParallelMode();
1456 
1457  /* Must pop active snapshot so snapmgr.c doesn't complain. */
1459 
1460  /* Shut down the parallel-worker transaction. */
1462 
1463  /* Detach from the per-session DSM segment. */
1464  DetachSession();
1465 
1466  /* Report success. */
1467  pq_putmessage('X', NULL, 0);
1468 }
1469 
1470 /*
1471  * Update shared memory with the ending location of the last WAL record we
1472  * wrote, if it's greater than the value already stored there.
1473  */
1474 void
1476 {
1478 
1479  Assert(fps != NULL);
1480  SpinLockAcquire(&fps->mutex);
1481  if (fps->last_xlog_end < last_xlog_end)
1483  SpinLockRelease(&fps->mutex);
1484 }
1485 
1486 /*
1487  * Make sure the leader tries to read from our error queue one more time.
1488  * This guards against the case where we exit uncleanly without sending an
1489  * ErrorResponse to the leader, for example because some code calls proc_exit
1490  * directly.
1491  */
1492 static void
1494 {
1498 }
1499 
1500 /*
1501  * Look up (and possibly load) a parallel worker entry point function.
1502  *
1503  * For functions contained in the core code, we use library name "postgres"
1504  * and consult the InternalParallelWorkers array. External functions are
1505  * looked up, and loaded if necessary, using load_external_function().
1506  *
1507  * The point of this is to pass function names as strings across process
1508  * boundaries. We can't pass actual function addresses because of the
1509  * possibility that the function has been loaded at a different address
1510  * in a different process. This is obviously a hazard for functions in
1511  * loadable libraries, but it can happen even for functions in the core code
1512  * on platforms using EXEC_BACKEND (e.g., Windows).
1513  *
1514  * At some point it might be worthwhile to get rid of InternalParallelWorkers[]
1515  * in favor of applying load_external_function() for core functions too;
1516  * but that raises portability issues that are not worth addressing now.
1517  */
1519 LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
1520 {
1521  /*
1522  * If the function is to be loaded from postgres itself, search the
1523  * InternalParallelWorkers array.
1524  */
1525  if (strcmp(libraryname, "postgres") == 0)
1526  {
1527  int i;
1528 
1529  for (i = 0; i < lengthof(InternalParallelWorkers); i++)
1530  {
1531  if (strcmp(InternalParallelWorkers[i].fn_name, funcname) == 0)
1532  return InternalParallelWorkers[i].fn_addr;
1533  }
1534 
1535  /* We can only reach this by programming error. */
1536  elog(ERROR, "internal function \"%s\" not found", funcname);
1537  }
1538 
1539  /* Otherwise load from external library. */
1540  return (parallel_worker_main_type)
1541  load_external_function(libraryname, funcname, true, NULL);
1542 }
char bgw_extra[BGW_EXTRALEN]
Definition: bgworker.h:98
#define DatumGetUInt32(X)
Definition: postgres.h:486
int slock_t
Definition: s_lock.h:934
void SerializeEnumBlacklist(void *space, Size size)
Definition: pg_enum.c:709
#define PARALLEL_ERROR_QUEUE_SIZE
Definition: parallel.c:54
#define AllocSetContextCreate
Definition: memutils.h:170
shm_toc * shm_toc_create(uint64 magic, void *address, Size nbytes)
Definition: shm_toc.c:40
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:1071
BackendId MyBackendId
Definition: globals.c:81
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2161
MemoryContext TopTransactionContext
Definition: mcxt.c:49
uint32 dsm_handle
Definition: dsm_impl.h:54
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:162
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:551
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
XLogRecPtr XactLastRecEnd
Definition: xlog.c:362
void AttachSerializableXact(SerializableXactHandle handle)
Definition: predicate.c:5123
void shm_mq_detach(shm_mq_handle *mqh)
Definition: shm_mq.c:793
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
dsm_segment * seg
Definition: parallel.h:43
static void WaitForParallelWorkersToExit(ParallelContext *pcxt)
Definition: parallel.c:852
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
void SerializeReindexState(Size maxsize, char *start_address)
Definition: index.c:3944
char * pstrdup(const char *in)
Definition: mcxt.c:1186
void CommitTransactionCommand(void)
Definition: xact.c:2898
shm_toc_estimator estimator
Definition: parallel.h:42
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void EndParallelWorkerTransaction(void)
Definition: xact.c:5275
#define SpinLockInit(lock)
Definition: spin.h:60
void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId)
Definition: namespace.c:3313
void RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
Definition: snapmgr.c:2225
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:527
void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: nbtsort.c:1758
#define Min(x, y)
Definition: c.h:920
PGPROC * shm_mq_get_sender(shm_mq *mq)
Definition: shm_mq.c:249
Oid authenticated_user_id
Definition: parallel.c:83
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:841
#define PARALLEL_KEY_ENUMBLACKLIST
Definition: parallel.c:76
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:960
int bgw_restart_time
Definition: bgworker.h:94
int errcode(int sqlerrcode)
Definition: elog.c:610
PGPROC * parallel_master_pgproc
Definition: parallel.c:90
Oid temp_toast_namespace_id
Definition: parallel.c:87
#define BGWORKER_CLASS_PARALLEL
Definition: bgworker.h:67
void DetachSession(void)
Definition: session.c:201
BackgroundWorker * MyBgworkerEntry
Definition: postmaster.c:192
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition: proc.c:1880
void PopActiveSnapshot(void)
Definition: snapmgr.c:814
int nknown_attached_workers
Definition: parallel.h:47
uint32 SubTransactionId
Definition: c.h:517
#define PARALLEL_KEY_RELMAPPER_STATE
Definition: parallel.c:75
Size shm_toc_estimate(shm_toc_estimator *e)
Definition: shm_toc.c:263
#define lengthof(array)
Definition: c.h:668
void SerializeTransactionState(Size maxsize, char *start_address)
Definition: xact.c:5180
parallel_worker_main_type fn_addr
Definition: parallel.c:134
const char * pq_getmsgrawstring(StringInfo msg)
Definition: pqformat.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void SetLatch(Latch *latch)
Definition: latch.c:457
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
void InvalidateSystemCaches(void)
Definition: inval.c:643
int nworkers_to_launch
Definition: parallel.h:37
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
static pid_t ParallelMasterPid
Definition: parallel.c:125
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:501
void ResetLatch(Latch *latch)
Definition: latch.c:540
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void RestoreComboCIDState(char *comboCIDstate)
Definition: combocid.c:343
signed int int32
Definition: c.h:355
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:365
SubTransactionId subid
Definition: parallel.h:35
Oid GetCurrentRoleId(void)
Definition: miscinit.c:798
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:118
ErrorContextCallback * error_context_stack
Definition: elog.c:92
SerializableXactHandle serializable_xact_handle
Definition: parallel.c:95
volatile bool ParallelMessagePending
Definition: parallel.c:113
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define sprintf
Definition: port.h:194
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:852
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:738
#define SpinLockAcquire(lock)
Definition: spin.h:62
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:892
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:250
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
ParallelWorkerInfo * worker
Definition: parallel.h:46
Datum bgw_main_arg
Definition: bgworker.h:97
void pfree(void *pointer)
Definition: mcxt.c:1056
bool IsInParallelMode(void)
Definition: xact.c:996
void SerializeLibraryState(Size maxsize, char *start_address)
Definition: dfmgr.c:727
#define ERROR
Definition: elog.h:43
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1117
void ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
Oid GetAuthenticatedUserId(void)
Definition: miscinit.c:498
#define PARALLEL_KEY_TRANSACTION_SNAPSHOT
Definition: parallel.c:69
char * function_name
Definition: parallel.h:40
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2102
int32 MyCancelKey
Definition: globals.c:44
void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
Definition: pqmq.c:215
#define FATAL
Definition: elog.h:52
shm_mq * shm_mq_create(void *address, Size size)
Definition: shm_mq.c:169
void ExitParallelMode(void)
Definition: xact.c:976
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define PARALLEL_KEY_FIXED
Definition: parallel.c:64
void HandleParallelMessages(void)
Definition: parallel.c:990
#define PARALLEL_KEY_ERROR_QUEUE
Definition: parallel.c:65
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static const struct @20 InternalParallelWorkers[]
void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
Definition: namespace.c:3329
static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
Definition: parallel.c:1079
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:735
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:544
shm_mq_handle * error_mqh
Definition: parallel.h:28
#define PARALLEL_KEY_GUC
Definition: parallel.c:67
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
int ParallelWorkerNumber
Definition: parallel.c:110
BackgroundWorkerHandle * bgwhandle
Definition: parallel.h:27
void SerializeRelationMap(Size maxSize, char *startAddress)
Definition: relmapper.c:657
Size EstimateGUCStateSpace(void)
Definition: guc.c:10177
void AttachSession(dsm_handle handle)
Definition: session.c:155
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
#define shm_toc_initialize_estimator(e)
Definition: shm_toc.h:49
Size EstimateComboCIDStateSpace(void)
Definition: combocid.c:298
#define UInt32GetDatum(X)
Definition: postgres.h:493
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static void ParallelWorkerShutdown(int code, Datum arg)
Definition: parallel.c:1493
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int nworkers_launched
Definition: parallel.h:38
XLogRecPtr last_xlog_end
Definition: parallel.c:101
BgwHandleStatus
Definition: bgworker.h:102
Size EstimateEnumBlacklistSpace(void)
Definition: pg_enum.c:695
PGFunction load_external_function(const char *filename, const char *funcname, bool signalNotFound, void **filehandle)
Definition: dfmgr.c:107
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:515
Size EstimateReindexStateSpace(void)
Definition: index.c:3933
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:216
void BecomeLockGroupLeader(void)
Definition: proc.c:1850
MemoryContext TopMemoryContext
Definition: mcxt.c:44
TimestampTz GetCurrentTransactionStartTimestamp(void)
Definition: xact.c:782
void ThrowErrorData(ErrorData *edata)
Definition: elog.c:1593
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:200
int elevel
Definition: elog.h:353
bool * known_attached_workers
Definition: parallel.h:48
bool ParallelContextActive(void)
Definition: parallel.c:966
#define SpinLockRelease(lock)
Definition: spin.h:64
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2078
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
void * palloc0(Size size)
Definition: mcxt.c:980
static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
Definition: parallel.c:1519
void RestoreLibraryState(char *start_address)
Definition: dfmgr.c:749
uintptr_t Datum
Definition: postgres.h:367
void RestoreEnumBlacklist(void *space)
Definition: pg_enum.c:741
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:432
shm_toc * shm_toc_attach(uint64 magic, void *address)
Definition: shm_toc.c:64
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
int BackendId
Definition: backendid.h:21
Oid MyDatabaseId
Definition: globals.c:85
pid_t parallel_master_pid
Definition: parallel.c:91
Size EstimateLibraryStateSpace(void)
Definition: dfmgr.c:710
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:451
void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:310
dlist_node * cur
Definition: ilist.h:161
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition: postmaster.c:5726
#define PARALLEL_MAGIC
Definition: parallel.c:57
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition: pqmq.c:55
void * SerializableXactHandle
Definition: predicate.h:37
void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
Definition: parallel.c:1475
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1156
#define ereport(elevel,...)
Definition: elog.h:144
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:401
shm_mq_result
Definition: shm_mq.h:36
TimestampTz xact_ts
Definition: parallel.c:93
char * library_name
Definition: parallel.h:39
BackendId parallel_master_backend_id
Definition: parallel.c:92
int force_parallel_mode
Definition: planner.c:70
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:932
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:738
BackendId ParallelMasterBackendId
Definition: globals.c:83
void StartParallelWorkerTransaction(char *tstatespace)
Definition: xact.c:5250
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:707
Size EstimateTransactionStateSpace(void)
Definition: xact.c:5152
void StartTransactionCommand(void)
Definition: xact.c:2797
#define PARALLEL_KEY_REINDEX_STATE
Definition: parallel.c:74
const char * fn_name
Definition: parallel.c:133
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:466
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
dlist_node node
Definition: parallel.h:34
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3396
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_mq * shm_mq_get_queue(shm_mq_handle *mqh)
Definition: shm_mq.c:848
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:911
void EnterParallelMode(void)
Definition: xact.c:963
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
Size EstimateRelationMapSpace(void)
Definition: relmapper.c:646
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
char * context
Definition: elog.h:369
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:282
ErrorContextCallback * error_context_stack
Definition: parallel.h:41
void pq_set_parallel_master(pid_t pid, BackendId backend_id)
Definition: pqmq.c:80
void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts)
Definition: xact.c:771
#define PARALLEL_KEY_TRANSACTION_STATE
Definition: parallel.c:71
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:658
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void RestoreReindexState(void *reindexstate)
Definition: index.c:3962
int errmsg(const char *fmt,...)
Definition: elog.c:824
void(* parallel_worker_main_type)(dsm_segment *seg, shm_toc *toc)
Definition: parallel.h:23
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
void ParallelWorkerMain(Datum main_arg)
Definition: parallel.c:1227
pid_t bgw_notify_pid
Definition: bgworker.h:99
static FixedParallelState * MyFixedParallelState
Definition: parallel.c:119
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
void SetCurrentRoleId(Oid roleid, bool is_superuser)
Definition: miscinit.c:819
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:116
#define elog(elevel,...)
Definition: elog.h:214
bool InitializingParallelWorker
Definition: parallel.c:116
int i
TimestampTz stmt_ts
Definition: parallel.c:94
Definition: shm_mq.c:70
void RestoreRelationMap(char *startAddress)
Definition: relmapper.c:674
#define PARALLEL_KEY_SESSION_DSM
Definition: parallel.c:73
#define BUFFERALIGN(LEN)
Definition: c.h:693
void * arg
struct Latch * MyLatch
Definition: globals.c:54
void HandleParallelMessageInterrupt(void)
Definition: parallel.c:979
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:198
static dlist_head pcxt_list
Definition: parallel.c:122
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:637
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:225
struct FixedParallelState FixedParallelState
dsm_handle GetSessionDsmHandle(void)
Definition: session.c:70
shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
Definition: shm_mq.c:540
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:635
bool session_auth_is_superuser
Definition: guc.c:540
SerializableXactHandle ShareSerializableXact(void)
Definition: predicate.c:5114
void AtEOXact_Parallel(bool isCommit)
Definition: parallel.c:1210
Definition: proc.h:95
#define PARALLEL_KEY_ENTRYPOINT
Definition: parallel.c:72
#define snprintf
Definition: port.h:192
#define PARALLEL_KEY_COMBO_CID
Definition: parallel.c:68
#define WL_LATCH_SET
Definition: latch.h:124
#define _(x)
Definition: elog.c:88
void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
Definition: parallel.c:1191
void SerializeGUCState(Size maxsize, char *start_address)
Definition: guc.c:10319
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
#define PARALLEL_KEY_ACTIVE_SNAPSHOT
Definition: parallel.c:70
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define die(msg)
Definition: pg_test_fsync.c:96
void SerializeComboCIDState(Size maxsize, char *start_address)
Definition: combocid.c:317
#define PARALLEL_KEY_LIBRARY
Definition: parallel.c:66
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1023
void RestoreGUCState(void *gucstate)
Definition: guc.c:10399
shm_toc * toc
Definition: parallel.h:45
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
Definition: async.c:2255
TimestampTz GetCurrentStatementStartTimestamp(void)
Definition: xact.c:791
void * private_memory
Definition: parallel.h:44
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5755