PostgreSQL Source Code  git master
parallel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parallel.c
4  * Infrastructure for launching parallel workers
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/access/transam/parallel.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/nbtree.h"
18 #include "access/parallel.h"
19 #include "access/session.h"
20 #include "access/xact.h"
21 #include "access/xlog.h"
22 #include "catalog/pg_enum.h"
23 #include "catalog/index.h"
24 #include "catalog/namespace.h"
25 #include "commands/async.h"
26 #include "executor/execParallel.h"
27 #include "libpq/libpq.h"
28 #include "libpq/pqformat.h"
29 #include "libpq/pqmq.h"
30 #include "miscadmin.h"
31 #include "optimizer/optimizer.h"
32 #include "pgstat.h"
33 #include "storage/ipc.h"
34 #include "storage/predicate.h"
35 #include "storage/sinval.h"
36 #include "storage/spin.h"
37 #include "tcop/tcopprot.h"
38 #include "utils/combocid.h"
39 #include "utils/guc.h"
40 #include "utils/inval.h"
41 #include "utils/memutils.h"
42 #include "utils/relmapper.h"
43 #include "utils/snapmgr.h"
44 #include "utils/typcache.h"
45 
46 
47 /*
48  * We don't want to waste a lot of memory on an error queue which, most of
49  * the time, will process only a handful of small messages. However, it is
50  * desirable to make it large enough that a typical ErrorResponse can be sent
51  * without blocking. That way, a worker that errors out can write the whole
52  * message into the queue and terminate without waiting for the user backend.
53  */
54 #define PARALLEL_ERROR_QUEUE_SIZE 16384
55 
56 /* Magic number for parallel context TOC. */
57 #define PARALLEL_MAGIC 0x50477c7c
58 
59 /*
60  * Magic numbers for per-context parallel state sharing. Higher-level code
61  * should use smaller values, leaving these very large ones for use by this
62  * module.
63  */
64 #define PARALLEL_KEY_FIXED UINT64CONST(0xFFFFFFFFFFFF0001)
65 #define PARALLEL_KEY_ERROR_QUEUE UINT64CONST(0xFFFFFFFFFFFF0002)
66 #define PARALLEL_KEY_LIBRARY UINT64CONST(0xFFFFFFFFFFFF0003)
67 #define PARALLEL_KEY_GUC UINT64CONST(0xFFFFFFFFFFFF0004)
68 #define PARALLEL_KEY_COMBO_CID UINT64CONST(0xFFFFFFFFFFFF0005)
69 #define PARALLEL_KEY_TRANSACTION_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0006)
70 #define PARALLEL_KEY_ACTIVE_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0007)
71 #define PARALLEL_KEY_TRANSACTION_STATE UINT64CONST(0xFFFFFFFFFFFF0008)
72 #define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009)
73 #define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A)
74 #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000B)
75 #define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000C)
76 #define PARALLEL_KEY_ENUMBLACKLIST UINT64CONST(0xFFFFFFFFFFFF000D)
77 
78 /* Fixed-size parallel state. */
79 typedef struct FixedParallelState
80 {
81  /* Fixed-size state that workers must restore. */
96 
97  /* Mutex protects remaining fields. */
99 
100  /* Maximum XactLastRecEnd of any worker. */
103 
104 /*
105  * Our parallel worker number. We initialize this to -1, meaning that we are
106  * not a parallel worker. In parallel workers, it will be set to a value >= 0
107  * and < the number of workers before any user code is invoked; each parallel
108  * worker will get a different parallel worker number.
109  */
111 
112 /* Is there a parallel message pending which we need to receive? */
113 volatile bool ParallelMessagePending = false;
114 
115 /* Are we initializing a parallel worker? */
117 
118 /* Pointer to our fixed parallel state. */
120 
121 /* List of active parallel contexts. */
123 
124 /* Backend-local copy of data from FixedParallelState. */
125 static pid_t ParallelMasterPid;
126 
127 /*
128  * List of internal parallel worker entry points. We need this for
129  * reasons explained in LookupParallelWorkerFunction(), below.
130  */
131 static const struct
132 {
133  const char *fn_name;
136 
137 {
138  {
139  "ParallelQueryMain", ParallelQueryMain
140  },
141  {
142  "_bt_parallel_build_main", _bt_parallel_build_main
143  }
144 };
145 
146 /* Private functions. */
147 static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg);
149 static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname);
150 static void ParallelWorkerShutdown(int code, Datum arg);
151 
152 
153 /*
154  * Establish a new parallel context. This should be done after entering
155  * parallel mode, and (unless there is an error) the context should be
156  * destroyed before exiting the current subtransaction.
157  */
159 CreateParallelContext(const char *library_name, const char *function_name,
160  int nworkers)
161 {
162  MemoryContext oldcontext;
163  ParallelContext *pcxt;
164 
165  /* It is unsafe to create a parallel context if not in parallel mode. */
167 
168  /* Number of workers should be non-negative. */
169  Assert(nworkers >= 0);
170 
171  /* We might be running in a short-lived memory context. */
173 
174  /* Initialize a new ParallelContext. */
175  pcxt = palloc0(sizeof(ParallelContext));
177  pcxt->nworkers = nworkers;
178  pcxt->library_name = pstrdup(library_name);
179  pcxt->function_name = pstrdup(function_name);
182  dlist_push_head(&pcxt_list, &pcxt->node);
183 
184  /* Restore previous memory context. */
185  MemoryContextSwitchTo(oldcontext);
186 
187  return pcxt;
188 }
189 
190 /*
191  * Establish the dynamic shared memory segment for a parallel context and
192  * copy state and other bookkeeping information that will be needed by
193  * parallel workers into it.
194  */
195 void
197 {
198  MemoryContext oldcontext;
199  Size library_len = 0;
200  Size guc_len = 0;
201  Size combocidlen = 0;
202  Size tsnaplen = 0;
203  Size asnaplen = 0;
204  Size tstatelen = 0;
205  Size reindexlen = 0;
206  Size relmapperlen = 0;
207  Size enumblacklistlen = 0;
208  Size segsize = 0;
209  int i;
210  FixedParallelState *fps;
211  dsm_handle session_dsm_handle = DSM_HANDLE_INVALID;
212  Snapshot transaction_snapshot = GetTransactionSnapshot();
213  Snapshot active_snapshot = GetActiveSnapshot();
214 
215  /* We might be running in a very short-lived memory context. */
217 
218  /* Allow space to store the fixed-size parallel state. */
220  shm_toc_estimate_keys(&pcxt->estimator, 1);
221 
222  /*
223  * Normally, the user will have requested at least one worker process, but
224  * if by chance they have not, we can skip a bunch of things here.
225  */
226  if (pcxt->nworkers > 0)
227  {
228  /* Get (or create) the per-session DSM segment's handle. */
229  session_dsm_handle = GetSessionDsmHandle();
230 
231  /*
232  * If we weren't able to create a per-session DSM segment, then we can
233  * continue but we can't safely launch any workers because their
234  * record typmods would be incompatible so they couldn't exchange
235  * tuples.
236  */
237  if (session_dsm_handle == DSM_HANDLE_INVALID)
238  pcxt->nworkers = 0;
239  }
240 
241  if (pcxt->nworkers > 0)
242  {
243  /* Estimate space for various kinds of state sharing. */
244  library_len = EstimateLibraryStateSpace();
245  shm_toc_estimate_chunk(&pcxt->estimator, library_len);
246  guc_len = EstimateGUCStateSpace();
247  shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
248  combocidlen = EstimateComboCIDStateSpace();
249  shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
250  tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
251  shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
252  asnaplen = EstimateSnapshotSpace(active_snapshot);
253  shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
254  tstatelen = EstimateTransactionStateSpace();
255  shm_toc_estimate_chunk(&pcxt->estimator, tstatelen);
257  reindexlen = EstimateReindexStateSpace();
258  shm_toc_estimate_chunk(&pcxt->estimator, reindexlen);
259  relmapperlen = EstimateRelationMapSpace();
260  shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen);
261  enumblacklistlen = EstimateEnumBlacklistSpace();
262  shm_toc_estimate_chunk(&pcxt->estimator, enumblacklistlen);
263  /* If you add more chunks here, you probably need to add keys. */
264  shm_toc_estimate_keys(&pcxt->estimator, 10);
265 
266  /* Estimate space need for error queues. */
269  "parallel error queue size not buffer-aligned");
272  pcxt->nworkers));
273  shm_toc_estimate_keys(&pcxt->estimator, 1);
274 
275  /* Estimate how much we'll need for the entrypoint info. */
276  shm_toc_estimate_chunk(&pcxt->estimator, strlen(pcxt->library_name) +
277  strlen(pcxt->function_name) + 2);
278  shm_toc_estimate_keys(&pcxt->estimator, 1);
279  }
280 
281  /*
282  * Create DSM and initialize with new table of contents. But if the user
283  * didn't request any workers, then don't bother creating a dynamic shared
284  * memory segment; instead, just use backend-private memory.
285  *
286  * Also, if we can't create a dynamic shared memory segment because the
287  * maximum number of segments have already been created, then fall back to
288  * backend-private memory, and plan not to use any workers. We hope this
289  * won't happen very often, but it's better to abandon the use of
290  * parallelism than to fail outright.
291  */
292  segsize = shm_toc_estimate(&pcxt->estimator);
293  if (pcxt->nworkers > 0)
295  if (pcxt->seg != NULL)
297  dsm_segment_address(pcxt->seg),
298  segsize);
299  else
300  {
301  pcxt->nworkers = 0;
304  segsize);
305  }
306 
307  /* Initialize fixed-size state in shared memory. */
308  fps = (FixedParallelState *)
309  shm_toc_allocate(pcxt->toc, sizeof(FixedParallelState));
310  fps->database_id = MyDatabaseId;
323  SpinLockInit(&fps->mutex);
324  fps->last_xlog_end = 0;
326 
327  /* We can skip the rest of this if we're not budgeting for any workers. */
328  if (pcxt->nworkers > 0)
329  {
330  char *libraryspace;
331  char *gucspace;
332  char *combocidspace;
333  char *tsnapspace;
334  char *asnapspace;
335  char *tstatespace;
336  char *reindexspace;
337  char *relmapperspace;
338  char *error_queue_space;
339  char *session_dsm_handle_space;
340  char *entrypointstate;
341  char *enumblacklistspace;
342  Size lnamelen;
343 
344  /* Serialize shared libraries we have loaded. */
345  libraryspace = shm_toc_allocate(pcxt->toc, library_len);
346  SerializeLibraryState(library_len, libraryspace);
347  shm_toc_insert(pcxt->toc, PARALLEL_KEY_LIBRARY, libraryspace);
348 
349  /* Serialize GUC settings. */
350  gucspace = shm_toc_allocate(pcxt->toc, guc_len);
351  SerializeGUCState(guc_len, gucspace);
352  shm_toc_insert(pcxt->toc, PARALLEL_KEY_GUC, gucspace);
353 
354  /* Serialize combo CID state. */
355  combocidspace = shm_toc_allocate(pcxt->toc, combocidlen);
356  SerializeComboCIDState(combocidlen, combocidspace);
357  shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
358 
359  /* Serialize transaction snapshot and active snapshot. */
360  tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
361  SerializeSnapshot(transaction_snapshot, tsnapspace);
363  tsnapspace);
364  asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
365  SerializeSnapshot(active_snapshot, asnapspace);
366  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
367 
368  /* Provide the handle for per-session segment. */
369  session_dsm_handle_space = shm_toc_allocate(pcxt->toc,
370  sizeof(dsm_handle));
371  *(dsm_handle *) session_dsm_handle_space = session_dsm_handle;
373  session_dsm_handle_space);
374 
375  /* Serialize transaction state. */
376  tstatespace = shm_toc_allocate(pcxt->toc, tstatelen);
377  SerializeTransactionState(tstatelen, tstatespace);
379 
380  /* Serialize reindex state. */
381  reindexspace = shm_toc_allocate(pcxt->toc, reindexlen);
382  SerializeReindexState(reindexlen, reindexspace);
383  shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace);
384 
385  /* Serialize relmapper state. */
386  relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen);
387  SerializeRelationMap(relmapperlen, relmapperspace);
389  relmapperspace);
390 
391  /* Serialize enum blacklist state. */
392  enumblacklistspace = shm_toc_allocate(pcxt->toc, enumblacklistlen);
393  SerializeEnumBlacklist(enumblacklistspace, enumblacklistlen);
395  enumblacklistspace);
396 
397  /* Allocate space for worker information. */
398  pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
399 
400  /*
401  * Establish error queues in dynamic shared memory.
402  *
403  * These queues should be used only for transmitting ErrorResponse,
404  * NoticeResponse, and NotifyResponse protocol messages. Tuple data
405  * should be transmitted via separate (possibly larger?) queues.
406  */
407  error_queue_space =
408  shm_toc_allocate(pcxt->toc,
410  pcxt->nworkers));
411  for (i = 0; i < pcxt->nworkers; ++i)
412  {
413  char *start;
414  shm_mq *mq;
415 
416  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
417  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
419  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
420  }
421  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, error_queue_space);
422 
423  /*
424  * Serialize entrypoint information. It's unsafe to pass function
425  * pointers across processes, as the function pointer may be different
426  * in each process in EXEC_BACKEND builds, so we always pass library
427  * and function name. (We use library name "postgres" for functions
428  * in the core backend.)
429  */
430  lnamelen = strlen(pcxt->library_name);
431  entrypointstate = shm_toc_allocate(pcxt->toc, lnamelen +
432  strlen(pcxt->function_name) + 2);
433  strcpy(entrypointstate, pcxt->library_name);
434  strcpy(entrypointstate + lnamelen + 1, pcxt->function_name);
435  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ENTRYPOINT, entrypointstate);
436  }
437 
438  /* Restore previous memory context. */
439  MemoryContextSwitchTo(oldcontext);
440 }
441 
442 /*
443  * Reinitialize the dynamic shared memory segment for a parallel context such
444  * that we could launch workers for it again.
445  */
446 void
448 {
449  FixedParallelState *fps;
450 
451  /* Wait for any old workers to exit. */
452  if (pcxt->nworkers_launched > 0)
453  {
456  pcxt->nworkers_launched = 0;
457  if (pcxt->known_attached_workers)
458  {
460  pcxt->known_attached_workers = NULL;
461  pcxt->nknown_attached_workers = 0;
462  }
463  }
464 
465  /* Reset a few bits of fixed parallel state to a clean state. */
466  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
467  fps->last_xlog_end = 0;
468 
469  /* Recreate error queues (if they exist). */
470  if (pcxt->nworkers > 0)
471  {
472  char *error_queue_space;
473  int i;
474 
475  error_queue_space =
477  for (i = 0; i < pcxt->nworkers; ++i)
478  {
479  char *start;
480  shm_mq *mq;
481 
482  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
483  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
485  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
486  }
487  }
488 }
489 
490 /*
491  * Launch parallel workers.
492  */
493 void
495 {
496  MemoryContext oldcontext;
497  BackgroundWorker worker;
498  int i;
499  bool any_registrations_failed = false;
500 
501  /* Skip this if we have no workers. */
502  if (pcxt->nworkers == 0)
503  return;
504 
505  /* We need to be a lock group leader. */
507 
508  /* If we do have workers, we'd better have a DSM segment. */
509  Assert(pcxt->seg != NULL);
510 
511  /* We might be running in a short-lived memory context. */
513 
514  /* Configure a worker. */
515  memset(&worker, 0, sizeof(worker));
516  snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
517  MyProcPid);
518  snprintf(worker.bgw_type, BGW_MAXLEN, "parallel worker");
519  worker.bgw_flags =
524  sprintf(worker.bgw_library_name, "postgres");
525  sprintf(worker.bgw_function_name, "ParallelWorkerMain");
527  worker.bgw_notify_pid = MyProcPid;
528 
529  /*
530  * Start workers.
531  *
532  * The caller must be able to tolerate ending up with fewer workers than
533  * expected, so there is no need to throw an error here if registration
534  * fails. It wouldn't help much anyway, because registering the worker in
535  * no way guarantees that it will start up and initialize successfully.
536  */
537  for (i = 0; i < pcxt->nworkers; ++i)
538  {
539  memcpy(worker.bgw_extra, &i, sizeof(int));
540  if (!any_registrations_failed &&
542  &pcxt->worker[i].bgwhandle))
543  {
545  pcxt->worker[i].bgwhandle);
546  pcxt->nworkers_launched++;
547  }
548  else
549  {
550  /*
551  * If we weren't able to register the worker, then we've bumped up
552  * against the max_worker_processes limit, and future
553  * registrations will probably fail too, so arrange to skip them.
554  * But we still have to execute this code for the remaining slots
555  * to make sure that we forget about the error queues we budgeted
556  * for those workers. Otherwise, we'll wait for them to start,
557  * but they never will.
558  */
559  any_registrations_failed = true;
560  pcxt->worker[i].bgwhandle = NULL;
561  shm_mq_detach(pcxt->worker[i].error_mqh);
562  pcxt->worker[i].error_mqh = NULL;
563  }
564  }
565 
566  /*
567  * Now that nworkers_launched has taken its final value, we can initialize
568  * known_attached_workers.
569  */
570  if (pcxt->nworkers_launched > 0)
571  {
572  pcxt->known_attached_workers =
573  palloc0(sizeof(bool) * pcxt->nworkers_launched);
574  pcxt->nknown_attached_workers = 0;
575  }
576 
577  /* Restore previous memory context. */
578  MemoryContextSwitchTo(oldcontext);
579 }
580 
581 /*
582  * Wait for all workers to attach to their error queues, and throw an error if
583  * any worker fails to do this.
584  *
585  * Callers can assume that if this function returns successfully, then the
586  * number of workers given by pcxt->nworkers_launched have initialized and
587  * attached to their error queues. Whether or not these workers are guaranteed
588  * to still be running depends on what code the caller asked them to run;
589  * this function does not guarantee that they have not exited. However, it
590  * does guarantee that any workers which exited must have done so cleanly and
591  * after successfully performing the work with which they were tasked.
592  *
593  * If this function is not called, then some of the workers that were launched
594  * may not have been started due to a fork() failure, or may have exited during
595  * early startup prior to attaching to the error queue, so nworkers_launched
596  * cannot be viewed as completely reliable. It will never be less than the
597  * number of workers which actually started, but it might be more. Any workers
598  * that failed to start will still be discovered by
599  * WaitForParallelWorkersToFinish and an error will be thrown at that time,
600  * provided that function is eventually reached.
601  *
602  * In general, the leader process should do as much work as possible before
603  * calling this function. fork() failures and other early-startup failures
604  * are very uncommon, and having the leader sit idle when it could be doing
605  * useful work is undesirable. However, if the leader needs to wait for
606  * all of its workers or for a specific worker, it may want to call this
607  * function before doing so. If not, it must make some other provision for
608  * the failure-to-start case, lest it wait forever. On the other hand, a
609  * leader which never waits for a worker that might not be started yet, or
610  * at least never does so prior to WaitForParallelWorkersToFinish(), need not
611  * call this function at all.
612  */
613 void
615 {
616  int i;
617 
618  /* Skip this if we have no launched workers. */
619  if (pcxt->nworkers_launched == 0)
620  return;
621 
622  for (;;)
623  {
624  /*
625  * This will process any parallel messages that are pending and it may
626  * also throw an error propagated from a worker.
627  */
629 
630  for (i = 0; i < pcxt->nworkers_launched; ++i)
631  {
633  shm_mq *mq;
634  int rc;
635  pid_t pid;
636 
637  if (pcxt->known_attached_workers[i])
638  continue;
639 
640  /*
641  * If error_mqh is NULL, then the worker has already exited
642  * cleanly.
643  */
644  if (pcxt->worker[i].error_mqh == NULL)
645  {
646  pcxt->known_attached_workers[i] = true;
647  ++pcxt->nknown_attached_workers;
648  continue;
649  }
650 
651  status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
652  if (status == BGWH_STARTED)
653  {
654  /* Has the worker attached to the error queue? */
655  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
656  if (shm_mq_get_sender(mq) != NULL)
657  {
658  /* Yes, so it is known to be attached. */
659  pcxt->known_attached_workers[i] = true;
660  ++pcxt->nknown_attached_workers;
661  }
662  }
663  else if (status == BGWH_STOPPED)
664  {
665  /*
666  * If the worker stopped without attaching to the error queue,
667  * throw an error.
668  */
669  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
670  if (shm_mq_get_sender(mq) == NULL)
671  ereport(ERROR,
672  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
673  errmsg("parallel worker failed to initialize"),
674  errhint("More details may be available in the server log.")));
675 
676  pcxt->known_attached_workers[i] = true;
677  ++pcxt->nknown_attached_workers;
678  }
679  else
680  {
681  /*
682  * Worker not yet started, so we must wait. The postmaster
683  * will notify us if the worker's state changes. Our latch
684  * might also get set for some other reason, but if so we'll
685  * just end up waiting for the same worker again.
686  */
687  rc = WaitLatch(MyLatch,
690 
691  if (rc & WL_LATCH_SET)
693  }
694  }
695 
696  /* If all workers are known to have started, we're done. */
697  if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
698  {
700  break;
701  }
702  }
703 }
704 
705 /*
706  * Wait for all workers to finish computing.
707  *
708  * Even if the parallel operation seems to have completed successfully, it's
709  * important to call this function afterwards. We must not miss any errors
710  * the workers may have thrown during the parallel operation, or any that they
711  * may yet throw while shutting down.
712  *
713  * Also, we want to update our notion of XactLastRecEnd based on worker
714  * feedback.
715  */
716 void
718 {
719  for (;;)
720  {
721  bool anyone_alive = false;
722  int nfinished = 0;
723  int i;
724 
725  /*
726  * This will process any parallel messages that are pending, which may
727  * change the outcome of the loop that follows. It may also throw an
728  * error propagated from a worker.
729  */
731 
732  for (i = 0; i < pcxt->nworkers_launched; ++i)
733  {
734  /*
735  * If error_mqh is NULL, then the worker has already exited
736  * cleanly. If we have received a message through error_mqh from
737  * the worker, we know it started up cleanly, and therefore we're
738  * certain to be notified when it exits.
739  */
740  if (pcxt->worker[i].error_mqh == NULL)
741  ++nfinished;
742  else if (pcxt->known_attached_workers[i])
743  {
744  anyone_alive = true;
745  break;
746  }
747  }
748 
749  if (!anyone_alive)
750  {
751  /* If all workers are known to have finished, we're done. */
752  if (nfinished >= pcxt->nworkers_launched)
753  {
754  Assert(nfinished == pcxt->nworkers_launched);
755  break;
756  }
757 
758  /*
759  * We didn't detect any living workers, but not all workers are
760  * known to have exited cleanly. Either not all workers have
761  * launched yet, or maybe some of them failed to start or
762  * terminated abnormally.
763  */
764  for (i = 0; i < pcxt->nworkers_launched; ++i)
765  {
766  pid_t pid;
767  shm_mq *mq;
768 
769  /*
770  * If the worker is BGWH_NOT_YET_STARTED or BGWH_STARTED, we
771  * should just keep waiting. If it is BGWH_STOPPED, then
772  * further investigation is needed.
773  */
774  if (pcxt->worker[i].error_mqh == NULL ||
775  pcxt->worker[i].bgwhandle == NULL ||
777  &pid) != BGWH_STOPPED)
778  continue;
779 
780  /*
781  * Check whether the worker ended up stopped without ever
782  * attaching to the error queue. If so, the postmaster was
783  * unable to fork the worker or it exited without initializing
784  * properly. We must throw an error, since the caller may
785  * have been expecting the worker to do some work before
786  * exiting.
787  */
788  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
789  if (shm_mq_get_sender(mq) == NULL)
790  ereport(ERROR,
791  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
792  errmsg("parallel worker failed to initialize"),
793  errhint("More details may be available in the server log.")));
794 
795  /*
796  * The worker is stopped, but is attached to the error queue.
797  * Unless there's a bug somewhere, this will only happen when
798  * the worker writes messages and terminates after the
799  * CHECK_FOR_INTERRUPTS() near the top of this function and
800  * before the call to GetBackgroundWorkerPid(). In that case,
801  * or latch should have been set as well and the right things
802  * will happen on the next pass through the loop.
803  */
804  }
805  }
806 
810  }
811 
812  if (pcxt->toc != NULL)
813  {
814  FixedParallelState *fps;
815 
816  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
817  if (fps->last_xlog_end > XactLastRecEnd)
819  }
820 }
821 
822 /*
823  * Wait for all workers to exit.
824  *
825  * This function ensures that workers have been completely shutdown. The
826  * difference between WaitForParallelWorkersToFinish and this function is
827  * that former just ensures that last message sent by worker backend is
828  * received by master backend whereas this ensures the complete shutdown.
829  */
830 static void
832 {
833  int i;
834 
835  /* Wait until the workers actually die. */
836  for (i = 0; i < pcxt->nworkers_launched; ++i)
837  {
839 
840  if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL)
841  continue;
842 
844 
845  /*
846  * If the postmaster kicked the bucket, we have no chance of cleaning
847  * up safely -- we won't be able to tell when our workers are actually
848  * dead. This doesn't necessitate a PANIC since they will all abort
849  * eventually, but we can't safely continue this session.
850  */
851  if (status == BGWH_POSTMASTER_DIED)
852  ereport(FATAL,
853  (errcode(ERRCODE_ADMIN_SHUTDOWN),
854  errmsg("postmaster exited during a parallel transaction")));
855 
856  /* Release memory. */
857  pfree(pcxt->worker[i].bgwhandle);
858  pcxt->worker[i].bgwhandle = NULL;
859  }
860 }
861 
862 /*
863  * Destroy a parallel context.
864  *
865  * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
866  * first, before calling this function. When this function is invoked, any
867  * remaining workers are forcibly killed; the dynamic shared memory segment
868  * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
869  */
870 void
872 {
873  int i;
874 
875  /*
876  * Be careful about order of operations here! We remove the parallel
877  * context from the list before we do anything else; otherwise, if an
878  * error occurs during a subsequent step, we might try to nuke it again
879  * from AtEOXact_Parallel or AtEOSubXact_Parallel.
880  */
881  dlist_delete(&pcxt->node);
882 
883  /* Kill each worker in turn, and forget their error queues. */
884  if (pcxt->worker != NULL)
885  {
886  for (i = 0; i < pcxt->nworkers_launched; ++i)
887  {
888  if (pcxt->worker[i].error_mqh != NULL)
889  {
891 
892  shm_mq_detach(pcxt->worker[i].error_mqh);
893  pcxt->worker[i].error_mqh = NULL;
894  }
895  }
896  }
897 
898  /*
899  * If we have allocated a shared memory segment, detach it. This will
900  * implicitly detach the error queues, and any other shared memory queues,
901  * stored there.
902  */
903  if (pcxt->seg != NULL)
904  {
905  dsm_detach(pcxt->seg);
906  pcxt->seg = NULL;
907  }
908 
909  /*
910  * If this parallel context is actually in backend-private memory rather
911  * than shared memory, free that memory instead.
912  */
913  if (pcxt->private_memory != NULL)
914  {
915  pfree(pcxt->private_memory);
916  pcxt->private_memory = NULL;
917  }
918 
919  /*
920  * We can't finish transaction commit or abort until all of the workers
921  * have exited. This means, in particular, that we can't respond to
922  * interrupts at this stage.
923  */
924  HOLD_INTERRUPTS();
927 
928  /* Free the worker array itself. */
929  if (pcxt->worker != NULL)
930  {
931  pfree(pcxt->worker);
932  pcxt->worker = NULL;
933  }
934 
935  /* Free memory. */
936  pfree(pcxt->library_name);
937  pfree(pcxt->function_name);
938  pfree(pcxt);
939 }
940 
941 /*
942  * Are there any parallel contexts currently active?
943  */
944 bool
946 {
947  return !dlist_is_empty(&pcxt_list);
948 }
949 
950 /*
951  * Handle receipt of an interrupt indicating a parallel worker message.
952  *
953  * Note: this is called within a signal handler! All we can do is set
954  * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
955  * HandleParallelMessages().
956  */
957 void
959 {
960  InterruptPending = true;
961  ParallelMessagePending = true;
962  SetLatch(MyLatch);
963 }
964 
965 /*
966  * Handle any queued protocol messages received from parallel workers.
967  */
968 void
970 {
971  dlist_iter iter;
972  MemoryContext oldcontext;
973 
974  static MemoryContext hpm_context = NULL;
975 
976  /*
977  * This is invoked from ProcessInterrupts(), and since some of the
978  * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
979  * for recursive calls if more signals are received while this runs. It's
980  * unclear that recursive entry would be safe, and it doesn't seem useful
981  * even if it is safe, so let's block interrupts until done.
982  */
983  HOLD_INTERRUPTS();
984 
985  /*
986  * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
987  * don't want to risk leaking data into long-lived contexts, so let's do
988  * our work here in a private context that we can reset on each use.
989  */
990  if (hpm_context == NULL) /* first time through? */
992  "HandleParallelMessages",
994  else
995  MemoryContextReset(hpm_context);
996 
997  oldcontext = MemoryContextSwitchTo(hpm_context);
998 
999  /* OK to process messages. Reset the flag saying there are more to do. */
1000  ParallelMessagePending = false;
1001 
1002  dlist_foreach(iter, &pcxt_list)
1003  {
1004  ParallelContext *pcxt;
1005  int i;
1006 
1007  pcxt = dlist_container(ParallelContext, node, iter.cur);
1008  if (pcxt->worker == NULL)
1009  continue;
1010 
1011  for (i = 0; i < pcxt->nworkers_launched; ++i)
1012  {
1013  /*
1014  * Read as many messages as we can from each worker, but stop when
1015  * either (1) the worker's error queue goes away, which can happen
1016  * if we receive a Terminate message from the worker; or (2) no
1017  * more messages can be read from the worker without blocking.
1018  */
1019  while (pcxt->worker[i].error_mqh != NULL)
1020  {
1021  shm_mq_result res;
1022  Size nbytes;
1023  void *data;
1024 
1025  res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes,
1026  &data, true);
1027  if (res == SHM_MQ_WOULD_BLOCK)
1028  break;
1029  else if (res == SHM_MQ_SUCCESS)
1030  {
1031  StringInfoData msg;
1032 
1033  initStringInfo(&msg);
1034  appendBinaryStringInfo(&msg, data, nbytes);
1035  HandleParallelMessage(pcxt, i, &msg);
1036  pfree(msg.data);
1037  }
1038  else
1039  ereport(ERROR,
1040  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1041  errmsg("lost connection to parallel worker")));
1042  }
1043  }
1044  }
1045 
1046  MemoryContextSwitchTo(oldcontext);
1047 
1048  /* Might as well clear the context on our way out */
1049  MemoryContextReset(hpm_context);
1050 
1052 }
1053 
1054 /*
1055  * Handle a single protocol message received from a single parallel worker.
1056  */
1057 static void
1059 {
1060  char msgtype;
1061 
1062  if (pcxt->known_attached_workers != NULL &&
1063  !pcxt->known_attached_workers[i])
1064  {
1065  pcxt->known_attached_workers[i] = true;
1066  pcxt->nknown_attached_workers++;
1067  }
1068 
1069  msgtype = pq_getmsgbyte(msg);
1070 
1071  switch (msgtype)
1072  {
1073  case 'K': /* BackendKeyData */
1074  {
1075  int32 pid = pq_getmsgint(msg, 4);
1076 
1077  (void) pq_getmsgint(msg, 4); /* discard cancel key */
1078  (void) pq_getmsgend(msg);
1079  pcxt->worker[i].pid = pid;
1080  break;
1081  }
1082 
1083  case 'E': /* ErrorResponse */
1084  case 'N': /* NoticeResponse */
1085  {
1086  ErrorData edata;
1087  ErrorContextCallback *save_error_context_stack;
1088 
1089  /* Parse ErrorResponse or NoticeResponse. */
1090  pq_parse_errornotice(msg, &edata);
1091 
1092  /* Death of a worker isn't enough justification for suicide. */
1093  edata.elevel = Min(edata.elevel, ERROR);
1094 
1095  /*
1096  * If desired, add a context line to show that this is a
1097  * message propagated from a parallel worker. Otherwise, it
1098  * can sometimes be confusing to understand what actually
1099  * happened. (We don't do this in FORCE_PARALLEL_REGRESS mode
1100  * because it causes test-result instability depending on
1101  * whether a parallel worker is actually used or not.)
1102  */
1104  {
1105  if (edata.context)
1106  edata.context = psprintf("%s\n%s", edata.context,
1107  _("parallel worker"));
1108  else
1109  edata.context = pstrdup(_("parallel worker"));
1110  }
1111 
1112  /*
1113  * Context beyond that should use the error context callbacks
1114  * that were in effect when the ParallelContext was created,
1115  * not the current ones.
1116  */
1117  save_error_context_stack = error_context_stack;
1119 
1120  /* Rethrow error or print notice. */
1121  ThrowErrorData(&edata);
1122 
1123  /* Not an error, so restore previous context stack. */
1124  error_context_stack = save_error_context_stack;
1125 
1126  break;
1127  }
1128 
1129  case 'A': /* NotifyResponse */
1130  {
1131  /* Propagate NotifyResponse. */
1132  int32 pid;
1133  const char *channel;
1134  const char *payload;
1135 
1136  pid = pq_getmsgint(msg, 4);
1137  channel = pq_getmsgrawstring(msg);
1138  payload = pq_getmsgrawstring(msg);
1139  pq_endmessage(msg);
1140 
1141  NotifyMyFrontEnd(channel, payload, pid);
1142 
1143  break;
1144  }
1145 
1146  case 'X': /* Terminate, indicating clean exit */
1147  {
1148  shm_mq_detach(pcxt->worker[i].error_mqh);
1149  pcxt->worker[i].error_mqh = NULL;
1150  break;
1151  }
1152 
1153  default:
1154  {
1155  elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %d bytes)",
1156  msgtype, msg->len);
1157  }
1158  }
1159 }
1160 
1161 /*
1162  * End-of-subtransaction cleanup for parallel contexts.
1163  *
1164  * Currently, it's forbidden to enter or leave a subtransaction while
1165  * parallel mode is in effect, so we could just blow away everything. But
1166  * we may want to relax that restriction in the future, so this code
1167  * contemplates that there may be multiple subtransaction IDs in pcxt_list.
1168  */
1169 void
1171 {
1172  while (!dlist_is_empty(&pcxt_list))
1173  {
1174  ParallelContext *pcxt;
1175 
1176  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1177  if (pcxt->subid != mySubId)
1178  break;
1179  if (isCommit)
1180  elog(WARNING, "leaked parallel context");
1181  DestroyParallelContext(pcxt);
1182  }
1183 }
1184 
1185 /*
1186  * End-of-transaction cleanup for parallel contexts.
1187  */
1188 void
1189 AtEOXact_Parallel(bool isCommit)
1190 {
1191  while (!dlist_is_empty(&pcxt_list))
1192  {
1193  ParallelContext *pcxt;
1194 
1195  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1196  if (isCommit)
1197  elog(WARNING, "leaked parallel context");
1198  DestroyParallelContext(pcxt);
1199  }
1200 }
1201 
1202 /*
1203  * Main entrypoint for parallel workers.
1204  */
1205 void
1207 {
1208  dsm_segment *seg;
1209  shm_toc *toc;
1210  FixedParallelState *fps;
1211  char *error_queue_space;
1212  shm_mq *mq;
1213  shm_mq_handle *mqh;
1214  char *libraryspace;
1215  char *entrypointstate;
1216  char *library_name;
1217  char *function_name;
1218  parallel_worker_main_type entrypt;
1219  char *gucspace;
1220  char *combocidspace;
1221  char *tsnapspace;
1222  char *asnapspace;
1223  char *tstatespace;
1224  char *reindexspace;
1225  char *relmapperspace;
1226  char *enumblacklistspace;
1227  StringInfoData msgbuf;
1228  char *session_dsm_handle_space;
1229 
1230  /* Set flag to indicate that we're initializing a parallel worker. */
1232 
1233  /* Establish signal handlers. */
1234  pqsignal(SIGTERM, die);
1236 
1237  /* Determine and set our parallel worker number. */
1239  memcpy(&ParallelWorkerNumber, MyBgworkerEntry->bgw_extra, sizeof(int));
1240 
1241  /* Set up a memory context to work in, just for cleanliness. */
1243  "Parallel worker",
1245 
1246  /*
1247  * Attach to the dynamic shared memory segment for the parallel query, and
1248  * find its table of contents.
1249  *
1250  * Note: at this point, we have not created any ResourceOwner in this
1251  * process. This will result in our DSM mapping surviving until process
1252  * exit, which is fine. If there were a ResourceOwner, it would acquire
1253  * ownership of the mapping, but we have no need for that.
1254  */
1255  seg = dsm_attach(DatumGetUInt32(main_arg));
1256  if (seg == NULL)
1257  ereport(ERROR,
1258  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1259  errmsg("could not map dynamic shared memory segment")));
1261  if (toc == NULL)
1262  ereport(ERROR,
1263  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1264  errmsg("invalid magic number in dynamic shared memory segment")));
1265 
1266  /* Look up fixed parallel state. */
1267  fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
1268  MyFixedParallelState = fps;
1269 
1270  /* Arrange to signal the leader if we exit. */
1274 
1275  /*
1276  * Now we can find and attach to the error queue provided for us. That's
1277  * good, because until we do that, any errors that happen here will not be
1278  * reported back to the process that requested that this worker be
1279  * launched.
1280  */
1281  error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
1282  mq = (shm_mq *) (error_queue_space +
1285  mqh = shm_mq_attach(mq, seg, NULL);
1286  pq_redirect_to_shm_mq(seg, mqh);
1289 
1290  /*
1291  * Send a BackendKeyData message to the process that initiated parallelism
1292  * so that it has access to our PID before it receives any other messages
1293  * from us. Our cancel key is sent, too, since that's the way the
1294  * protocol message is defined, but it won't actually be used for anything
1295  * in this case.
1296  */
1297  pq_beginmessage(&msgbuf, 'K');
1298  pq_sendint32(&msgbuf, (int32) MyProcPid);
1299  pq_sendint32(&msgbuf, (int32) MyCancelKey);
1300  pq_endmessage(&msgbuf);
1301 
1302  /*
1303  * Hooray! Primary initialization is complete. Now, we need to set up our
1304  * backend-local state to match the original backend.
1305  */
1306 
1307  /*
1308  * Join locking group. We must do this before anything that could try to
1309  * acquire a heavyweight lock, because any heavyweight locks acquired to
1310  * this point could block either directly against the parallel group
1311  * leader or against some process which in turn waits for a lock that
1312  * conflicts with the parallel group leader, causing an undetected
1313  * deadlock. (If we can't join the lock group, the leader has gone away,
1314  * so just exit quietly.)
1315  */
1317  fps->parallel_master_pid))
1318  return;
1319 
1320  /*
1321  * Restore transaction and statement start-time timestamps. This must
1322  * happen before anything that would start a transaction, else asserts in
1323  * xact.c will fire.
1324  */
1326 
1327  /*
1328  * Identify the entry point to be called. In theory this could result in
1329  * loading an additional library, though most likely the entry point is in
1330  * the core backend or in a library we just loaded.
1331  */
1332  entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
1333  library_name = entrypointstate;
1334  function_name = entrypointstate + strlen(library_name) + 1;
1335 
1336  entrypt = LookupParallelWorkerFunction(library_name, function_name);
1337 
1338  /* Restore database connection. */
1340  fps->authenticated_user_id,
1341  0);
1342 
1343  /*
1344  * Set the client encoding to the database encoding, since that is what
1345  * the leader will expect.
1346  */
1348 
1349  /*
1350  * Load libraries that were loaded by original backend. We want to do
1351  * this before restoring GUCs, because the libraries might define custom
1352  * variables.
1353  */
1354  libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
1356  RestoreLibraryState(libraryspace);
1357 
1358  /* Restore GUC values from launching backend. */
1359  gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
1360  RestoreGUCState(gucspace);
1362 
1363  /* Crank up a transaction state appropriate to a parallel worker. */
1364  tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
1365  StartParallelWorkerTransaction(tstatespace);
1366 
1367  /* Restore combo CID state. */
1368  combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
1369  RestoreComboCIDState(combocidspace);
1370 
1371  /* Attach to the per-session DSM segment and contained objects. */
1372  session_dsm_handle_space =
1374  AttachSession(*(dsm_handle *) session_dsm_handle_space);
1375 
1376  /* Restore transaction snapshot. */
1377  tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
1379  fps->parallel_master_pgproc);
1380 
1381  /* Restore active snapshot. */
1382  asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
1383  PushActiveSnapshot(RestoreSnapshot(asnapspace));
1384 
1385  /*
1386  * We've changed which tuples we can see, and must therefore invalidate
1387  * system caches.
1388  */
1390 
1391  /*
1392  * Restore current role id. Skip verifying whether session user is
1393  * allowed to become this role and blindly restore the leader's state for
1394  * current role.
1395  */
1397 
1398  /* Restore user ID and security context. */
1400 
1401  /* Restore temp-namespace state to ensure search path matches leader's. */
1404 
1405  /* Restore reindex state. */
1406  reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false);
1407  RestoreReindexState(reindexspace);
1408 
1409  /* Restore relmapper state. */
1410  relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false);
1411  RestoreRelationMap(relmapperspace);
1412 
1413  /* Restore enum blacklist. */
1414  enumblacklistspace = shm_toc_lookup(toc, PARALLEL_KEY_ENUMBLACKLIST,
1415  false);
1416  RestoreEnumBlacklist(enumblacklistspace);
1417 
1418  /* Attach to the leader's serializable transaction, if SERIALIZABLE. */
1420 
1421  /*
1422  * We've initialized all of our state now; nothing should change
1423  * hereafter.
1424  */
1427 
1428  /*
1429  * Time to do the real work: invoke the caller-supplied code.
1430  */
1431  entrypt(seg, toc);
1432 
1433  /* Must exit parallel mode to pop active snapshot. */
1434  ExitParallelMode();
1435 
1436  /* Must pop active snapshot so snapmgr.c doesn't complain. */
1438 
1439  /* Shut down the parallel-worker transaction. */
1441 
1442  /* Detach from the per-session DSM segment. */
1443  DetachSession();
1444 
1445  /* Report success. */
1446  pq_putmessage('X', NULL, 0);
1447 }
1448 
1449 /*
1450  * Update shared memory with the ending location of the last WAL record we
1451  * wrote, if it's greater than the value already stored there.
1452  */
1453 void
1455 {
1457 
1458  Assert(fps != NULL);
1459  SpinLockAcquire(&fps->mutex);
1460  if (fps->last_xlog_end < last_xlog_end)
1462  SpinLockRelease(&fps->mutex);
1463 }
1464 
1465 /*
1466  * Make sure the leader tries to read from our error queue one more time.
1467  * This guards against the case where we exit uncleanly without sending an
1468  * ErrorResponse to the leader, for example because some code calls proc_exit
1469  * directly.
1470  */
1471 static void
1473 {
1477 }
1478 
1479 /*
1480  * Look up (and possibly load) a parallel worker entry point function.
1481  *
1482  * For functions contained in the core code, we use library name "postgres"
1483  * and consult the InternalParallelWorkers array. External functions are
1484  * looked up, and loaded if necessary, using load_external_function().
1485  *
1486  * The point of this is to pass function names as strings across process
1487  * boundaries. We can't pass actual function addresses because of the
1488  * possibility that the function has been loaded at a different address
1489  * in a different process. This is obviously a hazard for functions in
1490  * loadable libraries, but it can happen even for functions in the core code
1491  * on platforms using EXEC_BACKEND (e.g., Windows).
1492  *
1493  * At some point it might be worthwhile to get rid of InternalParallelWorkers[]
1494  * in favor of applying load_external_function() for core functions too;
1495  * but that raises portability issues that are not worth addressing now.
1496  */
1498 LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
1499 {
1500  /*
1501  * If the function is to be loaded from postgres itself, search the
1502  * InternalParallelWorkers array.
1503  */
1504  if (strcmp(libraryname, "postgres") == 0)
1505  {
1506  int i;
1507 
1508  for (i = 0; i < lengthof(InternalParallelWorkers); i++)
1509  {
1510  if (strcmp(InternalParallelWorkers[i].fn_name, funcname) == 0)
1511  return InternalParallelWorkers[i].fn_addr;
1512  }
1513 
1514  /* We can only reach this by programming error. */
1515  elog(ERROR, "internal function \"%s\" not found", funcname);
1516  }
1517 
1518  /* Otherwise load from external library. */
1519  return (parallel_worker_main_type)
1520  load_external_function(libraryname, funcname, true, NULL);
1521 }
char bgw_extra[BGW_EXTRALEN]
Definition: bgworker.h:98
#define DatumGetUInt32(X)
Definition: postgres.h:486
int slock_t
Definition: s_lock.h:934
void SerializeEnumBlacklist(void *space, Size size)
Definition: pg_enum.c:710
#define PARALLEL_ERROR_QUEUE_SIZE
Definition: parallel.c:54
static const struct @21 InternalParallelWorkers[]
#define AllocSetContextCreate
Definition: memutils.h:170
shm_toc * shm_toc_create(uint64 magic, void *address, Size nbytes)
Definition: shm_toc.c:40
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:974
BackendId MyBackendId
Definition: globals.c:81
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2161
MemoryContext TopTransactionContext
Definition: mcxt.c:49
uint32 dsm_handle
Definition: dsm_impl.h:54
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:159
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:492
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
XLogRecPtr XactLastRecEnd
Definition: xlog.c:352
void AttachSerializableXact(SerializableXactHandle handle)
Definition: predicate.c:5192
void shm_mq_detach(shm_mq_handle *mqh)
Definition: shm_mq.c:793
PGPROC * MyProc
Definition: proc.c:68
int64 TimestampTz
Definition: timestamp.h:39
dsm_segment * seg
Definition: parallel.h:42
static void WaitForParallelWorkersToExit(ParallelContext *pcxt)
Definition: parallel.c:831
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
void SerializeReindexState(Size maxsize, char *start_address)
Definition: index.c:3835
char * pstrdup(const char *in)
Definition: mcxt.c:1186
void CommitTransactionCommand(void)
Definition: xact.c:2895
shm_toc_estimator estimator
Definition: parallel.h:41
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void EndParallelWorkerTransaction(void)
Definition: xact.c:5273
#define SpinLockInit(lock)
Definition: spin.h:60
void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId)
Definition: namespace.c:3305
void RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
Definition: snapmgr.c:2225
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:533
void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: nbtsort.c:1600
#define Min(x, y)
Definition: c.h:904
PGPROC * shm_mq_get_sender(shm_mq *mq)
Definition: shm_mq.c:249
Oid authenticated_user_id
Definition: parallel.c:83
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:841
#define PARALLEL_KEY_ENUMBLACKLIST
Definition: parallel.c:76
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:966
int bgw_restart_time
Definition: bgworker.h:94
int errcode(int sqlerrcode)
Definition: elog.c:570
PGPROC * parallel_master_pgproc
Definition: parallel.c:90
Oid temp_toast_namespace_id
Definition: parallel.c:87
#define BGWORKER_CLASS_PARALLEL
Definition: bgworker.h:67
void DetachSession(void)
Definition: session.c:201
BackgroundWorker * MyBgworkerEntry
Definition: postmaster.c:192
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition: proc.c:1879
void PopActiveSnapshot(void)
Definition: snapmgr.c:814
int nknown_attached_workers
Definition: parallel.h:46
uint32 SubTransactionId
Definition: c.h:511
#define PARALLEL_KEY_RELMAPPER_STATE
Definition: parallel.c:75
Size shm_toc_estimate(shm_toc_estimator *e)
Definition: shm_toc.c:263
#define lengthof(array)
Definition: c.h:662
void SerializeTransactionState(Size maxsize, char *start_address)
Definition: xact.c:5178
parallel_worker_main_type fn_addr
Definition: parallel.c:134
const char * pq_getmsgrawstring(StringInfo msg)
Definition: pqformat.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void SetLatch(Latch *latch)
Definition: latch.c:436
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
void InvalidateSystemCaches(void)
Definition: inval.c:643
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
static pid_t ParallelMasterPid
Definition: parallel.c:125
void ResetLatch(Latch *latch)
Definition: latch.c:519
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void RestoreComboCIDState(char *comboCIDstate)
Definition: combocid.c:344
signed int int32
Definition: c.h:346
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:344
SubTransactionId subid
Definition: parallel.h:35
Oid GetCurrentRoleId(void)
Definition: miscinit.c:739
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:118
ErrorContextCallback * error_context_stack
Definition: elog.c:88
SerializableXactHandle serializable_xact_handle
Definition: parallel.c:95
volatile bool ParallelMessagePending
Definition: parallel.c:113
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define sprintf
Definition: port.h:194
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:842
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:717
#define SpinLockAcquire(lock)
Definition: spin.h:62
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:871
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:180
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
ParallelWorkerInfo * worker
Definition: parallel.h:45
Datum bgw_main_arg
Definition: bgworker.h:97
void pfree(void *pointer)
Definition: mcxt.c:1056
bool IsInParallelMode(void)
Definition: xact.c:994
void SerializeLibraryState(Size maxsize, char *start_address)
Definition: dfmgr.c:736
#define ERROR
Definition: elog.h:43
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1138
void ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
Oid GetAuthenticatedUserId(void)
Definition: miscinit.c:439
#define PARALLEL_KEY_TRANSACTION_SNAPSHOT
Definition: parallel.c:69
char * function_name
Definition: parallel.h:39
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2102
int32 MyCancelKey
Definition: globals.c:44
void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
Definition: pqmq.c:215
#define FATAL
Definition: elog.h:52
shm_mq * shm_mq_create(void *address, Size size)
Definition: shm_mq.c:169
void ExitParallelMode(void)
Definition: xact.c:974
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define PARALLEL_KEY_FIXED
Definition: parallel.c:64
void HandleParallelMessages(void)
Definition: parallel.c:969
#define PARALLEL_KEY_ERROR_QUEUE
Definition: parallel.c:65
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
Definition: namespace.c:3321
static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
Definition: parallel.c:1058
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:735
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:485
shm_mq_handle * error_mqh
Definition: parallel.h:28
#define PARALLEL_KEY_GUC
Definition: parallel.c:67
int SetClientEncoding(int encoding)
Definition: mbutils.c:201
int ParallelWorkerNumber
Definition: parallel.c:110
BackgroundWorkerHandle * bgwhandle
Definition: parallel.h:27
void SerializeRelationMap(Size maxSize, char *startAddress)
Definition: relmapper.c:657
Size EstimateGUCStateSpace(void)
Definition: guc.c:10011
void AttachSession(dsm_handle handle)
Definition: session.c:155
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
#define shm_toc_initialize_estimator(e)
Definition: shm_toc.h:49
Size EstimateComboCIDStateSpace(void)
Definition: combocid.c:299
#define UInt32GetDatum(X)
Definition: postgres.h:493
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static void ParallelWorkerShutdown(int code, Datum arg)
Definition: parallel.c:1472
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int nworkers_launched
Definition: parallel.h:37
XLogRecPtr last_xlog_end
Definition: parallel.c:101
BgwHandleStatus
Definition: bgworker.h:102
Size EstimateEnumBlacklistSpace(void)
Definition: pg_enum.c:696
PGFunction load_external_function(const char *filename, const char *funcname, bool signalNotFound, void **filehandle)
Definition: dfmgr.c:107
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:494
Size EstimateReindexStateSpace(void)
Definition: index.c:3824
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:216
void BecomeLockGroupLeader(void)
Definition: proc.c:1849
#define ereport(elevel, rest)
Definition: elog.h:141
MemoryContext TopMemoryContext
Definition: mcxt.c:44
TimestampTz GetCurrentTransactionStartTimestamp(void)
Definition: xact.c:783
void ThrowErrorData(ErrorData *edata)
Definition: elog.c:1599
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:196
int elevel
Definition: elog.h:348
bool * known_attached_workers
Definition: parallel.h:47
bool ParallelContextActive(void)
Definition: parallel.c:945
#define SpinLockRelease(lock)
Definition: spin.h:64
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2078
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
void * palloc0(Size size)
Definition: mcxt.c:980
static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
Definition: parallel.c:1498
void RestoreLibraryState(char *start_address)
Definition: dfmgr.c:758
uintptr_t Datum
Definition: postgres.h:367
void RestoreEnumBlacklist(void *space)
Definition: pg_enum.c:742
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:437
shm_toc * shm_toc_attach(uint64 magic, void *address)
Definition: shm_toc.c:64
int GetDatabaseEncoding(void)
Definition: mbutils.c:1046
int BackendId
Definition: backendid.h:21
Oid MyDatabaseId
Definition: globals.c:85
pid_t parallel_master_pid
Definition: parallel.c:91
Size EstimateLibraryStateSpace(void)
Definition: dfmgr.c:719
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:447
void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:310
dlist_node * cur
Definition: ilist.h:161
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition: postmaster.c:5708
#define PARALLEL_MAGIC
Definition: parallel.c:57
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition: pqmq.c:55
void * SerializableXactHandle
Definition: predicate.h:37
void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
Definition: parallel.c:1454
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1177
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:401
shm_mq_result
Definition: shm_mq.h:36
TimestampTz xact_ts
Definition: parallel.c:93
char * library_name
Definition: parallel.h:38
BackendId parallel_master_backend_id
Definition: parallel.c:92
int force_parallel_mode
Definition: planner.c:71
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:938
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:732
BackendId ParallelMasterBackendId
Definition: globals.c:83
void StartParallelWorkerTransaction(char *tstatespace)
Definition: xact.c:5248
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:708
Size EstimateTransactionStateSpace(void)
Definition: xact.c:5150
void StartTransactionCommand(void)
Definition: xact.c:2794
#define PARALLEL_KEY_REINDEX_STATE
Definition: parallel.c:74
const char * fn_name
Definition: parallel.c:133
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:466
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
dlist_node node
Definition: parallel.h:34
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_mq * shm_mq_get_queue(shm_mq_handle *mqh)
Definition: shm_mq.c:848
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:932
void EnterParallelMode(void)
Definition: xact.c:961
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
Size EstimateRelationMapSpace(void)
Definition: relmapper.c:646
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
char * context
Definition: elog.h:364
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:282
ErrorContextCallback * error_context_stack
Definition: parallel.h:40
void pq_set_parallel_master(pid_t pid, BackendId backend_id)
Definition: pqmq.c:80
void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts)
Definition: xact.c:772
#define PARALLEL_KEY_TRANSACTION_STATE
Definition: parallel.c:71
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:664
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void RestoreReindexState(void *reindexstate)
Definition: index.c:3853
int errmsg(const char *fmt,...)
Definition: elog.c:784
void(* parallel_worker_main_type)(dsm_segment *seg, shm_toc *toc)
Definition: parallel.h:23
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
void ParallelWorkerMain(Datum main_arg)
Definition: parallel.c:1206
pid_t bgw_notify_pid
Definition: bgworker.h:99
static FixedParallelState * MyFixedParallelState
Definition: parallel.c:119
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
void SetCurrentRoleId(Oid roleid, bool is_superuser)
Definition: miscinit.c:760
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:116
#define elog(elevel,...)
Definition: elog.h:226
bool InitializingParallelWorker
Definition: parallel.c:116
int i
TimestampTz stmt_ts
Definition: parallel.c:94
Definition: shm_mq.c:70
void RestoreRelationMap(char *startAddress)
Definition: relmapper.c:674
#define PARALLEL_KEY_SESSION_DSM
Definition: parallel.c:73
#define BUFFERALIGN(LEN)
Definition: c.h:687
void * arg
struct Latch * MyLatch
Definition: globals.c:54
void HandleParallelMessageInterrupt(void)
Definition: parallel.c:958
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:198
static dlist_head pcxt_list
Definition: parallel.c:122
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:637
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
struct FixedParallelState FixedParallelState
dsm_handle GetSessionDsmHandle(void)
Definition: session.c:70
shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
Definition: shm_mq.c:540
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:614
bool session_auth_is_superuser
Definition: guc.c:507
SerializableXactHandle ShareSerializableXact(void)
Definition: predicate.c:5183
void AtEOXact_Parallel(bool isCommit)
Definition: parallel.c:1189
Definition: proc.h:95
#define PARALLEL_KEY_ENTRYPOINT
Definition: parallel.c:72
#define snprintf
Definition: port.h:192
#define PARALLEL_KEY_COMBO_CID
Definition: parallel.c:68
#define WL_LATCH_SET
Definition: latch.h:124
#define _(x)
Definition: elog.c:84
void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
Definition: parallel.c:1170
void SerializeGUCState(Size maxsize, char *start_address)
Definition: guc.c:10153
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:214
#define PARALLEL_KEY_ACTIVE_SNAPSHOT
Definition: parallel.c:70
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define die(msg)
Definition: pg_test_fsync.c:97
void SerializeComboCIDState(Size maxsize, char *start_address)
Definition: combocid.c:318
#define PARALLEL_KEY_LIBRARY
Definition: parallel.c:66
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1044
void RestoreGUCState(void *gucstate)
Definition: guc.c:10233
shm_toc * toc
Definition: parallel.h:44
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
Definition: async.c:2246
TimestampTz GetCurrentStatementStartTimestamp(void)
Definition: xact.c:792
void * private_memory
Definition: parallel.h:43
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5737