PostgreSQL Source Code  git master
parallel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parallel.c
4  * Infrastructure for launching parallel workers
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/access/transam/parallel.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/nbtree.h"
19 #include "access/parallel.h"
20 #include "access/session.h"
21 #include "access/xact.h"
22 #include "access/xlog.h"
23 #include "catalog/index.h"
24 #include "catalog/namespace.h"
25 #include "catalog/pg_enum.h"
26 #include "catalog/storage.h"
27 #include "commands/async.h"
28 #include "executor/execParallel.h"
29 #include "libpq/libpq.h"
30 #include "libpq/pqformat.h"
31 #include "libpq/pqmq.h"
32 #include "miscadmin.h"
33 #include "optimizer/optimizer.h"
34 #include "pgstat.h"
35 #include "storage/ipc.h"
36 #include "storage/predicate.h"
37 #include "storage/sinval.h"
38 #include "storage/spin.h"
39 #include "tcop/tcopprot.h"
40 #include "utils/combocid.h"
41 #include "utils/guc.h"
42 #include "utils/inval.h"
43 #include "utils/memutils.h"
44 #include "utils/relmapper.h"
45 #include "utils/snapmgr.h"
46 #include "utils/typcache.h"
47 
48 /*
49  * We don't want to waste a lot of memory on an error queue which, most of
50  * the time, will process only a handful of small messages. However, it is
51  * desirable to make it large enough that a typical ErrorResponse can be sent
52  * without blocking. That way, a worker that errors out can write the whole
53  * message into the queue and terminate without waiting for the user backend.
54  */
55 #define PARALLEL_ERROR_QUEUE_SIZE 16384
56 
57 /* Magic number for parallel context TOC. */
58 #define PARALLEL_MAGIC 0x50477c7c
59 
60 /*
61  * Magic numbers for per-context parallel state sharing. Higher-level code
62  * should use smaller values, leaving these very large ones for use by this
63  * module.
64  */
65 #define PARALLEL_KEY_FIXED UINT64CONST(0xFFFFFFFFFFFF0001)
66 #define PARALLEL_KEY_ERROR_QUEUE UINT64CONST(0xFFFFFFFFFFFF0002)
67 #define PARALLEL_KEY_LIBRARY UINT64CONST(0xFFFFFFFFFFFF0003)
68 #define PARALLEL_KEY_GUC UINT64CONST(0xFFFFFFFFFFFF0004)
69 #define PARALLEL_KEY_COMBO_CID UINT64CONST(0xFFFFFFFFFFFF0005)
70 #define PARALLEL_KEY_TRANSACTION_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0006)
71 #define PARALLEL_KEY_ACTIVE_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0007)
72 #define PARALLEL_KEY_TRANSACTION_STATE UINT64CONST(0xFFFFFFFFFFFF0008)
73 #define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009)
74 #define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A)
75 #define PARALLEL_KEY_PENDING_SYNCS UINT64CONST(0xFFFFFFFFFFFF000B)
76 #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000C)
77 #define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000D)
78 #define PARALLEL_KEY_ENUMBLACKLIST UINT64CONST(0xFFFFFFFFFFFF000E)
79 
80 /* Fixed-size parallel state. */
81 typedef struct FixedParallelState
82 {
83  /* Fixed-size state that workers must restore. */
98 
99  /* Mutex protects remaining fields. */
101 
102  /* Maximum XactLastRecEnd of any worker. */
105 
106 /*
107  * Our parallel worker number. We initialize this to -1, meaning that we are
108  * not a parallel worker. In parallel workers, it will be set to a value >= 0
109  * and < the number of workers before any user code is invoked; each parallel
110  * worker will get a different parallel worker number.
111  */
113 
114 /* Is there a parallel message pending which we need to receive? */
115 volatile bool ParallelMessagePending = false;
116 
117 /* Are we initializing a parallel worker? */
119 
120 /* Pointer to our fixed parallel state. */
122 
123 /* List of active parallel contexts. */
125 
126 /* Backend-local copy of data from FixedParallelState. */
127 static pid_t ParallelLeaderPid;
128 
129 /*
130  * List of internal parallel worker entry points. We need this for
131  * reasons explained in LookupParallelWorkerFunction(), below.
132  */
133 static const struct
134 {
135  const char *fn_name;
138 
139 {
140  {
141  "ParallelQueryMain", ParallelQueryMain
142  },
143  {
144  "_bt_parallel_build_main", _bt_parallel_build_main
145  },
146  {
147  "parallel_vacuum_main", parallel_vacuum_main
148  }
149 };
150 
151 /* Private functions. */
152 static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg);
154 static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname);
155 static void ParallelWorkerShutdown(int code, Datum arg);
156 
157 
158 /*
159  * Establish a new parallel context. This should be done after entering
160  * parallel mode, and (unless there is an error) the context should be
161  * destroyed before exiting the current subtransaction.
162  */
164 CreateParallelContext(const char *library_name, const char *function_name,
165  int nworkers)
166 {
167  MemoryContext oldcontext;
168  ParallelContext *pcxt;
169 
170  /* It is unsafe to create a parallel context if not in parallel mode. */
172 
173  /* Number of workers should be non-negative. */
174  Assert(nworkers >= 0);
175 
176  /* We might be running in a short-lived memory context. */
178 
179  /* Initialize a new ParallelContext. */
180  pcxt = palloc0(sizeof(ParallelContext));
182  pcxt->nworkers = nworkers;
183  pcxt->nworkers_to_launch = nworkers;
184  pcxt->library_name = pstrdup(library_name);
185  pcxt->function_name = pstrdup(function_name);
188  dlist_push_head(&pcxt_list, &pcxt->node);
189 
190  /* Restore previous memory context. */
191  MemoryContextSwitchTo(oldcontext);
192 
193  return pcxt;
194 }
195 
196 /*
197  * Establish the dynamic shared memory segment for a parallel context and
198  * copy state and other bookkeeping information that will be needed by
199  * parallel workers into it.
200  */
201 void
203 {
204  MemoryContext oldcontext;
205  Size library_len = 0;
206  Size guc_len = 0;
207  Size combocidlen = 0;
208  Size tsnaplen = 0;
209  Size asnaplen = 0;
210  Size tstatelen = 0;
211  Size pendingsyncslen = 0;
212  Size reindexlen = 0;
213  Size relmapperlen = 0;
214  Size enumblacklistlen = 0;
215  Size segsize = 0;
216  int i;
217  FixedParallelState *fps;
218  dsm_handle session_dsm_handle = DSM_HANDLE_INVALID;
219  Snapshot transaction_snapshot = GetTransactionSnapshot();
220  Snapshot active_snapshot = GetActiveSnapshot();
221 
222  /* We might be running in a very short-lived memory context. */
224 
225  /* Allow space to store the fixed-size parallel state. */
227  shm_toc_estimate_keys(&pcxt->estimator, 1);
228 
229  /*
230  * Normally, the user will have requested at least one worker process, but
231  * if by chance they have not, we can skip a bunch of things here.
232  */
233  if (pcxt->nworkers > 0)
234  {
235  /* Get (or create) the per-session DSM segment's handle. */
236  session_dsm_handle = GetSessionDsmHandle();
237 
238  /*
239  * If we weren't able to create a per-session DSM segment, then we can
240  * continue but we can't safely launch any workers because their
241  * record typmods would be incompatible so they couldn't exchange
242  * tuples.
243  */
244  if (session_dsm_handle == DSM_HANDLE_INVALID)
245  pcxt->nworkers = 0;
246  }
247 
248  if (pcxt->nworkers > 0)
249  {
250  /* Estimate space for various kinds of state sharing. */
251  library_len = EstimateLibraryStateSpace();
252  shm_toc_estimate_chunk(&pcxt->estimator, library_len);
253  guc_len = EstimateGUCStateSpace();
254  shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
255  combocidlen = EstimateComboCIDStateSpace();
256  shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
257  tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
258  shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
259  asnaplen = EstimateSnapshotSpace(active_snapshot);
260  shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
261  tstatelen = EstimateTransactionStateSpace();
262  shm_toc_estimate_chunk(&pcxt->estimator, tstatelen);
264  pendingsyncslen = EstimatePendingSyncsSpace();
265  shm_toc_estimate_chunk(&pcxt->estimator, pendingsyncslen);
266  reindexlen = EstimateReindexStateSpace();
267  shm_toc_estimate_chunk(&pcxt->estimator, reindexlen);
268  relmapperlen = EstimateRelationMapSpace();
269  shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen);
270  enumblacklistlen = EstimateEnumBlacklistSpace();
271  shm_toc_estimate_chunk(&pcxt->estimator, enumblacklistlen);
272  /* If you add more chunks here, you probably need to add keys. */
273  shm_toc_estimate_keys(&pcxt->estimator, 11);
274 
275  /* Estimate space need for error queues. */
278  "parallel error queue size not buffer-aligned");
281  pcxt->nworkers));
282  shm_toc_estimate_keys(&pcxt->estimator, 1);
283 
284  /* Estimate how much we'll need for the entrypoint info. */
285  shm_toc_estimate_chunk(&pcxt->estimator, strlen(pcxt->library_name) +
286  strlen(pcxt->function_name) + 2);
287  shm_toc_estimate_keys(&pcxt->estimator, 1);
288  }
289 
290  /*
291  * Create DSM and initialize with new table of contents. But if the user
292  * didn't request any workers, then don't bother creating a dynamic shared
293  * memory segment; instead, just use backend-private memory.
294  *
295  * Also, if we can't create a dynamic shared memory segment because the
296  * maximum number of segments have already been created, then fall back to
297  * backend-private memory, and plan not to use any workers. We hope this
298  * won't happen very often, but it's better to abandon the use of
299  * parallelism than to fail outright.
300  */
301  segsize = shm_toc_estimate(&pcxt->estimator);
302  if (pcxt->nworkers > 0)
304  if (pcxt->seg != NULL)
306  dsm_segment_address(pcxt->seg),
307  segsize);
308  else
309  {
310  pcxt->nworkers = 0;
313  segsize);
314  }
315 
316  /* Initialize fixed-size state in shared memory. */
317  fps = (FixedParallelState *)
318  shm_toc_allocate(pcxt->toc, sizeof(FixedParallelState));
319  fps->database_id = MyDatabaseId;
332  SpinLockInit(&fps->mutex);
333  fps->last_xlog_end = 0;
335 
336  /* We can skip the rest of this if we're not budgeting for any workers. */
337  if (pcxt->nworkers > 0)
338  {
339  char *libraryspace;
340  char *gucspace;
341  char *combocidspace;
342  char *tsnapspace;
343  char *asnapspace;
344  char *tstatespace;
345  char *pendingsyncsspace;
346  char *reindexspace;
347  char *relmapperspace;
348  char *error_queue_space;
349  char *session_dsm_handle_space;
350  char *entrypointstate;
351  char *enumblacklistspace;
352  Size lnamelen;
353 
354  /* Serialize shared libraries we have loaded. */
355  libraryspace = shm_toc_allocate(pcxt->toc, library_len);
356  SerializeLibraryState(library_len, libraryspace);
357  shm_toc_insert(pcxt->toc, PARALLEL_KEY_LIBRARY, libraryspace);
358 
359  /* Serialize GUC settings. */
360  gucspace = shm_toc_allocate(pcxt->toc, guc_len);
361  SerializeGUCState(guc_len, gucspace);
362  shm_toc_insert(pcxt->toc, PARALLEL_KEY_GUC, gucspace);
363 
364  /* Serialize combo CID state. */
365  combocidspace = shm_toc_allocate(pcxt->toc, combocidlen);
366  SerializeComboCIDState(combocidlen, combocidspace);
367  shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
368 
369  /* Serialize transaction snapshot and active snapshot. */
370  tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
371  SerializeSnapshot(transaction_snapshot, tsnapspace);
373  tsnapspace);
374  asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
375  SerializeSnapshot(active_snapshot, asnapspace);
376  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
377 
378  /* Provide the handle for per-session segment. */
379  session_dsm_handle_space = shm_toc_allocate(pcxt->toc,
380  sizeof(dsm_handle));
381  *(dsm_handle *) session_dsm_handle_space = session_dsm_handle;
383  session_dsm_handle_space);
384 
385  /* Serialize transaction state. */
386  tstatespace = shm_toc_allocate(pcxt->toc, tstatelen);
387  SerializeTransactionState(tstatelen, tstatespace);
389 
390  /* Serialize pending syncs. */
391  pendingsyncsspace = shm_toc_allocate(pcxt->toc, pendingsyncslen);
392  SerializePendingSyncs(pendingsyncslen, pendingsyncsspace);
394  pendingsyncsspace);
395 
396  /* Serialize reindex state. */
397  reindexspace = shm_toc_allocate(pcxt->toc, reindexlen);
398  SerializeReindexState(reindexlen, reindexspace);
399  shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace);
400 
401  /* Serialize relmapper state. */
402  relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen);
403  SerializeRelationMap(relmapperlen, relmapperspace);
405  relmapperspace);
406 
407  /* Serialize enum blacklist state. */
408  enumblacklistspace = shm_toc_allocate(pcxt->toc, enumblacklistlen);
409  SerializeEnumBlacklist(enumblacklistspace, enumblacklistlen);
411  enumblacklistspace);
412 
413  /* Allocate space for worker information. */
414  pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
415 
416  /*
417  * Establish error queues in dynamic shared memory.
418  *
419  * These queues should be used only for transmitting ErrorResponse,
420  * NoticeResponse, and NotifyResponse protocol messages. Tuple data
421  * should be transmitted via separate (possibly larger?) queues.
422  */
423  error_queue_space =
424  shm_toc_allocate(pcxt->toc,
426  pcxt->nworkers));
427  for (i = 0; i < pcxt->nworkers; ++i)
428  {
429  char *start;
430  shm_mq *mq;
431 
432  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
433  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
435  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
436  }
437  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, error_queue_space);
438 
439  /*
440  * Serialize entrypoint information. It's unsafe to pass function
441  * pointers across processes, as the function pointer may be different
442  * in each process in EXEC_BACKEND builds, so we always pass library
443  * and function name. (We use library name "postgres" for functions
444  * in the core backend.)
445  */
446  lnamelen = strlen(pcxt->library_name);
447  entrypointstate = shm_toc_allocate(pcxt->toc, lnamelen +
448  strlen(pcxt->function_name) + 2);
449  strcpy(entrypointstate, pcxt->library_name);
450  strcpy(entrypointstate + lnamelen + 1, pcxt->function_name);
451  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ENTRYPOINT, entrypointstate);
452  }
453 
454  /* Restore previous memory context. */
455  MemoryContextSwitchTo(oldcontext);
456 }
457 
458 /*
459  * Reinitialize the dynamic shared memory segment for a parallel context such
460  * that we could launch workers for it again.
461  */
462 void
464 {
465  FixedParallelState *fps;
466 
467  /* Wait for any old workers to exit. */
468  if (pcxt->nworkers_launched > 0)
469  {
472  pcxt->nworkers_launched = 0;
473  if (pcxt->known_attached_workers)
474  {
476  pcxt->known_attached_workers = NULL;
477  pcxt->nknown_attached_workers = 0;
478  }
479  }
480 
481  /* Reset a few bits of fixed parallel state to a clean state. */
482  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
483  fps->last_xlog_end = 0;
484 
485  /* Recreate error queues (if they exist). */
486  if (pcxt->nworkers > 0)
487  {
488  char *error_queue_space;
489  int i;
490 
491  error_queue_space =
493  for (i = 0; i < pcxt->nworkers; ++i)
494  {
495  char *start;
496  shm_mq *mq;
497 
498  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
499  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
501  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
502  }
503  }
504 }
505 
506 /*
507  * Reinitialize parallel workers for a parallel context such that we could
508  * launch a different number of workers. This is required for cases where
509  * we need to reuse the same DSM segment, but the number of workers can
510  * vary from run-to-run.
511  */
512 void
513 ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
514 {
515  /*
516  * The number of workers that need to be launched must be less than the
517  * number of workers with which the parallel context is initialized.
518  */
519  Assert(pcxt->nworkers >= nworkers_to_launch);
520  pcxt->nworkers_to_launch = nworkers_to_launch;
521 }
522 
523 /*
524  * Launch parallel workers.
525  */
526 void
528 {
529  MemoryContext oldcontext;
530  BackgroundWorker worker;
531  int i;
532  bool any_registrations_failed = false;
533 
534  /* Skip this if we have no workers. */
535  if (pcxt->nworkers == 0 || pcxt->nworkers_to_launch == 0)
536  return;
537 
538  /* We need to be a lock group leader. */
540 
541  /* If we do have workers, we'd better have a DSM segment. */
542  Assert(pcxt->seg != NULL);
543 
544  /* We might be running in a short-lived memory context. */
546 
547  /* Configure a worker. */
548  memset(&worker, 0, sizeof(worker));
549  snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
550  MyProcPid);
551  snprintf(worker.bgw_type, BGW_MAXLEN, "parallel worker");
552  worker.bgw_flags =
557  sprintf(worker.bgw_library_name, "postgres");
558  sprintf(worker.bgw_function_name, "ParallelWorkerMain");
560  worker.bgw_notify_pid = MyProcPid;
561 
562  /*
563  * Start workers.
564  *
565  * The caller must be able to tolerate ending up with fewer workers than
566  * expected, so there is no need to throw an error here if registration
567  * fails. It wouldn't help much anyway, because registering the worker in
568  * no way guarantees that it will start up and initialize successfully.
569  */
570  for (i = 0; i < pcxt->nworkers_to_launch; ++i)
571  {
572  memcpy(worker.bgw_extra, &i, sizeof(int));
573  if (!any_registrations_failed &&
575  &pcxt->worker[i].bgwhandle))
576  {
578  pcxt->worker[i].bgwhandle);
579  pcxt->nworkers_launched++;
580  }
581  else
582  {
583  /*
584  * If we weren't able to register the worker, then we've bumped up
585  * against the max_worker_processes limit, and future
586  * registrations will probably fail too, so arrange to skip them.
587  * But we still have to execute this code for the remaining slots
588  * to make sure that we forget about the error queues we budgeted
589  * for those workers. Otherwise, we'll wait for them to start,
590  * but they never will.
591  */
592  any_registrations_failed = true;
593  pcxt->worker[i].bgwhandle = NULL;
594  shm_mq_detach(pcxt->worker[i].error_mqh);
595  pcxt->worker[i].error_mqh = NULL;
596  }
597  }
598 
599  /*
600  * Now that nworkers_launched has taken its final value, we can initialize
601  * known_attached_workers.
602  */
603  if (pcxt->nworkers_launched > 0)
604  {
605  pcxt->known_attached_workers =
606  palloc0(sizeof(bool) * pcxt->nworkers_launched);
607  pcxt->nknown_attached_workers = 0;
608  }
609 
610  /* Restore previous memory context. */
611  MemoryContextSwitchTo(oldcontext);
612 }
613 
614 /*
615  * Wait for all workers to attach to their error queues, and throw an error if
616  * any worker fails to do this.
617  *
618  * Callers can assume that if this function returns successfully, then the
619  * number of workers given by pcxt->nworkers_launched have initialized and
620  * attached to their error queues. Whether or not these workers are guaranteed
621  * to still be running depends on what code the caller asked them to run;
622  * this function does not guarantee that they have not exited. However, it
623  * does guarantee that any workers which exited must have done so cleanly and
624  * after successfully performing the work with which they were tasked.
625  *
626  * If this function is not called, then some of the workers that were launched
627  * may not have been started due to a fork() failure, or may have exited during
628  * early startup prior to attaching to the error queue, so nworkers_launched
629  * cannot be viewed as completely reliable. It will never be less than the
630  * number of workers which actually started, but it might be more. Any workers
631  * that failed to start will still be discovered by
632  * WaitForParallelWorkersToFinish and an error will be thrown at that time,
633  * provided that function is eventually reached.
634  *
635  * In general, the leader process should do as much work as possible before
636  * calling this function. fork() failures and other early-startup failures
637  * are very uncommon, and having the leader sit idle when it could be doing
638  * useful work is undesirable. However, if the leader needs to wait for
639  * all of its workers or for a specific worker, it may want to call this
640  * function before doing so. If not, it must make some other provision for
641  * the failure-to-start case, lest it wait forever. On the other hand, a
642  * leader which never waits for a worker that might not be started yet, or
643  * at least never does so prior to WaitForParallelWorkersToFinish(), need not
644  * call this function at all.
645  */
646 void
648 {
649  int i;
650 
651  /* Skip this if we have no launched workers. */
652  if (pcxt->nworkers_launched == 0)
653  return;
654 
655  for (;;)
656  {
657  /*
658  * This will process any parallel messages that are pending and it may
659  * also throw an error propagated from a worker.
660  */
662 
663  for (i = 0; i < pcxt->nworkers_launched; ++i)
664  {
666  shm_mq *mq;
667  int rc;
668  pid_t pid;
669 
670  if (pcxt->known_attached_workers[i])
671  continue;
672 
673  /*
674  * If error_mqh is NULL, then the worker has already exited
675  * cleanly.
676  */
677  if (pcxt->worker[i].error_mqh == NULL)
678  {
679  pcxt->known_attached_workers[i] = true;
680  ++pcxt->nknown_attached_workers;
681  continue;
682  }
683 
684  status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
685  if (status == BGWH_STARTED)
686  {
687  /* Has the worker attached to the error queue? */
688  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
689  if (shm_mq_get_sender(mq) != NULL)
690  {
691  /* Yes, so it is known to be attached. */
692  pcxt->known_attached_workers[i] = true;
693  ++pcxt->nknown_attached_workers;
694  }
695  }
696  else if (status == BGWH_STOPPED)
697  {
698  /*
699  * If the worker stopped without attaching to the error queue,
700  * throw an error.
701  */
702  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
703  if (shm_mq_get_sender(mq) == NULL)
704  ereport(ERROR,
705  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
706  errmsg("parallel worker failed to initialize"),
707  errhint("More details may be available in the server log.")));
708 
709  pcxt->known_attached_workers[i] = true;
710  ++pcxt->nknown_attached_workers;
711  }
712  else
713  {
714  /*
715  * Worker not yet started, so we must wait. The postmaster
716  * will notify us if the worker's state changes. Our latch
717  * might also get set for some other reason, but if so we'll
718  * just end up waiting for the same worker again.
719  */
720  rc = WaitLatch(MyLatch,
723 
724  if (rc & WL_LATCH_SET)
726  }
727  }
728 
729  /* If all workers are known to have started, we're done. */
730  if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
731  {
733  break;
734  }
735  }
736 }
737 
738 /*
739  * Wait for all workers to finish computing.
740  *
741  * Even if the parallel operation seems to have completed successfully, it's
742  * important to call this function afterwards. We must not miss any errors
743  * the workers may have thrown during the parallel operation, or any that they
744  * may yet throw while shutting down.
745  *
746  * Also, we want to update our notion of XactLastRecEnd based on worker
747  * feedback.
748  */
749 void
751 {
752  for (;;)
753  {
754  bool anyone_alive = false;
755  int nfinished = 0;
756  int i;
757 
758  /*
759  * This will process any parallel messages that are pending, which may
760  * change the outcome of the loop that follows. It may also throw an
761  * error propagated from a worker.
762  */
764 
765  for (i = 0; i < pcxt->nworkers_launched; ++i)
766  {
767  /*
768  * If error_mqh is NULL, then the worker has already exited
769  * cleanly. If we have received a message through error_mqh from
770  * the worker, we know it started up cleanly, and therefore we're
771  * certain to be notified when it exits.
772  */
773  if (pcxt->worker[i].error_mqh == NULL)
774  ++nfinished;
775  else if (pcxt->known_attached_workers[i])
776  {
777  anyone_alive = true;
778  break;
779  }
780  }
781 
782  if (!anyone_alive)
783  {
784  /* If all workers are known to have finished, we're done. */
785  if (nfinished >= pcxt->nworkers_launched)
786  {
787  Assert(nfinished == pcxt->nworkers_launched);
788  break;
789  }
790 
791  /*
792  * We didn't detect any living workers, but not all workers are
793  * known to have exited cleanly. Either not all workers have
794  * launched yet, or maybe some of them failed to start or
795  * terminated abnormally.
796  */
797  for (i = 0; i < pcxt->nworkers_launched; ++i)
798  {
799  pid_t pid;
800  shm_mq *mq;
801 
802  /*
803  * If the worker is BGWH_NOT_YET_STARTED or BGWH_STARTED, we
804  * should just keep waiting. If it is BGWH_STOPPED, then
805  * further investigation is needed.
806  */
807  if (pcxt->worker[i].error_mqh == NULL ||
808  pcxt->worker[i].bgwhandle == NULL ||
810  &pid) != BGWH_STOPPED)
811  continue;
812 
813  /*
814  * Check whether the worker ended up stopped without ever
815  * attaching to the error queue. If so, the postmaster was
816  * unable to fork the worker or it exited without initializing
817  * properly. We must throw an error, since the caller may
818  * have been expecting the worker to do some work before
819  * exiting.
820  */
821  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
822  if (shm_mq_get_sender(mq) == NULL)
823  ereport(ERROR,
824  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
825  errmsg("parallel worker failed to initialize"),
826  errhint("More details may be available in the server log.")));
827 
828  /*
829  * The worker is stopped, but is attached to the error queue.
830  * Unless there's a bug somewhere, this will only happen when
831  * the worker writes messages and terminates after the
832  * CHECK_FOR_INTERRUPTS() near the top of this function and
833  * before the call to GetBackgroundWorkerPid(). In that case,
834  * or latch should have been set as well and the right things
835  * will happen on the next pass through the loop.
836  */
837  }
838  }
839 
843  }
844 
845  if (pcxt->toc != NULL)
846  {
847  FixedParallelState *fps;
848 
849  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
850  if (fps->last_xlog_end > XactLastRecEnd)
852  }
853 }
854 
855 /*
856  * Wait for all workers to exit.
857  *
858  * This function ensures that workers have been completely shutdown. The
859  * difference between WaitForParallelWorkersToFinish and this function is
860  * that the former just ensures that last message sent by a worker backend is
861  * received by the leader backend whereas this ensures the complete shutdown.
862  */
863 static void
865 {
866  int i;
867 
868  /* Wait until the workers actually die. */
869  for (i = 0; i < pcxt->nworkers_launched; ++i)
870  {
872 
873  if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL)
874  continue;
875 
877 
878  /*
879  * If the postmaster kicked the bucket, we have no chance of cleaning
880  * up safely -- we won't be able to tell when our workers are actually
881  * dead. This doesn't necessitate a PANIC since they will all abort
882  * eventually, but we can't safely continue this session.
883  */
884  if (status == BGWH_POSTMASTER_DIED)
885  ereport(FATAL,
886  (errcode(ERRCODE_ADMIN_SHUTDOWN),
887  errmsg("postmaster exited during a parallel transaction")));
888 
889  /* Release memory. */
890  pfree(pcxt->worker[i].bgwhandle);
891  pcxt->worker[i].bgwhandle = NULL;
892  }
893 }
894 
895 /*
896  * Destroy a parallel context.
897  *
898  * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
899  * first, before calling this function. When this function is invoked, any
900  * remaining workers are forcibly killed; the dynamic shared memory segment
901  * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
902  */
903 void
905 {
906  int i;
907 
908  /*
909  * Be careful about order of operations here! We remove the parallel
910  * context from the list before we do anything else; otherwise, if an
911  * error occurs during a subsequent step, we might try to nuke it again
912  * from AtEOXact_Parallel or AtEOSubXact_Parallel.
913  */
914  dlist_delete(&pcxt->node);
915 
916  /* Kill each worker in turn, and forget their error queues. */
917  if (pcxt->worker != NULL)
918  {
919  for (i = 0; i < pcxt->nworkers_launched; ++i)
920  {
921  if (pcxt->worker[i].error_mqh != NULL)
922  {
924 
925  shm_mq_detach(pcxt->worker[i].error_mqh);
926  pcxt->worker[i].error_mqh = NULL;
927  }
928  }
929  }
930 
931  /*
932  * If we have allocated a shared memory segment, detach it. This will
933  * implicitly detach the error queues, and any other shared memory queues,
934  * stored there.
935  */
936  if (pcxt->seg != NULL)
937  {
938  dsm_detach(pcxt->seg);
939  pcxt->seg = NULL;
940  }
941 
942  /*
943  * If this parallel context is actually in backend-private memory rather
944  * than shared memory, free that memory instead.
945  */
946  if (pcxt->private_memory != NULL)
947  {
948  pfree(pcxt->private_memory);
949  pcxt->private_memory = NULL;
950  }
951 
952  /*
953  * We can't finish transaction commit or abort until all of the workers
954  * have exited. This means, in particular, that we can't respond to
955  * interrupts at this stage.
956  */
957  HOLD_INTERRUPTS();
960 
961  /* Free the worker array itself. */
962  if (pcxt->worker != NULL)
963  {
964  pfree(pcxt->worker);
965  pcxt->worker = NULL;
966  }
967 
968  /* Free memory. */
969  pfree(pcxt->library_name);
970  pfree(pcxt->function_name);
971  pfree(pcxt);
972 }
973 
974 /*
975  * Are there any parallel contexts currently active?
976  */
977 bool
979 {
980  return !dlist_is_empty(&pcxt_list);
981 }
982 
983 /*
984  * Handle receipt of an interrupt indicating a parallel worker message.
985  *
986  * Note: this is called within a signal handler! All we can do is set
987  * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
988  * HandleParallelMessages().
989  */
990 void
992 {
993  InterruptPending = true;
994  ParallelMessagePending = true;
995  SetLatch(MyLatch);
996 }
997 
998 /*
999  * Handle any queued protocol messages received from parallel workers.
1000  */
1001 void
1003 {
1004  dlist_iter iter;
1005  MemoryContext oldcontext;
1006 
1007  static MemoryContext hpm_context = NULL;
1008 
1009  /*
1010  * This is invoked from ProcessInterrupts(), and since some of the
1011  * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
1012  * for recursive calls if more signals are received while this runs. It's
1013  * unclear that recursive entry would be safe, and it doesn't seem useful
1014  * even if it is safe, so let's block interrupts until done.
1015  */
1016  HOLD_INTERRUPTS();
1017 
1018  /*
1019  * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
1020  * don't want to risk leaking data into long-lived contexts, so let's do
1021  * our work here in a private context that we can reset on each use.
1022  */
1023  if (hpm_context == NULL) /* first time through? */
1025  "HandleParallelMessages",
1027  else
1028  MemoryContextReset(hpm_context);
1029 
1030  oldcontext = MemoryContextSwitchTo(hpm_context);
1031 
1032  /* OK to process messages. Reset the flag saying there are more to do. */
1033  ParallelMessagePending = false;
1034 
1035  dlist_foreach(iter, &pcxt_list)
1036  {
1037  ParallelContext *pcxt;
1038  int i;
1039 
1040  pcxt = dlist_container(ParallelContext, node, iter.cur);
1041  if (pcxt->worker == NULL)
1042  continue;
1043 
1044  for (i = 0; i < pcxt->nworkers_launched; ++i)
1045  {
1046  /*
1047  * Read as many messages as we can from each worker, but stop when
1048  * either (1) the worker's error queue goes away, which can happen
1049  * if we receive a Terminate message from the worker; or (2) no
1050  * more messages can be read from the worker without blocking.
1051  */
1052  while (pcxt->worker[i].error_mqh != NULL)
1053  {
1054  shm_mq_result res;
1055  Size nbytes;
1056  void *data;
1057 
1058  res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes,
1059  &data, true);
1060  if (res == SHM_MQ_WOULD_BLOCK)
1061  break;
1062  else if (res == SHM_MQ_SUCCESS)
1063  {
1064  StringInfoData msg;
1065 
1066  initStringInfo(&msg);
1067  appendBinaryStringInfo(&msg, data, nbytes);
1068  HandleParallelMessage(pcxt, i, &msg);
1069  pfree(msg.data);
1070  }
1071  else
1072  ereport(ERROR,
1073  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1074  errmsg("lost connection to parallel worker")));
1075  }
1076  }
1077  }
1078 
1079  MemoryContextSwitchTo(oldcontext);
1080 
1081  /* Might as well clear the context on our way out */
1082  MemoryContextReset(hpm_context);
1083 
1085 }
1086 
1087 /*
1088  * Handle a single protocol message received from a single parallel worker.
1089  */
1090 static void
1092 {
1093  char msgtype;
1094 
1095  if (pcxt->known_attached_workers != NULL &&
1096  !pcxt->known_attached_workers[i])
1097  {
1098  pcxt->known_attached_workers[i] = true;
1099  pcxt->nknown_attached_workers++;
1100  }
1101 
1102  msgtype = pq_getmsgbyte(msg);
1103 
1104  switch (msgtype)
1105  {
1106  case 'K': /* BackendKeyData */
1107  {
1108  int32 pid = pq_getmsgint(msg, 4);
1109 
1110  (void) pq_getmsgint(msg, 4); /* discard cancel key */
1111  (void) pq_getmsgend(msg);
1112  pcxt->worker[i].pid = pid;
1113  break;
1114  }
1115 
1116  case 'E': /* ErrorResponse */
1117  case 'N': /* NoticeResponse */
1118  {
1119  ErrorData edata;
1120  ErrorContextCallback *save_error_context_stack;
1121 
1122  /* Parse ErrorResponse or NoticeResponse. */
1123  pq_parse_errornotice(msg, &edata);
1124 
1125  /* Death of a worker isn't enough justification for suicide. */
1126  edata.elevel = Min(edata.elevel, ERROR);
1127 
1128  /*
1129  * If desired, add a context line to show that this is a
1130  * message propagated from a parallel worker. Otherwise, it
1131  * can sometimes be confusing to understand what actually
1132  * happened. (We don't do this in FORCE_PARALLEL_REGRESS mode
1133  * because it causes test-result instability depending on
1134  * whether a parallel worker is actually used or not.)
1135  */
1137  {
1138  if (edata.context)
1139  edata.context = psprintf("%s\n%s", edata.context,
1140  _("parallel worker"));
1141  else
1142  edata.context = pstrdup(_("parallel worker"));
1143  }
1144 
1145  /*
1146  * Context beyond that should use the error context callbacks
1147  * that were in effect when the ParallelContext was created,
1148  * not the current ones.
1149  */
1150  save_error_context_stack = error_context_stack;
1152 
1153  /* Rethrow error or print notice. */
1154  ThrowErrorData(&edata);
1155 
1156  /* Not an error, so restore previous context stack. */
1157  error_context_stack = save_error_context_stack;
1158 
1159  break;
1160  }
1161 
1162  case 'A': /* NotifyResponse */
1163  {
1164  /* Propagate NotifyResponse. */
1165  int32 pid;
1166  const char *channel;
1167  const char *payload;
1168 
1169  pid = pq_getmsgint(msg, 4);
1170  channel = pq_getmsgrawstring(msg);
1171  payload = pq_getmsgrawstring(msg);
1172  pq_endmessage(msg);
1173 
1174  NotifyMyFrontEnd(channel, payload, pid);
1175 
1176  break;
1177  }
1178 
1179  case 'X': /* Terminate, indicating clean exit */
1180  {
1181  shm_mq_detach(pcxt->worker[i].error_mqh);
1182  pcxt->worker[i].error_mqh = NULL;
1183  break;
1184  }
1185 
1186  default:
1187  {
1188  elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %d bytes)",
1189  msgtype, msg->len);
1190  }
1191  }
1192 }
1193 
1194 /*
1195  * End-of-subtransaction cleanup for parallel contexts.
1196  *
1197  * Currently, it's forbidden to enter or leave a subtransaction while
1198  * parallel mode is in effect, so we could just blow away everything. But
1199  * we may want to relax that restriction in the future, so this code
1200  * contemplates that there may be multiple subtransaction IDs in pcxt_list.
1201  */
1202 void
1204 {
1205  while (!dlist_is_empty(&pcxt_list))
1206  {
1207  ParallelContext *pcxt;
1208 
1209  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1210  if (pcxt->subid != mySubId)
1211  break;
1212  if (isCommit)
1213  elog(WARNING, "leaked parallel context");
1214  DestroyParallelContext(pcxt);
1215  }
1216 }
1217 
1218 /*
1219  * End-of-transaction cleanup for parallel contexts.
1220  */
1221 void
1222 AtEOXact_Parallel(bool isCommit)
1223 {
1224  while (!dlist_is_empty(&pcxt_list))
1225  {
1226  ParallelContext *pcxt;
1227 
1228  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1229  if (isCommit)
1230  elog(WARNING, "leaked parallel context");
1231  DestroyParallelContext(pcxt);
1232  }
1233 }
1234 
1235 /*
1236  * Main entrypoint for parallel workers.
1237  */
1238 void
1240 {
1241  dsm_segment *seg;
1242  shm_toc *toc;
1243  FixedParallelState *fps;
1244  char *error_queue_space;
1245  shm_mq *mq;
1246  shm_mq_handle *mqh;
1247  char *libraryspace;
1248  char *entrypointstate;
1249  char *library_name;
1250  char *function_name;
1251  parallel_worker_main_type entrypt;
1252  char *gucspace;
1253  char *combocidspace;
1254  char *tsnapspace;
1255  char *asnapspace;
1256  char *tstatespace;
1257  char *pendingsyncsspace;
1258  char *reindexspace;
1259  char *relmapperspace;
1260  char *enumblacklistspace;
1261  StringInfoData msgbuf;
1262  char *session_dsm_handle_space;
1263 
1264  /* Set flag to indicate that we're initializing a parallel worker. */
1266 
1267  /* Establish signal handlers. */
1268  pqsignal(SIGTERM, die);
1270 
1271  /* Determine and set our parallel worker number. */
1273  memcpy(&ParallelWorkerNumber, MyBgworkerEntry->bgw_extra, sizeof(int));
1274 
1275  /* Set up a memory context to work in, just for cleanliness. */
1277  "Parallel worker",
1279 
1280  /*
1281  * Attach to the dynamic shared memory segment for the parallel query, and
1282  * find its table of contents.
1283  *
1284  * Note: at this point, we have not created any ResourceOwner in this
1285  * process. This will result in our DSM mapping surviving until process
1286  * exit, which is fine. If there were a ResourceOwner, it would acquire
1287  * ownership of the mapping, but we have no need for that.
1288  */
1289  seg = dsm_attach(DatumGetUInt32(main_arg));
1290  if (seg == NULL)
1291  ereport(ERROR,
1292  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1293  errmsg("could not map dynamic shared memory segment")));
1295  if (toc == NULL)
1296  ereport(ERROR,
1297  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1298  errmsg("invalid magic number in dynamic shared memory segment")));
1299 
1300  /* Look up fixed parallel state. */
1301  fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
1302  MyFixedParallelState = fps;
1303 
1304  /* Arrange to signal the leader if we exit. */
1308 
1309  /*
1310  * Now we can find and attach to the error queue provided for us. That's
1311  * good, because until we do that, any errors that happen here will not be
1312  * reported back to the process that requested that this worker be
1313  * launched.
1314  */
1315  error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
1316  mq = (shm_mq *) (error_queue_space +
1319  mqh = shm_mq_attach(mq, seg, NULL);
1320  pq_redirect_to_shm_mq(seg, mqh);
1323 
1324  /*
1325  * Send a BackendKeyData message to the process that initiated parallelism
1326  * so that it has access to our PID before it receives any other messages
1327  * from us. Our cancel key is sent, too, since that's the way the
1328  * protocol message is defined, but it won't actually be used for anything
1329  * in this case.
1330  */
1331  pq_beginmessage(&msgbuf, 'K');
1332  pq_sendint32(&msgbuf, (int32) MyProcPid);
1333  pq_sendint32(&msgbuf, (int32) MyCancelKey);
1334  pq_endmessage(&msgbuf);
1335 
1336  /*
1337  * Hooray! Primary initialization is complete. Now, we need to set up our
1338  * backend-local state to match the original backend.
1339  */
1340 
1341  /*
1342  * Join locking group. We must do this before anything that could try to
1343  * acquire a heavyweight lock, because any heavyweight locks acquired to
1344  * this point could block either directly against the parallel group
1345  * leader or against some process which in turn waits for a lock that
1346  * conflicts with the parallel group leader, causing an undetected
1347  * deadlock. (If we can't join the lock group, the leader has gone away,
1348  * so just exit quietly.)
1349  */
1351  fps->parallel_leader_pid))
1352  return;
1353 
1354  /*
1355  * Restore transaction and statement start-time timestamps. This must
1356  * happen before anything that would start a transaction, else asserts in
1357  * xact.c will fire.
1358  */
1360 
1361  /*
1362  * Identify the entry point to be called. In theory this could result in
1363  * loading an additional library, though most likely the entry point is in
1364  * the core backend or in a library we just loaded.
1365  */
1366  entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
1367  library_name = entrypointstate;
1368  function_name = entrypointstate + strlen(library_name) + 1;
1369 
1370  entrypt = LookupParallelWorkerFunction(library_name, function_name);
1371 
1372  /* Restore database connection. */
1374  fps->authenticated_user_id,
1375  0);
1376 
1377  /*
1378  * Set the client encoding to the database encoding, since that is what
1379  * the leader will expect.
1380  */
1382 
1383  /*
1384  * Load libraries that were loaded by original backend. We want to do
1385  * this before restoring GUCs, because the libraries might define custom
1386  * variables.
1387  */
1388  libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
1390  RestoreLibraryState(libraryspace);
1391 
1392  /* Restore GUC values from launching backend. */
1393  gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
1394  RestoreGUCState(gucspace);
1396 
1397  /* Crank up a transaction state appropriate to a parallel worker. */
1398  tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
1399  StartParallelWorkerTransaction(tstatespace);
1400 
1401  /* Restore combo CID state. */
1402  combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
1403  RestoreComboCIDState(combocidspace);
1404 
1405  /* Attach to the per-session DSM segment and contained objects. */
1406  session_dsm_handle_space =
1408  AttachSession(*(dsm_handle *) session_dsm_handle_space);
1409 
1410  /* Restore transaction snapshot. */
1411  tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
1413  fps->parallel_leader_pgproc);
1414 
1415  /* Restore active snapshot. */
1416  asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
1417  PushActiveSnapshot(RestoreSnapshot(asnapspace));
1418 
1419  /*
1420  * We've changed which tuples we can see, and must therefore invalidate
1421  * system caches.
1422  */
1424 
1425  /*
1426  * Restore current role id. Skip verifying whether session user is
1427  * allowed to become this role and blindly restore the leader's state for
1428  * current role.
1429  */
1431 
1432  /* Restore user ID and security context. */
1434 
1435  /* Restore temp-namespace state to ensure search path matches leader's. */
1438 
1439  /* Restore pending syncs. */
1440  pendingsyncsspace = shm_toc_lookup(toc, PARALLEL_KEY_PENDING_SYNCS,
1441  false);
1442  RestorePendingSyncs(pendingsyncsspace);
1443 
1444  /* Restore reindex state. */
1445  reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false);
1446  RestoreReindexState(reindexspace);
1447 
1448  /* Restore relmapper state. */
1449  relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false);
1450  RestoreRelationMap(relmapperspace);
1451 
1452  /* Restore enum blacklist. */
1453  enumblacklistspace = shm_toc_lookup(toc, PARALLEL_KEY_ENUMBLACKLIST,
1454  false);
1455  RestoreEnumBlacklist(enumblacklistspace);
1456 
1457  /* Attach to the leader's serializable transaction, if SERIALIZABLE. */
1459 
1460  /*
1461  * We've initialized all of our state now; nothing should change
1462  * hereafter.
1463  */
1466 
1467  /*
1468  * Time to do the real work: invoke the caller-supplied code.
1469  */
1470  entrypt(seg, toc);
1471 
1472  /* Must exit parallel mode to pop active snapshot. */
1473  ExitParallelMode();
1474 
1475  /* Must pop active snapshot so snapmgr.c doesn't complain. */
1477 
1478  /* Shut down the parallel-worker transaction. */
1480 
1481  /* Detach from the per-session DSM segment. */
1482  DetachSession();
1483 
1484  /* Report success. */
1485  pq_putmessage('X', NULL, 0);
1486 }
1487 
1488 /*
1489  * Update shared memory with the ending location of the last WAL record we
1490  * wrote, if it's greater than the value already stored there.
1491  */
1492 void
1494 {
1496 
1497  Assert(fps != NULL);
1498  SpinLockAcquire(&fps->mutex);
1499  if (fps->last_xlog_end < last_xlog_end)
1501  SpinLockRelease(&fps->mutex);
1502 }
1503 
1504 /*
1505  * Make sure the leader tries to read from our error queue one more time.
1506  * This guards against the case where we exit uncleanly without sending an
1507  * ErrorResponse to the leader, for example because some code calls proc_exit
1508  * directly.
1509  */
1510 static void
1512 {
1516 }
1517 
1518 /*
1519  * Look up (and possibly load) a parallel worker entry point function.
1520  *
1521  * For functions contained in the core code, we use library name "postgres"
1522  * and consult the InternalParallelWorkers array. External functions are
1523  * looked up, and loaded if necessary, using load_external_function().
1524  *
1525  * The point of this is to pass function names as strings across process
1526  * boundaries. We can't pass actual function addresses because of the
1527  * possibility that the function has been loaded at a different address
1528  * in a different process. This is obviously a hazard for functions in
1529  * loadable libraries, but it can happen even for functions in the core code
1530  * on platforms using EXEC_BACKEND (e.g., Windows).
1531  *
1532  * At some point it might be worthwhile to get rid of InternalParallelWorkers[]
1533  * in favor of applying load_external_function() for core functions too;
1534  * but that raises portability issues that are not worth addressing now.
1535  */
1537 LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
1538 {
1539  /*
1540  * If the function is to be loaded from postgres itself, search the
1541  * InternalParallelWorkers array.
1542  */
1543  if (strcmp(libraryname, "postgres") == 0)
1544  {
1545  int i;
1546 
1547  for (i = 0; i < lengthof(InternalParallelWorkers); i++)
1548  {
1549  if (strcmp(InternalParallelWorkers[i].fn_name, funcname) == 0)
1550  return InternalParallelWorkers[i].fn_addr;
1551  }
1552 
1553  /* We can only reach this by programming error. */
1554  elog(ERROR, "internal function \"%s\" not found", funcname);
1555  }
1556 
1557  /* Otherwise load from external library. */
1558  return (parallel_worker_main_type)
1559  load_external_function(libraryname, funcname, true, NULL);
1560 }
static pid_t ParallelLeaderPid
Definition: parallel.c:127
char bgw_extra[BGW_EXTRALEN]
Definition: bgworker.h:98
#define DatumGetUInt32(X)
Definition: postgres.h:486
int slock_t
Definition: s_lock.h:934
void SerializeEnumBlacklist(void *space, Size size)
Definition: pg_enum.c:709
#define PARALLEL_ERROR_QUEUE_SIZE
Definition: parallel.c:55
#define AllocSetContextCreate
Definition: memutils.h:170
shm_toc * shm_toc_create(uint64 magic, void *address, Size nbytes)
Definition: shm_toc.c:40
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:1149
BackendId MyBackendId
Definition: globals.c:81
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2157
MemoryContext TopTransactionContext
Definition: mcxt.c:49
uint32 dsm_handle
Definition: dsm_impl.h:55
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:588
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
PGPROC * parallel_leader_pgproc
Definition: parallel.c:92
XLogRecPtr XactLastRecEnd
Definition: xlog.c:361
void AttachSerializableXact(SerializableXactHandle handle)
Definition: predicate.c:5124
void shm_mq_detach(shm_mq_handle *mqh)
Definition: shm_mq.c:817
PGPROC * MyProc
Definition: proc.c:67
int64 TimestampTz
Definition: timestamp.h:39
dsm_segment * seg
Definition: parallel.h:43
static void WaitForParallelWorkersToExit(ParallelContext *pcxt)
Definition: parallel.c:864
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
void SerializeReindexState(Size maxsize, char *start_address)
Definition: index.c:4131
char * pstrdup(const char *in)
Definition: mcxt.c:1187
void CommitTransactionCommand(void)
Definition: xact.c:2948
shm_toc_estimator estimator
Definition: parallel.h:42
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void EndParallelWorkerTransaction(void)
Definition: xact.c:5332
#define SpinLockInit(lock)
Definition: spin.h:60
void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId)
Definition: namespace.c:3312
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:631
void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: nbtsort.c:1796
#define Min(x, y)
Definition: c.h:982
PGPROC * shm_mq_get_sender(shm_mq *mq)
Definition: shm_mq.c:250
Oid authenticated_user_id
Definition: parallel.c:85
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:786
#define PARALLEL_KEY_ENUMBLACKLIST
Definition: parallel.c:78
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1082
int bgw_restart_time
Definition: bgworker.h:94
int errcode(int sqlerrcode)
Definition: elog.c:691
Oid temp_toast_namespace_id
Definition: parallel.c:89
#define BGWORKER_CLASS_PARALLEL
Definition: bgworker.h:67
void DetachSession(void)
Definition: session.c:201
BackgroundWorker * MyBgworkerEntry
Definition: postmaster.c:194
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:137
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition: proc.c:1889
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
int nknown_attached_workers
Definition: parallel.h:47
uint32 SubTransactionId
Definition: c.h:579
#define PARALLEL_KEY_RELMAPPER_STATE
Definition: parallel.c:77
Size shm_toc_estimate(shm_toc_estimator *e)
Definition: shm_toc.c:263
#define lengthof(array)
Definition: c.h:730
void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
Definition: snapmgr.c:2222
void SerializeTransactionState(Size maxsize, char *start_address)
Definition: xact.c:5237
parallel_worker_main_type fn_addr
Definition: parallel.c:136
const char * pq_getmsgrawstring(StringInfo msg)
Definition: pqformat.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void SetLatch(Latch *latch)
Definition: latch.c:505
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
void InvalidateSystemCaches(void)
Definition: inval.c:646
int nworkers_to_launch
Definition: parallel.h:37
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:513
void ResetLatch(Latch *latch)
Definition: latch.c:588
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void RestoreComboCIDState(char *comboCIDstate)
Definition: combocid.c:343
signed int int32
Definition: c.h:417
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:390
SubTransactionId subid
Definition: parallel.h:35
Oid GetCurrentRoleId(void)
Definition: miscinit.c:835
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:118
ErrorContextCallback * error_context_stack
Definition: elog.c:92
void RestorePendingSyncs(char *startAddress)
Definition: storage.c:575
SerializableXactHandle serializable_xact_handle
Definition: parallel.c:97
volatile bool ParallelMessagePending
Definition: parallel.c:115
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define sprintf
Definition: port.h:217
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:914
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:750
#define SpinLockAcquire(lock)
Definition: spin.h:62
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:904
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:250
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
ParallelWorkerInfo * worker
Definition: parallel.h:46
Datum bgw_main_arg
Definition: bgworker.h:97
void pfree(void *pointer)
Definition: mcxt.c:1057
bool IsInParallelMode(void)
Definition: xact.c:1012
void SerializeLibraryState(Size maxsize, char *start_address)
Definition: dfmgr.c:727
#define ERROR
Definition: elog.h:43
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1124
void ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
Oid GetAuthenticatedUserId(void)
Definition: miscinit.c:535
#define PARALLEL_KEY_TRANSACTION_SNAPSHOT
Definition: parallel.c:70
char * function_name
Definition: parallel.h:40
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2098
int32 MyCancelKey
Definition: globals.c:44
void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
Definition: pqmq.c:215
#define FATAL
Definition: elog.h:52
shm_mq * shm_mq_create(void *address, Size size)
Definition: shm_mq.c:170
void ExitParallelMode(void)
Definition: xact.c:992
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define PARALLEL_KEY_FIXED
Definition: parallel.c:65
void HandleParallelMessages(void)
Definition: parallel.c:1002
#define PARALLEL_KEY_ERROR_QUEUE
Definition: parallel.c:66
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static const struct @20 InternalParallelWorkers[]
void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
Definition: namespace.c:3328
static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
Definition: parallel.c:1091
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:581
shm_mq_handle * error_mqh
Definition: parallel.h:28
#define PARALLEL_KEY_GUC
Definition: parallel.c:68
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
int ParallelWorkerNumber
Definition: parallel.c:112
BackgroundWorkerHandle * bgwhandle
Definition: parallel.h:27
void SerializeRelationMap(Size maxSize, char *startAddress)
Definition: relmapper.c:657
Size EstimateGUCStateSpace(void)
Definition: guc.c:10327
void AttachSession(dsm_handle handle)
Definition: session.c:155
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
#define shm_toc_initialize_estimator(e)
Definition: shm_toc.h:49
Size EstimateComboCIDStateSpace(void)
Definition: combocid.c:298
#define UInt32GetDatum(X)
Definition: postgres.h:493
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
static void ParallelWorkerShutdown(int code, Datum arg)
Definition: parallel.c:1511
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int nworkers_launched
Definition: parallel.h:38
XLogRecPtr last_xlog_end
Definition: parallel.c:103
BgwHandleStatus
Definition: bgworker.h:102
Size EstimateEnumBlacklistSpace(void)
Definition: pg_enum.c:695
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:527
Size EstimateReindexStateSpace(void)
Definition: index.c:4120
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:217
void BecomeLockGroupLeader(void)
Definition: proc.c:1859
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void * load_external_function(const char *filename, const char *funcname, bool signalNotFound, void **filehandle)
Definition: dfmgr.c:107
TimestampTz GetCurrentTransactionStartTimestamp(void)
Definition: xact.c:798
void ThrowErrorData(ErrorData *edata)
Definition: elog.c:1671
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
int elevel
Definition: elog.h:367
bool * known_attached_workers
Definition: parallel.h:48
bool ParallelContextActive(void)
Definition: parallel.c:978
#define SpinLockRelease(lock)
Definition: spin.h:64
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2074
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
void * palloc0(Size size)
Definition: mcxt.c:981
static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
Definition: parallel.c:1537
void RestoreLibraryState(char *start_address)
Definition: dfmgr.c:749
uintptr_t Datum
Definition: postgres.h:367
void RestoreEnumBlacklist(void *space)
Definition: pg_enum.c:741
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:487
shm_toc * shm_toc_attach(uint64 magic, void *address)
Definition: shm_toc.c:64
Size EstimatePendingSyncsSpace(void)
Definition: storage.c:511
int GetDatabaseEncoding(void)
Definition: mbutils.c:1151
int BackendId
Definition: backendid.h:21
void SerializePendingSyncs(Size maxSize, char *startAddress)
Definition: storage.c:524
Oid MyDatabaseId
Definition: globals.c:85
Size EstimateLibraryStateSpace(void)
Definition: dfmgr.c:710
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:463
void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:311
dlist_node * cur
Definition: ilist.h:161
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition: postmaster.c:5718
#define PARALLEL_MAGIC
Definition: parallel.c:58
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition: pqmq.c:55
void * SerializableXactHandle
Definition: predicate.h:37
void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
Definition: parallel.c:1493
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1163
#define ereport(elevel,...)
Definition: elog.h:155
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:401
shm_mq_result
Definition: shm_mq.h:36
TimestampTz xact_ts
Definition: parallel.c:95
BackendId parallel_leader_backend_id
Definition: parallel.c:94
char * library_name
Definition: parallel.h:39
BackendId ParallelLeaderBackendId
Definition: globals.c:83
int force_parallel_mode
Definition: planner.c:70
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:1054
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:800
void StartParallelWorkerTransaction(char *tstatespace)
Definition: xact.c:5307
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:723
Size EstimateTransactionStateSpace(void)
Definition: xact.c:5209
void StartTransactionCommand(void)
Definition: xact.c:2847
#define PARALLEL_KEY_REINDEX_STATE
Definition: parallel.c:76
const char * fn_name
Definition: parallel.c:135
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:528
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
dlist_node node
Definition: parallel.h:34
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3504
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_mq * shm_mq_get_queue(shm_mq_handle *mqh)
Definition: shm_mq.c:872
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:918
void EnterParallelMode(void)
Definition: xact.c:979
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
Size EstimateRelationMapSpace(void)
Definition: relmapper.c:646
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
char * context
Definition: elog.h:383
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:283
ErrorContextCallback * error_context_stack
Definition: parallel.h:41
void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts)
Definition: xact.c:787
#define PARALLEL_KEY_TRANSACTION_STATE
Definition: parallel.c:72
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:769
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void RestoreReindexState(void *reindexstate)
Definition: index.c:4149
int errmsg(const char *fmt,...)
Definition: elog.c:902
void(* parallel_worker_main_type)(dsm_segment *seg, shm_toc *toc)
Definition: parallel.h:23
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
void ParallelWorkerMain(Datum main_arg)
Definition: parallel.c:1239
pid_t bgw_notify_pid
Definition: bgworker.h:99
static FixedParallelState * MyFixedParallelState
Definition: parallel.c:121
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:797
void SetCurrentRoleId(Oid roleid, bool is_superuser)
Definition: miscinit.c:856
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:116
#define elog(elevel,...)
Definition: elog.h:228
bool InitializingParallelWorker
Definition: parallel.c:118
int i
TimestampTz stmt_ts
Definition: parallel.c:96
Definition: shm_mq.c:71
void RestoreRelationMap(char *startAddress)
Definition: relmapper.c:674
#define PARALLEL_KEY_SESSION_DSM
Definition: parallel.c:74
#define BUFFERALIGN(LEN)
Definition: c.h:755
void pq_set_parallel_leader(pid_t pid, BackendId backend_id)
Definition: pqmq.c:80
void * arg
struct Latch * MyLatch
Definition: globals.c:54
void HandleParallelMessageInterrupt(void)
Definition: parallel.c:991
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
pid_t parallel_leader_pid
Definition: parallel.c:93
void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:199
static dlist_head pcxt_list
Definition: parallel.c:124
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:637
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
struct FixedParallelState FixedParallelState
dsm_handle GetSessionDsmHandle(void)
Definition: session.c:70
shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
Definition: shm_mq.c:548
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:647
#define PARALLEL_KEY_PENDING_SYNCS
Definition: parallel.c:75
bool session_auth_is_superuser
Definition: guc.c:535
SerializableXactHandle ShareSerializableXact(void)
Definition: predicate.c:5115
void AtEOXact_Parallel(bool isCommit)
Definition: parallel.c:1222
Definition: proc.h:120
#define PARALLEL_KEY_ENTRYPOINT
Definition: parallel.c:73
#define snprintf
Definition: port.h:215
#define PARALLEL_KEY_COMBO_CID
Definition: parallel.c:69
#define WL_LATCH_SET
Definition: latch.h:124
#define _(x)
Definition: elog.c:88
void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
Definition: parallel.c:1203
void SerializeGUCState(Size maxsize, char *start_address)
Definition: guc.c:10469
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
#define PARALLEL_KEY_ACTIVE_SNAPSHOT
Definition: parallel.c:71
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define die(msg)
Definition: pg_test_fsync.c:97
void SerializeComboCIDState(Size maxsize, char *start_address)
Definition: combocid.c:317
#define PARALLEL_KEY_LIBRARY
Definition: parallel.c:67
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1030
void RestoreGUCState(void *gucstate)
Definition: guc.c:10549
shm_toc * toc
Definition: parallel.h:45
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:129
void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
Definition: async.c:2300
TimestampTz GetCurrentStatementStartTimestamp(void)
Definition: xact.c:807
void * private_memory
Definition: parallel.h:44
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5747