PostgreSQL Source Code  git master
parallel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parallel.c
4  * Infrastructure for launching parallel workers
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/access/transam/parallel.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/nbtree.h"
19 #include "access/parallel.h"
20 #include "access/session.h"
21 #include "access/xact.h"
22 #include "access/xlog.h"
23 #include "catalog/index.h"
24 #include "catalog/namespace.h"
25 #include "catalog/pg_enum.h"
26 #include "catalog/storage.h"
27 #include "commands/async.h"
28 #include "executor/execParallel.h"
29 #include "libpq/libpq.h"
30 #include "libpq/pqformat.h"
31 #include "libpq/pqmq.h"
32 #include "miscadmin.h"
33 #include "optimizer/optimizer.h"
34 #include "pgstat.h"
35 #include "storage/ipc.h"
36 #include "storage/predicate.h"
37 #include "storage/sinval.h"
38 #include "storage/spin.h"
39 #include "tcop/tcopprot.h"
40 #include "utils/combocid.h"
41 #include "utils/guc.h"
42 #include "utils/inval.h"
43 #include "utils/memutils.h"
44 #include "utils/relmapper.h"
45 #include "utils/snapmgr.h"
46 #include "utils/typcache.h"
47 
48 /*
49  * We don't want to waste a lot of memory on an error queue which, most of
50  * the time, will process only a handful of small messages. However, it is
51  * desirable to make it large enough that a typical ErrorResponse can be sent
52  * without blocking. That way, a worker that errors out can write the whole
53  * message into the queue and terminate without waiting for the user backend.
54  */
55 #define PARALLEL_ERROR_QUEUE_SIZE 16384
56 
57 /* Magic number for parallel context TOC. */
58 #define PARALLEL_MAGIC 0x50477c7c
59 
60 /*
61  * Magic numbers for per-context parallel state sharing. Higher-level code
62  * should use smaller values, leaving these very large ones for use by this
63  * module.
64  */
65 #define PARALLEL_KEY_FIXED UINT64CONST(0xFFFFFFFFFFFF0001)
66 #define PARALLEL_KEY_ERROR_QUEUE UINT64CONST(0xFFFFFFFFFFFF0002)
67 #define PARALLEL_KEY_LIBRARY UINT64CONST(0xFFFFFFFFFFFF0003)
68 #define PARALLEL_KEY_GUC UINT64CONST(0xFFFFFFFFFFFF0004)
69 #define PARALLEL_KEY_COMBO_CID UINT64CONST(0xFFFFFFFFFFFF0005)
70 #define PARALLEL_KEY_TRANSACTION_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0006)
71 #define PARALLEL_KEY_ACTIVE_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0007)
72 #define PARALLEL_KEY_TRANSACTION_STATE UINT64CONST(0xFFFFFFFFFFFF0008)
73 #define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009)
74 #define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A)
75 #define PARALLEL_KEY_PENDING_SYNCS UINT64CONST(0xFFFFFFFFFFFF000B)
76 #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000C)
77 #define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000D)
78 #define PARALLEL_KEY_UNCOMMITTEDENUMS UINT64CONST(0xFFFFFFFFFFFF000E)
79 
80 /* Fixed-size parallel state. */
81 typedef struct FixedParallelState
82 {
83  /* Fixed-size state that workers must restore. */
98 
99  /* Mutex protects remaining fields. */
101 
102  /* Maximum XactLastRecEnd of any worker. */
105 
106 /*
107  * Our parallel worker number. We initialize this to -1, meaning that we are
108  * not a parallel worker. In parallel workers, it will be set to a value >= 0
109  * and < the number of workers before any user code is invoked; each parallel
110  * worker will get a different parallel worker number.
111  */
113 
114 /* Is there a parallel message pending which we need to receive? */
115 volatile bool ParallelMessagePending = false;
116 
117 /* Are we initializing a parallel worker? */
119 
120 /* Pointer to our fixed parallel state. */
122 
123 /* List of active parallel contexts. */
125 
126 /* Backend-local copy of data from FixedParallelState. */
127 static pid_t ParallelLeaderPid;
128 
129 /*
130  * List of internal parallel worker entry points. We need this for
131  * reasons explained in LookupParallelWorkerFunction(), below.
132  */
133 static const struct
134 {
135  const char *fn_name;
138 
139 {
140  {
141  "ParallelQueryMain", ParallelQueryMain
142  },
143  {
144  "_bt_parallel_build_main", _bt_parallel_build_main
145  },
146  {
147  "parallel_vacuum_main", parallel_vacuum_main
148  }
149 };
150 
151 /* Private functions. */
152 static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg);
154 static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname);
155 static void ParallelWorkerShutdown(int code, Datum arg);
156 
157 
158 /*
159  * Establish a new parallel context. This should be done after entering
160  * parallel mode, and (unless there is an error) the context should be
161  * destroyed before exiting the current subtransaction.
162  */
164 CreateParallelContext(const char *library_name, const char *function_name,
165  int nworkers)
166 {
167  MemoryContext oldcontext;
168  ParallelContext *pcxt;
169 
170  /* It is unsafe to create a parallel context if not in parallel mode. */
172 
173  /* Number of workers should be non-negative. */
174  Assert(nworkers >= 0);
175 
176  /* We might be running in a short-lived memory context. */
178 
179  /* Initialize a new ParallelContext. */
180  pcxt = palloc0(sizeof(ParallelContext));
182  pcxt->nworkers = nworkers;
183  pcxt->nworkers_to_launch = nworkers;
184  pcxt->library_name = pstrdup(library_name);
185  pcxt->function_name = pstrdup(function_name);
188  dlist_push_head(&pcxt_list, &pcxt->node);
189 
190  /* Restore previous memory context. */
191  MemoryContextSwitchTo(oldcontext);
192 
193  return pcxt;
194 }
195 
196 /*
197  * Establish the dynamic shared memory segment for a parallel context and
198  * copy state and other bookkeeping information that will be needed by
199  * parallel workers into it.
200  */
201 void
203 {
204  MemoryContext oldcontext;
205  Size library_len = 0;
206  Size guc_len = 0;
207  Size combocidlen = 0;
208  Size tsnaplen = 0;
209  Size asnaplen = 0;
210  Size tstatelen = 0;
211  Size pendingsyncslen = 0;
212  Size reindexlen = 0;
213  Size relmapperlen = 0;
214  Size uncommittedenumslen = 0;
215  Size segsize = 0;
216  int i;
217  FixedParallelState *fps;
218  dsm_handle session_dsm_handle = DSM_HANDLE_INVALID;
219  Snapshot transaction_snapshot = GetTransactionSnapshot();
220  Snapshot active_snapshot = GetActiveSnapshot();
221 
222  /* We might be running in a very short-lived memory context. */
224 
225  /* Allow space to store the fixed-size parallel state. */
227  shm_toc_estimate_keys(&pcxt->estimator, 1);
228 
229  /*
230  * Normally, the user will have requested at least one worker process, but
231  * if by chance they have not, we can skip a bunch of things here.
232  */
233  if (pcxt->nworkers > 0)
234  {
235  /* Get (or create) the per-session DSM segment's handle. */
236  session_dsm_handle = GetSessionDsmHandle();
237 
238  /*
239  * If we weren't able to create a per-session DSM segment, then we can
240  * continue but we can't safely launch any workers because their
241  * record typmods would be incompatible so they couldn't exchange
242  * tuples.
243  */
244  if (session_dsm_handle == DSM_HANDLE_INVALID)
245  pcxt->nworkers = 0;
246  }
247 
248  if (pcxt->nworkers > 0)
249  {
250  /* Estimate space for various kinds of state sharing. */
251  library_len = EstimateLibraryStateSpace();
252  shm_toc_estimate_chunk(&pcxt->estimator, library_len);
253  guc_len = EstimateGUCStateSpace();
254  shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
255  combocidlen = EstimateComboCIDStateSpace();
256  shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
257  tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
258  shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
259  asnaplen = EstimateSnapshotSpace(active_snapshot);
260  shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
261  tstatelen = EstimateTransactionStateSpace();
262  shm_toc_estimate_chunk(&pcxt->estimator, tstatelen);
264  pendingsyncslen = EstimatePendingSyncsSpace();
265  shm_toc_estimate_chunk(&pcxt->estimator, pendingsyncslen);
266  reindexlen = EstimateReindexStateSpace();
267  shm_toc_estimate_chunk(&pcxt->estimator, reindexlen);
268  relmapperlen = EstimateRelationMapSpace();
269  shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen);
270  uncommittedenumslen = EstimateUncommittedEnumsSpace();
271  shm_toc_estimate_chunk(&pcxt->estimator, uncommittedenumslen);
272  /* If you add more chunks here, you probably need to add keys. */
273  shm_toc_estimate_keys(&pcxt->estimator, 11);
274 
275  /* Estimate space need for error queues. */
278  "parallel error queue size not buffer-aligned");
281  pcxt->nworkers));
282  shm_toc_estimate_keys(&pcxt->estimator, 1);
283 
284  /* Estimate how much we'll need for the entrypoint info. */
285  shm_toc_estimate_chunk(&pcxt->estimator, strlen(pcxt->library_name) +
286  strlen(pcxt->function_name) + 2);
287  shm_toc_estimate_keys(&pcxt->estimator, 1);
288  }
289 
290  /*
291  * Create DSM and initialize with new table of contents. But if the user
292  * didn't request any workers, then don't bother creating a dynamic shared
293  * memory segment; instead, just use backend-private memory.
294  *
295  * Also, if we can't create a dynamic shared memory segment because the
296  * maximum number of segments have already been created, then fall back to
297  * backend-private memory, and plan not to use any workers. We hope this
298  * won't happen very often, but it's better to abandon the use of
299  * parallelism than to fail outright.
300  */
301  segsize = shm_toc_estimate(&pcxt->estimator);
302  if (pcxt->nworkers > 0)
304  if (pcxt->seg != NULL)
306  dsm_segment_address(pcxt->seg),
307  segsize);
308  else
309  {
310  pcxt->nworkers = 0;
313  segsize);
314  }
315 
316  /* Initialize fixed-size state in shared memory. */
317  fps = (FixedParallelState *)
318  shm_toc_allocate(pcxt->toc, sizeof(FixedParallelState));
319  fps->database_id = MyDatabaseId;
332  SpinLockInit(&fps->mutex);
333  fps->last_xlog_end = 0;
335 
336  /* We can skip the rest of this if we're not budgeting for any workers. */
337  if (pcxt->nworkers > 0)
338  {
339  char *libraryspace;
340  char *gucspace;
341  char *combocidspace;
342  char *tsnapspace;
343  char *asnapspace;
344  char *tstatespace;
345  char *pendingsyncsspace;
346  char *reindexspace;
347  char *relmapperspace;
348  char *error_queue_space;
349  char *session_dsm_handle_space;
350  char *entrypointstate;
351  char *uncommittedenumsspace;
352  Size lnamelen;
353 
354  /* Serialize shared libraries we have loaded. */
355  libraryspace = shm_toc_allocate(pcxt->toc, library_len);
356  SerializeLibraryState(library_len, libraryspace);
357  shm_toc_insert(pcxt->toc, PARALLEL_KEY_LIBRARY, libraryspace);
358 
359  /* Serialize GUC settings. */
360  gucspace = shm_toc_allocate(pcxt->toc, guc_len);
361  SerializeGUCState(guc_len, gucspace);
362  shm_toc_insert(pcxt->toc, PARALLEL_KEY_GUC, gucspace);
363 
364  /* Serialize combo CID state. */
365  combocidspace = shm_toc_allocate(pcxt->toc, combocidlen);
366  SerializeComboCIDState(combocidlen, combocidspace);
367  shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
368 
369  /* Serialize transaction snapshot and active snapshot. */
370  tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
371  SerializeSnapshot(transaction_snapshot, tsnapspace);
373  tsnapspace);
374  asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
375  SerializeSnapshot(active_snapshot, asnapspace);
376  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
377 
378  /* Provide the handle for per-session segment. */
379  session_dsm_handle_space = shm_toc_allocate(pcxt->toc,
380  sizeof(dsm_handle));
381  *(dsm_handle *) session_dsm_handle_space = session_dsm_handle;
383  session_dsm_handle_space);
384 
385  /* Serialize transaction state. */
386  tstatespace = shm_toc_allocate(pcxt->toc, tstatelen);
387  SerializeTransactionState(tstatelen, tstatespace);
389 
390  /* Serialize pending syncs. */
391  pendingsyncsspace = shm_toc_allocate(pcxt->toc, pendingsyncslen);
392  SerializePendingSyncs(pendingsyncslen, pendingsyncsspace);
394  pendingsyncsspace);
395 
396  /* Serialize reindex state. */
397  reindexspace = shm_toc_allocate(pcxt->toc, reindexlen);
398  SerializeReindexState(reindexlen, reindexspace);
399  shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace);
400 
401  /* Serialize relmapper state. */
402  relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen);
403  SerializeRelationMap(relmapperlen, relmapperspace);
405  relmapperspace);
406 
407  /* Serialize uncommitted enum state. */
408  uncommittedenumsspace = shm_toc_allocate(pcxt->toc,
409  uncommittedenumslen);
410  SerializeUncommittedEnums(uncommittedenumsspace, uncommittedenumslen);
412  uncommittedenumsspace);
413 
414  /* Allocate space for worker information. */
415  pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
416 
417  /*
418  * Establish error queues in dynamic shared memory.
419  *
420  * These queues should be used only for transmitting ErrorResponse,
421  * NoticeResponse, and NotifyResponse protocol messages. Tuple data
422  * should be transmitted via separate (possibly larger?) queues.
423  */
424  error_queue_space =
425  shm_toc_allocate(pcxt->toc,
427  pcxt->nworkers));
428  for (i = 0; i < pcxt->nworkers; ++i)
429  {
430  char *start;
431  shm_mq *mq;
432 
433  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
434  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
436  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
437  }
438  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, error_queue_space);
439 
440  /*
441  * Serialize entrypoint information. It's unsafe to pass function
442  * pointers across processes, as the function pointer may be different
443  * in each process in EXEC_BACKEND builds, so we always pass library
444  * and function name. (We use library name "postgres" for functions
445  * in the core backend.)
446  */
447  lnamelen = strlen(pcxt->library_name);
448  entrypointstate = shm_toc_allocate(pcxt->toc, lnamelen +
449  strlen(pcxt->function_name) + 2);
450  strcpy(entrypointstate, pcxt->library_name);
451  strcpy(entrypointstate + lnamelen + 1, pcxt->function_name);
452  shm_toc_insert(pcxt->toc, PARALLEL_KEY_ENTRYPOINT, entrypointstate);
453  }
454 
455  /* Restore previous memory context. */
456  MemoryContextSwitchTo(oldcontext);
457 }
458 
459 /*
460  * Reinitialize the dynamic shared memory segment for a parallel context such
461  * that we could launch workers for it again.
462  */
463 void
465 {
466  FixedParallelState *fps;
467 
468  /* Wait for any old workers to exit. */
469  if (pcxt->nworkers_launched > 0)
470  {
473  pcxt->nworkers_launched = 0;
474  if (pcxt->known_attached_workers)
475  {
477  pcxt->known_attached_workers = NULL;
478  pcxt->nknown_attached_workers = 0;
479  }
480  }
481 
482  /* Reset a few bits of fixed parallel state to a clean state. */
483  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
484  fps->last_xlog_end = 0;
485 
486  /* Recreate error queues (if they exist). */
487  if (pcxt->nworkers > 0)
488  {
489  char *error_queue_space;
490  int i;
491 
492  error_queue_space =
494  for (i = 0; i < pcxt->nworkers; ++i)
495  {
496  char *start;
497  shm_mq *mq;
498 
499  start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
500  mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
502  pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
503  }
504  }
505 }
506 
507 /*
508  * Reinitialize parallel workers for a parallel context such that we could
509  * launch a different number of workers. This is required for cases where
510  * we need to reuse the same DSM segment, but the number of workers can
511  * vary from run-to-run.
512  */
513 void
514 ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
515 {
516  /*
517  * The number of workers that need to be launched must be less than the
518  * number of workers with which the parallel context is initialized.
519  */
520  Assert(pcxt->nworkers >= nworkers_to_launch);
521  pcxt->nworkers_to_launch = nworkers_to_launch;
522 }
523 
524 /*
525  * Launch parallel workers.
526  */
527 void
529 {
530  MemoryContext oldcontext;
531  BackgroundWorker worker;
532  int i;
533  bool any_registrations_failed = false;
534 
535  /* Skip this if we have no workers. */
536  if (pcxt->nworkers == 0 || pcxt->nworkers_to_launch == 0)
537  return;
538 
539  /* We need to be a lock group leader. */
541 
542  /* If we do have workers, we'd better have a DSM segment. */
543  Assert(pcxt->seg != NULL);
544 
545  /* We might be running in a short-lived memory context. */
547 
548  /* Configure a worker. */
549  memset(&worker, 0, sizeof(worker));
550  snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
551  MyProcPid);
552  snprintf(worker.bgw_type, BGW_MAXLEN, "parallel worker");
553  worker.bgw_flags =
558  sprintf(worker.bgw_library_name, "postgres");
559  sprintf(worker.bgw_function_name, "ParallelWorkerMain");
561  worker.bgw_notify_pid = MyProcPid;
562 
563  /*
564  * Start workers.
565  *
566  * The caller must be able to tolerate ending up with fewer workers than
567  * expected, so there is no need to throw an error here if registration
568  * fails. It wouldn't help much anyway, because registering the worker in
569  * no way guarantees that it will start up and initialize successfully.
570  */
571  for (i = 0; i < pcxt->nworkers_to_launch; ++i)
572  {
573  memcpy(worker.bgw_extra, &i, sizeof(int));
574  if (!any_registrations_failed &&
576  &pcxt->worker[i].bgwhandle))
577  {
579  pcxt->worker[i].bgwhandle);
580  pcxt->nworkers_launched++;
581  }
582  else
583  {
584  /*
585  * If we weren't able to register the worker, then we've bumped up
586  * against the max_worker_processes limit, and future
587  * registrations will probably fail too, so arrange to skip them.
588  * But we still have to execute this code for the remaining slots
589  * to make sure that we forget about the error queues we budgeted
590  * for those workers. Otherwise, we'll wait for them to start,
591  * but they never will.
592  */
593  any_registrations_failed = true;
594  pcxt->worker[i].bgwhandle = NULL;
595  shm_mq_detach(pcxt->worker[i].error_mqh);
596  pcxt->worker[i].error_mqh = NULL;
597  }
598  }
599 
600  /*
601  * Now that nworkers_launched has taken its final value, we can initialize
602  * known_attached_workers.
603  */
604  if (pcxt->nworkers_launched > 0)
605  {
606  pcxt->known_attached_workers =
607  palloc0(sizeof(bool) * pcxt->nworkers_launched);
608  pcxt->nknown_attached_workers = 0;
609  }
610 
611  /* Restore previous memory context. */
612  MemoryContextSwitchTo(oldcontext);
613 }
614 
615 /*
616  * Wait for all workers to attach to their error queues, and throw an error if
617  * any worker fails to do this.
618  *
619  * Callers can assume that if this function returns successfully, then the
620  * number of workers given by pcxt->nworkers_launched have initialized and
621  * attached to their error queues. Whether or not these workers are guaranteed
622  * to still be running depends on what code the caller asked them to run;
623  * this function does not guarantee that they have not exited. However, it
624  * does guarantee that any workers which exited must have done so cleanly and
625  * after successfully performing the work with which they were tasked.
626  *
627  * If this function is not called, then some of the workers that were launched
628  * may not have been started due to a fork() failure, or may have exited during
629  * early startup prior to attaching to the error queue, so nworkers_launched
630  * cannot be viewed as completely reliable. It will never be less than the
631  * number of workers which actually started, but it might be more. Any workers
632  * that failed to start will still be discovered by
633  * WaitForParallelWorkersToFinish and an error will be thrown at that time,
634  * provided that function is eventually reached.
635  *
636  * In general, the leader process should do as much work as possible before
637  * calling this function. fork() failures and other early-startup failures
638  * are very uncommon, and having the leader sit idle when it could be doing
639  * useful work is undesirable. However, if the leader needs to wait for
640  * all of its workers or for a specific worker, it may want to call this
641  * function before doing so. If not, it must make some other provision for
642  * the failure-to-start case, lest it wait forever. On the other hand, a
643  * leader which never waits for a worker that might not be started yet, or
644  * at least never does so prior to WaitForParallelWorkersToFinish(), need not
645  * call this function at all.
646  */
647 void
649 {
650  int i;
651 
652  /* Skip this if we have no launched workers. */
653  if (pcxt->nworkers_launched == 0)
654  return;
655 
656  for (;;)
657  {
658  /*
659  * This will process any parallel messages that are pending and it may
660  * also throw an error propagated from a worker.
661  */
663 
664  for (i = 0; i < pcxt->nworkers_launched; ++i)
665  {
667  shm_mq *mq;
668  int rc;
669  pid_t pid;
670 
671  if (pcxt->known_attached_workers[i])
672  continue;
673 
674  /*
675  * If error_mqh is NULL, then the worker has already exited
676  * cleanly.
677  */
678  if (pcxt->worker[i].error_mqh == NULL)
679  {
680  pcxt->known_attached_workers[i] = true;
681  ++pcxt->nknown_attached_workers;
682  continue;
683  }
684 
685  status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
686  if (status == BGWH_STARTED)
687  {
688  /* Has the worker attached to the error queue? */
689  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
690  if (shm_mq_get_sender(mq) != NULL)
691  {
692  /* Yes, so it is known to be attached. */
693  pcxt->known_attached_workers[i] = true;
694  ++pcxt->nknown_attached_workers;
695  }
696  }
697  else if (status == BGWH_STOPPED)
698  {
699  /*
700  * If the worker stopped without attaching to the error queue,
701  * throw an error.
702  */
703  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
704  if (shm_mq_get_sender(mq) == NULL)
705  ereport(ERROR,
706  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
707  errmsg("parallel worker failed to initialize"),
708  errhint("More details may be available in the server log.")));
709 
710  pcxt->known_attached_workers[i] = true;
711  ++pcxt->nknown_attached_workers;
712  }
713  else
714  {
715  /*
716  * Worker not yet started, so we must wait. The postmaster
717  * will notify us if the worker's state changes. Our latch
718  * might also get set for some other reason, but if so we'll
719  * just end up waiting for the same worker again.
720  */
721  rc = WaitLatch(MyLatch,
724 
725  if (rc & WL_LATCH_SET)
727  }
728  }
729 
730  /* If all workers are known to have started, we're done. */
731  if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched)
732  {
734  break;
735  }
736  }
737 }
738 
739 /*
740  * Wait for all workers to finish computing.
741  *
742  * Even if the parallel operation seems to have completed successfully, it's
743  * important to call this function afterwards. We must not miss any errors
744  * the workers may have thrown during the parallel operation, or any that they
745  * may yet throw while shutting down.
746  *
747  * Also, we want to update our notion of XactLastRecEnd based on worker
748  * feedback.
749  */
750 void
752 {
753  for (;;)
754  {
755  bool anyone_alive = false;
756  int nfinished = 0;
757  int i;
758 
759  /*
760  * This will process any parallel messages that are pending, which may
761  * change the outcome of the loop that follows. It may also throw an
762  * error propagated from a worker.
763  */
765 
766  for (i = 0; i < pcxt->nworkers_launched; ++i)
767  {
768  /*
769  * If error_mqh is NULL, then the worker has already exited
770  * cleanly. If we have received a message through error_mqh from
771  * the worker, we know it started up cleanly, and therefore we're
772  * certain to be notified when it exits.
773  */
774  if (pcxt->worker[i].error_mqh == NULL)
775  ++nfinished;
776  else if (pcxt->known_attached_workers[i])
777  {
778  anyone_alive = true;
779  break;
780  }
781  }
782 
783  if (!anyone_alive)
784  {
785  /* If all workers are known to have finished, we're done. */
786  if (nfinished >= pcxt->nworkers_launched)
787  {
788  Assert(nfinished == pcxt->nworkers_launched);
789  break;
790  }
791 
792  /*
793  * We didn't detect any living workers, but not all workers are
794  * known to have exited cleanly. Either not all workers have
795  * launched yet, or maybe some of them failed to start or
796  * terminated abnormally.
797  */
798  for (i = 0; i < pcxt->nworkers_launched; ++i)
799  {
800  pid_t pid;
801  shm_mq *mq;
802 
803  /*
804  * If the worker is BGWH_NOT_YET_STARTED or BGWH_STARTED, we
805  * should just keep waiting. If it is BGWH_STOPPED, then
806  * further investigation is needed.
807  */
808  if (pcxt->worker[i].error_mqh == NULL ||
809  pcxt->worker[i].bgwhandle == NULL ||
811  &pid) != BGWH_STOPPED)
812  continue;
813 
814  /*
815  * Check whether the worker ended up stopped without ever
816  * attaching to the error queue. If so, the postmaster was
817  * unable to fork the worker or it exited without initializing
818  * properly. We must throw an error, since the caller may
819  * have been expecting the worker to do some work before
820  * exiting.
821  */
822  mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
823  if (shm_mq_get_sender(mq) == NULL)
824  ereport(ERROR,
825  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
826  errmsg("parallel worker failed to initialize"),
827  errhint("More details may be available in the server log.")));
828 
829  /*
830  * The worker is stopped, but is attached to the error queue.
831  * Unless there's a bug somewhere, this will only happen when
832  * the worker writes messages and terminates after the
833  * CHECK_FOR_INTERRUPTS() near the top of this function and
834  * before the call to GetBackgroundWorkerPid(). In that case,
835  * or latch should have been set as well and the right things
836  * will happen on the next pass through the loop.
837  */
838  }
839  }
840 
844  }
845 
846  if (pcxt->toc != NULL)
847  {
848  FixedParallelState *fps;
849 
850  fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
851  if (fps->last_xlog_end > XactLastRecEnd)
853  }
854 }
855 
856 /*
857  * Wait for all workers to exit.
858  *
859  * This function ensures that workers have been completely shutdown. The
860  * difference between WaitForParallelWorkersToFinish and this function is
861  * that the former just ensures that last message sent by a worker backend is
862  * received by the leader backend whereas this ensures the complete shutdown.
863  */
864 static void
866 {
867  int i;
868 
869  /* Wait until the workers actually die. */
870  for (i = 0; i < pcxt->nworkers_launched; ++i)
871  {
873 
874  if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL)
875  continue;
876 
878 
879  /*
880  * If the postmaster kicked the bucket, we have no chance of cleaning
881  * up safely -- we won't be able to tell when our workers are actually
882  * dead. This doesn't necessitate a PANIC since they will all abort
883  * eventually, but we can't safely continue this session.
884  */
885  if (status == BGWH_POSTMASTER_DIED)
886  ereport(FATAL,
887  (errcode(ERRCODE_ADMIN_SHUTDOWN),
888  errmsg("postmaster exited during a parallel transaction")));
889 
890  /* Release memory. */
891  pfree(pcxt->worker[i].bgwhandle);
892  pcxt->worker[i].bgwhandle = NULL;
893  }
894 }
895 
896 /*
897  * Destroy a parallel context.
898  *
899  * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
900  * first, before calling this function. When this function is invoked, any
901  * remaining workers are forcibly killed; the dynamic shared memory segment
902  * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
903  */
904 void
906 {
907  int i;
908 
909  /*
910  * Be careful about order of operations here! We remove the parallel
911  * context from the list before we do anything else; otherwise, if an
912  * error occurs during a subsequent step, we might try to nuke it again
913  * from AtEOXact_Parallel or AtEOSubXact_Parallel.
914  */
915  dlist_delete(&pcxt->node);
916 
917  /* Kill each worker in turn, and forget their error queues. */
918  if (pcxt->worker != NULL)
919  {
920  for (i = 0; i < pcxt->nworkers_launched; ++i)
921  {
922  if (pcxt->worker[i].error_mqh != NULL)
923  {
925 
926  shm_mq_detach(pcxt->worker[i].error_mqh);
927  pcxt->worker[i].error_mqh = NULL;
928  }
929  }
930  }
931 
932  /*
933  * If we have allocated a shared memory segment, detach it. This will
934  * implicitly detach the error queues, and any other shared memory queues,
935  * stored there.
936  */
937  if (pcxt->seg != NULL)
938  {
939  dsm_detach(pcxt->seg);
940  pcxt->seg = NULL;
941  }
942 
943  /*
944  * If this parallel context is actually in backend-private memory rather
945  * than shared memory, free that memory instead.
946  */
947  if (pcxt->private_memory != NULL)
948  {
949  pfree(pcxt->private_memory);
950  pcxt->private_memory = NULL;
951  }
952 
953  /*
954  * We can't finish transaction commit or abort until all of the workers
955  * have exited. This means, in particular, that we can't respond to
956  * interrupts at this stage.
957  */
958  HOLD_INTERRUPTS();
961 
962  /* Free the worker array itself. */
963  if (pcxt->worker != NULL)
964  {
965  pfree(pcxt->worker);
966  pcxt->worker = NULL;
967  }
968 
969  /* Free memory. */
970  pfree(pcxt->library_name);
971  pfree(pcxt->function_name);
972  pfree(pcxt);
973 }
974 
975 /*
976  * Are there any parallel contexts currently active?
977  */
978 bool
980 {
981  return !dlist_is_empty(&pcxt_list);
982 }
983 
984 /*
985  * Handle receipt of an interrupt indicating a parallel worker message.
986  *
987  * Note: this is called within a signal handler! All we can do is set
988  * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
989  * HandleParallelMessages().
990  */
991 void
993 {
994  InterruptPending = true;
995  ParallelMessagePending = true;
996  SetLatch(MyLatch);
997 }
998 
999 /*
1000  * Handle any queued protocol messages received from parallel workers.
1001  */
1002 void
1004 {
1005  dlist_iter iter;
1006  MemoryContext oldcontext;
1007 
1008  static MemoryContext hpm_context = NULL;
1009 
1010  /*
1011  * This is invoked from ProcessInterrupts(), and since some of the
1012  * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
1013  * for recursive calls if more signals are received while this runs. It's
1014  * unclear that recursive entry would be safe, and it doesn't seem useful
1015  * even if it is safe, so let's block interrupts until done.
1016  */
1017  HOLD_INTERRUPTS();
1018 
1019  /*
1020  * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
1021  * don't want to risk leaking data into long-lived contexts, so let's do
1022  * our work here in a private context that we can reset on each use.
1023  */
1024  if (hpm_context == NULL) /* first time through? */
1026  "HandleParallelMessages",
1028  else
1029  MemoryContextReset(hpm_context);
1030 
1031  oldcontext = MemoryContextSwitchTo(hpm_context);
1032 
1033  /* OK to process messages. Reset the flag saying there are more to do. */
1034  ParallelMessagePending = false;
1035 
1036  dlist_foreach(iter, &pcxt_list)
1037  {
1038  ParallelContext *pcxt;
1039  int i;
1040 
1041  pcxt = dlist_container(ParallelContext, node, iter.cur);
1042  if (pcxt->worker == NULL)
1043  continue;
1044 
1045  for (i = 0; i < pcxt->nworkers_launched; ++i)
1046  {
1047  /*
1048  * Read as many messages as we can from each worker, but stop when
1049  * either (1) the worker's error queue goes away, which can happen
1050  * if we receive a Terminate message from the worker; or (2) no
1051  * more messages can be read from the worker without blocking.
1052  */
1053  while (pcxt->worker[i].error_mqh != NULL)
1054  {
1055  shm_mq_result res;
1056  Size nbytes;
1057  void *data;
1058 
1059  res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes,
1060  &data, true);
1061  if (res == SHM_MQ_WOULD_BLOCK)
1062  break;
1063  else if (res == SHM_MQ_SUCCESS)
1064  {
1065  StringInfoData msg;
1066 
1067  initStringInfo(&msg);
1068  appendBinaryStringInfo(&msg, data, nbytes);
1069  HandleParallelMessage(pcxt, i, &msg);
1070  pfree(msg.data);
1071  }
1072  else
1073  ereport(ERROR,
1074  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1075  errmsg("lost connection to parallel worker")));
1076  }
1077  }
1078  }
1079 
1080  MemoryContextSwitchTo(oldcontext);
1081 
1082  /* Might as well clear the context on our way out */
1083  MemoryContextReset(hpm_context);
1084 
1086 }
1087 
1088 /*
1089  * Handle a single protocol message received from a single parallel worker.
1090  */
1091 static void
1093 {
1094  char msgtype;
1095 
1096  if (pcxt->known_attached_workers != NULL &&
1097  !pcxt->known_attached_workers[i])
1098  {
1099  pcxt->known_attached_workers[i] = true;
1100  pcxt->nknown_attached_workers++;
1101  }
1102 
1103  msgtype = pq_getmsgbyte(msg);
1104 
1105  switch (msgtype)
1106  {
1107  case 'K': /* BackendKeyData */
1108  {
1109  int32 pid = pq_getmsgint(msg, 4);
1110 
1111  (void) pq_getmsgint(msg, 4); /* discard cancel key */
1112  (void) pq_getmsgend(msg);
1113  pcxt->worker[i].pid = pid;
1114  break;
1115  }
1116 
1117  case 'E': /* ErrorResponse */
1118  case 'N': /* NoticeResponse */
1119  {
1120  ErrorData edata;
1121  ErrorContextCallback *save_error_context_stack;
1122 
1123  /* Parse ErrorResponse or NoticeResponse. */
1124  pq_parse_errornotice(msg, &edata);
1125 
1126  /* Death of a worker isn't enough justification for suicide. */
1127  edata.elevel = Min(edata.elevel, ERROR);
1128 
1129  /*
1130  * If desired, add a context line to show that this is a
1131  * message propagated from a parallel worker. Otherwise, it
1132  * can sometimes be confusing to understand what actually
1133  * happened. (We don't do this in FORCE_PARALLEL_REGRESS mode
1134  * because it causes test-result instability depending on
1135  * whether a parallel worker is actually used or not.)
1136  */
1138  {
1139  if (edata.context)
1140  edata.context = psprintf("%s\n%s", edata.context,
1141  _("parallel worker"));
1142  else
1143  edata.context = pstrdup(_("parallel worker"));
1144  }
1145 
1146  /*
1147  * Context beyond that should use the error context callbacks
1148  * that were in effect when the ParallelContext was created,
1149  * not the current ones.
1150  */
1151  save_error_context_stack = error_context_stack;
1153 
1154  /* Rethrow error or print notice. */
1155  ThrowErrorData(&edata);
1156 
1157  /* Not an error, so restore previous context stack. */
1158  error_context_stack = save_error_context_stack;
1159 
1160  break;
1161  }
1162 
1163  case 'A': /* NotifyResponse */
1164  {
1165  /* Propagate NotifyResponse. */
1166  int32 pid;
1167  const char *channel;
1168  const char *payload;
1169 
1170  pid = pq_getmsgint(msg, 4);
1171  channel = pq_getmsgrawstring(msg);
1172  payload = pq_getmsgrawstring(msg);
1173  pq_endmessage(msg);
1174 
1175  NotifyMyFrontEnd(channel, payload, pid);
1176 
1177  break;
1178  }
1179 
1180  case 'X': /* Terminate, indicating clean exit */
1181  {
1182  shm_mq_detach(pcxt->worker[i].error_mqh);
1183  pcxt->worker[i].error_mqh = NULL;
1184  break;
1185  }
1186 
1187  default:
1188  {
1189  elog(ERROR, "unrecognized message type received from parallel worker: %c (message length %d bytes)",
1190  msgtype, msg->len);
1191  }
1192  }
1193 }
1194 
1195 /*
1196  * End-of-subtransaction cleanup for parallel contexts.
1197  *
1198  * Currently, it's forbidden to enter or leave a subtransaction while
1199  * parallel mode is in effect, so we could just blow away everything. But
1200  * we may want to relax that restriction in the future, so this code
1201  * contemplates that there may be multiple subtransaction IDs in pcxt_list.
1202  */
1203 void
1205 {
1206  while (!dlist_is_empty(&pcxt_list))
1207  {
1208  ParallelContext *pcxt;
1209 
1210  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1211  if (pcxt->subid != mySubId)
1212  break;
1213  if (isCommit)
1214  elog(WARNING, "leaked parallel context");
1215  DestroyParallelContext(pcxt);
1216  }
1217 }
1218 
1219 /*
1220  * End-of-transaction cleanup for parallel contexts.
1221  */
1222 void
1223 AtEOXact_Parallel(bool isCommit)
1224 {
1225  while (!dlist_is_empty(&pcxt_list))
1226  {
1227  ParallelContext *pcxt;
1228 
1229  pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
1230  if (isCommit)
1231  elog(WARNING, "leaked parallel context");
1232  DestroyParallelContext(pcxt);
1233  }
1234 }
1235 
1236 /*
1237  * Main entrypoint for parallel workers.
1238  */
1239 void
1241 {
1242  dsm_segment *seg;
1243  shm_toc *toc;
1244  FixedParallelState *fps;
1245  char *error_queue_space;
1246  shm_mq *mq;
1247  shm_mq_handle *mqh;
1248  char *libraryspace;
1249  char *entrypointstate;
1250  char *library_name;
1251  char *function_name;
1252  parallel_worker_main_type entrypt;
1253  char *gucspace;
1254  char *combocidspace;
1255  char *tsnapspace;
1256  char *asnapspace;
1257  char *tstatespace;
1258  char *pendingsyncsspace;
1259  char *reindexspace;
1260  char *relmapperspace;
1261  char *uncommittedenumsspace;
1262  StringInfoData msgbuf;
1263  char *session_dsm_handle_space;
1264 
1265  /* Set flag to indicate that we're initializing a parallel worker. */
1267 
1268  /* Establish signal handlers. */
1269  pqsignal(SIGTERM, die);
1271 
1272  /* Determine and set our parallel worker number. */
1274  memcpy(&ParallelWorkerNumber, MyBgworkerEntry->bgw_extra, sizeof(int));
1275 
1276  /* Set up a memory context to work in, just for cleanliness. */
1278  "Parallel worker",
1280 
1281  /*
1282  * Attach to the dynamic shared memory segment for the parallel query, and
1283  * find its table of contents.
1284  *
1285  * Note: at this point, we have not created any ResourceOwner in this
1286  * process. This will result in our DSM mapping surviving until process
1287  * exit, which is fine. If there were a ResourceOwner, it would acquire
1288  * ownership of the mapping, but we have no need for that.
1289  */
1290  seg = dsm_attach(DatumGetUInt32(main_arg));
1291  if (seg == NULL)
1292  ereport(ERROR,
1293  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1294  errmsg("could not map dynamic shared memory segment")));
1296  if (toc == NULL)
1297  ereport(ERROR,
1298  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1299  errmsg("invalid magic number in dynamic shared memory segment")));
1300 
1301  /* Look up fixed parallel state. */
1302  fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
1303  MyFixedParallelState = fps;
1304 
1305  /* Arrange to signal the leader if we exit. */
1309 
1310  /*
1311  * Now we can find and attach to the error queue provided for us. That's
1312  * good, because until we do that, any errors that happen here will not be
1313  * reported back to the process that requested that this worker be
1314  * launched.
1315  */
1316  error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
1317  mq = (shm_mq *) (error_queue_space +
1320  mqh = shm_mq_attach(mq, seg, NULL);
1321  pq_redirect_to_shm_mq(seg, mqh);
1324 
1325  /*
1326  * Send a BackendKeyData message to the process that initiated parallelism
1327  * so that it has access to our PID before it receives any other messages
1328  * from us. Our cancel key is sent, too, since that's the way the
1329  * protocol message is defined, but it won't actually be used for anything
1330  * in this case.
1331  */
1332  pq_beginmessage(&msgbuf, 'K');
1333  pq_sendint32(&msgbuf, (int32) MyProcPid);
1334  pq_sendint32(&msgbuf, (int32) MyCancelKey);
1335  pq_endmessage(&msgbuf);
1336 
1337  /*
1338  * Hooray! Primary initialization is complete. Now, we need to set up our
1339  * backend-local state to match the original backend.
1340  */
1341 
1342  /*
1343  * Join locking group. We must do this before anything that could try to
1344  * acquire a heavyweight lock, because any heavyweight locks acquired to
1345  * this point could block either directly against the parallel group
1346  * leader or against some process which in turn waits for a lock that
1347  * conflicts with the parallel group leader, causing an undetected
1348  * deadlock. (If we can't join the lock group, the leader has gone away,
1349  * so just exit quietly.)
1350  */
1352  fps->parallel_leader_pid))
1353  return;
1354 
1355  /*
1356  * Restore transaction and statement start-time timestamps. This must
1357  * happen before anything that would start a transaction, else asserts in
1358  * xact.c will fire.
1359  */
1361 
1362  /*
1363  * Identify the entry point to be called. In theory this could result in
1364  * loading an additional library, though most likely the entry point is in
1365  * the core backend or in a library we just loaded.
1366  */
1367  entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
1368  library_name = entrypointstate;
1369  function_name = entrypointstate + strlen(library_name) + 1;
1370 
1371  entrypt = LookupParallelWorkerFunction(library_name, function_name);
1372 
1373  /* Restore database connection. */
1375  fps->authenticated_user_id,
1376  0);
1377 
1378  /*
1379  * Set the client encoding to the database encoding, since that is what
1380  * the leader will expect.
1381  */
1383 
1384  /*
1385  * Load libraries that were loaded by original backend. We want to do
1386  * this before restoring GUCs, because the libraries might define custom
1387  * variables.
1388  */
1389  libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
1391  RestoreLibraryState(libraryspace);
1392 
1393  /* Restore GUC values from launching backend. */
1394  gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
1395  RestoreGUCState(gucspace);
1397 
1398  /* Crank up a transaction state appropriate to a parallel worker. */
1399  tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
1400  StartParallelWorkerTransaction(tstatespace);
1401 
1402  /* Restore combo CID state. */
1403  combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
1404  RestoreComboCIDState(combocidspace);
1405 
1406  /* Attach to the per-session DSM segment and contained objects. */
1407  session_dsm_handle_space =
1409  AttachSession(*(dsm_handle *) session_dsm_handle_space);
1410 
1411  /* Restore transaction snapshot. */
1412  tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
1414  fps->parallel_leader_pgproc);
1415 
1416  /* Restore active snapshot. */
1417  asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
1418  PushActiveSnapshot(RestoreSnapshot(asnapspace));
1419 
1420  /*
1421  * We've changed which tuples we can see, and must therefore invalidate
1422  * system caches.
1423  */
1425 
1426  /*
1427  * Restore current role id. Skip verifying whether session user is
1428  * allowed to become this role and blindly restore the leader's state for
1429  * current role.
1430  */
1432 
1433  /* Restore user ID and security context. */
1435 
1436  /* Restore temp-namespace state to ensure search path matches leader's. */
1439 
1440  /* Restore pending syncs. */
1441  pendingsyncsspace = shm_toc_lookup(toc, PARALLEL_KEY_PENDING_SYNCS,
1442  false);
1443  RestorePendingSyncs(pendingsyncsspace);
1444 
1445  /* Restore reindex state. */
1446  reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false);
1447  RestoreReindexState(reindexspace);
1448 
1449  /* Restore relmapper state. */
1450  relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false);
1451  RestoreRelationMap(relmapperspace);
1452 
1453  /* Restore uncommitted enums. */
1454  uncommittedenumsspace = shm_toc_lookup(toc, PARALLEL_KEY_UNCOMMITTEDENUMS,
1455  false);
1456  RestoreUncommittedEnums(uncommittedenumsspace);
1457 
1458  /* Attach to the leader's serializable transaction, if SERIALIZABLE. */
1460 
1461  /*
1462  * We've initialized all of our state now; nothing should change
1463  * hereafter.
1464  */
1467 
1468  /*
1469  * Time to do the real work: invoke the caller-supplied code.
1470  */
1471  entrypt(seg, toc);
1472 
1473  /* Must exit parallel mode to pop active snapshot. */
1474  ExitParallelMode();
1475 
1476  /* Must pop active snapshot so snapmgr.c doesn't complain. */
1478 
1479  /* Shut down the parallel-worker transaction. */
1481 
1482  /* Detach from the per-session DSM segment. */
1483  DetachSession();
1484 
1485  /* Report success. */
1486  pq_putmessage('X', NULL, 0);
1487 }
1488 
1489 /*
1490  * Update shared memory with the ending location of the last WAL record we
1491  * wrote, if it's greater than the value already stored there.
1492  */
1493 void
1495 {
1497 
1498  Assert(fps != NULL);
1499  SpinLockAcquire(&fps->mutex);
1500  if (fps->last_xlog_end < last_xlog_end)
1502  SpinLockRelease(&fps->mutex);
1503 }
1504 
1505 /*
1506  * Make sure the leader tries to read from our error queue one more time.
1507  * This guards against the case where we exit uncleanly without sending an
1508  * ErrorResponse to the leader, for example because some code calls proc_exit
1509  * directly.
1510  */
1511 static void
1513 {
1517 }
1518 
1519 /*
1520  * Look up (and possibly load) a parallel worker entry point function.
1521  *
1522  * For functions contained in the core code, we use library name "postgres"
1523  * and consult the InternalParallelWorkers array. External functions are
1524  * looked up, and loaded if necessary, using load_external_function().
1525  *
1526  * The point of this is to pass function names as strings across process
1527  * boundaries. We can't pass actual function addresses because of the
1528  * possibility that the function has been loaded at a different address
1529  * in a different process. This is obviously a hazard for functions in
1530  * loadable libraries, but it can happen even for functions in the core code
1531  * on platforms using EXEC_BACKEND (e.g., Windows).
1532  *
1533  * At some point it might be worthwhile to get rid of InternalParallelWorkers[]
1534  * in favor of applying load_external_function() for core functions too;
1535  * but that raises portability issues that are not worth addressing now.
1536  */
1538 LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
1539 {
1540  /*
1541  * If the function is to be loaded from postgres itself, search the
1542  * InternalParallelWorkers array.
1543  */
1544  if (strcmp(libraryname, "postgres") == 0)
1545  {
1546  int i;
1547 
1548  for (i = 0; i < lengthof(InternalParallelWorkers); i++)
1549  {
1550  if (strcmp(InternalParallelWorkers[i].fn_name, funcname) == 0)
1551  return InternalParallelWorkers[i].fn_addr;
1552  }
1553 
1554  /* We can only reach this by programming error. */
1555  elog(ERROR, "internal function \"%s\" not found", funcname);
1556  }
1557 
1558  /* Otherwise load from external library. */
1559  return (parallel_worker_main_type)
1560  load_external_function(libraryname, funcname, true, NULL);
1561 }
static pid_t ParallelLeaderPid
Definition: parallel.c:127
char bgw_extra[BGW_EXTRALEN]
Definition: bgworker.h:98
#define DatumGetUInt32(X)
Definition: postgres.h:530
int slock_t
Definition: s_lock.h:934
#define PARALLEL_ERROR_QUEUE_SIZE
Definition: parallel.c:55
#define AllocSetContextCreate
Definition: memutils.h:173
shm_toc * shm_toc_create(uint64 magic, void *address, Size nbytes)
Definition: shm_toc.c:40
int MyProcPid
Definition: globals.c:43
int errhint(const char *fmt,...)
Definition: elog.c:1156
BackendId MyBackendId
Definition: globals.c:84
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2161
MemoryContext TopTransactionContext
Definition: mcxt.c:53
uint32 dsm_handle
Definition: dsm_impl.h:55
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
void SerializeUncommittedEnums(void *space, Size size)
Definition: pg_enum.c:709
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition: miscinit.c:590
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
PGPROC * parallel_leader_pgproc
Definition: parallel.c:92
XLogRecPtr XactLastRecEnd
Definition: xlog.c:365
void AttachSerializableXact(SerializableXactHandle handle)
Definition: predicate.c:5195
void shm_mq_detach(shm_mq_handle *mqh)
Definition: shm_mq.c:817
PGPROC * MyProc
Definition: proc.c:68
int64 TimestampTz
Definition: timestamp.h:39
dsm_segment * seg
Definition: parallel.h:43
static void WaitForParallelWorkersToExit(ParallelContext *pcxt)
Definition: parallel.c:865
#define dlist_foreach(iter, lhead)
Definition: ilist.h:526
void SerializeReindexState(Size maxsize, char *start_address)
Definition: index.c:4252
char * pstrdup(const char *in)
Definition: mcxt.c:1299
void CommitTransactionCommand(void)
Definition: xact.c:2939
shm_toc_estimator estimator
Definition: parallel.h:42
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void EndParallelWorkerTransaction(void)
Definition: xact.c:5328
#define SpinLockInit(lock)
Definition: spin.h:60
void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId)
Definition: namespace.c:3312
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:631
void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Definition: nbtsort.c:1795
#define Min(x, y)
Definition: c.h:986
PGPROC * shm_mq_get_sender(shm_mq *mq)
Definition: shm_mq.c:250
Oid authenticated_user_id
Definition: parallel.c:85
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:786
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1087
int bgw_restart_time
Definition: bgworker.h:94
int errcode(int sqlerrcode)
Definition: elog.c:698
Oid temp_toast_namespace_id
Definition: parallel.c:89
#define BGWORKER_CLASS_PARALLEL
Definition: bgworker.h:67
void DetachSession(void)
Definition: session.c:201
BackgroundWorker * MyBgworkerEntry
Definition: postmaster.c:194
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition: proc.c:1976
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
int nknown_attached_workers
Definition: parallel.h:47
uint32 SubTransactionId
Definition: c.h:591
#define PARALLEL_KEY_RELMAPPER_STATE
Definition: parallel.c:77
Size shm_toc_estimate(shm_toc_estimator *e)
Definition: shm_toc.c:263
#define lengthof(array)
Definition: c.h:734
void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
Definition: snapmgr.c:2226
void SerializeTransactionState(Size maxsize, char *start_address)
Definition: xact.c:5233
parallel_worker_main_type fn_addr
Definition: parallel.c:136
const char * pq_getmsgrawstring(StringInfo msg)
Definition: pqformat.c:610
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void SetLatch(Latch *latch)
Definition: latch.c:567
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
void InvalidateSystemCaches(void)
Definition: inval.c:649
int nworkers_to_launch
Definition: parallel.h:37
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:514
void ResetLatch(Latch *latch)
Definition: latch.c:660
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
void RestoreComboCIDState(char *comboCIDstate)
Definition: combocid.c:342
signed int int32
Definition: c.h:429
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:452
SubTransactionId subid
Definition: parallel.h:35
Oid GetCurrentRoleId(void)
Definition: miscinit.c:837
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:121
ErrorContextCallback * error_context_stack
Definition: elog.c:93
void RestorePendingSyncs(char *startAddress)
Definition: storage.c:575
SerializableXactHandle serializable_xact_handle
Definition: parallel.c:97
volatile bool ParallelMessagePending
Definition: parallel.c:115
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define sprintf
Definition: port.h:218
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:918
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:751
#define SpinLockAcquire(lock)
Definition: spin.h:62
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:905
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:261
#define dlist_container(type, membername, ptr)
Definition: ilist.h:496
ParallelWorkerInfo * worker
Definition: parallel.h:46
Datum bgw_main_arg
Definition: bgworker.h:97
void pfree(void *pointer)
Definition: mcxt.c:1169
bool IsInParallelMode(void)
Definition: xact.c:1012
void SerializeLibraryState(Size maxsize, char *start_address)
Definition: dfmgr.c:726
#define ERROR
Definition: elog.h:46
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1174
void ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
Oid GetAuthenticatedUserId(void)
Definition: miscinit.c:537
#define PARALLEL_KEY_TRANSACTION_SNAPSHOT
Definition: parallel.c:70
char * function_name
Definition: parallel.h:40
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2102
int32 MyCancelKey
Definition: globals.c:47
#define PARALLEL_KEY_UNCOMMITTEDENUMS
Definition: parallel.c:78
void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
Definition: pqmq.c:199
#define FATAL
Definition: elog.h:49
shm_mq * shm_mq_create(void *address, Size size)
Definition: shm_mq.c:170
void ExitParallelMode(void)
Definition: xact.c:992
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define PARALLEL_KEY_FIXED
Definition: parallel.c:65
void HandleParallelMessages(void)
Definition: parallel.c:1003
#define PARALLEL_KEY_ERROR_QUEUE
Definition: parallel.c:66
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
Definition: namespace.c:3328
static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
Definition: parallel.c:1092
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition: miscinit.c:583
shm_mq_handle * error_mqh
Definition: parallel.h:28
#define PARALLEL_KEY_GUC
Definition: parallel.c:68
void RestoreUncommittedEnums(void *space)
Definition: pg_enum.c:741
int SetClientEncoding(int encoding)
Definition: mbutils.c:208
int ParallelWorkerNumber
Definition: parallel.c:112
BackgroundWorkerHandle * bgwhandle
Definition: parallel.h:27
void SerializeRelationMap(Size maxSize, char *startAddress)
Definition: relmapper.c:657
Size EstimateGUCStateSpace(void)
Definition: guc.c:10602
void AttachSession(dsm_handle handle)
Definition: session.c:155
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
#define shm_toc_initialize_estimator(e)
Definition: shm_toc.h:49
Size EstimateComboCIDStateSpace(void)
Definition: combocid.c:297
#define UInt32GetDatum(X)
Definition: postgres.h:537
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
static void ParallelWorkerShutdown(int code, Datum arg)
Definition: parallel.c:1512
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int nworkers_launched
Definition: parallel.h:38
XLogRecPtr last_xlog_end
Definition: parallel.c:103
BgwHandleStatus
Definition: bgworker.h:102
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:528
Size EstimateReindexStateSpace(void)
Definition: index.c:4241
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:217
void BecomeLockGroupLeader(void)
Definition: proc.c:1946
MemoryContext TopMemoryContext
Definition: mcxt.c:48
void * load_external_function(const char *filename, const char *funcname, bool signalNotFound, void **filehandle)
Definition: dfmgr.c:107
TimestampTz GetCurrentTransactionStartTimestamp(void)
Definition: xact.c:798
void ThrowErrorData(ErrorData *edata)
Definition: elog.c:1679
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
Size EstimateUncommittedEnumsSpace(void)
Definition: pg_enum.c:695
int elevel
Definition: elog.h:371
bool * known_attached_workers
Definition: parallel.h:48
bool ParallelContextActive(void)
Definition: parallel.c:979
#define SpinLockRelease(lock)
Definition: spin.h:64
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:506
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2078
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
void * palloc0(Size size)
Definition: mcxt.c:1093
static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
Definition: parallel.c:1538
void RestoreLibraryState(char *start_address)
Definition: dfmgr.c:748
uintptr_t Datum
Definition: postgres.h:411
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:487
shm_toc * shm_toc_attach(uint64 magic, void *address)
Definition: shm_toc.c:64
Size EstimatePendingSyncsSpace(void)
Definition: storage.c:511
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
int BackendId
Definition: backendid.h:21
void SerializePendingSyncs(Size maxSize, char *startAddress)
Definition: storage.c:524
Oid MyDatabaseId
Definition: globals.c:88
Size EstimateLibraryStateSpace(void)
Definition: dfmgr.c:709
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:464
void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:311
dlist_node * cur
Definition: ilist.h:161
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition: postmaster.c:5694
#define PARALLEL_MAGIC
Definition: parallel.c:58
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition: pqmq.c:51
void * SerializableXactHandle
Definition: predicate.h:37
void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
Definition: parallel.c:1494
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition: bgworker.c:1213
#define ereport(elevel,...)
Definition: elog.h:157
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:401
shm_mq_result
Definition: shm_mq.h:36
TimestampTz xact_ts
Definition: parallel.c:95
BackendId parallel_leader_backend_id
Definition: parallel.c:94
char * library_name
Definition: parallel.h:39
BackendId ParallelLeaderBackendId
Definition: globals.c:86
int force_parallel_mode
Definition: planner.c:70
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:1059
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:804
void StartParallelWorkerTransaction(char *tstatespace)
Definition: xact.c:5303
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:723
Size EstimateTransactionStateSpace(void)
Definition: xact.c:5205
void StartTransactionCommand(void)
Definition: xact.c:2838
#define PARALLEL_KEY_REINDEX_STATE
Definition: parallel.c:76
const char * fn_name
Definition: parallel.c:135
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define BGW_MAXLEN
Definition: bgworker.h:85
size_t Size
Definition: c.h:540
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
dlist_node node
Definition: parallel.h:34
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:4130
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_mq * shm_mq_get_queue(shm_mq_handle *mqh)
Definition: shm_mq.c:872
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:962
void EnterParallelMode(void)
Definition: xact.c:979
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
Size EstimateRelationMapSpace(void)
Definition: relmapper.c:646
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
char * context
Definition: elog.h:386
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition: shm_mq.c:283
ErrorContextCallback * error_context_stack
Definition: parallel.h:41
static const struct @14 InternalParallelWorkers[]
void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts)
Definition: xact.c:787
#define PARALLEL_KEY_TRANSACTION_STATE
Definition: parallel.c:72
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:769
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void RestoreReindexState(void *reindexstate)
Definition: index.c:4270
int errmsg(const char *fmt,...)
Definition: elog.c:909
void(* parallel_worker_main_type)(dsm_segment *seg, shm_toc *toc)
Definition: parallel.h:23
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
void ParallelWorkerMain(Datum main_arg)
Definition: parallel.c:1240
pid_t bgw_notify_pid
Definition: bgworker.h:99
static FixedParallelState * MyFixedParallelState
Definition: parallel.c:121
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void SetCurrentRoleId(Oid roleid, bool is_superuser)
Definition: miscinit.c:858
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:119
#define elog(elevel,...)
Definition: elog.h:232
bool InitializingParallelWorker
Definition: parallel.c:118
int i
TimestampTz stmt_ts
Definition: parallel.c:96
Definition: shm_mq.c:71
void RestoreRelationMap(char *startAddress)
Definition: relmapper.c:674
#define PARALLEL_KEY_SESSION_DSM
Definition: parallel.c:74
#define BUFFERALIGN(LEN)
Definition: c.h:759
void pq_set_parallel_leader(pid_t pid, BackendId backend_id)
Definition: pqmq.c:76
void * arg
struct Latch * MyLatch
Definition: globals.c:57
void HandleParallelMessageInterrupt(void)
Definition: parallel.c:992
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:102
pid_t parallel_leader_pid
Definition: parallel.c:93
void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
Definition: shm_mq.c:199
static dlist_head pcxt_list
Definition: parallel.c:124
void pq_getmsgend(StringInfo msg)
Definition: pqformat.c:637
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:40
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
struct FixedParallelState FixedParallelState
dsm_handle GetSessionDsmHandle(void)
Definition: session.c:70
shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
Definition: shm_mq.c:548
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
Definition: parallel.c:648
#define PARALLEL_KEY_PENDING_SYNCS
Definition: parallel.c:75
bool session_auth_is_superuser
Definition: guc.c:562
SerializableXactHandle ShareSerializableXact(void)
Definition: predicate.c:5186
void AtEOXact_Parallel(bool isCommit)
Definition: parallel.c:1223
Definition: proc.h:121
#define PARALLEL_KEY_ENTRYPOINT
Definition: parallel.c:73
#define snprintf
Definition: port.h:216
#define PARALLEL_KEY_COMBO_CID
Definition: parallel.c:69
#define WL_LATCH_SET
Definition: latch.h:125
#define _(x)
Definition: elog.c:89
void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
Definition: parallel.c:1204
void SerializeGUCState(Size maxsize, char *start_address)
Definition: guc.c:10745
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
#define PARALLEL_KEY_ACTIVE_SNAPSHOT
Definition: parallel.c:71
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define die(msg)
Definition: pg_test_fsync.c:97
void SerializeComboCIDState(Size maxsize, char *start_address)
Definition: combocid.c:316
#define PARALLEL_KEY_LIBRARY
Definition: parallel.c:67
BgwHandleStatus GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
Definition: bgworker.c:1074
void RestoreGUCState(void *gucstate)
Definition: guc.c:10831
shm_toc * toc
Definition: parallel.h:45
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
Definition: async.c:2305
TimestampTz GetCurrentStatementStartTimestamp(void)
Definition: xact.c:807
void * private_memory
Definition: parallel.h:44
void BackgroundWorkerUnblockSignals(void)
Definition: postmaster.c:5723