PostgreSQL Source Code git master
Loading...
Searching...
No Matches
parallel.c File Reference
#include "postgres_fe.h"
#include <sys/select.h>
#include <sys/wait.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include "fe_utils/string_utils.h"
#include "parallel.h"
#include "pg_backup_utils.h"
Include dependency graph for parallel.c:

Go to the source code of this file.

Data Structures

struct  ParallelSlot
 
struct  ShutdownInformation
 
struct  DumpSignalInformation
 

Macros

#define PIPE_READ   0
 
#define PIPE_WRITE   1
 
#define NO_SLOT   (-1) /* Failure result for GetIdleWorker() */
 
#define WORKER_IS_RUNNING(workerStatus)    ((workerStatus) == WRKR_IDLE || (workerStatus) == WRKR_WORKING)
 
#define pgpipe(a)   pipe(a)
 
#define piperead(a, b, c)   read(a,b,c)
 
#define pipewrite(a, b, c)   write(a,b,c)
 
#define write_stderr(str)
 
#define messageStartsWith(msg, prefix)    (strncmp(msg, prefix, strlen(prefix)) == 0)
 

Typedefs

typedef struct ShutdownInformation ShutdownInformation
 
typedef struct DumpSignalInformation DumpSignalInformation
 

Enumerations

enum  T_WorkerStatus { WRKR_NOT_STARTED = 0 , WRKR_IDLE , WRKR_WORKING , WRKR_TERMINATED }
 

Functions

static ParallelSlotGetMyPSlot (ParallelState *pstate)
 
static void archive_close_connection (int code, void *arg)
 
static void ShutdownWorkersHard (ParallelState *pstate)
 
static void WaitForTerminatingWorkers (ParallelState *pstate)
 
static void set_cancel_handler (void)
 
static void set_cancel_pstate (ParallelState *pstate)
 
static void set_cancel_slot_archive (ParallelSlot *slot, ArchiveHandle *AH)
 
static void RunWorker (ArchiveHandle *AH, ParallelSlot *slot)
 
static int GetIdleWorker (ParallelState *pstate)
 
static bool HasEveryWorkerTerminated (ParallelState *pstate)
 
static void lockTableForWorker (ArchiveHandle *AH, TocEntry *te)
 
static void WaitForCommands (ArchiveHandle *AH, int pipefd[2])
 
static bool ListenToWorkers (ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
 
static chargetMessageFromLeader (int pipefd[2])
 
static void sendMessageToLeader (int pipefd[2], const char *str)
 
static int select_loop (int maxFd, fd_set *workerset)
 
static chargetMessageFromWorker (ParallelState *pstate, bool do_wait, int *worker)
 
static void sendMessageToWorker (ParallelState *pstate, int worker, const char *str)
 
static charreadMessageFromPipe (int fd)
 
void init_parallel_dump_utils (void)
 
void on_exit_close_archive (Archive *AHX)
 
void replace_on_exit_close_archive (Archive *AHX)
 
static void sigTermHandler (SIGNAL_ARGS)
 
void set_archive_cancel_info (ArchiveHandle *AH, PGconn *conn)
 
ParallelStateParallelBackupStart (ArchiveHandle *AH)
 
void ParallelBackupEnd (ArchiveHandle *AH, ParallelState *pstate)
 
static void buildWorkerCommand (ArchiveHandle *AH, TocEntry *te, T_Action act, char *buf, int buflen)
 
static void parseWorkerCommand (ArchiveHandle *AH, TocEntry **te, T_Action *act, const char *msg)
 
static void buildWorkerResponse (ArchiveHandle *AH, TocEntry *te, T_Action act, int status, char *buf, int buflen)
 
static int parseWorkerResponse (ArchiveHandle *AH, TocEntry *te, const char *msg)
 
void DispatchJobForTocEntry (ArchiveHandle *AH, ParallelState *pstate, TocEntry *te, T_Action act, ParallelCompletionPtr callback, void *callback_data)
 
bool IsEveryWorkerIdle (ParallelState *pstate)
 
void WaitForWorkers (ArchiveHandle *AH, ParallelState *pstate, WFW_WaitOption mode)
 

Variables

static ShutdownInformation shutdown_info
 
static volatile DumpSignalInformation signal_info
 

Macro Definition Documentation

◆ messageStartsWith

#define messageStartsWith (   msg,
  prefix 
)     (strncmp(msg, prefix, strlen(prefix)) == 0)

Definition at line 228 of file parallel.c.

238{
239#ifdef WIN32
241 {
243 int err;
244
245 /* Prepare for threaded operation */
248
249 /* Initialize socket access */
250 err = WSAStartup(MAKEWORD(2, 2), &wsaData);
251 if (err != 0)
252 pg_fatal("%s() failed: error code %d", "WSAStartup", err);
253
254 parallel_init_done = true;
255 }
256#endif
257}
258
259/*
260 * Find the ParallelSlot for the current worker process or thread.
261 *
262 * Returns NULL if no matching slot is found (this implies we're the leader).
263 */
264static ParallelSlot *
266{
267 int i;
268
269 for (i = 0; i < pstate->numWorkers; i++)
270 {
271#ifdef WIN32
272 if (pstate->parallelSlot[i].threadId == GetCurrentThreadId())
273#else
274 if (pstate->parallelSlot[i].pid == getpid())
275#endif
276 return &(pstate->parallelSlot[i]);
277 }
278
279 return NULL;
280}
281
282/*
283 * A thread-local version of getLocalPQExpBuffer().
284 *
285 * Non-reentrant but reduces memory leakage: we'll consume one buffer per
286 * thread, which is much better than one per fmtId/fmtQualifiedId call.
287 */
288#ifdef WIN32
289static PQExpBuffer
291{
292 /*
293 * The Tls code goes awry if we use a static var, so we provide for both
294 * static and auto, and omit any use of the static var when using Tls. We
295 * rely on TlsGetValue() to return 0 if the value is not yet set.
296 */
299
302 else
304
305 if (id_return) /* first time through? */
306 {
307 /* same buffer, just wipe contents */
309 }
310 else
311 {
312 /* new buffer */
316 else
318 }
319
320 return id_return;
321}
322#endif /* WIN32 */
323
324/*
325 * pg_dump and pg_restore call this to register the cleanup handler
326 * as soon as they've created the ArchiveHandle.
327 */
328void
330{
331 shutdown_info.AHX = AHX;
333}
334
335/*
336 * Update the archive handle in the on_exit callback registered by
337 * on_exit_close_archive(). When pg_restore processes a pg_dumpall archive
338 * containing multiple databases, each database is restored from a separate
339 * archive. After closing one archive and opening the next, we update the
340 * shutdown_info to reference the new archive handle so the cleanup callback
341 * will close the correct archive on exit.
342 */
343void
345{
346 shutdown_info.AHX = AHX;
347}
348
349/*
350 * on_exit_nicely handler for shutting down database connections and
351 * worker processes cleanly.
352 */
353static void
354archive_close_connection(int code, void *arg)
355{
357
358 if (si->pstate)
359 {
360 /* In parallel mode, must figure out who we are */
361 ParallelSlot *slot = GetMyPSlot(si->pstate);
362
363 if (!slot)
364 {
365 /*
366 * We're the leader. Forcibly shut down workers, then close our
367 * own database connection, if any.
368 */
369 ShutdownWorkersHard(si->pstate);
370
371 if (si->AHX)
373 }
374 else
375 {
376 /*
377 * We're a worker. Shut down our own DB connection if any. On
378 * Windows, we also have to close our communication sockets, to
379 * emulate what will happen on Unix when the worker process exits.
380 * (Without this, if this is a premature exit, the leader would
381 * fail to detect it because there would be no EOF condition on
382 * the other end of the pipe.)
383 */
384 if (slot->AH)
385 DisconnectDatabase(&(slot->AH->public));
386
387#ifdef WIN32
390#endif
391 }
392 }
393 else
394 {
395 /* Non-parallel operation: just kill the leader DB connection */
396 if (si->AHX)
398 }
399}
400
401/*
402 * Forcibly shut down any remaining workers, waiting for them to finish.
403 *
404 * Note that we don't expect to come here during normal exit (the workers
405 * should be long gone, and the ParallelState too). We're only here in a
406 * pg_fatal() situation, so intervening to cancel active commands is
407 * appropriate.
408 */
409static void
411{
412 int i;
413
414 /*
415 * Close our write end of the sockets so that any workers waiting for
416 * commands know they can exit. (Note: some of the pipeWrite fields might
417 * still be zero, if we failed to initialize all the workers. Hence, just
418 * ignore errors here.)
419 */
420 for (i = 0; i < pstate->numWorkers; i++)
422
423 /*
424 * Force early termination of any commands currently in progress.
425 */
426#ifndef WIN32
427 /* On non-Windows, send SIGTERM to each worker process. */
428 for (i = 0; i < pstate->numWorkers; i++)
429 {
430 pid_t pid = pstate->parallelSlot[i].pid;
431
432 if (pid != 0)
433 kill(pid, SIGTERM);
434 }
435#else
436
437 /*
438 * On Windows, send query cancels directly to the workers' backends. Use
439 * a critical section to ensure worker threads don't change state.
440 */
442 for (i = 0; i < pstate->numWorkers; i++)
443 {
444 ArchiveHandle *AH = pstate->parallelSlot[i].AH;
445 char errbuf[1];
446
447 if (AH != NULL && AH->connCancel != NULL)
448 (void) PQcancel(AH->connCancel, errbuf, sizeof(errbuf));
449 }
451#endif
452
453 /* Now wait for them to terminate. */
455}
456
457/*
458 * Wait for all workers to terminate.
459 */
460static void
462{
463 while (!HasEveryWorkerTerminated(pstate))
464 {
465 ParallelSlot *slot = NULL;
466 int j;
467
468#ifndef WIN32
469 /* On non-Windows, use wait() to wait for next worker to end */
470 int status;
471 pid_t pid = wait(&status);
472
473 /* Find dead worker's slot, and clear the PID field */
474 for (j = 0; j < pstate->numWorkers; j++)
475 {
476 slot = &(pstate->parallelSlot[j]);
477 if (slot->pid == pid)
478 {
479 slot->pid = 0;
480 break;
481 }
482 }
483#else /* WIN32 */
484 /* On Windows, we must use WaitForMultipleObjects() */
486 int nrun = 0;
487 DWORD ret;
489
490 for (j = 0; j < pstate->numWorkers; j++)
491 {
493 {
494 lpHandles[nrun] = (HANDLE) pstate->parallelSlot[j].hThread;
495 nrun++;
496 }
497 }
499 Assert(ret != WAIT_FAILED);
502
503 /* Find dead worker's slot, and clear the hThread field */
504 for (j = 0; j < pstate->numWorkers; j++)
505 {
506 slot = &(pstate->parallelSlot[j]);
507 if (slot->hThread == hThread)
508 {
509 /* For cleanliness, close handles for dead threads */
510 CloseHandle((HANDLE) slot->hThread);
511 slot->hThread = (uintptr_t) INVALID_HANDLE_VALUE;
512 break;
513 }
514 }
515#endif /* WIN32 */
516
517 /* On all platforms, update workerStatus and te[] as well */
518 Assert(j < pstate->numWorkers);
520 pstate->te[j] = NULL;
521 }
522}
523
524
525/*
526 * Code for responding to cancel interrupts (SIGINT, control-C, etc)
527 *
528 * This doesn't quite belong in this module, but it needs access to the
529 * ParallelState data, so there's not really a better place either.
530 *
531 * When we get a cancel interrupt, we could just die, but in pg_restore that
532 * could leave a SQL command (e.g., CREATE INDEX on a large table) running
533 * for a long time. Instead, we try to send a cancel request and then die.
534 * pg_dump probably doesn't really need this, but we might as well use it
535 * there too. Note that sending the cancel directly from the signal handler
536 * is safe because PQcancel() is written to make it so.
537 *
538 * In parallel operation on Unix, each process is responsible for canceling
539 * its own connection (this must be so because nobody else has access to it).
540 * Furthermore, the leader process should attempt to forward its signal to
541 * each child. In simple manual use of pg_dump/pg_restore, forwarding isn't
542 * needed because typing control-C at the console would deliver SIGINT to
543 * every member of the terminal process group --- but in other scenarios it
544 * might be that only the leader gets signaled.
545 *
546 * On Windows, the cancel handler runs in a separate thread, because that's
547 * how SetConsoleCtrlHandler works. We make it stop worker threads, send
548 * cancels on all active connections, and then return FALSE, which will allow
549 * the process to die. For safety's sake, we use a critical section to
550 * protect the PGcancel structures against being changed while the signal
551 * thread runs.
552 */
553
554#ifndef WIN32
555
556/*
557 * Signal handler (Unix only)
558 */
559static void
561{
562 int i;
563 char errbuf[1];
564
565 /*
566 * Some platforms allow delivery of new signals to interrupt an active
567 * signal handler. That could muck up our attempt to send PQcancel, so
568 * disable the signals that set_cancel_handler enabled.
569 */
573
574 /*
575 * If we're in the leader, forward signal to all workers. (It seems best
576 * to do this before PQcancel; killing the leader transaction will result
577 * in invalid-snapshot errors from active workers, which maybe we can
578 * quiet by killing workers first.) Ignore any errors.
579 */
580 if (signal_info.pstate != NULL)
581 {
582 for (i = 0; i < signal_info.pstate->numWorkers; i++)
583 {
585
586 if (pid != 0)
587 kill(pid, SIGTERM);
588 }
589 }
590
591 /*
592 * Send QueryCancel if we have a connection to send to. Ignore errors,
593 * there's not much we can do about them anyway.
594 */
596 (void) PQcancel(signal_info.myAH->connCancel, errbuf, sizeof(errbuf));
597
598 /*
599 * Report we're quitting, using nothing more complicated than write(2).
600 * When in parallel operation, only the leader process should do this.
601 */
603 {
604 if (progname)
605 {
607 write_stderr(": ");
608 }
609 write_stderr("terminated by user\n");
610 }
611
612 /*
613 * And die, using _exit() not exit() because the latter will invoke atexit
614 * handlers that can fail if we interrupted related code.
615 */
616 _exit(1);
617}
618
619/*
620 * Enable cancel interrupt handler, if not already done.
621 */
622static void
624{
625 /*
626 * When forking, signal_info.handler_set will propagate into the new
627 * process, but that's fine because the signal handler state does too.
628 */
630 {
632
636 }
637}
638
639#else /* WIN32 */
640
641/*
642 * Console interrupt handler --- runs in a newly-started thread.
643 *
644 * After stopping other threads and sending cancel requests on all open
645 * connections, we return FALSE which will allow the default ExitProcess()
646 * action to be taken.
647 */
648static BOOL WINAPI
650{
651 int i;
652 char errbuf[1];
653
654 if (dwCtrlType == CTRL_C_EVENT ||
656 {
657 /* Critical section prevents changing data we look at here */
659
660 /*
661 * If in parallel mode, stop worker threads and send QueryCancel to
662 * their connected backends. The main point of stopping the worker
663 * threads is to keep them from reporting the query cancels as errors,
664 * which would clutter the user's screen. We needn't stop the leader
665 * thread since it won't be doing much anyway. Do this before
666 * canceling the main transaction, else we might get invalid-snapshot
667 * errors reported before we can stop the workers. Ignore errors,
668 * there's not much we can do about them anyway.
669 */
670 if (signal_info.pstate != NULL)
671 {
672 for (i = 0; i < signal_info.pstate->numWorkers; i++)
673 {
675 ArchiveHandle *AH = slot->AH;
676 HANDLE hThread = (HANDLE) slot->hThread;
677
678 /*
679 * Using TerminateThread here may leave some resources leaked,
680 * but it doesn't matter since we're about to end the whole
681 * process.
682 */
685
686 if (AH != NULL && AH->connCancel != NULL)
687 (void) PQcancel(AH->connCancel, errbuf, sizeof(errbuf));
688 }
689 }
690
691 /*
692 * Send QueryCancel to leader connection, if enabled. Ignore errors,
693 * there's not much we can do about them anyway.
694 */
697 errbuf, sizeof(errbuf));
698
700
701 /*
702 * Report we're quitting, using nothing more complicated than
703 * write(2). (We might be able to get away with using pg_log_*()
704 * here, but since we terminated other threads uncleanly above, it
705 * seems better to assume as little as possible.)
706 */
707 if (progname)
708 {
710 write_stderr(": ");
711 }
712 write_stderr("terminated by user\n");
713 }
714
715 /* Always return FALSE to allow signal handling to continue */
716 return FALSE;
717}
718
719/*
720 * Enable cancel interrupt handler, if not already done.
721 */
722static void
724{
726 {
728
730
732 }
733}
734
735#endif /* WIN32 */
736
737
738/*
739 * set_archive_cancel_info
740 *
741 * Fill AH->connCancel with cancellation info for the specified database
742 * connection; or clear it if conn is NULL.
743 */
744void
746{
748
749 /*
750 * Activate the interrupt handler if we didn't yet in this process. On
751 * Windows, this also initializes signal_info_lock; therefore it's
752 * important that this happen at least once before we fork off any
753 * threads.
754 */
756
757 /*
758 * On Unix, we assume that storing a pointer value is atomic with respect
759 * to any possible signal interrupt. On Windows, use a critical section.
760 */
761
762#ifdef WIN32
764#endif
765
766 /* Free the old one if we have one */
768 /* be sure interrupt handler doesn't use pointer while freeing */
769 AH->connCancel = NULL;
770
771 if (oldConnCancel != NULL)
773
774 /* Set the new one if specified */
775 if (conn)
777
778 /*
779 * On Unix, there's only ever one active ArchiveHandle per process, so we
780 * can just set signal_info.myAH unconditionally. On Windows, do that
781 * only in the main thread; worker threads have to make sure their
782 * ArchiveHandle appears in the pstate data, which is dealt with in
783 * RunWorker().
784 */
785#ifndef WIN32
786 signal_info.myAH = AH;
787#else
789 signal_info.myAH = AH;
790#endif
791
792#ifdef WIN32
794#endif
795}
796
797/*
798 * set_cancel_pstate
799 *
800 * Set signal_info.pstate to point to the specified ParallelState, if any.
801 * We need this mainly to have an interlock against Windows signal thread.
802 */
803static void
805{
806#ifdef WIN32
808#endif
809
810 signal_info.pstate = pstate;
811
812#ifdef WIN32
814#endif
815}
816
817/*
818 * set_cancel_slot_archive
819 *
820 * Set ParallelSlot's AH field to point to the specified archive, if any.
821 * We need this mainly to have an interlock against Windows signal thread.
822 */
823static void
825{
826#ifdef WIN32
828#endif
829
830 slot->AH = AH;
831
832#ifdef WIN32
834#endif
835}
836
837
838/*
839 * This function is called by both Unix and Windows variants to set up
840 * and run a worker process. Caller should exit the process (or thread)
841 * upon return.
842 */
843static void
845{
846 int pipefd[2];
847
848 /* fetch child ends of pipes */
851
852 /*
853 * Clone the archive so that we have our own state to work with, and in
854 * particular our own database connection.
855 *
856 * We clone on Unix as well as Windows, even though technically we don't
857 * need to because fork() gives us a copy in our own address space
858 * already. But CloneArchive resets the state information and also clones
859 * the database connection which both seem kinda helpful.
860 */
861 AH = CloneArchive(AH);
862
863 /* Remember cloned archive where signal handler can find it */
864 set_cancel_slot_archive(slot, AH);
865
866 /*
867 * Call the setup worker function that's defined in the ArchiveHandle.
868 */
869 (AH->SetupWorkerPtr) ((Archive *) AH);
870
871 /*
872 * Execute commands until done.
873 */
875
876 /*
877 * Disconnect from database and clean up.
878 */
881 DeCloneArchive(AH);
882}
883
884/*
885 * Thread base function for Windows
886 */
887#ifdef WIN32
888static unsigned __stdcall
890{
891 ArchiveHandle *AH = wi->AH;
892 ParallelSlot *slot = wi->slot;
893
894 /* Don't need WorkerInfo anymore */
895 free(wi);
896
897 /* Run the worker ... */
898 RunWorker(AH, slot);
899
900 /* Exit the thread */
901 _endthreadex(0);
902 return 0;
903}
904#endif /* WIN32 */
905
906/*
907 * This function starts a parallel dump or restore by spawning off the worker
908 * processes. For Windows, it creates a number of threads; on Unix the
909 * workers are created with fork().
910 */
913{
914 ParallelState *pstate;
915 int i;
916
917 Assert(AH->public.numWorkers > 0);
918
920
921 pstate->numWorkers = AH->public.numWorkers;
922 pstate->te = NULL;
923 pstate->parallelSlot = NULL;
924
925 if (AH->public.numWorkers == 1)
926 return pstate;
927
928 /* Create status arrays, being sure to initialize all fields to 0 */
929 pstate->te =
931 pstate->parallelSlot =
933
934#ifdef WIN32
935 /* Make fmtId() and fmtQualifiedId() use thread-local storage */
937#endif
938
939 /*
940 * Set the pstate in shutdown_info, to tell the exit handler that it must
941 * clean up workers as well as the main database connection. But we don't
942 * set this in signal_info yet, because we don't want child processes to
943 * inherit non-NULL signal_info.pstate.
944 */
945 shutdown_info.pstate = pstate;
946
947 /*
948 * Temporarily disable query cancellation on the leader connection. This
949 * ensures that child processes won't inherit valid AH->connCancel
950 * settings and thus won't try to issue cancels against the leader's
951 * connection. No harm is done if we fail while it's disabled, because
952 * the leader connection is idle at this point anyway.
953 */
955
956 /* Ensure stdio state is quiesced before forking */
957 fflush(NULL);
958
959 /* Create desired number of workers */
960 for (i = 0; i < pstate->numWorkers; i++)
961 {
962#ifdef WIN32
963 WorkerInfo *wi;
964 uintptr_t handle;
965#else
966 pid_t pid;
967#endif
968 ParallelSlot *slot = &(pstate->parallelSlot[i]);
969 int pipeMW[2],
970 pipeWM[2];
971
972 /* Create communication pipes for this worker */
973 if (pgpipe(pipeMW) < 0 || pgpipe(pipeWM) < 0)
974 pg_fatal("could not create communication channels: %m");
975
976 /* leader's ends of the pipes */
977 slot->pipeRead = pipeWM[PIPE_READ];
978 slot->pipeWrite = pipeMW[PIPE_WRITE];
979 /* child's ends of the pipes */
982
983#ifdef WIN32
984 /* Create transient structure to pass args to worker function */
986
987 wi->AH = AH;
988 wi->slot = slot;
989
990 handle = _beginthreadex(NULL, 0, (void *) &init_spawned_worker_win32,
991 wi, 0, &(slot->threadId));
992 slot->hThread = handle;
993 slot->workerStatus = WRKR_IDLE;
994#else /* !WIN32 */
995 pid = fork();
996 if (pid == 0)
997 {
998 /* we are the worker */
999 int j;
1000
1001 /* this is needed for GetMyPSlot() */
1002 slot->pid = getpid();
1003
1004 /* instruct signal handler that we're in a worker now */
1005 signal_info.am_worker = true;
1006
1007 /* close read end of Worker -> Leader */
1009 /* close write end of Leader -> Worker */
1011
1012 /*
1013 * Close all inherited fds for communication of the leader with
1014 * previously-forked workers.
1015 */
1016 for (j = 0; j < i; j++)
1017 {
1020 }
1021
1022 /* Run the worker ... */
1023 RunWorker(AH, slot);
1024
1025 /* We can just exit(0) when done */
1026 exit(0);
1027 }
1028 else if (pid < 0)
1029 {
1030 /* fork failed */
1031 pg_fatal("could not create worker process: %m");
1032 }
1033
1034 /* In Leader after successful fork */
1035 slot->pid = pid;
1036 slot->workerStatus = WRKR_IDLE;
1037
1038 /* close read end of Leader -> Worker */
1040 /* close write end of Worker -> Leader */
1042#endif /* WIN32 */
1043 }
1044
1045 /*
1046 * Having forked off the workers, disable SIGPIPE so that leader isn't
1047 * killed if it tries to send a command to a dead worker. We don't want
1048 * the workers to inherit this setting, though.
1049 */
1050#ifndef WIN32
1052#endif
1053
1054 /*
1055 * Re-establish query cancellation on the leader connection.
1056 */
1058
1059 /*
1060 * Tell the cancel signal handler to forward signals to worker processes,
1061 * too. (As with query cancel, we did not need this earlier because the
1062 * workers have not yet been given anything to do; if we die before this
1063 * point, any already-started workers will see EOF and quit promptly.)
1064 */
1065 set_cancel_pstate(pstate);
1066
1067 return pstate;
1068}
1069
1070/*
1071 * Close down a parallel dump or restore.
1072 */
1073void
1075{
1076 int i;
1077
1078 /* No work if non-parallel */
1079 if (pstate->numWorkers == 1)
1080 return;
1081
1082 /* There should not be any unfinished jobs */
1083 Assert(IsEveryWorkerIdle(pstate));
1084
1085 /* Close the sockets so that the workers know they can exit */
1086 for (i = 0; i < pstate->numWorkers; i++)
1087 {
1090 }
1091
1092 /* Wait for them to exit */
1094
1095 /*
1096 * Unlink pstate from shutdown_info, so the exit handler will not try to
1097 * use it; and likewise unlink from signal_info.
1098 */
1101
1102 /* Release state (mere neatnik-ism, since we're about to terminate) */
1103 free(pstate->te);
1104 free(pstate->parallelSlot);
1105 free(pstate);
1106}
1107
1108/*
1109 * These next four functions handle construction and parsing of the command
1110 * strings and response strings for parallel workers.
1111 *
1112 * Currently, these can be the same regardless of which archive format we are
1113 * processing. In future, we might want to let format modules override these
1114 * functions to add format-specific data to a command or response.
1115 */
1116
1117/*
1118 * buildWorkerCommand: format a command string to send to a worker.
1119 *
1120 * The string is built in the caller-supplied buffer of size buflen.
1121 */
1122static void
1124 char *buf, int buflen)
1125{
1126 if (act == ACT_DUMP)
1127 snprintf(buf, buflen, "DUMP %d", te->dumpId);
1128 else if (act == ACT_RESTORE)
1129 snprintf(buf, buflen, "RESTORE %d", te->dumpId);
1130 else
1131 Assert(false);
1132}
1133
1134/*
1135 * parseWorkerCommand: interpret a command string in a worker.
1136 */
1137static void
1139 const char *msg)
1140{
1141 DumpId dumpId;
1142 int nBytes;
1143
1144 if (messageStartsWith(msg, "DUMP "))
1145 {
1146 *act = ACT_DUMP;
1147 sscanf(msg, "DUMP %d%n", &dumpId, &nBytes);
1148 Assert(nBytes == strlen(msg));
1149 *te = getTocEntryByDumpId(AH, dumpId);
1150 Assert(*te != NULL);
1151 }
1152 else if (messageStartsWith(msg, "RESTORE "))
1153 {
1154 *act = ACT_RESTORE;
1155 sscanf(msg, "RESTORE %d%n", &dumpId, &nBytes);
1156 Assert(nBytes == strlen(msg));
1157 *te = getTocEntryByDumpId(AH, dumpId);
1158 Assert(*te != NULL);
1159 }
1160 else
1161 pg_fatal("unrecognized command received from leader: \"%s\"",
1162 msg);
1163}
1164
1165/*
1166 * buildWorkerResponse: format a response string to send to the leader.
1167 *
1168 * The string is built in the caller-supplied buffer of size buflen.
1169 */
1170static void
1172 char *buf, int buflen)
1173{
1174 snprintf(buf, buflen, "OK %d %d %d",
1175 te->dumpId,
1176 status,
1177 status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0);
1178}
1179
1180/*
1181 * parseWorkerResponse: parse the status message returned by a worker.
1182 *
1183 * Returns the integer status code, and may update fields of AH and/or te.
1184 */
1185static int
1187 const char *msg)
1188{
1189 DumpId dumpId;
1190 int nBytes,
1191 n_errors;
1192 int status = 0;
1193
1194 if (messageStartsWith(msg, "OK "))
1195 {
1196 sscanf(msg, "OK %d %d %d%n", &dumpId, &status, &n_errors, &nBytes);
1197
1198 Assert(dumpId == te->dumpId);
1199 Assert(nBytes == strlen(msg));
1200
1201 AH->public.n_errors += n_errors;
1202 }
1203 else
1204 pg_fatal("invalid message received from worker: \"%s\"",
1205 msg);
1206
1207 return status;
1208}
1209
1210/*
1211 * Dispatch a job to some free worker.
1212 *
1213 * te is the TocEntry to be processed, act is the action to be taken on it.
1214 * callback is the function to call on completion of the job.
1215 *
1216 * If no worker is currently available, this will block, and previously
1217 * registered callback functions may be called.
1218 */
1219void
1221 ParallelState *pstate,
1222 TocEntry *te,
1223 T_Action act,
1225 void *callback_data)
1226{
1227 int worker;
1228 char buf[256];
1229
1230 /* Get a worker, waiting if none are idle */
1231 while ((worker = GetIdleWorker(pstate)) == NO_SLOT)
1232 WaitForWorkers(AH, pstate, WFW_ONE_IDLE);
1233
1234 /* Construct and send command string */
1235 buildWorkerCommand(AH, te, act, buf, sizeof(buf));
1236
1237 sendMessageToWorker(pstate, worker, buf);
1238
1239 /* Remember worker is busy, and which TocEntry it's working on */
1240 pstate->parallelSlot[worker].workerStatus = WRKR_WORKING;
1241 pstate->parallelSlot[worker].callback = callback;
1242 pstate->parallelSlot[worker].callback_data = callback_data;
1243 pstate->te[worker] = te;
1244}
1245
1246/*
1247 * Find an idle worker and return its slot number.
1248 * Return NO_SLOT if none are idle.
1249 */
1250static int
1252{
1253 int i;
1254
1255 for (i = 0; i < pstate->numWorkers; i++)
1256 {
1257 if (pstate->parallelSlot[i].workerStatus == WRKR_IDLE)
1258 return i;
1259 }
1260 return NO_SLOT;
1261}
1262
1263/*
1264 * Return true iff no worker is running.
1265 */
1266static bool
1268{
1269 int i;
1270
1271 for (i = 0; i < pstate->numWorkers; i++)
1272 {
1274 return false;
1275 }
1276 return true;
1277}
1278
1279/*
1280 * Return true iff every worker is in the WRKR_IDLE state.
1281 */
1282bool
1284{
1285 int i;
1286
1287 for (i = 0; i < pstate->numWorkers; i++)
1288 {
1289 if (pstate->parallelSlot[i].workerStatus != WRKR_IDLE)
1290 return false;
1291 }
1292 return true;
1293}
1294
1295/*
1296 * Acquire lock on a table to be dumped by a worker process.
1297 *
1298 * The leader process is already holding an ACCESS SHARE lock. Ordinarily
1299 * it's no problem for a worker to get one too, but if anything else besides
1300 * pg_dump is running, there's a possible deadlock:
1301 *
1302 * 1) Leader dumps the schema and locks all tables in ACCESS SHARE mode.
1303 * 2) Another process requests an ACCESS EXCLUSIVE lock (which is not granted
1304 * because the leader holds a conflicting ACCESS SHARE lock).
1305 * 3) A worker process also requests an ACCESS SHARE lock to read the table.
1306 * The worker is enqueued behind the ACCESS EXCLUSIVE lock request.
1307 * 4) Now we have a deadlock, since the leader is effectively waiting for
1308 * the worker. The server cannot detect that, however.
1309 *
1310 * To prevent an infinite wait, prior to touching a table in a worker, request
1311 * a lock in ACCESS SHARE mode but with NOWAIT. If we don't get the lock,
1312 * then we know that somebody else has requested an ACCESS EXCLUSIVE lock and
1313 * so we have a deadlock. We must fail the backup in that case.
1314 */
1315static void
1317{
1318 const char *qualId;
1319 PQExpBuffer query;
1320 PGresult *res;
1321
1322 /* Nothing to do for BLOBS */
1323 if (strcmp(te->desc, "BLOBS") == 0)
1324 return;
1325
1326 query = createPQExpBuffer();
1327
1328 qualId = fmtQualifiedId(te->namespace, te->tag);
1329
1330 appendPQExpBuffer(query, "LOCK TABLE %s IN ACCESS SHARE MODE NOWAIT",
1331 qualId);
1332
1333 res = PQexec(AH->connection, query->data);
1334
1335 if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
1336 pg_fatal("could not obtain lock on relation \"%s\"\n"
1337 "This usually means that someone requested an ACCESS EXCLUSIVE lock "
1338 "on the table after the pg_dump parent process had gotten the "
1339 "initial ACCESS SHARE lock on the table.", qualId);
1340
1341 PQclear(res);
1342 destroyPQExpBuffer(query);
1343}
1344
1345/*
1346 * WaitForCommands: main routine for a worker process.
1347 *
1348 * Read and execute commands from the leader until we see EOF on the pipe.
1349 */
1350static void
1352{
1353 char *command;
1354 TocEntry *te;
1355 T_Action act;
1356 int status = 0;
1357 char buf[256];
1358
1359 for (;;)
1360 {
1361 if (!(command = getMessageFromLeader(pipefd)))
1362 {
1363 /* EOF, so done */
1364 return;
1365 }
1366
1367 /* Decode the command */
1368 parseWorkerCommand(AH, &te, &act, command);
1369
1370 if (act == ACT_DUMP)
1371 {
1372 /* Acquire lock on this table within the worker's session */
1373 lockTableForWorker(AH, te);
1374
1375 /* Perform the dump command */
1376 status = (AH->WorkerJobDumpPtr) (AH, te);
1377 }
1378 else if (act == ACT_RESTORE)
1379 {
1380 /* Perform the restore command */
1381 status = (AH->WorkerJobRestorePtr) (AH, te);
1382 }
1383 else
1384 Assert(false);
1385
1386 /* Return status to leader */
1387 buildWorkerResponse(AH, te, act, status, buf, sizeof(buf));
1388
1390
1391 /* command was pg_malloc'd and we are responsible for free()ing it. */
1392 free(command);
1393 }
1394}
1395
1396/*
1397 * Check for status messages from workers.
1398 *
1399 * If do_wait is true, wait to get a status message; otherwise, just return
1400 * immediately if there is none available.
1401 *
1402 * When we get a status message, we pass the status code to the callback
1403 * function that was specified to DispatchJobForTocEntry, then reset the
1404 * worker status to IDLE.
1405 *
1406 * Returns true if we collected a status message, else false.
1407 *
1408 * XXX is it worth checking for more than one status message per call?
1409 * It seems somewhat unlikely that multiple workers would finish at exactly
1410 * the same time.
1411 */
1412static bool
1414{
1415 int worker;
1416 char *msg;
1417
1418 /* Try to collect a status message */
1419 msg = getMessageFromWorker(pstate, do_wait, &worker);
1420
1421 if (!msg)
1422 {
1423 /* If do_wait is true, we must have detected EOF on some socket */
1424 if (do_wait)
1425 pg_fatal("a worker process died unexpectedly");
1426 return false;
1427 }
1428
1429 /* Process it and update our idea of the worker's status */
1430 if (messageStartsWith(msg, "OK "))
1431 {
1432 ParallelSlot *slot = &pstate->parallelSlot[worker];
1433 TocEntry *te = pstate->te[worker];
1434 int status;
1435
1436 status = parseWorkerResponse(AH, te, msg);
1437 slot->callback(AH, te, status, slot->callback_data);
1438 slot->workerStatus = WRKR_IDLE;
1439 pstate->te[worker] = NULL;
1440 }
1441 else
1442 pg_fatal("invalid message received from worker: \"%s\"",
1443 msg);
1444
1445 /* Free the string returned from getMessageFromWorker */
1446 free(msg);
1447
1448 return true;
1449}
1450
1451/*
1452 * Check for status results from workers, waiting if necessary.
1453 *
1454 * Available wait modes are:
1455 * WFW_NO_WAIT: reap any available status, but don't block
1456 * WFW_GOT_STATUS: wait for at least one more worker to finish
1457 * WFW_ONE_IDLE: wait for at least one worker to be idle
1458 * WFW_ALL_IDLE: wait for all workers to be idle
1459 *
1460 * Any received results are passed to the callback specified to
1461 * DispatchJobForTocEntry.
1462 *
1463 * This function is executed in the leader process.
1464 */
1465void
1467{
1468 bool do_wait = false;
1469
1470 /*
1471 * In GOT_STATUS mode, always block waiting for a message, since we can't
1472 * return till we get something. In other modes, we don't block the first
1473 * time through the loop.
1474 */
1475 if (mode == WFW_GOT_STATUS)
1476 {
1477 /* Assert that caller knows what it's doing */
1478 Assert(!IsEveryWorkerIdle(pstate));
1479 do_wait = true;
1480 }
1481
1482 for (;;)
1483 {
1484 /*
1485 * Check for status messages, even if we don't need to block. We do
1486 * not try very hard to reap all available messages, though, since
1487 * there's unlikely to be more than one.
1488 */
1489 if (ListenToWorkers(AH, pstate, do_wait))
1490 {
1491 /*
1492 * If we got a message, we are done by definition for GOT_STATUS
1493 * mode, and we can also be certain that there's at least one idle
1494 * worker. So we're done in all but ALL_IDLE mode.
1495 */
1496 if (mode != WFW_ALL_IDLE)
1497 return;
1498 }
1499
1500 /* Check whether we must wait for new status messages */
1501 switch (mode)
1502 {
1503 case WFW_NO_WAIT:
1504 return; /* never wait */
1505 case WFW_GOT_STATUS:
1506 Assert(false); /* can't get here, because we waited */
1507 break;
1508 case WFW_ONE_IDLE:
1509 if (GetIdleWorker(pstate) != NO_SLOT)
1510 return;
1511 break;
1512 case WFW_ALL_IDLE:
1513 if (IsEveryWorkerIdle(pstate))
1514 return;
1515 break;
1516 }
1517
1518 /* Loop back, and this time wait for something to happen */
1519 do_wait = true;
1520 }
1521}
1522
1523/*
1524 * Read one command message from the leader, blocking if necessary
1525 * until one is available, and return it as a malloc'd string.
1526 * On EOF, return NULL.
1527 *
1528 * This function is executed in worker processes.
1529 */
1530static char *
1532{
1534}
1535
1536/*
1537 * Send a status message to the leader.
1538 *
1539 * This function is executed in worker processes.
1540 */
1541static void
1542sendMessageToLeader(int pipefd[2], const char *str)
1543{
1544 int len = strlen(str) + 1;
1545
1546 if (pipewrite(pipefd[PIPE_WRITE], str, len) != len)
1547 pg_fatal("could not write to the communication channel: %m");
1548}
1549
1550/*
1551 * Wait until some descriptor in "workerset" becomes readable.
1552 * Returns -1 on error, else the number of readable descriptors.
1553 */
1554static int
1556{
1557 int i;
1559
1560 for (;;)
1561 {
1562 *workerset = saveSet;
1563 i = select(maxFd + 1, workerset, NULL, NULL, NULL);
1564
1565#ifndef WIN32
1566 if (i < 0 && errno == EINTR)
1567 continue;
1568#else
1569 if (i == SOCKET_ERROR && WSAGetLastError() == WSAEINTR)
1570 continue;
1571#endif
1572 break;
1573 }
1574
1575 return i;
1576}
1577
1578
1579/*
1580 * Check for messages from worker processes.
1581 *
1582 * If a message is available, return it as a malloc'd string, and put the
1583 * index of the sending worker in *worker.
1584 *
1585 * If nothing is available, wait if "do_wait" is true, else return NULL.
1586 *
1587 * If we detect EOF on any socket, we'll return NULL. It's not great that
1588 * that's hard to distinguish from the no-data-available case, but for now
1589 * our one caller is okay with that.
1590 *
1591 * This function is executed in the leader process.
1592 */
1593static char *
1594getMessageFromWorker(ParallelState *pstate, bool do_wait, int *worker)
1595{
1596 int i;
1598 int maxFd = -1;
1599 struct timeval nowait = {0, 0};
1600
1601 /* construct bitmap of socket descriptors for select() */
1603 for (i = 0; i < pstate->numWorkers; i++)
1604 {
1606 continue;
1608 if (pstate->parallelSlot[i].pipeRead > maxFd)
1609 maxFd = pstate->parallelSlot[i].pipeRead;
1610 }
1611
1612 if (do_wait)
1613 {
1615 Assert(i != 0);
1616 }
1617 else
1618 {
1619 if ((i = select(maxFd + 1, &workerset, NULL, NULL, &nowait)) == 0)
1620 return NULL;
1621 }
1622
1623 if (i < 0)
1624 pg_fatal("%s() failed: %m", "select");
1625
1626 for (i = 0; i < pstate->numWorkers; i++)
1627 {
1628 char *msg;
1629
1631 continue;
1632 if (!FD_ISSET(pstate->parallelSlot[i].pipeRead, &workerset))
1633 continue;
1634
1635 /*
1636 * Read the message if any. If the socket is ready because of EOF,
1637 * we'll return NULL instead (and the socket will stay ready, so the
1638 * condition will persist).
1639 *
1640 * Note: because this is a blocking read, we'll wait if only part of
1641 * the message is available. Waiting a long time would be bad, but
1642 * since worker status messages are short and are always sent in one
1643 * operation, it shouldn't be a problem in practice.
1644 */
1646 *worker = i;
1647 return msg;
1648 }
1649 Assert(false);
1650 return NULL;
1651}
1652
1653/*
1654 * Send a command message to the specified worker process.
1655 *
1656 * This function is executed in the leader process.
1657 */
1658static void
1659sendMessageToWorker(ParallelState *pstate, int worker, const char *str)
1660{
1661 int len = strlen(str) + 1;
1662
1663 if (pipewrite(pstate->parallelSlot[worker].pipeWrite, str, len) != len)
1664 {
1665 pg_fatal("could not write to the communication channel: %m");
1666 }
1667}
1668
1669/*
1670 * Read one message from the specified pipe (fd), blocking if necessary
1671 * until one is available, and return it as a malloc'd string.
1672 * On EOF, return NULL.
1673 *
1674 * A "message" on the channel is just a null-terminated string.
1675 */
1676static char *
1678{
1679 char *msg;
1680 int msgsize,
1681 bufsize;
1682 int ret;
1683
1684 /*
1685 * In theory, if we let piperead() read multiple bytes, it might give us
1686 * back fragments of multiple messages. (That can't actually occur, since
1687 * neither leader nor workers send more than one message without waiting
1688 * for a reply, but we don't wish to assume that here.) For simplicity,
1689 * read a byte at a time until we get the terminating '\0'. This method
1690 * is a bit inefficient, but since this is only used for relatively short
1691 * command and status strings, it shouldn't matter.
1692 */
1693 bufsize = 64; /* could be any number */
1694 msg = (char *) pg_malloc(bufsize);
1695 msgsize = 0;
1696 for (;;)
1697 {
1699 ret = piperead(fd, msg + msgsize, 1);
1700 if (ret <= 0)
1701 break; /* error or connection closure */
1702
1703 Assert(ret == 1);
1704
1705 if (msg[msgsize] == '\0')
1706 return msg; /* collected whole message */
1707
1708 msgsize++;
1709 if (msgsize == bufsize) /* enlarge buffer if needed */
1710 {
1711 bufsize += 16; /* could be any number */
1712 msg = (char *) pg_realloc(msg, bufsize);
1713 }
1714 }
1715
1716 /* Other end has closed the connection */
1717 pg_free(msg);
1718 return NULL;
1719}
1720
1721#ifdef WIN32
1722
1723/*
1724 * This is a replacement version of pipe(2) for Windows which allows the pipe
1725 * handles to be used in select().
1726 *
1727 * Reads and writes on the pipe must go through piperead()/pipewrite().
1728 *
1729 * For consistency with Unix we declare the returned handles as "int".
1730 * This is okay even on WIN64 because system handles are not more than
1731 * 32 bits wide, but we do have to do some casting.
1732 */
1733static int
1734pgpipe(int handles[2])
1735{
1736 pgsocket s,
1737 tmp_sock;
1738 struct sockaddr_in serv_addr;
1739 int len = sizeof(serv_addr);
1740
1741 /* We have to use the Unix socket invalid file descriptor value here. */
1742 handles[0] = handles[1] = -1;
1743
1744 /*
1745 * setup listen socket
1746 */
1747 if ((s = socket(AF_INET, SOCK_STREAM, 0)) == PGINVALID_SOCKET)
1748 {
1749 pg_log_error("pgpipe: could not create socket: error code %d",
1750 WSAGetLastError());
1751 return -1;
1752 }
1753
1754 memset(&serv_addr, 0, sizeof(serv_addr));
1755 serv_addr.sin_family = AF_INET;
1756 serv_addr.sin_port = pg_hton16(0);
1757 serv_addr.sin_addr.s_addr = pg_hton32(INADDR_LOOPBACK);
1758 if (bind(s, (SOCKADDR *) &serv_addr, len) == SOCKET_ERROR)
1759 {
1760 pg_log_error("pgpipe: could not bind: error code %d",
1761 WSAGetLastError());
1762 closesocket(s);
1763 return -1;
1764 }
1765 if (listen(s, 1) == SOCKET_ERROR)
1766 {
1767 pg_log_error("pgpipe: could not listen: error code %d",
1768 WSAGetLastError());
1769 closesocket(s);
1770 return -1;
1771 }
1772 if (getsockname(s, (SOCKADDR *) &serv_addr, &len) == SOCKET_ERROR)
1773 {
1774 pg_log_error("pgpipe: %s() failed: error code %d", "getsockname",
1775 WSAGetLastError());
1776 closesocket(s);
1777 return -1;
1778 }
1779
1780 /*
1781 * setup pipe handles
1782 */
1784 {
1785 pg_log_error("pgpipe: could not create second socket: error code %d",
1786 WSAGetLastError());
1787 closesocket(s);
1788 return -1;
1789 }
1790 handles[1] = (int) tmp_sock;
1791
1793 {
1794 pg_log_error("pgpipe: could not connect socket: error code %d",
1795 WSAGetLastError());
1796 closesocket(handles[1]);
1797 handles[1] = -1;
1798 closesocket(s);
1799 return -1;
1800 }
1801 if ((tmp_sock = accept(s, (SOCKADDR *) &serv_addr, &len)) == PGINVALID_SOCKET)
1802 {
1803 pg_log_error("pgpipe: could not accept connection: error code %d",
1804 WSAGetLastError());
1805 closesocket(handles[1]);
1806 handles[1] = -1;
1807 closesocket(s);
1808 return -1;
1809 }
1810 handles[0] = (int) tmp_sock;
1811
1812 closesocket(s);
1813 return 0;
1814}
1815
1816#endif /* WIN32 */
void ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate)
Definition parallel.c:1075
static void sendMessageToLeader(int pipefd[2], const char *str)
Definition parallel.c:1543
static ParallelSlot * GetMyPSlot(ParallelState *pstate)
Definition parallel.c:266
static void WaitForCommands(ArchiveHandle *AH, int pipefd[2])
Definition parallel.c:1352
void WaitForWorkers(ArchiveHandle *AH, ParallelState *pstate, WFW_WaitOption mode)
Definition parallel.c:1467
@ WRKR_WORKING
Definition parallel.c:81
@ WRKR_IDLE
Definition parallel.c:80
@ WRKR_TERMINATED
Definition parallel.c:82
static bool HasEveryWorkerTerminated(ParallelState *pstate)
Definition parallel.c:1268
void replace_on_exit_close_archive(Archive *AHX)
Definition parallel.c:345
#define pgpipe(a)
Definition parallel.c:139
static bool ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
Definition parallel.c:1414
static void sigTermHandler(SIGNAL_ARGS)
Definition parallel.c:561
#define PIPE_READ
Definition parallel.c:71
ParallelState * ParallelBackupStart(ArchiveHandle *AH)
Definition parallel.c:913
static char * readMessageFromPipe(int fd)
Definition parallel.c:1678
static int select_loop(int maxFd, fd_set *workerset)
Definition parallel.c:1556
static int parseWorkerResponse(ArchiveHandle *AH, TocEntry *te, const char *msg)
Definition parallel.c:1187
static int GetIdleWorker(ParallelState *pstate)
Definition parallel.c:1252
static void set_cancel_pstate(ParallelState *pstate)
Definition parallel.c:805
static void RunWorker(ArchiveHandle *AH, ParallelSlot *slot)
Definition parallel.c:845
static void set_cancel_slot_archive(ParallelSlot *slot, ArchiveHandle *AH)
Definition parallel.c:825
static void buildWorkerCommand(ArchiveHandle *AH, TocEntry *te, T_Action act, char *buf, int buflen)
Definition parallel.c:1124
static char * getMessageFromWorker(ParallelState *pstate, bool do_wait, int *worker)
Definition parallel.c:1595
static void archive_close_connection(int code, void *arg)
Definition parallel.c:355
#define NO_SLOT
Definition parallel.c:74
static void sendMessageToWorker(ParallelState *pstate, int worker, const char *str)
Definition parallel.c:1660
#define PIPE_WRITE
Definition parallel.c:72
static ShutdownInformation shutdown_info
Definition parallel.c:154
void on_exit_close_archive(Archive *AHX)
Definition parallel.c:330
void DispatchJobForTocEntry(ArchiveHandle *AH, ParallelState *pstate, TocEntry *te, T_Action act, ParallelCompletionPtr callback, void *callback_data)
Definition parallel.c:1221
#define WORKER_IS_RUNNING(workerStatus)
Definition parallel.c:85
static char * getMessageFromLeader(int pipefd[2])
Definition parallel.c:1532
static void lockTableForWorker(ArchiveHandle *AH, TocEntry *te)
Definition parallel.c:1317
#define piperead(a, b, c)
Definition parallel.c:140
#define pipewrite(a, b, c)
Definition parallel.c:141
static void set_cancel_handler(void)
Definition parallel.c:624
static void buildWorkerResponse(ArchiveHandle *AH, TocEntry *te, T_Action act, int status, char *buf, int buflen)
Definition parallel.c:1172
static volatile DumpSignalInformation signal_info
Definition parallel.c:175
bool IsEveryWorkerIdle(ParallelState *pstate)
Definition parallel.c:1284
#define write_stderr(str)
Definition parallel.c:186
static void parseWorkerCommand(ArchiveHandle *AH, TocEntry **te, T_Action *act, const char *msg)
Definition parallel.c:1139
#define messageStartsWith(msg, prefix)
Definition parallel.c:228
static void ShutdownWorkersHard(ParallelState *pstate)
Definition parallel.c:411
static void WaitForTerminatingWorkers(ParallelState *pstate)
Definition parallel.c:462
void set_archive_cancel_info(ArchiveHandle *AH, PGconn *conn)
Definition parallel.c:746
void(* ParallelCompletionPtr)(ArchiveHandle *AH, TocEntry *te, int status, void *callback_data)
Definition parallel.h:24
WFW_WaitOption
Definition parallel.h:31
@ WFW_ALL_IDLE
Definition parallel.h:35
@ WFW_GOT_STATUS
Definition parallel.h:33
@ WFW_NO_WAIT
Definition parallel.h:32
@ WFW_ONE_IDLE
Definition parallel.h:34
#define SIGNAL_ARGS
Definition c.h:1385
#define Assert(condition)
Definition c.h:885
Datum arg
Definition elog.c:1322
void err(int eval, const char *fmt,...)
Definition err.c:43
PGcancel * PQgetCancel(PGconn *conn)
Definition fe-cancel.c:368
int PQcancel(PGcancel *cancel, char *errbuf, int errbufsize)
Definition fe-cancel.c:548
void PQfreeCancel(PGcancel *cancel)
Definition fe-cancel.c:502
PGresult * PQexec(PGconn *conn, const char *query)
Definition fe-exec.c:2279
void * pg_malloc(size_t size)
Definition fe_memutils.c:47
void pg_free(void *ptr)
void * pg_realloc(void *ptr, size_t size)
Definition fe_memutils.c:65
#define pg_malloc_array(type, count)
Definition fe_memutils.h:56
#define pg_malloc_object(type)
Definition fe_memutils.h:50
#define pg_malloc0_array(type, count)
Definition fe_memutils.h:57
const char * str
#define bufsize
int j
Definition isn.c:78
int i
Definition isn.c:77
#define PQclear
#define PQresultStatus
@ PGRES_COMMAND_OK
Definition libpq-fe.h:125
#define pg_log_error(...)
Definition logging.h:106
const char * progname
Definition main.c:44
int DumpId
Definition pg_backup.h:285
void DisconnectDatabase(Archive *AHX)
void DeCloneArchive(ArchiveHandle *AH)
ArchiveHandle * CloneArchive(ArchiveHandle *AH)
TocEntry * getTocEntryByDumpId(ArchiveHandle *AH, DumpId id)
#define WORKER_IGNORED_ERRORS
@ ACT_RESTORE
void on_exit_nicely(on_exit_nicely_callback function, void *arg)
#define pg_fatal(...)
#define pg_hton32(x)
Definition pg_bswap.h:121
#define pg_hton16(x)
Definition pg_bswap.h:120
static PgChecksumMode mode
const void size_t len
static bool do_wait
Definition pg_ctl.c:76
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define pqsignal
Definition port.h:547
int pgsocket
Definition port.h:29
#define snprintf
Definition port.h:260
#define PGINVALID_SOCKET
Definition port.h:31
#define closesocket
Definition port.h:397
PQExpBuffer createPQExpBuffer(void)
Definition pqexpbuffer.c:72
void resetPQExpBuffer(PQExpBuffer str)
void appendPQExpBuffer(PQExpBuffer str, const char *fmt,...)
void destroyPQExpBuffer(PQExpBuffer str)
PQExpBufferData * PQExpBuffer
Definition pqexpbuffer.h:51
static int fd(const char *x, int i)
static int fb(int x)
#define free(a)
PGconn * conn
Definition streamutil.c:52
const char * fmtQualifiedId(const char *schema, const char *id)
PQExpBuffer(* getLocalPQExpBuffer)(void)
int n_errors
Definition pg_backup.h:253
int numWorkers
Definition pg_backup.h:240
ArchiveHandle * myAH
Definition parallel.c:167
ParallelState * pstate
Definition parallel.c:168
ParallelCompletionPtr callback
Definition parallel.c:100
ArchiveHandle * AH
Definition parallel.c:103
void * callback_data
Definition parallel.c:101
T_WorkerStatus workerStatus
Definition parallel.c:97
int pipeRevRead
Definition parallel.c:107
int pipeRevWrite
Definition parallel.c:108
TocEntry ** te
Definition parallel.h:59
ParallelSlot * parallelSlot
Definition parallel.h:60
ParallelState * pstate
Definition parallel.c:150
WorkerJobDumpPtrType WorkerJobDumpPtr
PGcancel *volatile connCancel
WorkerJobRestorePtrType WorkerJobRestorePtr
SetupWorkerPtrType SetupWorkerPtr
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
#define bind(s, addr, addrlen)
Definition win32_port.h:496
#define EINTR
Definition win32_port.h:361
#define SIGPIPE
Definition win32_port.h:163
#define SIGQUIT
Definition win32_port.h:159
#define kill(pid, sig)
Definition win32_port.h:490
#define socket(af, type, protocol)
Definition win32_port.h:495
#define accept(s, addr, addrlen)
Definition win32_port.h:498
#define connect(s, name, namelen)
Definition win32_port.h:499
#define listen(s, backlog)
Definition win32_port.h:497
#define select(n, r, w, e, timeout)
Definition win32_port.h:500

◆ NO_SLOT

#define NO_SLOT   (-1) /* Failure result for GetIdleWorker() */

Definition at line 74 of file parallel.c.

◆ pgpipe

#define pgpipe (   a)    pipe(a)

Definition at line 139 of file parallel.c.

◆ PIPE_READ

#define PIPE_READ   0

Definition at line 71 of file parallel.c.

◆ PIPE_WRITE

#define PIPE_WRITE   1

Definition at line 72 of file parallel.c.

◆ piperead

#define piperead (   a,
  b,
  c 
)    read(a,b,c)

Definition at line 140 of file parallel.c.

◆ pipewrite

#define pipewrite (   a,
  b,
  c 
)    write(a,b,c)

Definition at line 141 of file parallel.c.

◆ WORKER_IS_RUNNING

#define WORKER_IS_RUNNING (   workerStatus)     ((workerStatus) == WRKR_IDLE || (workerStatus) == WRKR_WORKING)

Definition at line 85 of file parallel.c.

◆ write_stderr

#define write_stderr (   str)
Value:
do { \
const char *str_ = (str); \
int rc_; \
rc_ = write(fileno(stderr), str_, strlen(str_)); \
(void) rc_; \
} while (0)
#define write(a, b, c)
Definition win32.h:14

Definition at line 186 of file parallel.c.

187 { \
188 const char *str_ = (str); \
189 int rc_; \
190 rc_ = write(fileno(stderr), str_, strlen(str_)); \
191 (void) rc_; \
192 } while (0)

Typedef Documentation

◆ DumpSignalInformation

◆ ShutdownInformation

Enumeration Type Documentation

◆ T_WorkerStatus

Enumerator
WRKR_NOT_STARTED 
WRKR_IDLE 
WRKR_WORKING 
WRKR_TERMINATED 

Definition at line 77 of file parallel.c.

78{
T_WorkerStatus
Definition parallel.c:78
@ WRKR_NOT_STARTED
Definition parallel.c:79

Function Documentation

◆ archive_close_connection()

static void archive_close_connection ( int  code,
void arg 
)
static

Definition at line 355 of file parallel.c.

356{
358
359 if (si->pstate)
360 {
361 /* In parallel mode, must figure out who we are */
362 ParallelSlot *slot = GetMyPSlot(si->pstate);
363
364 if (!slot)
365 {
366 /*
367 * We're the leader. Forcibly shut down workers, then close our
368 * own database connection, if any.
369 */
370 ShutdownWorkersHard(si->pstate);
371
372 if (si->AHX)
374 }
375 else
376 {
377 /*
378 * We're a worker. Shut down our own DB connection if any. On
379 * Windows, we also have to close our communication sockets, to
380 * emulate what will happen on Unix when the worker process exits.
381 * (Without this, if this is a premature exit, the leader would
382 * fail to detect it because there would be no EOF condition on
383 * the other end of the pipe.)
384 */
385 if (slot->AH)
386 DisconnectDatabase(&(slot->AH->public));
387
388#ifdef WIN32
391#endif
392 }
393 }
394 else
395 {
396 /* Non-parallel operation: just kill the leader DB connection */
397 if (si->AHX)
399 }
400}

References ParallelSlot::AH, arg, closesocket, DisconnectDatabase(), fb(), GetMyPSlot(), ParallelSlot::pipeRevRead, ParallelSlot::pipeRevWrite, _archiveHandle::public, and ShutdownWorkersHard().

Referenced by on_exit_close_archive().

◆ buildWorkerCommand()

static void buildWorkerCommand ( ArchiveHandle AH,
TocEntry te,
T_Action  act,
char buf,
int  buflen 
)
static

Definition at line 1124 of file parallel.c.

1126{
1127 if (act == ACT_DUMP)
1128 snprintf(buf, buflen, "DUMP %d", te->dumpId);
1129 else if (act == ACT_RESTORE)
1130 snprintf(buf, buflen, "RESTORE %d", te->dumpId);
1131 else
1132 Assert(false);
1133}

References ACT_DUMP, ACT_RESTORE, Assert, buf, _tocEntry::dumpId, fb(), and snprintf.

Referenced by DispatchJobForTocEntry().

◆ buildWorkerResponse()

static void buildWorkerResponse ( ArchiveHandle AH,
TocEntry te,
T_Action  act,
int  status,
char buf,
int  buflen 
)
static

Definition at line 1172 of file parallel.c.

1174{
1175 snprintf(buf, buflen, "OK %d %d %d",
1176 te->dumpId,
1177 status,
1178 status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0);
1179}

References buf, _tocEntry::dumpId, Archive::n_errors, _archiveHandle::public, snprintf, and WORKER_IGNORED_ERRORS.

Referenced by WaitForCommands().

◆ DispatchJobForTocEntry()

void DispatchJobForTocEntry ( ArchiveHandle AH,
ParallelState pstate,
TocEntry te,
T_Action  act,
ParallelCompletionPtr  callback,
void callback_data 
)

Definition at line 1221 of file parallel.c.

1227{
1228 int worker;
1229 char buf[256];
1230
1231 /* Get a worker, waiting if none are idle */
1232 while ((worker = GetIdleWorker(pstate)) == NO_SLOT)
1233 WaitForWorkers(AH, pstate, WFW_ONE_IDLE);
1234
1235 /* Construct and send command string */
1236 buildWorkerCommand(AH, te, act, buf, sizeof(buf));
1237
1238 sendMessageToWorker(pstate, worker, buf);
1239
1240 /* Remember worker is busy, and which TocEntry it's working on */
1241 pstate->parallelSlot[worker].workerStatus = WRKR_WORKING;
1242 pstate->parallelSlot[worker].callback = callback;
1243 pstate->parallelSlot[worker].callback_data = callback_data;
1244 pstate->te[worker] = te;
1245}

References buf, buildWorkerCommand(), ParallelSlot::callback, callback(), ParallelSlot::callback_data, fb(), GetIdleWorker(), NO_SLOT, ParallelState::parallelSlot, sendMessageToWorker(), ParallelState::te, WaitForWorkers(), WFW_ONE_IDLE, ParallelSlot::workerStatus, and WRKR_WORKING.

Referenced by restore_toc_entries_parallel(), and WriteDataChunks().

◆ GetIdleWorker()

static int GetIdleWorker ( ParallelState pstate)
static

Definition at line 1252 of file parallel.c.

1253{
1254 int i;
1255
1256 for (i = 0; i < pstate->numWorkers; i++)
1257 {
1258 if (pstate->parallelSlot[i].workerStatus == WRKR_IDLE)
1259 return i;
1260 }
1261 return NO_SLOT;
1262}

References i, NO_SLOT, ParallelState::numWorkers, ParallelState::parallelSlot, ParallelSlot::workerStatus, and WRKR_IDLE.

Referenced by DispatchJobForTocEntry(), and WaitForWorkers().

◆ getMessageFromLeader()

static char * getMessageFromLeader ( int  pipefd[2])
static

Definition at line 1532 of file parallel.c.

1533{
1535}

References fb(), PIPE_READ, and readMessageFromPipe().

Referenced by WaitForCommands().

◆ getMessageFromWorker()

static char * getMessageFromWorker ( ParallelState pstate,
bool  do_wait,
int worker 
)
static

Definition at line 1595 of file parallel.c.

1596{
1597 int i;
1599 int maxFd = -1;
1600 struct timeval nowait = {0, 0};
1601
1602 /* construct bitmap of socket descriptors for select() */
1604 for (i = 0; i < pstate->numWorkers; i++)
1605 {
1607 continue;
1609 if (pstate->parallelSlot[i].pipeRead > maxFd)
1610 maxFd = pstate->parallelSlot[i].pipeRead;
1611 }
1612
1613 if (do_wait)
1614 {
1616 Assert(i != 0);
1617 }
1618 else
1619 {
1620 if ((i = select(maxFd + 1, &workerset, NULL, NULL, &nowait)) == 0)
1621 return NULL;
1622 }
1623
1624 if (i < 0)
1625 pg_fatal("%s() failed: %m", "select");
1626
1627 for (i = 0; i < pstate->numWorkers; i++)
1628 {
1629 char *msg;
1630
1632 continue;
1633 if (!FD_ISSET(pstate->parallelSlot[i].pipeRead, &workerset))
1634 continue;
1635
1636 /*
1637 * Read the message if any. If the socket is ready because of EOF,
1638 * we'll return NULL instead (and the socket will stay ready, so the
1639 * condition will persist).
1640 *
1641 * Note: because this is a blocking read, we'll wait if only part of
1642 * the message is available. Waiting a long time would be bad, but
1643 * since worker status messages are short and are always sent in one
1644 * operation, it shouldn't be a problem in practice.
1645 */
1647 *worker = i;
1648 return msg;
1649 }
1650 Assert(false);
1651 return NULL;
1652}

References Assert, do_wait, fb(), i, ParallelState::numWorkers, ParallelState::parallelSlot, pg_fatal, ParallelSlot::pipeRead, readMessageFromPipe(), select, select_loop(), WORKER_IS_RUNNING, and ParallelSlot::workerStatus.

Referenced by ListenToWorkers().

◆ GetMyPSlot()

static ParallelSlot * GetMyPSlot ( ParallelState pstate)
static

Definition at line 266 of file parallel.c.

267{
268 int i;
269
270 for (i = 0; i < pstate->numWorkers; i++)
271 {
272#ifdef WIN32
273 if (pstate->parallelSlot[i].threadId == GetCurrentThreadId())
274#else
275 if (pstate->parallelSlot[i].pid == getpid())
276#endif
277 return &(pstate->parallelSlot[i]);
278 }
279
280 return NULL;
281}

References fb(), i, ParallelState::numWorkers, ParallelState::parallelSlot, and ParallelSlot::pid.

Referenced by archive_close_connection().

◆ HasEveryWorkerTerminated()

static bool HasEveryWorkerTerminated ( ParallelState pstate)
static

Definition at line 1268 of file parallel.c.

1269{
1270 int i;
1271
1272 for (i = 0; i < pstate->numWorkers; i++)
1273 {
1275 return false;
1276 }
1277 return true;
1278}

References i, ParallelState::numWorkers, ParallelState::parallelSlot, WORKER_IS_RUNNING, and ParallelSlot::workerStatus.

Referenced by WaitForTerminatingWorkers().

◆ init_parallel_dump_utils()

void init_parallel_dump_utils ( void  )

Definition at line 238 of file parallel.c.

239{
240#ifdef WIN32
242 {
244 int err;
245
246 /* Prepare for threaded operation */
249
250 /* Initialize socket access */
251 err = WSAStartup(MAKEWORD(2, 2), &wsaData);
252 if (err != 0)
253 pg_fatal("%s() failed: error code %d", "WSAStartup", err);
254
255 parallel_init_done = true;
256 }
257#endif
258}

References err(), fb(), and pg_fatal.

Referenced by main().

◆ IsEveryWorkerIdle()

bool IsEveryWorkerIdle ( ParallelState pstate)

Definition at line 1284 of file parallel.c.

1285{
1286 int i;
1287
1288 for (i = 0; i < pstate->numWorkers; i++)
1289 {
1290 if (pstate->parallelSlot[i].workerStatus != WRKR_IDLE)
1291 return false;
1292 }
1293 return true;
1294}

References i, ParallelState::numWorkers, ParallelState::parallelSlot, ParallelSlot::workerStatus, and WRKR_IDLE.

Referenced by ParallelBackupEnd(), restore_toc_entries_parallel(), and WaitForWorkers().

◆ ListenToWorkers()

static bool ListenToWorkers ( ArchiveHandle AH,
ParallelState pstate,
bool  do_wait 
)
static

Definition at line 1414 of file parallel.c.

1415{
1416 int worker;
1417 char *msg;
1418
1419 /* Try to collect a status message */
1420 msg = getMessageFromWorker(pstate, do_wait, &worker);
1421
1422 if (!msg)
1423 {
1424 /* If do_wait is true, we must have detected EOF on some socket */
1425 if (do_wait)
1426 pg_fatal("a worker process died unexpectedly");
1427 return false;
1428 }
1429
1430 /* Process it and update our idea of the worker's status */
1431 if (messageStartsWith(msg, "OK "))
1432 {
1433 ParallelSlot *slot = &pstate->parallelSlot[worker];
1434 TocEntry *te = pstate->te[worker];
1435 int status;
1436
1437 status = parseWorkerResponse(AH, te, msg);
1438 slot->callback(AH, te, status, slot->callback_data);
1439 slot->workerStatus = WRKR_IDLE;
1440 pstate->te[worker] = NULL;
1441 }
1442 else
1443 pg_fatal("invalid message received from worker: \"%s\"",
1444 msg);
1445
1446 /* Free the string returned from getMessageFromWorker */
1447 free(msg);
1448
1449 return true;
1450}

References ParallelSlot::callback, ParallelSlot::callback_data, do_wait, fb(), free, getMessageFromWorker(), messageStartsWith, ParallelState::parallelSlot, parseWorkerResponse(), pg_fatal, ParallelState::te, ParallelSlot::workerStatus, and WRKR_IDLE.

Referenced by WaitForWorkers().

◆ lockTableForWorker()

static void lockTableForWorker ( ArchiveHandle AH,
TocEntry te 
)
static

Definition at line 1317 of file parallel.c.

1318{
1319 const char *qualId;
1320 PQExpBuffer query;
1321 PGresult *res;
1322
1323 /* Nothing to do for BLOBS */
1324 if (strcmp(te->desc, "BLOBS") == 0)
1325 return;
1326
1327 query = createPQExpBuffer();
1328
1329 qualId = fmtQualifiedId(te->namespace, te->tag);
1330
1331 appendPQExpBuffer(query, "LOCK TABLE %s IN ACCESS SHARE MODE NOWAIT",
1332 qualId);
1333
1334 res = PQexec(AH->connection, query->data);
1335
1336 if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
1337 pg_fatal("could not obtain lock on relation \"%s\"\n"
1338 "This usually means that someone requested an ACCESS EXCLUSIVE lock "
1339 "on the table after the pg_dump parent process had gotten the "
1340 "initial ACCESS SHARE lock on the table.", qualId);
1341
1342 PQclear(res);
1343 destroyPQExpBuffer(query);
1344}

References appendPQExpBuffer(), _archiveHandle::connection, createPQExpBuffer(), PQExpBufferData::data, _tocEntry::desc, destroyPQExpBuffer(), fb(), fmtQualifiedId(), pg_fatal, PGRES_COMMAND_OK, PQclear, PQexec(), PQresultStatus, and _tocEntry::tag.

Referenced by WaitForCommands().

◆ on_exit_close_archive()

◆ ParallelBackupEnd()

void ParallelBackupEnd ( ArchiveHandle AH,
ParallelState pstate 
)

Definition at line 1075 of file parallel.c.

1076{
1077 int i;
1078
1079 /* No work if non-parallel */
1080 if (pstate->numWorkers == 1)
1081 return;
1082
1083 /* There should not be any unfinished jobs */
1084 Assert(IsEveryWorkerIdle(pstate));
1085
1086 /* Close the sockets so that the workers know they can exit */
1087 for (i = 0; i < pstate->numWorkers; i++)
1088 {
1091 }
1092
1093 /* Wait for them to exit */
1095
1096 /*
1097 * Unlink pstate from shutdown_info, so the exit handler will not try to
1098 * use it; and likewise unlink from signal_info.
1099 */
1102
1103 /* Release state (mere neatnik-ism, since we're about to terminate) */
1104 free(pstate->te);
1105 free(pstate->parallelSlot);
1106 free(pstate);
1107}

References Assert, closesocket, fb(), free, i, IsEveryWorkerIdle(), ParallelState::numWorkers, ParallelState::parallelSlot, ParallelSlot::pipeRead, ParallelSlot::pipeWrite, ShutdownInformation::pstate, set_cancel_pstate(), shutdown_info, ParallelState::te, and WaitForTerminatingWorkers().

Referenced by _CloseArchive(), and RestoreArchive().

◆ ParallelBackupStart()

ParallelState * ParallelBackupStart ( ArchiveHandle AH)

Definition at line 913 of file parallel.c.

914{
915 ParallelState *pstate;
916 int i;
917
918 Assert(AH->public.numWorkers > 0);
919
921
922 pstate->numWorkers = AH->public.numWorkers;
923 pstate->te = NULL;
924 pstate->parallelSlot = NULL;
925
926 if (AH->public.numWorkers == 1)
927 return pstate;
928
929 /* Create status arrays, being sure to initialize all fields to 0 */
930 pstate->te =
932 pstate->parallelSlot =
934
935#ifdef WIN32
936 /* Make fmtId() and fmtQualifiedId() use thread-local storage */
938#endif
939
940 /*
941 * Set the pstate in shutdown_info, to tell the exit handler that it must
942 * clean up workers as well as the main database connection. But we don't
943 * set this in signal_info yet, because we don't want child processes to
944 * inherit non-NULL signal_info.pstate.
945 */
946 shutdown_info.pstate = pstate;
947
948 /*
949 * Temporarily disable query cancellation on the leader connection. This
950 * ensures that child processes won't inherit valid AH->connCancel
951 * settings and thus won't try to issue cancels against the leader's
952 * connection. No harm is done if we fail while it's disabled, because
953 * the leader connection is idle at this point anyway.
954 */
956
957 /* Ensure stdio state is quiesced before forking */
958 fflush(NULL);
959
960 /* Create desired number of workers */
961 for (i = 0; i < pstate->numWorkers; i++)
962 {
963#ifdef WIN32
964 WorkerInfo *wi;
965 uintptr_t handle;
966#else
967 pid_t pid;
968#endif
969 ParallelSlot *slot = &(pstate->parallelSlot[i]);
970 int pipeMW[2],
971 pipeWM[2];
972
973 /* Create communication pipes for this worker */
974 if (pgpipe(pipeMW) < 0 || pgpipe(pipeWM) < 0)
975 pg_fatal("could not create communication channels: %m");
976
977 /* leader's ends of the pipes */
978 slot->pipeRead = pipeWM[PIPE_READ];
979 slot->pipeWrite = pipeMW[PIPE_WRITE];
980 /* child's ends of the pipes */
983
984#ifdef WIN32
985 /* Create transient structure to pass args to worker function */
987
988 wi->AH = AH;
989 wi->slot = slot;
990
991 handle = _beginthreadex(NULL, 0, (void *) &init_spawned_worker_win32,
992 wi, 0, &(slot->threadId));
993 slot->hThread = handle;
994 slot->workerStatus = WRKR_IDLE;
995#else /* !WIN32 */
996 pid = fork();
997 if (pid == 0)
998 {
999 /* we are the worker */
1000 int j;
1001
1002 /* this is needed for GetMyPSlot() */
1003 slot->pid = getpid();
1004
1005 /* instruct signal handler that we're in a worker now */
1006 signal_info.am_worker = true;
1007
1008 /* close read end of Worker -> Leader */
1010 /* close write end of Leader -> Worker */
1012
1013 /*
1014 * Close all inherited fds for communication of the leader with
1015 * previously-forked workers.
1016 */
1017 for (j = 0; j < i; j++)
1018 {
1021 }
1022
1023 /* Run the worker ... */
1024 RunWorker(AH, slot);
1025
1026 /* We can just exit(0) when done */
1027 exit(0);
1028 }
1029 else if (pid < 0)
1030 {
1031 /* fork failed */
1032 pg_fatal("could not create worker process: %m");
1033 }
1034
1035 /* In Leader after successful fork */
1036 slot->pid = pid;
1037 slot->workerStatus = WRKR_IDLE;
1038
1039 /* close read end of Leader -> Worker */
1041 /* close write end of Worker -> Leader */
1043#endif /* WIN32 */
1044 }
1045
1046 /*
1047 * Having forked off the workers, disable SIGPIPE so that leader isn't
1048 * killed if it tries to send a command to a dead worker. We don't want
1049 * the workers to inherit this setting, though.
1050 */
1051#ifndef WIN32
1053#endif
1054
1055 /*
1056 * Re-establish query cancellation on the leader connection.
1057 */
1059
1060 /*
1061 * Tell the cancel signal handler to forward signals to worker processes,
1062 * too. (As with query cancel, we did not need this earlier because the
1063 * workers have not yet been given anything to do; if we die before this
1064 * point, any already-started workers will see EOF and quit promptly.)
1065 */
1066 set_cancel_pstate(pstate);
1067
1068 return pstate;
1069}

References DumpSignalInformation::am_worker, Assert, closesocket, _archiveHandle::connection, fb(), getLocalPQExpBuffer, i, j, ParallelState::numWorkers, Archive::numWorkers, ParallelState::parallelSlot, pg_fatal, pg_malloc0_array, pg_malloc_object, pgpipe, ParallelSlot::pid, PIPE_READ, PIPE_WRITE, ParallelSlot::pipeRead, ParallelSlot::pipeRevRead, ParallelSlot::pipeRevWrite, ParallelSlot::pipeWrite, pqsignal, ShutdownInformation::pstate, _archiveHandle::public, RunWorker(), set_archive_cancel_info(), set_cancel_pstate(), shutdown_info, signal_info, SIGPIPE, ParallelState::te, ParallelSlot::workerStatus, and WRKR_IDLE.

Referenced by _CloseArchive(), and RestoreArchive().

◆ parseWorkerCommand()

static void parseWorkerCommand ( ArchiveHandle AH,
TocEntry **  te,
T_Action act,
const char msg 
)
static

Definition at line 1139 of file parallel.c.

1141{
1142 DumpId dumpId;
1143 int nBytes;
1144
1145 if (messageStartsWith(msg, "DUMP "))
1146 {
1147 *act = ACT_DUMP;
1148 sscanf(msg, "DUMP %d%n", &dumpId, &nBytes);
1149 Assert(nBytes == strlen(msg));
1150 *te = getTocEntryByDumpId(AH, dumpId);
1151 Assert(*te != NULL);
1152 }
1153 else if (messageStartsWith(msg, "RESTORE "))
1154 {
1155 *act = ACT_RESTORE;
1156 sscanf(msg, "RESTORE %d%n", &dumpId, &nBytes);
1157 Assert(nBytes == strlen(msg));
1158 *te = getTocEntryByDumpId(AH, dumpId);
1159 Assert(*te != NULL);
1160 }
1161 else
1162 pg_fatal("unrecognized command received from leader: \"%s\"",
1163 msg);
1164}

References ACT_DUMP, ACT_RESTORE, Assert, fb(), getTocEntryByDumpId(), messageStartsWith, and pg_fatal.

Referenced by WaitForCommands().

◆ parseWorkerResponse()

static int parseWorkerResponse ( ArchiveHandle AH,
TocEntry te,
const char msg 
)
static

Definition at line 1187 of file parallel.c.

1189{
1190 DumpId dumpId;
1191 int nBytes,
1192 n_errors;
1193 int status = 0;
1194
1195 if (messageStartsWith(msg, "OK "))
1196 {
1197 sscanf(msg, "OK %d %d %d%n", &dumpId, &status, &n_errors, &nBytes);
1198
1199 Assert(dumpId == te->dumpId);
1200 Assert(nBytes == strlen(msg));
1201
1202 AH->public.n_errors += n_errors;
1203 }
1204 else
1205 pg_fatal("invalid message received from worker: \"%s\"",
1206 msg);
1207
1208 return status;
1209}

References Assert, _tocEntry::dumpId, fb(), messageStartsWith, Archive::n_errors, pg_fatal, and _archiveHandle::public.

Referenced by ListenToWorkers().

◆ readMessageFromPipe()

static char * readMessageFromPipe ( int  fd)
static

Definition at line 1678 of file parallel.c.

1679{
1680 char *msg;
1681 int msgsize,
1682 bufsize;
1683 int ret;
1684
1685 /*
1686 * In theory, if we let piperead() read multiple bytes, it might give us
1687 * back fragments of multiple messages. (That can't actually occur, since
1688 * neither leader nor workers send more than one message without waiting
1689 * for a reply, but we don't wish to assume that here.) For simplicity,
1690 * read a byte at a time until we get the terminating '\0'. This method
1691 * is a bit inefficient, but since this is only used for relatively short
1692 * command and status strings, it shouldn't matter.
1693 */
1694 bufsize = 64; /* could be any number */
1695 msg = (char *) pg_malloc(bufsize);
1696 msgsize = 0;
1697 for (;;)
1698 {
1700 ret = piperead(fd, msg + msgsize, 1);
1701 if (ret <= 0)
1702 break; /* error or connection closure */
1703
1704 Assert(ret == 1);
1705
1706 if (msg[msgsize] == '\0')
1707 return msg; /* collected whole message */
1708
1709 msgsize++;
1710 if (msgsize == bufsize) /* enlarge buffer if needed */
1711 {
1712 bufsize += 16; /* could be any number */
1713 msg = (char *) pg_realloc(msg, bufsize);
1714 }
1715 }
1716
1717 /* Other end has closed the connection */
1718 pg_free(msg);
1719 return NULL;
1720}

References Assert, bufsize, fb(), fd(), pg_free(), pg_malloc(), pg_realloc(), and piperead.

Referenced by getMessageFromLeader(), and getMessageFromWorker().

◆ replace_on_exit_close_archive()

void replace_on_exit_close_archive ( Archive AHX)

Definition at line 345 of file parallel.c.

346{
347 shutdown_info.AHX = AHX;
348}

References ShutdownInformation::AHX, and shutdown_info.

Referenced by restore_one_database().

◆ RunWorker()

static void RunWorker ( ArchiveHandle AH,
ParallelSlot slot 
)
static

Definition at line 845 of file parallel.c.

846{
847 int pipefd[2];
848
849 /* fetch child ends of pipes */
852
853 /*
854 * Clone the archive so that we have our own state to work with, and in
855 * particular our own database connection.
856 *
857 * We clone on Unix as well as Windows, even though technically we don't
858 * need to because fork() gives us a copy in our own address space
859 * already. But CloneArchive resets the state information and also clones
860 * the database connection which both seem kinda helpful.
861 */
862 AH = CloneArchive(AH);
863
864 /* Remember cloned archive where signal handler can find it */
865 set_cancel_slot_archive(slot, AH);
866
867 /*
868 * Call the setup worker function that's defined in the ArchiveHandle.
869 */
870 (AH->SetupWorkerPtr) ((Archive *) AH);
871
872 /*
873 * Execute commands until done.
874 */
876
877 /*
878 * Disconnect from database and clean up.
879 */
882 DeCloneArchive(AH);
883}

References CloneArchive(), DeCloneArchive(), DisconnectDatabase(), fb(), PIPE_READ, PIPE_WRITE, ParallelSlot::pipeRevRead, ParallelSlot::pipeRevWrite, _archiveHandle::public, set_cancel_slot_archive(), _archiveHandle::SetupWorkerPtr, and WaitForCommands().

Referenced by ParallelBackupStart().

◆ select_loop()

static int select_loop ( int  maxFd,
fd_set workerset 
)
static

Definition at line 1556 of file parallel.c.

1557{
1558 int i;
1560
1561 for (;;)
1562 {
1563 *workerset = saveSet;
1564 i = select(maxFd + 1, workerset, NULL, NULL, NULL);
1565
1566#ifndef WIN32
1567 if (i < 0 && errno == EINTR)
1568 continue;
1569#else
1570 if (i == SOCKET_ERROR && WSAGetLastError() == WSAEINTR)
1571 continue;
1572#endif
1573 break;
1574 }
1575
1576 return i;
1577}

References EINTR, fb(), i, and select.

Referenced by getMessageFromWorker().

◆ sendMessageToLeader()

static void sendMessageToLeader ( int  pipefd[2],
const char str 
)
static

Definition at line 1543 of file parallel.c.

1544{
1545 int len = strlen(str) + 1;
1546
1547 if (pipewrite(pipefd[PIPE_WRITE], str, len) != len)
1548 pg_fatal("could not write to the communication channel: %m");
1549}

References fb(), len, pg_fatal, PIPE_WRITE, pipewrite, and str.

Referenced by WaitForCommands().

◆ sendMessageToWorker()

static void sendMessageToWorker ( ParallelState pstate,
int  worker,
const char str 
)
static

Definition at line 1660 of file parallel.c.

1661{
1662 int len = strlen(str) + 1;
1663
1664 if (pipewrite(pstate->parallelSlot[worker].pipeWrite, str, len) != len)
1665 {
1666 pg_fatal("could not write to the communication channel: %m");
1667 }
1668}

References fb(), len, ParallelState::parallelSlot, pg_fatal, ParallelSlot::pipeWrite, pipewrite, and str.

Referenced by DispatchJobForTocEntry().

◆ set_archive_cancel_info()

void set_archive_cancel_info ( ArchiveHandle AH,
PGconn conn 
)

Definition at line 746 of file parallel.c.

747{
749
750 /*
751 * Activate the interrupt handler if we didn't yet in this process. On
752 * Windows, this also initializes signal_info_lock; therefore it's
753 * important that this happen at least once before we fork off any
754 * threads.
755 */
757
758 /*
759 * On Unix, we assume that storing a pointer value is atomic with respect
760 * to any possible signal interrupt. On Windows, use a critical section.
761 */
762
763#ifdef WIN32
765#endif
766
767 /* Free the old one if we have one */
769 /* be sure interrupt handler doesn't use pointer while freeing */
770 AH->connCancel = NULL;
771
772 if (oldConnCancel != NULL)
774
775 /* Set the new one if specified */
776 if (conn)
778
779 /*
780 * On Unix, there's only ever one active ArchiveHandle per process, so we
781 * can just set signal_info.myAH unconditionally. On Windows, do that
782 * only in the main thread; worker threads have to make sure their
783 * ArchiveHandle appears in the pstate data, which is dealt with in
784 * RunWorker().
785 */
786#ifndef WIN32
787 signal_info.myAH = AH;
788#else
790 signal_info.myAH = AH;
791#endif
792
793#ifdef WIN32
795#endif
796}

References conn, _archiveHandle::connCancel, fb(), DumpSignalInformation::myAH, PQfreeCancel(), PQgetCancel(), set_cancel_handler(), and signal_info.

Referenced by ConnectDatabaseAhx(), DisconnectDatabase(), and ParallelBackupStart().

◆ set_cancel_handler()

static void set_cancel_handler ( void  )
static

Definition at line 624 of file parallel.c.

625{
626 /*
627 * When forking, signal_info.handler_set will propagate into the new
628 * process, but that's fine because the signal handler state does too.
629 */
631 {
633
637 }
638}

References fb(), DumpSignalInformation::handler_set, pqsignal, signal_info, SIGQUIT, and sigTermHandler().

Referenced by set_archive_cancel_info().

◆ set_cancel_pstate()

static void set_cancel_pstate ( ParallelState pstate)
static

Definition at line 805 of file parallel.c.

806{
807#ifdef WIN32
809#endif
810
811 signal_info.pstate = pstate;
812
813#ifdef WIN32
815#endif
816}

References fb(), DumpSignalInformation::pstate, and signal_info.

Referenced by ParallelBackupEnd(), and ParallelBackupStart().

◆ set_cancel_slot_archive()

static void set_cancel_slot_archive ( ParallelSlot slot,
ArchiveHandle AH 
)
static

Definition at line 825 of file parallel.c.

826{
827#ifdef WIN32
829#endif
830
831 slot->AH = AH;
832
833#ifdef WIN32
835#endif
836}

References ParallelSlot::AH, and fb().

Referenced by RunWorker().

◆ ShutdownWorkersHard()

static void ShutdownWorkersHard ( ParallelState pstate)
static

Definition at line 411 of file parallel.c.

412{
413 int i;
414
415 /*
416 * Close our write end of the sockets so that any workers waiting for
417 * commands know they can exit. (Note: some of the pipeWrite fields might
418 * still be zero, if we failed to initialize all the workers. Hence, just
419 * ignore errors here.)
420 */
421 for (i = 0; i < pstate->numWorkers; i++)
423
424 /*
425 * Force early termination of any commands currently in progress.
426 */
427#ifndef WIN32
428 /* On non-Windows, send SIGTERM to each worker process. */
429 for (i = 0; i < pstate->numWorkers; i++)
430 {
431 pid_t pid = pstate->parallelSlot[i].pid;
432
433 if (pid != 0)
434 kill(pid, SIGTERM);
435 }
436#else
437
438 /*
439 * On Windows, send query cancels directly to the workers' backends. Use
440 * a critical section to ensure worker threads don't change state.
441 */
443 for (i = 0; i < pstate->numWorkers; i++)
444 {
445 ArchiveHandle *AH = pstate->parallelSlot[i].AH;
446 char errbuf[1];
447
448 if (AH != NULL && AH->connCancel != NULL)
449 (void) PQcancel(AH->connCancel, errbuf, sizeof(errbuf));
450 }
452#endif
453
454 /* Now wait for them to terminate. */
456}

References ParallelSlot::AH, closesocket, _archiveHandle::connCancel, fb(), i, kill, ParallelState::numWorkers, ParallelState::parallelSlot, ParallelSlot::pid, ParallelSlot::pipeWrite, PQcancel(), and WaitForTerminatingWorkers().

Referenced by archive_close_connection().

◆ sigTermHandler()

static void sigTermHandler ( SIGNAL_ARGS  )
static

Definition at line 561 of file parallel.c.

562{
563 int i;
564 char errbuf[1];
565
566 /*
567 * Some platforms allow delivery of new signals to interrupt an active
568 * signal handler. That could muck up our attempt to send PQcancel, so
569 * disable the signals that set_cancel_handler enabled.
570 */
574
575 /*
576 * If we're in the leader, forward signal to all workers. (It seems best
577 * to do this before PQcancel; killing the leader transaction will result
578 * in invalid-snapshot errors from active workers, which maybe we can
579 * quiet by killing workers first.) Ignore any errors.
580 */
581 if (signal_info.pstate != NULL)
582 {
583 for (i = 0; i < signal_info.pstate->numWorkers; i++)
584 {
586
587 if (pid != 0)
588 kill(pid, SIGTERM);
589 }
590 }
591
592 /*
593 * Send QueryCancel if we have a connection to send to. Ignore errors,
594 * there's not much we can do about them anyway.
595 */
597 (void) PQcancel(signal_info.myAH->connCancel, errbuf, sizeof(errbuf));
598
599 /*
600 * Report we're quitting, using nothing more complicated than write(2).
601 * When in parallel operation, only the leader process should do this.
602 */
604 {
605 if (progname)
606 {
608 write_stderr(": ");
609 }
610 write_stderr("terminated by user\n");
611 }
612
613 /*
614 * And die, using _exit() not exit() because the latter will invoke atexit
615 * handlers that can fail if we interrupted related code.
616 */
617 _exit(1);
618}

References DumpSignalInformation::am_worker, _archiveHandle::connCancel, fb(), i, kill, DumpSignalInformation::myAH, ParallelState::numWorkers, ParallelState::parallelSlot, ParallelSlot::pid, PQcancel(), pqsignal, progname, DumpSignalInformation::pstate, signal_info, SIGQUIT, and write_stderr.

Referenced by set_cancel_handler().

◆ WaitForCommands()

static void WaitForCommands ( ArchiveHandle AH,
int  pipefd[2] 
)
static

Definition at line 1352 of file parallel.c.

1353{
1354 char *command;
1355 TocEntry *te;
1356 T_Action act;
1357 int status = 0;
1358 char buf[256];
1359
1360 for (;;)
1361 {
1362 if (!(command = getMessageFromLeader(pipefd)))
1363 {
1364 /* EOF, so done */
1365 return;
1366 }
1367
1368 /* Decode the command */
1369 parseWorkerCommand(AH, &te, &act, command);
1370
1371 if (act == ACT_DUMP)
1372 {
1373 /* Acquire lock on this table within the worker's session */
1374 lockTableForWorker(AH, te);
1375
1376 /* Perform the dump command */
1377 status = (AH->WorkerJobDumpPtr) (AH, te);
1378 }
1379 else if (act == ACT_RESTORE)
1380 {
1381 /* Perform the restore command */
1382 status = (AH->WorkerJobRestorePtr) (AH, te);
1383 }
1384 else
1385 Assert(false);
1386
1387 /* Return status to leader */
1388 buildWorkerResponse(AH, te, act, status, buf, sizeof(buf));
1389
1391
1392 /* command was pg_malloc'd and we are responsible for free()ing it. */
1393 free(command);
1394 }
1395}

References ACT_DUMP, ACT_RESTORE, Assert, buf, buildWorkerResponse(), fb(), free, getMessageFromLeader(), lockTableForWorker(), parseWorkerCommand(), sendMessageToLeader(), _archiveHandle::WorkerJobDumpPtr, and _archiveHandle::WorkerJobRestorePtr.

Referenced by RunWorker().

◆ WaitForTerminatingWorkers()

static void WaitForTerminatingWorkers ( ParallelState pstate)
static

Definition at line 462 of file parallel.c.

463{
464 while (!HasEveryWorkerTerminated(pstate))
465 {
466 ParallelSlot *slot = NULL;
467 int j;
468
469#ifndef WIN32
470 /* On non-Windows, use wait() to wait for next worker to end */
471 int status;
472 pid_t pid = wait(&status);
473
474 /* Find dead worker's slot, and clear the PID field */
475 for (j = 0; j < pstate->numWorkers; j++)
476 {
477 slot = &(pstate->parallelSlot[j]);
478 if (slot->pid == pid)
479 {
480 slot->pid = 0;
481 break;
482 }
483 }
484#else /* WIN32 */
485 /* On Windows, we must use WaitForMultipleObjects() */
487 int nrun = 0;
488 DWORD ret;
490
491 for (j = 0; j < pstate->numWorkers; j++)
492 {
494 {
495 lpHandles[nrun] = (HANDLE) pstate->parallelSlot[j].hThread;
496 nrun++;
497 }
498 }
500 Assert(ret != WAIT_FAILED);
503
504 /* Find dead worker's slot, and clear the hThread field */
505 for (j = 0; j < pstate->numWorkers; j++)
506 {
507 slot = &(pstate->parallelSlot[j]);
508 if (slot->hThread == hThread)
509 {
510 /* For cleanliness, close handles for dead threads */
511 CloseHandle((HANDLE) slot->hThread);
512 slot->hThread = (uintptr_t) INVALID_HANDLE_VALUE;
513 break;
514 }
515 }
516#endif /* WIN32 */
517
518 /* On all platforms, update workerStatus and te[] as well */
519 Assert(j < pstate->numWorkers);
521 pstate->te[j] = NULL;
522 }
523}

References Assert, fb(), free, HasEveryWorkerTerminated(), j, ParallelState::numWorkers, ParallelState::parallelSlot, pg_malloc_array, ParallelSlot::pid, ParallelState::te, WORKER_IS_RUNNING, ParallelSlot::workerStatus, and WRKR_TERMINATED.

Referenced by ParallelBackupEnd(), and ShutdownWorkersHard().

◆ WaitForWorkers()

void WaitForWorkers ( ArchiveHandle AH,
ParallelState pstate,
WFW_WaitOption  mode 
)

Definition at line 1467 of file parallel.c.

1468{
1469 bool do_wait = false;
1470
1471 /*
1472 * In GOT_STATUS mode, always block waiting for a message, since we can't
1473 * return till we get something. In other modes, we don't block the first
1474 * time through the loop.
1475 */
1476 if (mode == WFW_GOT_STATUS)
1477 {
1478 /* Assert that caller knows what it's doing */
1479 Assert(!IsEveryWorkerIdle(pstate));
1480 do_wait = true;
1481 }
1482
1483 for (;;)
1484 {
1485 /*
1486 * Check for status messages, even if we don't need to block. We do
1487 * not try very hard to reap all available messages, though, since
1488 * there's unlikely to be more than one.
1489 */
1490 if (ListenToWorkers(AH, pstate, do_wait))
1491 {
1492 /*
1493 * If we got a message, we are done by definition for GOT_STATUS
1494 * mode, and we can also be certain that there's at least one idle
1495 * worker. So we're done in all but ALL_IDLE mode.
1496 */
1497 if (mode != WFW_ALL_IDLE)
1498 return;
1499 }
1500
1501 /* Check whether we must wait for new status messages */
1502 switch (mode)
1503 {
1504 case WFW_NO_WAIT:
1505 return; /* never wait */
1506 case WFW_GOT_STATUS:
1507 Assert(false); /* can't get here, because we waited */
1508 break;
1509 case WFW_ONE_IDLE:
1510 if (GetIdleWorker(pstate) != NO_SLOT)
1511 return;
1512 break;
1513 case WFW_ALL_IDLE:
1514 if (IsEveryWorkerIdle(pstate))
1515 return;
1516 break;
1517 }
1518
1519 /* Loop back, and this time wait for something to happen */
1520 do_wait = true;
1521 }
1522}

References Assert, do_wait, GetIdleWorker(), IsEveryWorkerIdle(), ListenToWorkers(), mode, NO_SLOT, WFW_ALL_IDLE, WFW_GOT_STATUS, WFW_NO_WAIT, and WFW_ONE_IDLE.

Referenced by DispatchJobForTocEntry(), restore_toc_entries_parallel(), and WriteDataChunks().

Variable Documentation

◆ shutdown_info

◆ signal_info