PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netinet/in.h>
78 #include <arpa/inet.h>
79 #include <netdb.h>
80 #include <limits.h>
81 
82 #ifdef HAVE_SYS_SELECT_H
83 #include <sys/select.h>
84 #endif
85 
86 #ifdef USE_BONJOUR
87 #include <dns_sd.h>
88 #endif
89 
90 #ifdef USE_SYSTEMD
91 #include <systemd/sd-daemon.h>
92 #endif
93 
94 #ifdef HAVE_PTHREAD_IS_THREADED_NP
95 #include <pthread.h>
96 #endif
97 
98 #include "access/transam.h"
99 #include "access/xlog.h"
100 #include "bootstrap/bootstrap.h"
101 #include "catalog/pg_control.h"
102 #include "common/ip.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqsignal.h"
107 #include "miscadmin.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "postmaster/autovacuum.h"
112 #include "postmaster/fork_process.h"
113 #include "postmaster/pgarch.h"
114 #include "postmaster/postmaster.h"
115 #include "postmaster/syslogger.h"
117 #include "replication/walsender.h"
118 #include "storage/fd.h"
119 #include "storage/ipc.h"
120 #include "storage/pg_shmem.h"
121 #include "storage/pmsignal.h"
122 #include "storage/proc.h"
123 #include "tcop/tcopprot.h"
124 #include "utils/builtins.h"
125 #include "utils/datetime.h"
126 #include "utils/dynamic_loader.h"
127 #include "utils/memutils.h"
128 #include "utils/ps_status.h"
129 #include "utils/timeout.h"
130 #include "utils/varlena.h"
131 
132 #ifdef EXEC_BACKEND
133 #include "storage/spin.h"
134 #endif
135 
136 
137 /*
138  * Possible types of a backend. Beyond being the possible bkend_type values in
139  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
140  * and CountChildren().
141  */
142 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
143 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
144 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
145 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
146 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
147 
148 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
149 
150 /*
151  * List of active backends (or child processes anyway; we don't actually
152  * know whether a given child has become a backend or is still in the
153  * authorization phase). This is used mainly to keep track of how many
154  * children we have and send them appropriate signals when necessary.
155  *
156  * "Special" children such as the startup, bgwriter and autovacuum launcher
157  * tasks are not in this list. Autovacuum worker and walsender are in it.
158  * Also, "dead_end" children are in it: these are children launched just for
159  * the purpose of sending a friendly rejection message to a would-be client.
160  * We must track them because they are attached to shared memory, but we know
161  * they will never become live backends. dead_end children are not assigned a
162  * PMChildSlot.
163  *
164  * Background workers are in this list, too.
165  */
166 typedef struct bkend
167 {
168  pid_t pid; /* process id of backend */
169  int32 cancel_key; /* cancel key for cancels for this backend */
170  int child_slot; /* PMChildSlot for this backend, if any */
171 
172  /*
173  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
174  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
175  * bkend_type is normal, you should check for a recent transition.
176  */
178  bool dead_end; /* is it going to send an error and quit? */
179  bool bgworker_notify; /* gets bgworker start/stop notifications */
180  dlist_node elem; /* list link in BackendList */
181 } Backend;
182 
184 
185 #ifdef EXEC_BACKEND
186 static Backend *ShmemBackendArray;
187 #endif
188 
190 
191 
192 
193 /* The socket number we are listening for connections on */
195 
196 /* The directory names for Unix socket(s) */
198 
199 /* The TCP listen address(es) */
201 
202 /*
203  * ReservedBackends is the number of backends reserved for superuser use.
204  * This number is taken out of the pool size given by MaxBackends so
205  * number of backend slots available to non-superusers is
206  * (MaxBackends - ReservedBackends). Note what this really means is
207  * "if there are <= ReservedBackends connections available, only superusers
208  * can make new connections" --- pre-existing superuser connections don't
209  * count against the limit.
210  */
212 
213 /* The socket(s) we're listening to. */
214 #define MAXLISTEN 64
216 
217 /*
218  * Set by the -o option
219  */
220 static char ExtraOptions[MAXPGPATH];
221 
222 /*
223  * These globals control the behavior of the postmaster in case some
224  * backend dumps core. Normally, it kills all peers of the dead backend
225  * and reinitializes shared memory. By specifying -s or -n, we can have
226  * the postmaster stop (rather than kill) peers and not reinitialize
227  * shared data structures. (Reinit is currently dead code, though.)
228  */
229 static bool Reinit = true;
230 static int SendStop = false;
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 bool Db_user_namespace = false;
241 
242 bool enable_bonjour = false;
245 
246 /* PIDs of special child processes; 0 when not running */
247 static pid_t StartupPID = 0,
256 
257 /* Startup process's status */
258 typedef enum
259 {
262  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
265 
267 
268 /* Startup/shutdown state */
269 #define NoShutdown 0
270 #define SmartShutdown 1
271 #define FastShutdown 2
272 #define ImmediateShutdown 3
273 
274 static int Shutdown = NoShutdown;
275 
276 static bool FatalError = false; /* T if recovering from backend crash */
277 
278 /*
279  * We use a simple state machine to control startup, shutdown, and
280  * crash recovery (which is rather like shutdown followed by startup).
281  *
282  * After doing all the postmaster initialization work, we enter PM_STARTUP
283  * state and the startup process is launched. The startup process begins by
284  * reading the control file and other preliminary initialization steps.
285  * In a normal startup, or after crash recovery, the startup process exits
286  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
287  * is handled specially since it takes much longer and we would like to support
288  * hot standby during archive recovery.
289  *
290  * When the startup process is ready to start archive recovery, it signals the
291  * postmaster, and we switch to PM_RECOVERY state. The background writer and
292  * checkpointer are launched, while the startup process continues applying WAL.
293  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
294  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
295  * state and begin accepting connections to perform read-only queries. When
296  * archive recovery is finished, the startup process exits with exit code 0
297  * and we switch to PM_RUN state.
298  *
299  * Normal child backends can only be launched when we are in PM_RUN or
300  * PM_HOT_STANDBY state. (We also allow launch of normal
301  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
316  * states, nor in PM_SHUTDOWN states (because we don't enter those states
317  * when trying to recover from a crash). It can be true in PM_STARTUP state,
318  * because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
328  PM_WAIT_READONLY, /* waiting for read only backends to exit */
329  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
330  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
331  * ckpt */
332  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
333  * finish */
334  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
335  PM_NO_CHILDREN /* all important children have exited */
336 } PMState;
337 
339 
340 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
341 /* Zero means timeout is not running */
342 static time_t AbortStartTime = 0;
343 /* Length of said timeout */
344 #define SIGKILL_CHILDREN_AFTER_SECS 5
345 
346 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
347 
348 bool ClientAuthInProgress = false; /* T during new-client
349  * authentication */
350 
351 bool redirection_done = false; /* stderr redirected for syslogger? */
352 
353 /* received START_AUTOVAC_LAUNCHER signal */
354 static volatile sig_atomic_t start_autovac_launcher = false;
355 
356 /* the launcher needs to be signalled to communicate some condition */
357 static volatile bool avlauncher_needs_signal = false;
358 
359 /* set when there's a worker that needs to be started up */
360 static volatile bool StartWorkerNeeded = true;
361 static volatile bool HaveCrashedWorker = false;
362 
363 #ifndef HAVE_STRONG_RANDOM
364 /*
365  * State for assigning cancel keys.
366  * Also, the global MyCancelKey passes the cancel key assigned to a given
367  * backend from the postmaster to that backend (via fork).
368  */
369 static unsigned int random_seed = 0;
370 static struct timeval random_start_time;
371 #endif
372 
373 #ifdef USE_SSL
374 /* Set when and if SSL has been initialized properly */
375 static bool LoadedSSL = false;
376 #endif
377 
378 #ifdef USE_BONJOUR
379 static DNSServiceRef bonjour_sdref = NULL;
380 #endif
381 
382 /*
383  * postmaster.c - function prototypes
384  */
385 static void CloseServerPorts(int status, Datum arg);
386 static void unlink_external_pid_file(int status, Datum arg);
387 static void getInstallationPaths(const char *argv0);
388 static void checkDataDir(void);
389 static Port *ConnCreate(int serverFd);
390 static void ConnFree(Port *port);
391 static void reset_shared(int port);
392 static void SIGHUP_handler(SIGNAL_ARGS);
393 static void pmdie(SIGNAL_ARGS);
394 static void reaper(SIGNAL_ARGS);
395 static void sigusr1_handler(SIGNAL_ARGS);
396 static void startup_die(SIGNAL_ARGS);
397 static void dummy_handler(SIGNAL_ARGS);
398 static void StartupPacketTimeoutHandler(void);
399 static void CleanupBackend(int pid, int exitstatus);
400 static bool CleanupBackgroundWorker(int pid, int exitstatus);
401 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
402 static void LogChildExit(int lev, const char *procname,
403  int pid, int exitstatus);
404 static void PostmasterStateMachine(void);
405 static void BackendInitialize(Port *port);
406 static void BackendRun(Port *port) pg_attribute_noreturn();
407 static void ExitPostmaster(int status) pg_attribute_noreturn();
408 static int ServerLoop(void);
409 static int BackendStartup(Port *port);
410 static int ProcessStartupPacket(Port *port, bool SSLdone);
411 static void processCancelRequest(Port *port, void *pkt);
412 static int initMasks(fd_set *rmask);
413 static void report_fork_failure_to_client(Port *port, int errnum);
414 static CAC_state canAcceptConnections(void);
415 static bool RandomCancelKey(int32 *cancel_key);
416 static void signal_child(pid_t pid, int signal);
417 static bool SignalSomeChildren(int signal, int targets);
418 static void TerminateChildren(int signal);
419 
420 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
421 
422 static int CountChildren(int target);
423 static void maybe_start_bgworker(void);
424 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
425 static pid_t StartChildProcess(AuxProcType type);
426 static void StartAutovacuumWorker(void);
427 static void InitPostmasterDeathWatchHandle(void);
428 
429 /*
430  * Archiver is allowed to start up at the current postmaster state?
431  *
432  * If WAL archiving is enabled always, we are allowed to start archiver
433  * even during recovery.
434  */
435 #define PgArchStartupAllowed() \
436  ((XLogArchivingActive() && pmState == PM_RUN) || \
437  (XLogArchivingAlways() && \
438  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
439 
440 #ifdef EXEC_BACKEND
441 
442 #ifdef WIN32
443 #define WNOHANG 0 /* ignored, so any integer value will do */
444 
445 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
446 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
447 
448 static HANDLE win32ChildQueue;
449 
450 typedef struct
451 {
452  HANDLE waitHandle;
453  HANDLE procHandle;
454  DWORD procId;
455 } win32_deadchild_waitinfo;
456 #endif /* WIN32 */
457 
458 static pid_t backend_forkexec(Port *port);
459 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
460 
461 /* Type for a socket that can be inherited to a client process */
462 #ifdef WIN32
463 typedef struct
464 {
465  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
466  * if not a socket */
467  WSAPROTOCOL_INFO wsainfo;
468 } InheritableSocket;
469 #else
470 typedef int InheritableSocket;
471 #endif
472 
473 /*
474  * Structure contains all variables passed to exec:ed backends
475  */
476 typedef struct
477 {
478  Port port;
479  InheritableSocket portsocket;
480  char DataDir[MAXPGPATH];
483  int MyPMChildSlot;
484 #ifndef WIN32
485  unsigned long UsedShmemSegID;
486 #else
487  HANDLE UsedShmemSegID;
488 #endif
489  void *UsedShmemSegAddr;
492  Backend *ShmemBackendArray;
493 #ifndef HAVE_SPINLOCKS
495 #endif
504  InheritableSocket pgStatSock;
505  pid_t PostmasterPid;
509  bool redirection_done;
510  bool IsBinaryUpgrade;
511  int max_safe_fds;
512  int MaxBackends;
513 #ifdef WIN32
514  HANDLE PostmasterHandle;
515  HANDLE initial_signal_pipe;
516  HANDLE syslogPipe[2];
517 #else
518  int postmaster_alive_fds[2];
519  int syslogPipe[2];
520 #endif
521  char my_exec_path[MAXPGPATH];
522  char pkglib_path[MAXPGPATH];
523  char ExtraOptions[MAXPGPATH];
524 } BackendParameters;
525 
526 static void read_backend_variables(char *id, Port *port);
527 static void restore_backend_variables(BackendParameters *param, Port *port);
528 
529 #ifndef WIN32
530 static bool save_backend_variables(BackendParameters *param, Port *port);
531 #else
532 static bool save_backend_variables(BackendParameters *param, Port *port,
533  HANDLE childProcess, pid_t childPid);
534 #endif
535 
536 static void ShmemBackendArrayAdd(Backend *bn);
537 static void ShmemBackendArrayRemove(Backend *bn);
538 #endif /* EXEC_BACKEND */
539 
540 #define StartupDataBase() StartChildProcess(StartupProcess)
541 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
542 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
543 #define StartWalWriter() StartChildProcess(WalWriterProcess)
544 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
545 
546 /* Macros to check exit status of a child process */
547 #define EXIT_STATUS_0(st) ((st) == 0)
548 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
549 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
550 
551 #ifndef WIN32
552 /*
553  * File descriptors for pipe used to monitor if postmaster is alive.
554  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
555  */
556 int postmaster_alive_fds[2] = {-1, -1};
557 #else
558 /* Process handle of postmaster used for the same purpose on Windows */
559 HANDLE PostmasterHandle;
560 #endif
561 
562 /*
563  * Postmaster main entry point
564  */
565 void
566 PostmasterMain(int argc, char *argv[])
567 {
568  int opt;
569  int status;
570  char *userDoption = NULL;
571  bool listen_addr_saved = false;
572  int i;
573  char *output_config_variable = NULL;
574 
575  MyProcPid = PostmasterPid = getpid();
576 
577  MyStartTime = time(NULL);
578 
580 
581  /*
582  * for security, no dir or file created can be group or other accessible
583  */
584  umask(S_IRWXG | S_IRWXO);
585 
586  /*
587  * Initialize random(3) so we don't get the same values in every run.
588  *
589  * Note: the seed is pretty predictable from externally-visible facts such
590  * as postmaster start time, so avoid using random() for security-critical
591  * random values during postmaster startup. At the time of first
592  * connection, PostmasterRandom will select a hopefully-more-random seed.
593  */
594  srandom((unsigned int) (MyProcPid ^ MyStartTime));
595 
596  /*
597  * By default, palloc() requests in the postmaster will be allocated in
598  * the PostmasterContext, which is space that can be recycled by backends.
599  * Allocated data that needs to be available to backends should be
600  * allocated in TopMemoryContext.
601  */
603  "Postmaster",
606 
607  /* Initialize paths to installation files */
608  getInstallationPaths(argv[0]);
609 
610  /*
611  * Set up signal handlers for the postmaster process.
612  *
613  * CAUTION: when changing this list, check for side-effects on the signal
614  * handling setup of child processes. See tcop/postgres.c,
615  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
616  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
617  * postmaster/syslogger.c, postmaster/bgworker.c and
618  * postmaster/checkpointer.c.
619  */
620  pqinitmask();
622 
623  pqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have
624  * children do same */
625  pqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */
626  pqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
627  pqsignal(SIGTERM, pmdie); /* wait for children and shut down */
628  pqsignal(SIGALRM, SIG_IGN); /* ignored */
629  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
630  pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
631  pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
632  pqsignal(SIGCHLD, reaper); /* handle child termination */
633  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
634  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
635  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
636 #ifdef SIGXFSZ
637  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
638 #endif
639 
640  /*
641  * Options setup
642  */
644 
645  opterr = 1;
646 
647  /*
648  * Parse command-line options. CAUTION: keep this in sync with
649  * tcop/postgres.c (the option sets should not conflict) and with the
650  * common help() function in main/main.c.
651  */
652  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
653  {
654  switch (opt)
655  {
656  case 'B':
657  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
658  break;
659 
660  case 'b':
661  /* Undocumented flag used for binary upgrades */
662  IsBinaryUpgrade = true;
663  break;
664 
665  case 'C':
666  output_config_variable = strdup(optarg);
667  break;
668 
669  case 'D':
670  userDoption = strdup(optarg);
671  break;
672 
673  case 'd':
675  break;
676 
677  case 'E':
678  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
679  break;
680 
681  case 'e':
682  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
683  break;
684 
685  case 'F':
686  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
687  break;
688 
689  case 'f':
691  {
692  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
693  progname, optarg);
694  ExitPostmaster(1);
695  }
696  break;
697 
698  case 'h':
699  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
700  break;
701 
702  case 'i':
703  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
704  break;
705 
706  case 'j':
707  /* only used by interactive backend */
708  break;
709 
710  case 'k':
711  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
712  break;
713 
714  case 'l':
715  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
716  break;
717 
718  case 'N':
719  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
720  break;
721 
722  case 'n':
723  /* Don't reinit shared mem after abnormal exit */
724  Reinit = false;
725  break;
726 
727  case 'O':
728  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'o':
732  /* Other options to pass to the backend on the command line */
734  sizeof(ExtraOptions) - strlen(ExtraOptions),
735  " %s", optarg);
736  break;
737 
738  case 'P':
739  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
740  break;
741 
742  case 'p':
744  break;
745 
746  case 'r':
747  /* only used by single-user backend */
748  break;
749 
750  case 'S':
752  break;
753 
754  case 's':
755  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
756  break;
757 
758  case 'T':
759 
760  /*
761  * In the event that some backend dumps core, send SIGSTOP,
762  * rather than SIGQUIT, to all its peers. This lets the wily
763  * post_hacker collect core dumps from everyone.
764  */
765  SendStop = true;
766  break;
767 
768  case 't':
769  {
770  const char *tmp = get_stats_option_name(optarg);
771 
772  if (tmp)
773  {
775  }
776  else
777  {
778  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
779  progname, optarg);
780  ExitPostmaster(1);
781  }
782  break;
783  }
784 
785  case 'W':
786  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
787  break;
788 
789  case 'c':
790  case '-':
791  {
792  char *name,
793  *value;
794 
795  ParseLongOption(optarg, &name, &value);
796  if (!value)
797  {
798  if (opt == '-')
799  ereport(ERROR,
800  (errcode(ERRCODE_SYNTAX_ERROR),
801  errmsg("--%s requires a value",
802  optarg)));
803  else
804  ereport(ERROR,
805  (errcode(ERRCODE_SYNTAX_ERROR),
806  errmsg("-c %s requires a value",
807  optarg)));
808  }
809 
811  free(name);
812  if (value)
813  free(value);
814  break;
815  }
816 
817  default:
818  write_stderr("Try \"%s --help\" for more information.\n",
819  progname);
820  ExitPostmaster(1);
821  }
822  }
823 
824  /*
825  * Postmaster accepts no non-option switch arguments.
826  */
827  if (optind < argc)
828  {
829  write_stderr("%s: invalid argument: \"%s\"\n",
830  progname, argv[optind]);
831  write_stderr("Try \"%s --help\" for more information.\n",
832  progname);
833  ExitPostmaster(1);
834  }
835 
836  /*
837  * Locate the proper configuration files and data directory, and read
838  * postgresql.conf for the first time.
839  */
840  if (!SelectConfigFiles(userDoption, progname))
841  ExitPostmaster(2);
842 
843  if (output_config_variable != NULL)
844  {
845  /*
846  * "-C guc" was specified, so print GUC's value and exit. No extra
847  * permission check is needed because the user is reading inside the
848  * data dir.
849  */
850  const char *config_val = GetConfigOption(output_config_variable,
851  false, false);
852 
853  puts(config_val ? config_val : "");
854  ExitPostmaster(0);
855  }
856 
857  /* Verify that DataDir looks reasonable */
858  checkDataDir();
859 
860  /* And switch working directory into it */
861  ChangeToDataDir();
862 
863  /*
864  * Check for invalid combinations of GUC settings.
865  */
867  {
868  write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
869  ExitPostmaster(1);
870  }
872  {
873  write_stderr("%s: max_wal_senders must be less than max_connections\n", progname);
874  ExitPostmaster(1);
875  }
877  ereport(ERROR,
878  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
880  ereport(ERROR,
881  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
882 
883  /*
884  * Other one-time internal sanity checks can go here, if they are fast.
885  * (Put any slow processing further down, after postmaster.pid creation.)
886  */
887  if (!CheckDateTokenTables())
888  {
889  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
890  ExitPostmaster(1);
891  }
892 
893  /*
894  * Now that we are done processing the postmaster arguments, reset
895  * getopt(3) library so that it will work correctly in subprocesses.
896  */
897  optind = 1;
898 #ifdef HAVE_INT_OPTRESET
899  optreset = 1; /* some systems need this too */
900 #endif
901 
902  /* For debugging: display postmaster environment */
903  {
904  extern char **environ;
905  char **p;
906 
907  ereport(DEBUG3,
908  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
909  progname)));
910  ereport(DEBUG3,
911  (errmsg_internal("-----------------------------------------")));
912  for (p = environ; *p; ++p)
913  ereport(DEBUG3,
914  (errmsg_internal("\t%s", *p)));
915  ereport(DEBUG3,
916  (errmsg_internal("-----------------------------------------")));
917  }
918 
919  /*
920  * Create lockfile for data directory.
921  *
922  * We want to do this before we try to grab the input sockets, because the
923  * data directory interlock is more reliable than the socket-file
924  * interlock (thanks to whoever decided to put socket files in /tmp :-().
925  * For the same reason, it's best to grab the TCP socket(s) before the
926  * Unix socket(s).
927  *
928  * Also note that this internally sets up the on_proc_exit function that
929  * is responsible for removing both data directory and socket lockfiles;
930  * so it must happen before opening sockets so that at exit, the socket
931  * lockfiles go away after CloseServerPorts runs.
932  */
933  CreateDataDirLockFile(true);
934 
935  /*
936  * Initialize SSL library, if specified.
937  */
938 #ifdef USE_SSL
939  if (EnableSSL)
940  {
941  (void) secure_initialize(true);
942  LoadedSSL = true;
943  }
944 #endif
945 
946  /*
947  * Register the apply launcher. Since it registers a background worker,
948  * it needs to be called before InitializeMaxBackends(), and it's probably
949  * a good idea to call it before any modules had chance to take the
950  * background worker slots.
951  */
953 
954  /*
955  * process any libraries that should be preloaded at postmaster start
956  */
958 
959  /*
960  * Now that loadable modules have had their chance to register background
961  * workers, calculate MaxBackends.
962  */
964 
965  /*
966  * Establish input sockets.
967  *
968  * First, mark them all closed, and set up an on_proc_exit function that's
969  * charged with closing the sockets again at postmaster shutdown.
970  */
971  for (i = 0; i < MAXLISTEN; i++)
973 
975 
976  if (ListenAddresses)
977  {
978  char *rawstring;
979  List *elemlist;
980  ListCell *l;
981  int success = 0;
982 
983  /* Need a modifiable copy of ListenAddresses */
984  rawstring = pstrdup(ListenAddresses);
985 
986  /* Parse string into list of hostnames */
987  if (!SplitIdentifierString(rawstring, ',', &elemlist))
988  {
989  /* syntax error in list */
990  ereport(FATAL,
991  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
992  errmsg("invalid list syntax in parameter \"%s\"",
993  "listen_addresses")));
994  }
995 
996  foreach(l, elemlist)
997  {
998  char *curhost = (char *) lfirst(l);
999 
1000  if (strcmp(curhost, "*") == 0)
1001  status = StreamServerPort(AF_UNSPEC, NULL,
1002  (unsigned short) PostPortNumber,
1003  NULL,
1005  else
1006  status = StreamServerPort(AF_UNSPEC, curhost,
1007  (unsigned short) PostPortNumber,
1008  NULL,
1009  ListenSocket, MAXLISTEN);
1010 
1011  if (status == STATUS_OK)
1012  {
1013  success++;
1014  /* record the first successful host addr in lockfile */
1015  if (!listen_addr_saved)
1016  {
1018  listen_addr_saved = true;
1019  }
1020  }
1021  else
1022  ereport(WARNING,
1023  (errmsg("could not create listen socket for \"%s\"",
1024  curhost)));
1025  }
1026 
1027  if (!success && elemlist != NIL)
1028  ereport(FATAL,
1029  (errmsg("could not create any TCP/IP sockets")));
1030 
1031  list_free(elemlist);
1032  pfree(rawstring);
1033  }
1034 
1035 #ifdef USE_BONJOUR
1036  /* Register for Bonjour only if we opened TCP socket(s) */
1038  {
1039  DNSServiceErrorType err;
1040 
1041  /*
1042  * We pass 0 for interface_index, which will result in registering on
1043  * all "applicable" interfaces. It's not entirely clear from the
1044  * DNS-SD docs whether this would be appropriate if we have bound to
1045  * just a subset of the available network interfaces.
1046  */
1047  err = DNSServiceRegister(&bonjour_sdref,
1048  0,
1049  0,
1050  bonjour_name,
1051  "_postgresql._tcp.",
1052  NULL,
1053  NULL,
1054  htons(PostPortNumber),
1055  0,
1056  NULL,
1057  NULL,
1058  NULL);
1059  if (err != kDNSServiceErr_NoError)
1060  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1061  (long) err);
1062 
1063  /*
1064  * We don't bother to read the mDNS daemon's reply, and we expect that
1065  * it will automatically terminate our registration when the socket is
1066  * closed at postmaster termination. So there's nothing more to be
1067  * done here. However, the bonjour_sdref is kept around so that
1068  * forked children can close their copies of the socket.
1069  */
1070  }
1071 #endif
1072 
1073 #ifdef HAVE_UNIX_SOCKETS
1075  {
1076  char *rawstring;
1077  List *elemlist;
1078  ListCell *l;
1079  int success = 0;
1080 
1081  /* Need a modifiable copy of Unix_socket_directories */
1082  rawstring = pstrdup(Unix_socket_directories);
1083 
1084  /* Parse string into list of directories */
1085  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1086  {
1087  /* syntax error in list */
1088  ereport(FATAL,
1089  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1090  errmsg("invalid list syntax in parameter \"%s\"",
1091  "unix_socket_directories")));
1092  }
1093 
1094  foreach(l, elemlist)
1095  {
1096  char *socketdir = (char *) lfirst(l);
1097 
1098  status = StreamServerPort(AF_UNIX, NULL,
1099  (unsigned short) PostPortNumber,
1100  socketdir,
1101  ListenSocket, MAXLISTEN);
1102 
1103  if (status == STATUS_OK)
1104  {
1105  success++;
1106  /* record the first successful Unix socket in lockfile */
1107  if (success == 1)
1109  }
1110  else
1111  ereport(WARNING,
1112  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1113  socketdir)));
1114  }
1115 
1116  if (!success && elemlist != NIL)
1117  ereport(FATAL,
1118  (errmsg("could not create any Unix-domain sockets")));
1119 
1120  list_free_deep(elemlist);
1121  pfree(rawstring);
1122  }
1123 #endif
1124 
1125  /*
1126  * check that we have some socket to listen on
1127  */
1128  if (ListenSocket[0] == PGINVALID_SOCKET)
1129  ereport(FATAL,
1130  (errmsg("no socket created for listening")));
1131 
1132  /*
1133  * If no valid TCP ports, write an empty line for listen address,
1134  * indicating the Unix socket must be used. Note that this line is not
1135  * added to the lock file until there is a socket backing it.
1136  */
1137  if (!listen_addr_saved)
1139 
1140  /*
1141  * Set up shared memory and semaphores.
1142  */
1144 
1145  /*
1146  * Estimate number of openable files. This must happen after setting up
1147  * semaphores, because on some platforms semaphores count as open files.
1148  */
1149  set_max_safe_fds();
1150 
1151  /*
1152  * Set reference point for stack-depth checking.
1153  */
1154  set_stack_base();
1155 
1156  /*
1157  * Initialize pipe (or process handle on Windows) that allows children to
1158  * wake up from sleep on postmaster death.
1159  */
1161 
1162 #ifdef WIN32
1163 
1164  /*
1165  * Initialize I/O completion port used to deliver list of dead children.
1166  */
1167  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1168  if (win32ChildQueue == NULL)
1169  ereport(FATAL,
1170  (errmsg("could not create I/O completion port for child queue")));
1171 #endif
1172 
1173  /*
1174  * Record postmaster options. We delay this till now to avoid recording
1175  * bogus options (eg, NBuffers too high for available memory).
1176  */
1177  if (!CreateOptsFile(argc, argv, my_exec_path))
1178  ExitPostmaster(1);
1179 
1180 #ifdef EXEC_BACKEND
1181  /* Write out nondefault GUC settings for child processes to use */
1182  write_nondefault_variables(PGC_POSTMASTER);
1183 #endif
1184 
1185  /*
1186  * Write the external PID file if requested
1187  */
1188  if (external_pid_file)
1189  {
1190  FILE *fpidfile = fopen(external_pid_file, "w");
1191 
1192  if (fpidfile)
1193  {
1194  fprintf(fpidfile, "%d\n", MyProcPid);
1195  fclose(fpidfile);
1196 
1197  /* Make PID file world readable */
1198  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1199  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1201  }
1202  else
1203  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1205 
1207  }
1208 
1209  /*
1210  * Remove old temporary files. At this point there can be no other
1211  * Postgres processes running in this directory, so this should be safe.
1212  */
1214 
1215  /*
1216  * Forcibly remove the files signaling a standby promotion request.
1217  * Otherwise, the existence of those files triggers a promotion too early,
1218  * whether a user wants that or not.
1219  *
1220  * This removal of files is usually unnecessary because they can exist
1221  * only during a few moments during a standby promotion. However there is
1222  * a race condition: if pg_ctl promote is executed and creates the files
1223  * during a promotion, the files can stay around even after the server is
1224  * brought up to new master. Then, if new standby starts by using the
1225  * backup taken from that master, the files can exist at the server
1226  * startup and should be removed in order to avoid an unexpected
1227  * promotion.
1228  *
1229  * Note that promotion signal files need to be removed before the startup
1230  * process is invoked. Because, after that, they can be used by
1231  * postmaster's SIGUSR1 signal handler.
1232  */
1234 
1235  /*
1236  * If enabled, start up syslogger collection subprocess
1237  */
1239 
1240  /*
1241  * Reset whereToSendOutput from DestDebug (its starting state) to
1242  * DestNone. This stops ereport from sending log messages to stderr unless
1243  * Log_destination permits. We don't do this until the postmaster is
1244  * fully launched, since startup failures may as well be reported to
1245  * stderr.
1246  *
1247  * If we are in fact disabling logging to stderr, first emit a log message
1248  * saying so, to provide a breadcrumb trail for users who may not remember
1249  * that their logging is configured to go somewhere else.
1250  */
1252  ereport(LOG,
1253  (errmsg("ending log output to stderr"),
1254  errhint("Future log output will go to log destination \"%s\".",
1256 
1258 
1259  /*
1260  * Initialize stats collection subsystem (this does NOT start the
1261  * collector process!)
1262  */
1263  pgstat_init();
1264 
1265  /*
1266  * Initialize the autovacuum subsystem (again, no process start yet)
1267  */
1268  autovac_init();
1269 
1270  /*
1271  * Load configuration files for client authentication.
1272  */
1273  if (!load_hba())
1274  {
1275  /*
1276  * It makes no sense to continue if we fail to load the HBA file,
1277  * since there is no way to connect to the database in this case.
1278  */
1279  ereport(FATAL,
1280  (errmsg("could not load pg_hba.conf")));
1281  }
1282  if (!load_ident())
1283  {
1284  /*
1285  * We can start up without the IDENT file, although it means that you
1286  * cannot log in using any of the authentication methods that need a
1287  * user name mapping. load_ident() already logged the details of error
1288  * to the log.
1289  */
1290  }
1291 
1292 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1293 
1294  /*
1295  * On macOS, libintl replaces setlocale() with a version that calls
1296  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1297  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1298  * the process multithreaded. The postmaster calls sigprocmask() and
1299  * calls fork() without an immediate exec(), both of which have undefined
1300  * behavior in a multithreaded program. A multithreaded postmaster is the
1301  * normal case on Windows, which offers neither fork() nor sigprocmask().
1302  */
1303  if (pthread_is_threaded_np() != 0)
1304  ereport(FATAL,
1305  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1306  errmsg("postmaster became multithreaded during startup"),
1307  errhint("Set the LC_ALL environment variable to a valid locale.")));
1308 #endif
1309 
1310  /*
1311  * Remember postmaster startup time
1312  */
1314 #ifndef HAVE_STRONG_RANDOM
1315  /* RandomCancelKey wants its own copy */
1317 #endif
1318 
1319  /*
1320  * We're ready to rock and roll...
1321  */
1323  Assert(StartupPID != 0);
1325  pmState = PM_STARTUP;
1326 
1327  /* Some workers may be scheduled to start now */
1329 
1330  status = ServerLoop();
1331 
1332  /*
1333  * ServerLoop probably shouldn't ever return, but if it does, close down.
1334  */
1335  ExitPostmaster(status != STATUS_OK);
1336 
1337  abort(); /* not reached */
1338 }
1339 
1340 
1341 /*
1342  * on_proc_exit callback to close server's listen sockets
1343  */
1344 static void
1346 {
1347  int i;
1348 
1349  /*
1350  * First, explicitly close all the socket FDs. We used to just let this
1351  * happen implicitly at postmaster exit, but it's better to close them
1352  * before we remove the postmaster.pid lockfile; otherwise there's a race
1353  * condition if a new postmaster wants to re-use the TCP port number.
1354  */
1355  for (i = 0; i < MAXLISTEN; i++)
1356  {
1357  if (ListenSocket[i] != PGINVALID_SOCKET)
1358  {
1361  }
1362  }
1363 
1364  /*
1365  * Next, remove any filesystem entries for Unix sockets. To avoid race
1366  * conditions against incoming postmasters, this must happen after closing
1367  * the sockets and before removing lock files.
1368  */
1370 
1371  /*
1372  * We don't do anything about socket lock files here; those will be
1373  * removed in a later on_proc_exit callback.
1374  */
1375 }
1376 
1377 /*
1378  * on_proc_exit callback to delete external_pid_file
1379  */
1380 static void
1382 {
1383  if (external_pid_file)
1385 }
1386 
1387 
1388 /*
1389  * Compute and check the directory paths to files that are part of the
1390  * installation (as deduced from the postgres executable's own location)
1391  */
1392 static void
1394 {
1395  DIR *pdir;
1396 
1397  /* Locate the postgres executable itself */
1398  if (find_my_exec(argv0, my_exec_path) < 0)
1399  elog(FATAL, "%s: could not locate my own executable path", argv0);
1400 
1401 #ifdef EXEC_BACKEND
1402  /* Locate executable backend before we change working directory */
1403  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1404  postgres_exec_path) < 0)
1405  ereport(FATAL,
1406  (errmsg("%s: could not locate matching postgres executable",
1407  argv0)));
1408 #endif
1409 
1410  /*
1411  * Locate the pkglib directory --- this has to be set early in case we try
1412  * to load any modules from it in response to postgresql.conf entries.
1413  */
1415 
1416  /*
1417  * Verify that there's a readable directory there; otherwise the Postgres
1418  * installation is incomplete or corrupt. (A typical cause of this
1419  * failure is that the postgres executable has been moved or hardlinked to
1420  * some directory that's not a sibling of the installation lib/
1421  * directory.)
1422  */
1423  pdir = AllocateDir(pkglib_path);
1424  if (pdir == NULL)
1425  ereport(ERROR,
1427  errmsg("could not open directory \"%s\": %m",
1428  pkglib_path),
1429  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1430  my_exec_path)));
1431  FreeDir(pdir);
1432 
1433  /*
1434  * XXX is it worth similarly checking the share/ directory? If the lib/
1435  * directory is there, then share/ probably is too.
1436  */
1437 }
1438 
1439 
1440 /*
1441  * Validate the proposed data directory
1442  */
1443 static void
1445 {
1446  char path[MAXPGPATH];
1447  FILE *fp;
1448  struct stat stat_buf;
1449 
1450  Assert(DataDir);
1451 
1452  if (stat(DataDir, &stat_buf) != 0)
1453  {
1454  if (errno == ENOENT)
1455  ereport(FATAL,
1457  errmsg("data directory \"%s\" does not exist",
1458  DataDir)));
1459  else
1460  ereport(FATAL,
1462  errmsg("could not read permissions of directory \"%s\": %m",
1463  DataDir)));
1464  }
1465 
1466  /* eventual chdir would fail anyway, but let's test ... */
1467  if (!S_ISDIR(stat_buf.st_mode))
1468  ereport(FATAL,
1469  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1470  errmsg("specified data directory \"%s\" is not a directory",
1471  DataDir)));
1472 
1473  /*
1474  * Check that the directory belongs to my userid; if not, reject.
1475  *
1476  * This check is an essential part of the interlock that prevents two
1477  * postmasters from starting in the same directory (see CreateLockFile()).
1478  * Do not remove or weaken it.
1479  *
1480  * XXX can we safely enable this check on Windows?
1481  */
1482 #if !defined(WIN32) && !defined(__CYGWIN__)
1483  if (stat_buf.st_uid != geteuid())
1484  ereport(FATAL,
1485  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1486  errmsg("data directory \"%s\" has wrong ownership",
1487  DataDir),
1488  errhint("The server must be started by the user that owns the data directory.")));
1489 #endif
1490 
1491  /*
1492  * Check if the directory has group or world access. If so, reject.
1493  *
1494  * It would be possible to allow weaker constraints (for example, allow
1495  * group access) but we cannot make a general assumption that that is
1496  * okay; for example there are platforms where nearly all users
1497  * customarily belong to the same group. Perhaps this test should be
1498  * configurable.
1499  *
1500  * XXX temporarily suppress check when on Windows, because there may not
1501  * be proper support for Unix-y file permissions. Need to think of a
1502  * reasonable check to apply on Windows.
1503  */
1504 #if !defined(WIN32) && !defined(__CYGWIN__)
1505  if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1506  ereport(FATAL,
1507  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1508  errmsg("data directory \"%s\" has group or world access",
1509  DataDir),
1510  errdetail("Permissions should be u=rwx (0700).")));
1511 #endif
1512 
1513  /* Look for PG_VERSION before looking for pg_control */
1515 
1516  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1517 
1518  fp = AllocateFile(path, PG_BINARY_R);
1519  if (fp == NULL)
1520  {
1521  write_stderr("%s: could not find the database system\n"
1522  "Expected to find it in the directory \"%s\",\n"
1523  "but could not open file \"%s\": %s\n",
1524  progname, DataDir, path, strerror(errno));
1525  ExitPostmaster(2);
1526  }
1527  FreeFile(fp);
1528 }
1529 
1530 /*
1531  * Determine how long should we let ServerLoop sleep.
1532  *
1533  * In normal conditions we wait at most one minute, to ensure that the other
1534  * background tasks handled by ServerLoop get done even when no requests are
1535  * arriving. However, if there are background workers waiting to be started,
1536  * we don't actually sleep so that they are quickly serviced. Other exception
1537  * cases are as shown in the code.
1538  */
1539 static void
1540 DetermineSleepTime(struct timeval * timeout)
1541 {
1542  TimestampTz next_wakeup = 0;
1543 
1544  /*
1545  * Normal case: either there are no background workers at all, or we're in
1546  * a shutdown sequence (during which we ignore bgworkers altogether).
1547  */
1548  if (Shutdown > NoShutdown ||
1550  {
1551  if (AbortStartTime != 0)
1552  {
1553  /* time left to abort; clamp to 0 in case it already expired */
1554  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1555  (time(NULL) - AbortStartTime);
1556  timeout->tv_sec = Max(timeout->tv_sec, 0);
1557  timeout->tv_usec = 0;
1558  }
1559  else
1560  {
1561  timeout->tv_sec = 60;
1562  timeout->tv_usec = 0;
1563  }
1564  return;
1565  }
1566 
1567  if (StartWorkerNeeded)
1568  {
1569  timeout->tv_sec = 0;
1570  timeout->tv_usec = 0;
1571  return;
1572  }
1573 
1574  if (HaveCrashedWorker)
1575  {
1576  slist_mutable_iter siter;
1577 
1578  /*
1579  * When there are crashed bgworkers, we sleep just long enough that
1580  * they are restarted when they request to be. Scan the list to
1581  * determine the minimum of all wakeup times according to most recent
1582  * crash time and requested restart interval.
1583  */
1585  {
1586  RegisteredBgWorker *rw;
1587  TimestampTz this_wakeup;
1588 
1589  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1590 
1591  if (rw->rw_crashed_at == 0)
1592  continue;
1593 
1595  || rw->rw_terminate)
1596  {
1597  ForgetBackgroundWorker(&siter);
1598  continue;
1599  }
1600 
1601  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1602  1000L * rw->rw_worker.bgw_restart_time);
1603  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1604  next_wakeup = this_wakeup;
1605  }
1606  }
1607 
1608  if (next_wakeup != 0)
1609  {
1610  long secs;
1611  int microsecs;
1612 
1614  &secs, &microsecs);
1615  timeout->tv_sec = secs;
1616  timeout->tv_usec = microsecs;
1617 
1618  /* Ensure we don't exceed one minute */
1619  if (timeout->tv_sec > 60)
1620  {
1621  timeout->tv_sec = 60;
1622  timeout->tv_usec = 0;
1623  }
1624  }
1625  else
1626  {
1627  timeout->tv_sec = 60;
1628  timeout->tv_usec = 0;
1629  }
1630 }
1631 
1632 /*
1633  * Main idle loop of postmaster
1634  *
1635  * NB: Needs to be called with signals blocked
1636  */
1637 static int
1639 {
1640  fd_set readmask;
1641  int nSockets;
1642  time_t last_lockfile_recheck_time,
1643  last_touch_time;
1644 
1645  last_lockfile_recheck_time = last_touch_time = time(NULL);
1646 
1647  nSockets = initMasks(&readmask);
1648 
1649  for (;;)
1650  {
1651  fd_set rmask;
1652  int selres;
1653  time_t now;
1654 
1655  /*
1656  * Wait for a connection request to arrive.
1657  *
1658  * We block all signals except while sleeping. That makes it safe for
1659  * signal handlers, which again block all signals while executing, to
1660  * do nontrivial work.
1661  *
1662  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1663  * any new connections, so we don't call select(), and just sleep.
1664  */
1665  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1666 
1667  if (pmState == PM_WAIT_DEAD_END)
1668  {
1670 
1671  pg_usleep(100000L); /* 100 msec seems reasonable */
1672  selres = 0;
1673 
1674  PG_SETMASK(&BlockSig);
1675  }
1676  else
1677  {
1678  /* must set timeout each time; some OSes change it! */
1679  struct timeval timeout;
1680 
1681  /* Needs to run with blocked signals! */
1682  DetermineSleepTime(&timeout);
1683 
1685 
1686  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1687 
1688  PG_SETMASK(&BlockSig);
1689  }
1690 
1691  /* Now check the select() result */
1692  if (selres < 0)
1693  {
1694  if (errno != EINTR && errno != EWOULDBLOCK)
1695  {
1696  ereport(LOG,
1698  errmsg("select() failed in postmaster: %m")));
1699  return STATUS_ERROR;
1700  }
1701  }
1702 
1703  /*
1704  * New connection pending on any of our sockets? If so, fork a child
1705  * process to deal with it.
1706  */
1707  if (selres > 0)
1708  {
1709  int i;
1710 
1711  for (i = 0; i < MAXLISTEN; i++)
1712  {
1713  if (ListenSocket[i] == PGINVALID_SOCKET)
1714  break;
1715  if (FD_ISSET(ListenSocket[i], &rmask))
1716  {
1717  Port *port;
1718 
1719  port = ConnCreate(ListenSocket[i]);
1720  if (port)
1721  {
1722  BackendStartup(port);
1723 
1724  /*
1725  * We no longer need the open socket or port structure
1726  * in this process
1727  */
1728  StreamClose(port->sock);
1729  ConnFree(port);
1730  }
1731  }
1732  }
1733  }
1734 
1735  /* If we have lost the log collector, try to start a new one */
1736  if (SysLoggerPID == 0 && Logging_collector)
1738 
1739  /*
1740  * If no background writer process is running, and we are not in a
1741  * state that prevents it, start one. It doesn't matter if this
1742  * fails, we'll just try again later. Likewise for the checkpointer.
1743  */
1744  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1746  {
1747  if (CheckpointerPID == 0)
1749  if (BgWriterPID == 0)
1751  }
1752 
1753  /*
1754  * Likewise, if we have lost the walwriter process, try to start a new
1755  * one. But this is needed only in normal operation (else we cannot
1756  * be writing any new WAL).
1757  */
1758  if (WalWriterPID == 0 && pmState == PM_RUN)
1760 
1761  /*
1762  * If we have lost the autovacuum launcher, try to start a new one. We
1763  * don't want autovacuum to run in binary upgrade mode because
1764  * autovacuum might update relfrozenxid for empty tables before the
1765  * physical files are put in place.
1766  */
1767  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1769  pmState == PM_RUN)
1770  {
1772  if (AutoVacPID != 0)
1773  start_autovac_launcher = false; /* signal processed */
1774  }
1775 
1776  /* If we have lost the stats collector, try to start a new one */
1777  if (PgStatPID == 0 &&
1778  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1779  PgStatPID = pgstat_start();
1780 
1781  /* If we have lost the archiver, try to start a new one. */
1782  if (PgArchPID == 0 && PgArchStartupAllowed())
1783  PgArchPID = pgarch_start();
1784 
1785  /* If we need to signal the autovacuum launcher, do so now */
1787  {
1788  avlauncher_needs_signal = false;
1789  if (AutoVacPID != 0)
1790  kill(AutoVacPID, SIGUSR2);
1791  }
1792 
1793  /* Get other worker processes running, if needed */
1796 
1797 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1798 
1799  /*
1800  * With assertions enabled, check regularly for appearance of
1801  * additional threads. All builds check at start and exit.
1802  */
1803  Assert(pthread_is_threaded_np() == 0);
1804 #endif
1805 
1806  /*
1807  * Lastly, check to see if it's time to do some things that we don't
1808  * want to do every single time through the loop, because they're a
1809  * bit expensive. Note that there's up to a minute of slop in when
1810  * these tasks will be performed, since DetermineSleepTime() will let
1811  * us sleep at most that long; except for SIGKILL timeout which has
1812  * special-case logic there.
1813  */
1814  now = time(NULL);
1815 
1816  /*
1817  * If we already sent SIGQUIT to children and they are slow to shut
1818  * down, it's time to send them SIGKILL. This doesn't happen
1819  * normally, but under certain conditions backends can get stuck while
1820  * shutting down. This is a last measure to get them unwedged.
1821  *
1822  * Note we also do this during recovery from a process crash.
1823  */
1824  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1825  AbortStartTime != 0 &&
1827  {
1828  /* We were gentle with them before. Not anymore */
1830  /* reset flag so we don't SIGKILL again */
1831  AbortStartTime = 0;
1832  }
1833 
1834  /*
1835  * Once a minute, verify that postmaster.pid hasn't been removed or
1836  * overwritten. If it has, we force a shutdown. This avoids having
1837  * postmasters and child processes hanging around after their database
1838  * is gone, and maybe causing problems if a new database cluster is
1839  * created in the same place. It also provides some protection
1840  * against a DBA foolishly removing postmaster.pid and manually
1841  * starting a new postmaster. Data corruption is likely to ensue from
1842  * that anyway, but we can minimize the damage by aborting ASAP.
1843  */
1844  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1845  {
1846  if (!RecheckDataDirLockFile())
1847  {
1848  ereport(LOG,
1849  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1850  kill(MyProcPid, SIGQUIT);
1851  }
1852  last_lockfile_recheck_time = now;
1853  }
1854 
1855  /*
1856  * Touch Unix socket and lock files every 58 minutes, to ensure that
1857  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1858  * no one runs cleaners with cutoff times of less than an hour ...
1859  */
1860  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1861  {
1862  TouchSocketFiles();
1864  last_touch_time = now;
1865  }
1866  }
1867 }
1868 
1869 /*
1870  * Initialise the masks for select() for the ports we are listening on.
1871  * Return the number of sockets to listen on.
1872  */
1873 static int
1874 initMasks(fd_set *rmask)
1875 {
1876  int maxsock = -1;
1877  int i;
1878 
1879  FD_ZERO(rmask);
1880 
1881  for (i = 0; i < MAXLISTEN; i++)
1882  {
1883  int fd = ListenSocket[i];
1884 
1885  if (fd == PGINVALID_SOCKET)
1886  break;
1887  FD_SET(fd, rmask);
1888 
1889  if (fd > maxsock)
1890  maxsock = fd;
1891  }
1892 
1893  return maxsock + 1;
1894 }
1895 
1896 
1897 /*
1898  * Read a client's startup packet and do something according to it.
1899  *
1900  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1901  * not return at all.
1902  *
1903  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1904  * if that's what you want. Return STATUS_ERROR if you don't want to
1905  * send anything to the client, which would typically be appropriate
1906  * if we detect a communications failure.)
1907  */
1908 static int
1910 {
1911  int32 len;
1912  void *buf;
1913  ProtocolVersion proto;
1914  MemoryContext oldcontext;
1915 
1916  pq_startmsgread();
1917  if (pq_getbytes((char *) &len, 4) == EOF)
1918  {
1919  /*
1920  * EOF after SSLdone probably means the client didn't like our
1921  * response to NEGOTIATE_SSL_CODE. That's not an error condition, so
1922  * don't clutter the log with a complaint.
1923  */
1924  if (!SSLdone)
1926  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1927  errmsg("incomplete startup packet")));
1928  return STATUS_ERROR;
1929  }
1930 
1931  len = ntohl(len);
1932  len -= 4;
1933 
1934  if (len < (int32) sizeof(ProtocolVersion) ||
1936  {
1938  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1939  errmsg("invalid length of startup packet")));
1940  return STATUS_ERROR;
1941  }
1942 
1943  /*
1944  * Allocate at least the size of an old-style startup packet, plus one
1945  * extra byte, and make sure all are zeroes. This ensures we will have
1946  * null termination of all strings, in both fixed- and variable-length
1947  * packet layouts.
1948  */
1949  if (len <= (int32) sizeof(StartupPacket))
1950  buf = palloc0(sizeof(StartupPacket) + 1);
1951  else
1952  buf = palloc0(len + 1);
1953 
1954  if (pq_getbytes(buf, len) == EOF)
1955  {
1957  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1958  errmsg("incomplete startup packet")));
1959  return STATUS_ERROR;
1960  }
1961  pq_endmsgread();
1962 
1963  /*
1964  * The first field is either a protocol version number or a special
1965  * request code.
1966  */
1967  port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1968 
1969  if (proto == CANCEL_REQUEST_CODE)
1970  {
1971  processCancelRequest(port, buf);
1972  /* Not really an error, but we don't want to proceed further */
1973  return STATUS_ERROR;
1974  }
1975 
1976  if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1977  {
1978  char SSLok;
1979 
1980 #ifdef USE_SSL
1981  /* No SSL when disabled or on Unix sockets */
1982  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1983  SSLok = 'N';
1984  else
1985  SSLok = 'S'; /* Support for SSL */
1986 #else
1987  SSLok = 'N'; /* No support for SSL */
1988 #endif
1989 
1990 retry1:
1991  if (send(port->sock, &SSLok, 1, 0) != 1)
1992  {
1993  if (errno == EINTR)
1994  goto retry1; /* if interrupted, just retry */
1997  errmsg("failed to send SSL negotiation response: %m")));
1998  return STATUS_ERROR; /* close the connection */
1999  }
2000 
2001 #ifdef USE_SSL
2002  if (SSLok == 'S' && secure_open_server(port) == -1)
2003  return STATUS_ERROR;
2004 #endif
2005  /* regular startup packet, cancel, etc packet should follow... */
2006  /* but not another SSL negotiation request */
2007  return ProcessStartupPacket(port, true);
2008  }
2009 
2010  /* Could add additional special packet types here */
2011 
2012  /*
2013  * Set FrontendProtocol now so that ereport() knows what format to send if
2014  * we fail during startup.
2015  */
2016  FrontendProtocol = proto;
2017 
2018  /* Check we can handle the protocol the frontend is using. */
2019 
2024  ereport(FATAL,
2025  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2026  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2027  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2031 
2032  /*
2033  * Now fetch parameters out of startup packet and save them into the Port
2034  * structure. All data structures attached to the Port struct must be
2035  * allocated in TopMemoryContext so that they will remain available in a
2036  * running backend (even after PostmasterContext is destroyed). We need
2037  * not worry about leaking this storage on failure, since we aren't in the
2038  * postmaster process anymore.
2039  */
2041 
2042  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2043  {
2044  int32 offset = sizeof(ProtocolVersion);
2045 
2046  /*
2047  * Scan packet body for name/option pairs. We can assume any string
2048  * beginning within the packet body is null-terminated, thanks to
2049  * zeroing extra byte above.
2050  */
2051  port->guc_options = NIL;
2052 
2053  while (offset < len)
2054  {
2055  char *nameptr = ((char *) buf) + offset;
2056  int32 valoffset;
2057  char *valptr;
2058 
2059  if (*nameptr == '\0')
2060  break; /* found packet terminator */
2061  valoffset = offset + strlen(nameptr) + 1;
2062  if (valoffset >= len)
2063  break; /* missing value, will complain below */
2064  valptr = ((char *) buf) + valoffset;
2065 
2066  if (strcmp(nameptr, "database") == 0)
2067  port->database_name = pstrdup(valptr);
2068  else if (strcmp(nameptr, "user") == 0)
2069  port->user_name = pstrdup(valptr);
2070  else if (strcmp(nameptr, "options") == 0)
2071  port->cmdline_options = pstrdup(valptr);
2072  else if (strcmp(nameptr, "replication") == 0)
2073  {
2074  /*
2075  * Due to backward compatibility concerns the replication
2076  * parameter is a hybrid beast which allows the value to be
2077  * either boolean or the string 'database'. The latter
2078  * connects to a specific database which is e.g. required for
2079  * logical decoding while.
2080  */
2081  if (strcmp(valptr, "database") == 0)
2082  {
2083  am_walsender = true;
2084  am_db_walsender = true;
2085  }
2086  else if (!parse_bool(valptr, &am_walsender))
2087  ereport(FATAL,
2088  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2089  errmsg("invalid value for parameter \"%s\": \"%s\"",
2090  "replication",
2091  valptr),
2092  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2093  }
2094  else
2095  {
2096  /* Assume it's a generic GUC option */
2097  port->guc_options = lappend(port->guc_options,
2098  pstrdup(nameptr));
2099  port->guc_options = lappend(port->guc_options,
2100  pstrdup(valptr));
2101  }
2102  offset = valoffset + strlen(valptr) + 1;
2103  }
2104 
2105  /*
2106  * If we didn't find a packet terminator exactly at the end of the
2107  * given packet length, complain.
2108  */
2109  if (offset != len - 1)
2110  ereport(FATAL,
2111  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2112  errmsg("invalid startup packet layout: expected terminator as last byte")));
2113  }
2114  else
2115  {
2116  /*
2117  * Get the parameters from the old-style, fixed-width-fields startup
2118  * packet as C strings. The packet destination was cleared first so a
2119  * short packet has zeros silently added. We have to be prepared to
2120  * truncate the pstrdup result for oversize fields, though.
2121  */
2122  StartupPacket *packet = (StartupPacket *) buf;
2123 
2124  port->database_name = pstrdup(packet->database);
2125  if (strlen(port->database_name) > sizeof(packet->database))
2126  port->database_name[sizeof(packet->database)] = '\0';
2127  port->user_name = pstrdup(packet->user);
2128  if (strlen(port->user_name) > sizeof(packet->user))
2129  port->user_name[sizeof(packet->user)] = '\0';
2130  port->cmdline_options = pstrdup(packet->options);
2131  if (strlen(port->cmdline_options) > sizeof(packet->options))
2132  port->cmdline_options[sizeof(packet->options)] = '\0';
2133  port->guc_options = NIL;
2134  }
2135 
2136  /* Check a user name was given. */
2137  if (port->user_name == NULL || port->user_name[0] == '\0')
2138  ereport(FATAL,
2139  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2140  errmsg("no PostgreSQL user name specified in startup packet")));
2141 
2142  /* The database defaults to the user name. */
2143  if (port->database_name == NULL || port->database_name[0] == '\0')
2144  port->database_name = pstrdup(port->user_name);
2145 
2146  if (Db_user_namespace)
2147  {
2148  /*
2149  * If user@, it is a global user, remove '@'. We only want to do this
2150  * if there is an '@' at the end and no earlier in the user string or
2151  * they may fake as a local user of another database attaching to this
2152  * database.
2153  */
2154  if (strchr(port->user_name, '@') ==
2155  port->user_name + strlen(port->user_name) - 1)
2156  *strchr(port->user_name, '@') = '\0';
2157  else
2158  {
2159  /* Append '@' and dbname */
2160  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2161  }
2162  }
2163 
2164  /*
2165  * Truncate given database and user names to length of a Postgres name.
2166  * This avoids lookup failures when overlength names are given.
2167  */
2168  if (strlen(port->database_name) >= NAMEDATALEN)
2169  port->database_name[NAMEDATALEN - 1] = '\0';
2170  if (strlen(port->user_name) >= NAMEDATALEN)
2171  port->user_name[NAMEDATALEN - 1] = '\0';
2172 
2173  /*
2174  * Normal walsender backends, e.g. for streaming replication, are not
2175  * connected to a particular database. But walsenders used for logical
2176  * replication need to connect to a specific database. We allow streaming
2177  * replication commands to be issued even if connected to a database as it
2178  * can make sense to first make a basebackup and then stream changes
2179  * starting from that.
2180  */
2181  if (am_walsender && !am_db_walsender)
2182  port->database_name[0] = '\0';
2183 
2184  /*
2185  * Done putting stuff in TopMemoryContext.
2186  */
2187  MemoryContextSwitchTo(oldcontext);
2188 
2189  /*
2190  * If we're going to reject the connection due to database state, say so
2191  * now instead of wasting cycles on an authentication exchange. (This also
2192  * allows a pg_ping utility to be written.)
2193  */
2194  switch (port->canAcceptConnections)
2195  {
2196  case CAC_STARTUP:
2197  ereport(FATAL,
2199  errmsg("the database system is starting up")));
2200  break;
2201  case CAC_SHUTDOWN:
2202  ereport(FATAL,
2204  errmsg("the database system is shutting down")));
2205  break;
2206  case CAC_RECOVERY:
2207  ereport(FATAL,
2209  errmsg("the database system is in recovery mode")));
2210  break;
2211  case CAC_TOOMANY:
2212  ereport(FATAL,
2213  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2214  errmsg("sorry, too many clients already")));
2215  break;
2216  case CAC_WAITBACKUP:
2217  /* OK for now, will check in InitPostgres */
2218  break;
2219  case CAC_OK:
2220  break;
2221  }
2222 
2223  return STATUS_OK;
2224 }
2225 
2226 
2227 /*
2228  * The client has sent a cancel request packet, not a normal
2229  * start-a-new-connection packet. Perform the necessary processing.
2230  * Nothing is sent back to the client.
2231  */
2232 static void
2234 {
2235  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2236  int backendPID;
2237  int32 cancelAuthCode;
2238  Backend *bp;
2239 
2240 #ifndef EXEC_BACKEND
2241  dlist_iter iter;
2242 #else
2243  int i;
2244 #endif
2245 
2246  backendPID = (int) ntohl(canc->backendPID);
2247  cancelAuthCode = (int32) ntohl(canc->cancelAuthCode);
2248 
2249  /*
2250  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2251  * longer access the postmaster's own backend list, and must rely on the
2252  * duplicate array in shared memory.
2253  */
2254 #ifndef EXEC_BACKEND
2255  dlist_foreach(iter, &BackendList)
2256  {
2257  bp = dlist_container(Backend, elem, iter.cur);
2258 #else
2259  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2260  {
2261  bp = (Backend *) &ShmemBackendArray[i];
2262 #endif
2263  if (bp->pid == backendPID)
2264  {
2265  if (bp->cancel_key == cancelAuthCode)
2266  {
2267  /* Found a match; signal that backend to cancel current op */
2268  ereport(DEBUG2,
2269  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2270  backendPID)));
2271  signal_child(bp->pid, SIGINT);
2272  }
2273  else
2274  /* Right PID, wrong key: no way, Jose */
2275  ereport(LOG,
2276  (errmsg("wrong key in cancel request for process %d",
2277  backendPID)));
2278  return;
2279  }
2280  }
2281 
2282  /* No matching backend */
2283  ereport(LOG,
2284  (errmsg("PID %d in cancel request did not match any process",
2285  backendPID)));
2286 }
2287 
2288 /*
2289  * canAcceptConnections --- check to see if database state allows connections.
2290  */
2291 static CAC_state
2293 {
2294  CAC_state result = CAC_OK;
2295 
2296  /*
2297  * Can't start backends when in startup/shutdown/inconsistent recovery
2298  * state.
2299  *
2300  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2301  * allowed so that a superuser can end online backup mode); we return
2302  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2303  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2304  * have checked for too many children.
2305  */
2306  if (pmState != PM_RUN)
2307  {
2308  if (pmState == PM_WAIT_BACKUP)
2309  result = CAC_WAITBACKUP; /* allow superusers only */
2310  else if (Shutdown > NoShutdown)
2311  return CAC_SHUTDOWN; /* shutdown is pending */
2312  else if (!FatalError &&
2313  (pmState == PM_STARTUP ||
2314  pmState == PM_RECOVERY))
2315  return CAC_STARTUP; /* normal startup */
2316  else if (!FatalError &&
2318  result = CAC_OK; /* connection OK during hot standby */
2319  else
2320  return CAC_RECOVERY; /* else must be crash recovery */
2321  }
2322 
2323  /*
2324  * Don't start too many children.
2325  *
2326  * We allow more connections than we can have backends here because some
2327  * might still be authenticating; they might fail auth, or some existing
2328  * backend might exit before the auth cycle is completed. The exact
2329  * MaxBackends limit is enforced when a new backend tries to join the
2330  * shared-inval backend array.
2331  *
2332  * The limit here must match the sizes of the per-child-process arrays;
2333  * see comments for MaxLivePostmasterChildren().
2334  */
2336  result = CAC_TOOMANY;
2337 
2338  return result;
2339 }
2340 
2341 
2342 /*
2343  * ConnCreate -- create a local connection data structure
2344  *
2345  * Returns NULL on failure, other than out-of-memory which is fatal.
2346  */
2347 static Port *
2348 ConnCreate(int serverFd)
2349 {
2350  Port *port;
2351 
2352  if (!(port = (Port *) calloc(1, sizeof(Port))))
2353  {
2354  ereport(LOG,
2355  (errcode(ERRCODE_OUT_OF_MEMORY),
2356  errmsg("out of memory")));
2357  ExitPostmaster(1);
2358  }
2359 
2360  if (StreamConnection(serverFd, port) != STATUS_OK)
2361  {
2362  if (port->sock != PGINVALID_SOCKET)
2363  StreamClose(port->sock);
2364  ConnFree(port);
2365  return NULL;
2366  }
2367 
2368  /*
2369  * Allocate GSSAPI specific state struct
2370  */
2371 #ifndef EXEC_BACKEND
2372 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2373  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2374  if (!port->gss)
2375  {
2376  ereport(LOG,
2377  (errcode(ERRCODE_OUT_OF_MEMORY),
2378  errmsg("out of memory")));
2379  ExitPostmaster(1);
2380  }
2381 #endif
2382 #endif
2383 
2384  return port;
2385 }
2386 
2387 
2388 /*
2389  * ConnFree -- free a local connection data structure
2390  */
2391 static void
2393 {
2394 #ifdef USE_SSL
2395  secure_close(conn);
2396 #endif
2397  if (conn->gss)
2398  free(conn->gss);
2399  free(conn);
2400 }
2401 
2402 
2403 /*
2404  * ClosePostmasterPorts -- close all the postmaster's open sockets
2405  *
2406  * This is called during child process startup to release file descriptors
2407  * that are not needed by that child process. The postmaster still has
2408  * them open, of course.
2409  *
2410  * Note: we pass am_syslogger as a boolean because we don't want to set
2411  * the global variable yet when this is called.
2412  */
2413 void
2415 {
2416  int i;
2417 
2418 #ifndef WIN32
2419 
2420  /*
2421  * Close the write end of postmaster death watch pipe. It's important to
2422  * do this as early as possible, so that if postmaster dies, others won't
2423  * think that it's still running because we're holding the pipe open.
2424  */
2426  ereport(FATAL,
2428  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2430 #endif
2431 
2432  /* Close the listen sockets */
2433  for (i = 0; i < MAXLISTEN; i++)
2434  {
2435  if (ListenSocket[i] != PGINVALID_SOCKET)
2436  {
2439  }
2440  }
2441 
2442  /* If using syslogger, close the read side of the pipe */
2443  if (!am_syslogger)
2444  {
2445 #ifndef WIN32
2446  if (syslogPipe[0] >= 0)
2447  close(syslogPipe[0]);
2448  syslogPipe[0] = -1;
2449 #else
2450  if (syslogPipe[0])
2451  CloseHandle(syslogPipe[0]);
2452  syslogPipe[0] = 0;
2453 #endif
2454  }
2455 
2456 #ifdef USE_BONJOUR
2457  /* If using Bonjour, close the connection to the mDNS daemon */
2458  if (bonjour_sdref)
2459  close(DNSServiceRefSockFD(bonjour_sdref));
2460 #endif
2461 }
2462 
2463 
2464 /*
2465  * reset_shared -- reset shared memory and semaphores
2466  */
2467 static void
2468 reset_shared(int port)
2469 {
2470  /*
2471  * Create or re-create shared memory and semaphores.
2472  *
2473  * Note: in each "cycle of life" we will normally assign the same IPC keys
2474  * (if using SysV shmem and/or semas), since the port number is used to
2475  * determine IPC keys. This helps ensure that we will clean up dead IPC
2476  * objects if the postmaster crashes and is restarted.
2477  */
2478  CreateSharedMemoryAndSemaphores(false, port);
2479 }
2480 
2481 
2482 /*
2483  * SIGHUP -- reread config files, and tell children to do same
2484  */
2485 static void
2487 {
2488  int save_errno = errno;
2489 
2490  PG_SETMASK(&BlockSig);
2491 
2492  if (Shutdown <= SmartShutdown)
2493  {
2494  ereport(LOG,
2495  (errmsg("received SIGHUP, reloading configuration files")));
2498  if (StartupPID != 0)
2500  if (BgWriterPID != 0)
2502  if (CheckpointerPID != 0)
2504  if (WalWriterPID != 0)
2506  if (WalReceiverPID != 0)
2508  if (AutoVacPID != 0)
2510  if (PgArchPID != 0)
2512  if (SysLoggerPID != 0)
2514  if (PgStatPID != 0)
2516 
2517  /* Reload authentication config files too */
2518  if (!load_hba())
2519  ereport(LOG,
2520  (errmsg("pg_hba.conf was not reloaded")));
2521 
2522  if (!load_ident())
2523  ereport(LOG,
2524  (errmsg("pg_ident.conf was not reloaded")));
2525 
2526 #ifdef USE_SSL
2527  /* Reload SSL configuration as well */
2528  if (EnableSSL)
2529  {
2530  if (secure_initialize(false) == 0)
2531  LoadedSSL = true;
2532  else
2533  ereport(LOG,
2534  (errmsg("SSL configuration was not reloaded")));
2535  }
2536  else
2537  {
2538  secure_destroy();
2539  LoadedSSL = false;
2540  }
2541 #endif
2542 
2543 #ifdef EXEC_BACKEND
2544  /* Update the starting-point file for future children */
2545  write_nondefault_variables(PGC_SIGHUP);
2546 #endif
2547  }
2548 
2550 
2551  errno = save_errno;
2552 }
2553 
2554 
2555 /*
2556  * pmdie -- signal handler for processing various postmaster signals.
2557  */
2558 static void
2560 {
2561  int save_errno = errno;
2562 
2563  PG_SETMASK(&BlockSig);
2564 
2565  ereport(DEBUG2,
2566  (errmsg_internal("postmaster received signal %d",
2567  postgres_signal_arg)));
2568 
2569  switch (postgres_signal_arg)
2570  {
2571  case SIGTERM:
2572 
2573  /*
2574  * Smart Shutdown:
2575  *
2576  * Wait for children to end their work, then shut down.
2577  */
2578  if (Shutdown >= SmartShutdown)
2579  break;
2581  ereport(LOG,
2582  (errmsg("received smart shutdown request")));
2583 #ifdef USE_SYSTEMD
2584  sd_notify(0, "STOPPING=1");
2585 #endif
2586 
2587  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2589  {
2590  /* autovac workers are told to shut down immediately */
2591  /* and bgworkers too; does this need tweaking? */
2592  SignalSomeChildren(SIGTERM,
2594  /* and the autovac launcher too */
2595  if (AutoVacPID != 0)
2596  signal_child(AutoVacPID, SIGTERM);
2597  /* and the bgwriter too */
2598  if (BgWriterPID != 0)
2599  signal_child(BgWriterPID, SIGTERM);
2600  /* and the walwriter too */
2601  if (WalWriterPID != 0)
2602  signal_child(WalWriterPID, SIGTERM);
2603 
2604  /*
2605  * If we're in recovery, we can't kill the startup process
2606  * right away, because at present doing so does not release
2607  * its locks. We might want to change this in a future
2608  * release. For the time being, the PM_WAIT_READONLY state
2609  * indicates that we're waiting for the regular (read only)
2610  * backends to die off; once they do, we'll kill the startup
2611  * and walreceiver processes.
2612  */
2613  pmState = (pmState == PM_RUN) ?
2615  }
2616 
2617  /*
2618  * Now wait for online backup mode to end and backends to exit. If
2619  * that is already the case, PostmasterStateMachine will take the
2620  * next step.
2621  */
2623  break;
2624 
2625  case SIGINT:
2626 
2627  /*
2628  * Fast Shutdown:
2629  *
2630  * Abort all children with SIGTERM (rollback active transactions
2631  * and exit) and shut down when they are gone.
2632  */
2633  if (Shutdown >= FastShutdown)
2634  break;
2636  ereport(LOG,
2637  (errmsg("received fast shutdown request")));
2638 #ifdef USE_SYSTEMD
2639  sd_notify(0, "STOPPING=1");
2640 #endif
2641 
2642  if (StartupPID != 0)
2643  signal_child(StartupPID, SIGTERM);
2644  if (BgWriterPID != 0)
2645  signal_child(BgWriterPID, SIGTERM);
2646  if (WalReceiverPID != 0)
2647  signal_child(WalReceiverPID, SIGTERM);
2648  if (pmState == PM_RECOVERY)
2649  {
2651 
2652  /*
2653  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2654  * and/or checkpointer should be active in this state; we just
2655  * signaled the first four, and we don't want to kill
2656  * checkpointer yet.
2657  */
2659  }
2660  else if (pmState == PM_RUN ||
2661  pmState == PM_WAIT_BACKUP ||
2665  {
2666  ereport(LOG,
2667  (errmsg("aborting any active transactions")));
2668  /* shut down all backends and workers */
2669  SignalSomeChildren(SIGTERM,
2672  /* and the autovac launcher too */
2673  if (AutoVacPID != 0)
2674  signal_child(AutoVacPID, SIGTERM);
2675  /* and the walwriter too */
2676  if (WalWriterPID != 0)
2677  signal_child(WalWriterPID, SIGTERM);
2679  }
2680 
2681  /*
2682  * Now wait for backends to exit. If there are none,
2683  * PostmasterStateMachine will take the next step.
2684  */
2686  break;
2687 
2688  case SIGQUIT:
2689 
2690  /*
2691  * Immediate Shutdown:
2692  *
2693  * abort all children with SIGQUIT, wait for them to exit,
2694  * terminate remaining ones with SIGKILL, then exit without
2695  * attempt to properly shut down the data base system.
2696  */
2697  if (Shutdown >= ImmediateShutdown)
2698  break;
2700  ereport(LOG,
2701  (errmsg("received immediate shutdown request")));
2702 #ifdef USE_SYSTEMD
2703  sd_notify(0, "STOPPING=1");
2704 #endif
2705 
2708 
2709  /* set stopwatch for them to die */
2710  AbortStartTime = time(NULL);
2711 
2712  /*
2713  * Now wait for backends to exit. If there are none,
2714  * PostmasterStateMachine will take the next step.
2715  */
2717  break;
2718  }
2719 
2721 
2722  errno = save_errno;
2723 }
2724 
2725 /*
2726  * Reaper -- signal handler to cleanup after a child process dies.
2727  */
2728 static void
2730 {
2731  int save_errno = errno;
2732  int pid; /* process id of dead child process */
2733  int exitstatus; /* its exit status */
2734 
2735  PG_SETMASK(&BlockSig);
2736 
2737  ereport(DEBUG4,
2738  (errmsg_internal("reaping dead processes")));
2739 
2740  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2741  {
2742  /*
2743  * Check if this child was a startup process.
2744  */
2745  if (pid == StartupPID)
2746  {
2747  StartupPID = 0;
2748 
2749  /*
2750  * Startup process exited in response to a shutdown request (or it
2751  * completed normally regardless of the shutdown request).
2752  */
2753  if (Shutdown > NoShutdown &&
2754  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2755  {
2758  /* PostmasterStateMachine logic does the rest */
2759  continue;
2760  }
2761 
2762  if (EXIT_STATUS_3(exitstatus))
2763  {
2764  ereport(LOG,
2765  (errmsg("shutdown at recovery target")));
2768  TerminateChildren(SIGTERM);
2770  /* PostmasterStateMachine logic does the rest */
2771  continue;
2772  }
2773 
2774  /*
2775  * Unexpected exit of startup process (including FATAL exit)
2776  * during PM_STARTUP is treated as catastrophic. There are no
2777  * other processes running yet, so we can just exit.
2778  */
2779  if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2780  {
2781  LogChildExit(LOG, _("startup process"),
2782  pid, exitstatus);
2783  ereport(LOG,
2784  (errmsg("aborting startup due to startup process failure")));
2785  ExitPostmaster(1);
2786  }
2787 
2788  /*
2789  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2790  * the startup process is catastrophic, so kill other children,
2791  * and set StartupStatus so we don't try to reinitialize after
2792  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2793  * then we previously sent the startup process a SIGQUIT; so
2794  * that's probably the reason it died, and we do want to try to
2795  * restart in that case.
2796  */
2797  if (!EXIT_STATUS_0(exitstatus))
2798  {
2801  else
2803  HandleChildCrash(pid, exitstatus,
2804  _("startup process"));
2805  continue;
2806  }
2807 
2808  /*
2809  * Startup succeeded, commence normal operations
2810  */
2812  FatalError = false;
2813  Assert(AbortStartTime == 0);
2814  ReachedNormalRunning = true;
2815  pmState = PM_RUN;
2816 
2817  /*
2818  * Crank up the background tasks, if we didn't do that already
2819  * when we entered consistent recovery state. It doesn't matter
2820  * if this fails, we'll just try again later.
2821  */
2822  if (CheckpointerPID == 0)
2824  if (BgWriterPID == 0)
2826  if (WalWriterPID == 0)
2828 
2829  /*
2830  * Likewise, start other special children as needed. In a restart
2831  * situation, some of them may be alive already.
2832  */
2835  if (PgArchStartupAllowed() && PgArchPID == 0)
2836  PgArchPID = pgarch_start();
2837  if (PgStatPID == 0)
2838  PgStatPID = pgstat_start();
2839 
2840  /* workers may be scheduled to start now */
2842 
2843  /* at this point we are really open for business */
2844  ereport(LOG,
2845  (errmsg("database system is ready to accept connections")));
2846 
2847 #ifdef USE_SYSTEMD
2848  sd_notify(0, "READY=1");
2849 #endif
2850 
2851  continue;
2852  }
2853 
2854  /*
2855  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2856  * one at the next iteration of the postmaster's main loop, if
2857  * necessary. Any other exit condition is treated as a crash.
2858  */
2859  if (pid == BgWriterPID)
2860  {
2861  BgWriterPID = 0;
2862  if (!EXIT_STATUS_0(exitstatus))
2863  HandleChildCrash(pid, exitstatus,
2864  _("background writer process"));
2865  continue;
2866  }
2867 
2868  /*
2869  * Was it the checkpointer?
2870  */
2871  if (pid == CheckpointerPID)
2872  {
2873  CheckpointerPID = 0;
2874  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2875  {
2876  /*
2877  * OK, we saw normal exit of the checkpointer after it's been
2878  * told to shut down. We expect that it wrote a shutdown
2879  * checkpoint. (If for some reason it didn't, recovery will
2880  * occur on next postmaster start.)
2881  *
2882  * At this point we should have no normal backend children
2883  * left (else we'd not be in PM_SHUTDOWN state) but we might
2884  * have dead_end children to wait for.
2885  *
2886  * If we have an archiver subprocess, tell it to do a last
2887  * archive cycle and quit. Likewise, if we have walsender
2888  * processes, tell them to send any remaining WAL and quit.
2889  */
2891 
2892  /* Waken archiver for the last time */
2893  if (PgArchPID != 0)
2895 
2896  /*
2897  * Waken walsenders for the last time. No regular backends
2898  * should be around anymore.
2899  */
2901 
2903 
2904  /*
2905  * We can also shut down the stats collector now; there's
2906  * nothing left for it to do.
2907  */
2908  if (PgStatPID != 0)
2910  }
2911  else
2912  {
2913  /*
2914  * Any unexpected exit of the checkpointer (including FATAL
2915  * exit) is treated as a crash.
2916  */
2917  HandleChildCrash(pid, exitstatus,
2918  _("checkpointer process"));
2919  }
2920 
2921  continue;
2922  }
2923 
2924  /*
2925  * Was it the wal writer? Normal exit can be ignored; we'll start a
2926  * new one at the next iteration of the postmaster's main loop, if
2927  * necessary. Any other exit condition is treated as a crash.
2928  */
2929  if (pid == WalWriterPID)
2930  {
2931  WalWriterPID = 0;
2932  if (!EXIT_STATUS_0(exitstatus))
2933  HandleChildCrash(pid, exitstatus,
2934  _("WAL writer process"));
2935  continue;
2936  }
2937 
2938  /*
2939  * Was it the wal receiver? If exit status is zero (normal) or one
2940  * (FATAL exit), we assume everything is all right just like normal
2941  * backends.
2942  */
2943  if (pid == WalReceiverPID)
2944  {
2945  WalReceiverPID = 0;
2946  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2947  HandleChildCrash(pid, exitstatus,
2948  _("WAL receiver process"));
2949  continue;
2950  }
2951 
2952  /*
2953  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
2954  * start a new one at the next iteration of the postmaster's main
2955  * loop, if necessary. Any other exit condition is treated as a
2956  * crash.
2957  */
2958  if (pid == AutoVacPID)
2959  {
2960  AutoVacPID = 0;
2961  if (!EXIT_STATUS_0(exitstatus))
2962  HandleChildCrash(pid, exitstatus,
2963  _("autovacuum launcher process"));
2964  continue;
2965  }
2966 
2967  /*
2968  * Was it the archiver? If so, just try to start a new one; no need
2969  * to force reset of the rest of the system. (If fail, we'll try
2970  * again in future cycles of the main loop.). Unless we were waiting
2971  * for it to shut down; don't restart it in that case, and
2972  * PostmasterStateMachine() will advance to the next shutdown step.
2973  */
2974  if (pid == PgArchPID)
2975  {
2976  PgArchPID = 0;
2977  if (!EXIT_STATUS_0(exitstatus))
2978  LogChildExit(LOG, _("archiver process"),
2979  pid, exitstatus);
2980  if (PgArchStartupAllowed())
2981  PgArchPID = pgarch_start();
2982  continue;
2983  }
2984 
2985  /*
2986  * Was it the statistics collector? If so, just try to start a new
2987  * one; no need to force reset of the rest of the system. (If fail,
2988  * we'll try again in future cycles of the main loop.)
2989  */
2990  if (pid == PgStatPID)
2991  {
2992  PgStatPID = 0;
2993  if (!EXIT_STATUS_0(exitstatus))
2994  LogChildExit(LOG, _("statistics collector process"),
2995  pid, exitstatus);
2996  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2997  PgStatPID = pgstat_start();
2998  continue;
2999  }
3000 
3001  /* Was it the system logger? If so, try to start a new one */
3002  if (pid == SysLoggerPID)
3003  {
3004  SysLoggerPID = 0;
3005  /* for safety's sake, launch new logger *first* */
3007  if (!EXIT_STATUS_0(exitstatus))
3008  LogChildExit(LOG, _("system logger process"),
3009  pid, exitstatus);
3010  continue;
3011  }
3012 
3013  /* Was it one of our background workers? */
3014  if (CleanupBackgroundWorker(pid, exitstatus))
3015  {
3016  /* have it be restarted */
3017  HaveCrashedWorker = true;
3018  continue;
3019  }
3020 
3021  /*
3022  * Else do standard backend child cleanup.
3023  */
3024  CleanupBackend(pid, exitstatus);
3025  } /* loop over pending child-death reports */
3026 
3027  /*
3028  * After cleaning out the SIGCHLD queue, see if we have any state changes
3029  * or actions to make.
3030  */
3032 
3033  /* Done with signal handler */
3035 
3036  errno = save_errno;
3037 }
3038 
3039 /*
3040  * Scan the bgworkers list and see if the given PID (which has just stopped
3041  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3042  * bgworker, return false.
3043  *
3044  * This is heavily based on CleanupBackend. One important difference is that
3045  * we don't know yet that the dying process is a bgworker, so we must be silent
3046  * until we're sure it is.
3047  */
3048 static bool
3050  int exitstatus) /* child's exit status */
3051 {
3052  char namebuf[MAXPGPATH];
3053  slist_iter iter;
3054 
3056  {
3057  RegisteredBgWorker *rw;
3058 
3059  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3060 
3061  if (rw->rw_pid != pid)
3062  continue;
3063 
3064 #ifdef WIN32
3065  /* see CleanupBackend */
3066  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3067  exitstatus = 0;
3068 #endif
3069 
3070  snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"),
3071  rw->rw_worker.bgw_name);
3072 
3073  if (!EXIT_STATUS_0(exitstatus))
3074  {
3075  /* Record timestamp, so we know when to restart the worker. */
3077  }
3078  else
3079  {
3080  /* Zero exit status means terminate */
3081  rw->rw_crashed_at = 0;
3082  rw->rw_terminate = true;
3083  }
3084 
3085  /*
3086  * Additionally, for shared-memory-connected workers, just like a
3087  * backend, any exit status other than 0 or 1 is considered a crash
3088  * and causes a system-wide restart.
3089  */
3090  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3091  {
3092  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3093  {
3094  HandleChildCrash(pid, exitstatus, namebuf);
3095  return true;
3096  }
3097  }
3098 
3099  /*
3100  * We must release the postmaster child slot whether this worker is
3101  * connected to shared memory or not, but we only treat it as a crash
3102  * if it is in fact connected.
3103  */
3106  {
3107  HandleChildCrash(pid, exitstatus, namebuf);
3108  return true;
3109  }
3110 
3111  /* Get it out of the BackendList and clear out remaining data */
3112  dlist_delete(&rw->rw_backend->elem);
3113 #ifdef EXEC_BACKEND
3114  ShmemBackendArrayRemove(rw->rw_backend);
3115 #endif
3116 
3117  /*
3118  * It's possible that this background worker started some OTHER
3119  * background worker and asked to be notified when that worker started
3120  * or stopped. If so, cancel any notifications destined for the
3121  * now-dead backend.
3122  */
3123  if (rw->rw_backend->bgworker_notify)
3125  free(rw->rw_backend);
3126  rw->rw_backend = NULL;
3127  rw->rw_pid = 0;
3128  rw->rw_child_slot = 0;
3129  ReportBackgroundWorkerPID(rw); /* report child death */
3130 
3131  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3132  namebuf, pid, exitstatus);
3133 
3134  return true;
3135  }
3136 
3137  return false;
3138 }
3139 
3140 /*
3141  * CleanupBackend -- cleanup after terminated backend.
3142  *
3143  * Remove all local state associated with backend.
3144  *
3145  * If you change this, see also CleanupBackgroundWorker.
3146  */
3147 static void
3149  int exitstatus) /* child's exit status. */
3150 {
3151  dlist_mutable_iter iter;
3152 
3153  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3154 
3155  /*
3156  * If a backend dies in an ugly way then we must signal all other backends
3157  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3158  * assume everything is all right and proceed to remove the backend from
3159  * the active backend list.
3160  */
3161 
3162 #ifdef WIN32
3163 
3164  /*
3165  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3166  * since that sometimes happens under load when the process fails to start
3167  * properly (long before it starts using shared memory). Microsoft reports
3168  * it is related to mutex failure:
3169  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3170  */
3171  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3172  {
3173  LogChildExit(LOG, _("server process"), pid, exitstatus);
3174  exitstatus = 0;
3175  }
3176 #endif
3177 
3178  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3179  {
3180  HandleChildCrash(pid, exitstatus, _("server process"));
3181  return;
3182  }
3183 
3184  dlist_foreach_modify(iter, &BackendList)
3185  {
3186  Backend *bp = dlist_container(Backend, elem, iter.cur);
3187 
3188  if (bp->pid == pid)
3189  {
3190  if (!bp->dead_end)
3191  {
3193  {
3194  /*
3195  * Uh-oh, the child failed to clean itself up. Treat as a
3196  * crash after all.
3197  */
3198  HandleChildCrash(pid, exitstatus, _("server process"));
3199  return;
3200  }
3201 #ifdef EXEC_BACKEND
3202  ShmemBackendArrayRemove(bp);
3203 #endif
3204  }
3205  if (bp->bgworker_notify)
3206  {
3207  /*
3208  * This backend may have been slated to receive SIGUSR1 when
3209  * some background worker started or stopped. Cancel those
3210  * notifications, as we don't want to signal PIDs that are not
3211  * PostgreSQL backends. This gets skipped in the (probably
3212  * very common) case where the backend has never requested any
3213  * such notifications.
3214  */
3216  }
3217  dlist_delete(iter.cur);
3218  free(bp);
3219  break;
3220  }
3221  }
3222 }
3223 
3224 /*
3225  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3226  * walwriter, autovacuum, or background worker.
3227  *
3228  * The objectives here are to clean up our local state about the child
3229  * process, and to signal all other remaining children to quickdie.
3230  */
3231 static void
3232 HandleChildCrash(int pid, int exitstatus, const char *procname)
3233 {
3234  dlist_mutable_iter iter;
3235  slist_iter siter;
3236  Backend *bp;
3237  bool take_action;
3238 
3239  /*
3240  * We only log messages and send signals if this is the first process
3241  * crash and we're not doing an immediate shutdown; otherwise, we're only
3242  * here to update postmaster's idea of live processes. If we have already
3243  * signalled children, nonzero exit status is to be expected, so don't
3244  * clutter log.
3245  */
3246  take_action = !FatalError && Shutdown != ImmediateShutdown;
3247 
3248  if (take_action)
3249  {
3250  LogChildExit(LOG, procname, pid, exitstatus);
3251  ereport(LOG,
3252  (errmsg("terminating any other active server processes")));
3253  }
3254 
3255  /* Process background workers. */
3257  {
3258  RegisteredBgWorker *rw;
3259 
3260  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3261  if (rw->rw_pid == 0)
3262  continue; /* not running */
3263  if (rw->rw_pid == pid)
3264  {
3265  /*
3266  * Found entry for freshly-dead worker, so remove it.
3267  */
3269  dlist_delete(&rw->rw_backend->elem);
3270 #ifdef EXEC_BACKEND
3271  ShmemBackendArrayRemove(rw->rw_backend);
3272 #endif
3273  free(rw->rw_backend);
3274  rw->rw_backend = NULL;
3275  rw->rw_pid = 0;
3276  rw->rw_child_slot = 0;
3277  /* don't reset crashed_at */
3278  /* don't report child stop, either */
3279  /* Keep looping so we can signal remaining workers */
3280  }
3281  else
3282  {
3283  /*
3284  * This worker is still alive. Unless we did so already, tell it
3285  * to commit hara-kiri.
3286  *
3287  * SIGQUIT is the special signal that says exit without proc_exit
3288  * and let the user know what's going on. But if SendStop is set
3289  * (-s on command line), then we send SIGSTOP instead, so that we
3290  * can get core dumps from all backends by hand.
3291  */
3292  if (take_action)
3293  {
3294  ereport(DEBUG2,
3295  (errmsg_internal("sending %s to process %d",
3296  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3297  (int) rw->rw_pid)));
3299  }
3300  }
3301  }
3302 
3303  /* Process regular backends */
3304  dlist_foreach_modify(iter, &BackendList)
3305  {
3306  bp = dlist_container(Backend, elem, iter.cur);
3307 
3308  if (bp->pid == pid)
3309  {
3310  /*
3311  * Found entry for freshly-dead backend, so remove it.
3312  */
3313  if (!bp->dead_end)
3314  {
3316 #ifdef EXEC_BACKEND
3317  ShmemBackendArrayRemove(bp);
3318 #endif
3319  }
3320  dlist_delete(iter.cur);
3321  free(bp);
3322  /* Keep looping so we can signal remaining backends */
3323  }
3324  else
3325  {
3326  /*
3327  * This backend is still alive. Unless we did so already, tell it
3328  * to commit hara-kiri.
3329  *
3330  * SIGQUIT is the special signal that says exit without proc_exit
3331  * and let the user know what's going on. But if SendStop is set
3332  * (-s on command line), then we send SIGSTOP instead, so that we
3333  * can get core dumps from all backends by hand.
3334  *
3335  * We could exclude dead_end children here, but at least in the
3336  * SIGSTOP case it seems better to include them.
3337  *
3338  * Background workers were already processed above; ignore them
3339  * here.
3340  */
3341  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3342  continue;
3343 
3344  if (take_action)
3345  {
3346  ereport(DEBUG2,
3347  (errmsg_internal("sending %s to process %d",
3348  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3349  (int) bp->pid)));
3350  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3351  }
3352  }
3353  }
3354 
3355  /* Take care of the startup process too */
3356  if (pid == StartupPID)
3357  {
3358  StartupPID = 0;
3360  }
3361  else if (StartupPID != 0 && take_action)
3362  {
3363  ereport(DEBUG2,
3364  (errmsg_internal("sending %s to process %d",
3365  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3366  (int) StartupPID)));
3367  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3369  }
3370 
3371  /* Take care of the bgwriter too */
3372  if (pid == BgWriterPID)
3373  BgWriterPID = 0;
3374  else if (BgWriterPID != 0 && take_action)
3375  {
3376  ereport(DEBUG2,
3377  (errmsg_internal("sending %s to process %d",
3378  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3379  (int) BgWriterPID)));
3380  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3381  }
3382 
3383  /* Take care of the checkpointer too */
3384  if (pid == CheckpointerPID)
3385  CheckpointerPID = 0;
3386  else if (CheckpointerPID != 0 && take_action)
3387  {
3388  ereport(DEBUG2,
3389  (errmsg_internal("sending %s to process %d",
3390  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3391  (int) CheckpointerPID)));
3392  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3393  }
3394 
3395  /* Take care of the walwriter too */
3396  if (pid == WalWriterPID)
3397  WalWriterPID = 0;
3398  else if (WalWriterPID != 0 && take_action)
3399  {
3400  ereport(DEBUG2,
3401  (errmsg_internal("sending %s to process %d",
3402  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3403  (int) WalWriterPID)));
3404  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3405  }
3406 
3407  /* Take care of the walreceiver too */
3408  if (pid == WalReceiverPID)
3409  WalReceiverPID = 0;
3410  else if (WalReceiverPID != 0 && take_action)
3411  {
3412  ereport(DEBUG2,
3413  (errmsg_internal("sending %s to process %d",
3414  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3415  (int) WalReceiverPID)));
3416  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3417  }
3418 
3419  /* Take care of the autovacuum launcher too */
3420  if (pid == AutoVacPID)
3421  AutoVacPID = 0;
3422  else if (AutoVacPID != 0 && take_action)
3423  {
3424  ereport(DEBUG2,
3425  (errmsg_internal("sending %s to process %d",
3426  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3427  (int) AutoVacPID)));
3428  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3429  }
3430 
3431  /*
3432  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3433  * necessary, but it seems like a good idea for robustness, and it
3434  * simplifies the state-machine logic in the case where a shutdown request
3435  * arrives during crash processing.)
3436  */
3437  if (PgArchPID != 0 && take_action)
3438  {
3439  ereport(DEBUG2,
3440  (errmsg_internal("sending %s to process %d",
3441  "SIGQUIT",
3442  (int) PgArchPID)));
3443  signal_child(PgArchPID, SIGQUIT);
3444  }
3445 
3446  /*
3447  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3448  * necessary, but it seems like a good idea for robustness, and it
3449  * simplifies the state-machine logic in the case where a shutdown request
3450  * arrives during crash processing.)
3451  */
3452  if (PgStatPID != 0 && take_action)
3453  {
3454  ereport(DEBUG2,
3455  (errmsg_internal("sending %s to process %d",
3456  "SIGQUIT",
3457  (int) PgStatPID)));
3458  signal_child(PgStatPID, SIGQUIT);
3460  }
3461 
3462  /* We do NOT restart the syslogger */
3463 
3464  if (Shutdown != ImmediateShutdown)
3465  FatalError = true;
3466 
3467  /* We now transit into a state of waiting for children to die */
3468  if (pmState == PM_RECOVERY ||
3469  pmState == PM_HOT_STANDBY ||
3470  pmState == PM_RUN ||
3471  pmState == PM_WAIT_BACKUP ||
3473  pmState == PM_SHUTDOWN)
3475 
3476  /*
3477  * .. and if this doesn't happen quickly enough, now the clock is ticking
3478  * for us to kill them without mercy.
3479  */
3480  if (AbortStartTime == 0)
3481  AbortStartTime = time(NULL);
3482 }
3483 
3484 /*
3485  * Log the death of a child process.
3486  */
3487 static void
3488 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3489 {
3490  /*
3491  * size of activity_buffer is arbitrary, but set equal to default
3492  * track_activity_query_size
3493  */
3494  char activity_buffer[1024];
3495  const char *activity = NULL;
3496 
3497  if (!EXIT_STATUS_0(exitstatus))
3498  activity = pgstat_get_crashed_backend_activity(pid,
3499  activity_buffer,
3500  sizeof(activity_buffer));
3501 
3502  if (WIFEXITED(exitstatus))
3503  ereport(lev,
3504 
3505  /*------
3506  translator: %s is a noun phrase describing a child process, such as
3507  "server process" */
3508  (errmsg("%s (PID %d) exited with exit code %d",
3509  procname, pid, WEXITSTATUS(exitstatus)),
3510  activity ? errdetail("Failed process was running: %s", activity) : 0));
3511  else if (WIFSIGNALED(exitstatus))
3512 #if defined(WIN32)
3513  ereport(lev,
3514 
3515  /*------
3516  translator: %s is a noun phrase describing a child process, such as
3517  "server process" */
3518  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3519  procname, pid, WTERMSIG(exitstatus)),
3520  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3521  activity ? errdetail("Failed process was running: %s", activity) : 0));
3522 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
3523  ereport(lev,
3524 
3525  /*------
3526  translator: %s is a noun phrase describing a child process, such as
3527  "server process" */
3528  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3529  procname, pid, WTERMSIG(exitstatus),
3530  WTERMSIG(exitstatus) < NSIG ?
3531  sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
3532  activity ? errdetail("Failed process was running: %s", activity) : 0));
3533 #else
3534  ereport(lev,
3535 
3536  /*------
3537  translator: %s is a noun phrase describing a child process, such as
3538  "server process" */
3539  (errmsg("%s (PID %d) was terminated by signal %d",
3540  procname, pid, WTERMSIG(exitstatus)),
3541  activity ? errdetail("Failed process was running: %s", activity) : 0));
3542 #endif
3543  else
3544  ereport(lev,
3545 
3546  /*------
3547  translator: %s is a noun phrase describing a child process, such as
3548  "server process" */
3549  (errmsg("%s (PID %d) exited with unrecognized status %d",
3550  procname, pid, exitstatus),
3551  activity ? errdetail("Failed process was running: %s", activity) : 0));
3552 }
3553 
3554 /*
3555  * Advance the postmaster's state machine and take actions as appropriate
3556  *
3557  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3558  * receive the signals that might mean we need to change state.
3559  */
3560 static void
3562 {
3563  if (pmState == PM_WAIT_BACKUP)
3564  {
3565  /*
3566  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3567  */
3568  if (!BackupInProgress())
3570  }
3571 
3572  if (pmState == PM_WAIT_READONLY)
3573  {
3574  /*
3575  * PM_WAIT_READONLY state ends when we have no regular backends that
3576  * have been started during recovery. We kill the startup and
3577  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3578  * we might like to kill these processes first and then wait for
3579  * backends to die off, but that doesn't work at present because
3580  * killing the startup process doesn't release its locks.
3581  */
3583  {
3584  if (StartupPID != 0)
3585  signal_child(StartupPID, SIGTERM);
3586  if (WalReceiverPID != 0)
3587  signal_child(WalReceiverPID, SIGTERM);
3589  }
3590  }
3591 
3592  /*
3593  * If we are in a state-machine state that implies waiting for backends to
3594  * exit, see if they're all gone, and change state if so.
3595  */
3596  if (pmState == PM_WAIT_BACKENDS)
3597  {
3598  /*
3599  * PM_WAIT_BACKENDS state ends when we have no regular backends
3600  * (including autovac workers), no bgworkers (including unconnected
3601  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3602  * doing crash recovery or an immediate shutdown then we expect the
3603  * checkpointer to exit as well, otherwise not. The archiver, stats,
3604  * and syslogger processes are disregarded since they are not
3605  * connected to shared memory; we also disregard dead_end children
3606  * here. Walsenders are also disregarded, they will be terminated
3607  * later after writing the checkpoint record, like the archiver
3608  * process.
3609  */
3611  StartupPID == 0 &&
3612  WalReceiverPID == 0 &&
3613  BgWriterPID == 0 &&
3614  (CheckpointerPID == 0 ||
3616  WalWriterPID == 0 &&
3617  AutoVacPID == 0)
3618  {
3620  {
3621  /*
3622  * Start waiting for dead_end children to die. This state
3623  * change causes ServerLoop to stop creating new ones.
3624  */
3626 
3627  /*
3628  * We already SIGQUIT'd the archiver and stats processes, if
3629  * any, when we started immediate shutdown or entered
3630  * FatalError state.
3631  */
3632  }
3633  else
3634  {
3635  /*
3636  * If we get here, we are proceeding with normal shutdown. All
3637  * the regular children are gone, and it's time to tell the
3638  * checkpointer to do a shutdown checkpoint.
3639  */
3641  /* Start the checkpointer if not running */
3642  if (CheckpointerPID == 0)
3644  /* And tell it to shut down */
3645  if (CheckpointerPID != 0)
3646  {
3648  pmState = PM_SHUTDOWN;
3649  }
3650  else
3651  {
3652  /*
3653  * If we failed to fork a checkpointer, just shut down.
3654  * Any required cleanup will happen at next restart. We
3655  * set FatalError so that an "abnormal shutdown" message
3656  * gets logged when we exit.
3657  */
3658  FatalError = true;
3660 
3661  /* Kill the walsenders, archiver and stats collector too */
3663  if (PgArchPID != 0)
3665  if (PgStatPID != 0)
3667  }
3668  }
3669  }
3670  }
3671 
3672  if (pmState == PM_SHUTDOWN_2)
3673  {
3674  /*
3675  * PM_SHUTDOWN_2 state ends when there's no other children than
3676  * dead_end children left. There shouldn't be any regular backends
3677  * left by now anyway; what we're really waiting for is walsenders and
3678  * archiver.
3679  *
3680  * Walreceiver should normally be dead by now, but not when a fast
3681  * shutdown is performed during recovery.
3682  */
3683  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
3684  WalReceiverPID == 0)
3685  {
3687  }
3688  }
3689 
3690  if (pmState == PM_WAIT_DEAD_END)
3691  {
3692  /*
3693  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3694  * (ie, no dead_end children remain), and the archiver and stats
3695  * collector are gone too.
3696  *
3697  * The reason we wait for those two is to protect them against a new
3698  * postmaster starting conflicting subprocesses; this isn't an
3699  * ironclad protection, but it at least helps in the
3700  * shutdown-and-immediately-restart scenario. Note that they have
3701  * already been sent appropriate shutdown signals, either during a
3702  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3703  * FatalError processing.
3704  */
3705  if (dlist_is_empty(&BackendList) &&
3706  PgArchPID == 0 && PgStatPID == 0)
3707  {
3708  /* These other guys should be dead already */
3709  Assert(StartupPID == 0);
3710  Assert(WalReceiverPID == 0);
3711  Assert(BgWriterPID == 0);
3712  Assert(CheckpointerPID == 0);
3713  Assert(WalWriterPID == 0);
3714  Assert(AutoVacPID == 0);
3715  /* syslogger is not considered here */
3717  }
3718  }
3719 
3720  /*
3721  * If we've been told to shut down, we exit as soon as there are no
3722  * remaining children. If there was a crash, cleanup will occur at the
3723  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3724  * crash before exiting, but that seems unwise if we are quitting because
3725  * we got SIGTERM from init --- there may well not be time for recovery
3726  * before init decides to SIGKILL us.)
3727  *
3728  * Note that the syslogger continues to run. It will exit when it sees
3729  * EOF on its input pipe, which happens when there are no more upstream
3730  * processes.
3731  */
3733  {
3734  if (FatalError)
3735  {
3736  ereport(LOG, (errmsg("abnormal database system shutdown")));
3737  ExitPostmaster(1);
3738  }
3739  else
3740  {
3741  /*
3742  * Terminate exclusive backup mode to avoid recovery after a clean
3743  * fast shutdown. Since an exclusive backup can only be taken
3744  * during normal running (and not, for example, while running
3745  * under Hot Standby) it only makes sense to do this if we reached
3746  * normal running. If we're still in recovery, the backup file is
3747  * one we're recovering *from*, and we must keep it around so that
3748  * recovery restarts from the right place.
3749  */
3751  CancelBackup();
3752 
3753  /* Normal exit from the postmaster is here */
3754  ExitPostmaster(0);
3755  }
3756  }
3757 
3758  /*
3759  * If the startup process failed, or the user does not want an automatic
3760  * restart after backend crashes, wait for all non-syslogger children to
3761  * exit, and then exit postmaster. We don't try to reinitialize when the
3762  * startup process fails, because more than likely it will just fail again
3763  * and we will keep trying forever.
3764  */
3765  if (pmState == PM_NO_CHILDREN &&
3767  ExitPostmaster(1);
3768 
3769  /*
3770  * If we need to recover from a crash, wait for all non-syslogger children
3771  * to exit, then reset shmem and StartupDataBase.
3772  */
3773  if (FatalError && pmState == PM_NO_CHILDREN)
3774  {
3775  ereport(LOG,
3776  (errmsg("all server processes terminated; reinitializing")));
3777 
3778  /* allow background workers to immediately restart */
3780 
3781  shmem_exit(1);
3783 
3785  Assert(StartupPID != 0);
3787  pmState = PM_STARTUP;
3788  /* crash recovery started, reset SIGKILL flag */
3789  AbortStartTime = 0;
3790  }
3791 }
3792 
3793 
3794 /*
3795  * Send a signal to a postmaster child process
3796  *
3797  * On systems that have setsid(), each child process sets itself up as a
3798  * process group leader. For signals that are generally interpreted in the
3799  * appropriate fashion, we signal the entire process group not just the
3800  * direct child process. This allows us to, for example, SIGQUIT a blocked
3801  * archive_recovery script, or SIGINT a script being run by a backend via
3802  * system().
3803  *
3804  * There is a race condition for recently-forked children: they might not
3805  * have executed setsid() yet. So we signal the child directly as well as
3806  * the group. We assume such a child will handle the signal before trying
3807  * to spawn any grandchild processes. We also assume that signaling the
3808  * child twice will not cause any problems.
3809  */
3810 static void
3811 signal_child(pid_t pid, int signal)
3812 {
3813  if (kill(pid, signal) < 0)
3814  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3815 #ifdef HAVE_SETSID
3816  switch (signal)
3817  {
3818  case SIGINT:
3819  case SIGTERM:
3820  case SIGQUIT:
3821  case SIGSTOP:
3822  case SIGKILL:
3823  if (kill(-pid, signal) < 0)
3824  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3825  break;
3826  default:
3827  break;
3828  }
3829 #endif
3830 }
3831 
3832 /*
3833  * Send a signal to the targeted children (but NOT special children;
3834  * dead_end children are never signaled, either).
3835  */
3836 static bool
3837 SignalSomeChildren(int signal, int target)
3838 {
3839  dlist_iter iter;
3840  bool signaled = false;
3841 
3842  dlist_foreach(iter, &BackendList)
3843  {
3844  Backend *bp = dlist_container(Backend, elem, iter.cur);
3845 
3846  if (bp->dead_end)
3847  continue;
3848 
3849  /*
3850  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3851  * it first and avoid touching shared memory for every child.
3852  */
3853  if (target != BACKEND_TYPE_ALL)
3854  {
3855  /*
3856  * Assign bkend_type for any recently announced WAL Sender
3857  * processes.
3858  */
3859  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3862 
3863  if (!(target & bp->bkend_type))
3864  continue;
3865  }
3866 
3867  ereport(DEBUG4,
3868  (errmsg_internal("sending signal %d to process %d",
3869  signal, (int) bp->pid)));
3870  signal_child(bp->pid, signal);
3871  signaled = true;
3872  }
3873  return signaled;
3874 }
3875 
3876 /*
3877  * Send a termination signal to children. This considers all of our children
3878  * processes, except syslogger and dead_end backends.
3879  */
3880 static void
3882 {
3883  SignalChildren(signal);
3884  if (StartupPID != 0)
3885  {
3886  signal_child(StartupPID, signal);
3887  if (signal == SIGQUIT || signal == SIGKILL)
3889  }
3890  if (BgWriterPID != 0)
3891  signal_child(BgWriterPID, signal);
3892  if (CheckpointerPID != 0)
3893  signal_child(CheckpointerPID, signal);
3894  if (WalWriterPID != 0)
3895  signal_child(WalWriterPID, signal);
3896  if (WalReceiverPID != 0)
3897  signal_child(WalReceiverPID, signal);
3898  if (AutoVacPID != 0)
3899  signal_child(AutoVacPID, signal);
3900  if (PgArchPID != 0)
3901  signal_child(PgArchPID, signal);
3902  if (PgStatPID != 0)
3903  signal_child(PgStatPID, signal);
3904 }
3905 
3906 /*
3907  * BackendStartup -- start backend process
3908  *
3909  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3910  *
3911  * Note: if you change this code, also consider StartAutovacuumWorker.
3912  */
3913 static int
3915 {
3916  Backend *bn; /* for backend cleanup */
3917  pid_t pid;
3918 
3919  /*
3920  * Create backend data structure. Better before the fork() so we can
3921  * handle failure cleanly.
3922  */
3923  bn = (Backend *) malloc(sizeof(Backend));
3924  if (!bn)
3925  {
3926  ereport(LOG,
3927  (errcode(ERRCODE_OUT_OF_MEMORY),
3928  errmsg("out of memory")));
3929  return STATUS_ERROR;
3930  }
3931 
3932  /*
3933  * Compute the cancel key that will be assigned to this backend. The
3934  * backend will have its own copy in the forked-off process' value of
3935  * MyCancelKey, so that it can transmit the key to the frontend.
3936  */
3938  {
3939  free(bn);
3940  ereport(LOG,
3941  (errcode(ERRCODE_INTERNAL_ERROR),
3942  errmsg("could not generate random cancel key")));
3943  return STATUS_ERROR;
3944  }
3945 
3946  bn->cancel_key = MyCancelKey;
3947 
3948  /* Pass down canAcceptConnections state */
3950  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3952 
3953  /*
3954  * Unless it's a dead_end child, assign it a child slot number
3955  */
3956  if (!bn->dead_end)
3958  else
3959  bn->child_slot = 0;
3960 
3961  /* Hasn't asked to be notified about any bgworkers yet */
3962  bn->bgworker_notify = false;
3963 
3964 #ifdef EXEC_BACKEND
3965  pid = backend_forkexec(port);
3966 #else /* !EXEC_BACKEND */
3967  pid = fork_process();
3968  if (pid == 0) /* child */
3969  {
3970  free(bn);
3971 
3972  /* Detangle from postmaster */
3974 
3975  /* Close the postmaster's sockets */
3976  ClosePostmasterPorts(false);
3977 
3978  /* Perform additional initialization and collect startup packet */
3979  BackendInitialize(port);
3980 
3981  /* And run the backend */
3982  BackendRun(port);
3983  }
3984 #endif /* EXEC_BACKEND */
3985 
3986  if (pid < 0)
3987  {
3988  /* in parent, fork failed */
3989  int save_errno = errno;
3990 
3991  if (!bn->dead_end)
3993  free(bn);
3994  errno = save_errno;
3995  ereport(LOG,
3996  (errmsg("could not fork new process for connection: %m")));
3997  report_fork_failure_to_client(port, save_errno);
3998  return STATUS_ERROR;
3999  }
4000 
4001  /* in parent, successful fork */
4002  ereport(DEBUG2,
4003  (errmsg_internal("forked new backend, pid=%d socket=%d",
4004  (int) pid, (int) port->sock)));
4005 
4006  /*
4007  * Everything's been successful, it's safe to add this backend to our list
4008  * of backends.
4009  */
4010  bn->pid = pid;
4011  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4012  dlist_push_head(&BackendList, &bn->elem);
4013 
4014 #ifdef EXEC_BACKEND
4015  if (!bn->dead_end)
4016  ShmemBackendArrayAdd(bn);
4017 #endif
4018 
4019  return STATUS_OK;
4020 }
4021 
4022 /*
4023  * Try to report backend fork() failure to client before we close the
4024  * connection. Since we do not care to risk blocking the postmaster on
4025  * this connection, we set the connection to non-blocking and try only once.
4026  *
4027  * This is grungy special-purpose code; we cannot use backend libpq since
4028  * it's not up and running.
4029  */
4030 static void
4032 {
4033  char buffer[1000];
4034  int rc;
4035 
4036  /* Format the error message packet (always V2 protocol) */
4037  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4038  _("could not fork new process for connection: "),
4039  strerror(errnum));
4040 
4041  /* Set port to non-blocking. Don't do send() if this fails */
4042  if (!pg_set_noblock(port->sock))
4043  return;
4044 
4045  /* We'll retry after EINTR, but ignore all other failures */
4046  do
4047  {
4048  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4049  } while (rc < 0 && errno == EINTR);
4050 }
4051 
4052 
4053 /*
4054  * BackendInitialize -- initialize an interactive (postmaster-child)
4055  * backend process, and collect the client's startup packet.
4056  *
4057  * returns: nothing. Will not return at all if there's any failure.
4058  *
4059  * Note: this code does not depend on having any access to shared memory.
4060  * In the EXEC_BACKEND case, we are physically attached to shared memory
4061  * but have not yet set up most of our local pointers to shmem structures.
4062  */
4063 static void
4065 {
4066  int status;
4067  int ret;
4068  char remote_host[NI_MAXHOST];
4069  char remote_port[NI_MAXSERV];
4070  char remote_ps_data[NI_MAXHOST];
4071 
4072  /* Save port etc. for ps status */
4073  MyProcPort = port;
4074 
4075  /*
4076  * PreAuthDelay is a debugging aid for investigating problems in the
4077  * authentication cycle: it can be set in postgresql.conf to allow time to
4078  * attach to the newly-forked backend with a debugger. (See also
4079  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4080  * is not honored until after authentication.)
4081  */
4082  if (PreAuthDelay > 0)
4083  pg_usleep(PreAuthDelay * 1000000L);
4084 
4085  /* This flag will remain set until InitPostgres finishes authentication */
4086  ClientAuthInProgress = true; /* limit visibility of log messages */
4087 
4088  /* save process start time */
4091 
4092  /* set these to empty in case they are needed before we set them up */
4093  port->remote_host = "";
4094  port->remote_port = "";
4095 
4096  /*
4097  * Initialize libpq and enable reporting of ereport errors to the client.
4098  * Must do this now because authentication uses libpq to send messages.
4099  */
4100  pq_init(); /* initialize libpq to talk to client */
4101  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4102 
4103  /*
4104  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4105  * timeout while trying to collect the startup packet. Otherwise the
4106  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4107  * buggy client fails to send the packet promptly. XXX it follows that
4108  * the remainder of this function must tolerate losing control at any
4109  * instant. Likewise, any pg_on_exit_callback registered before or during
4110  * this function must be prepared to execute at any instant between here
4111  * and the end of this function. Furthermore, affected callbacks execute
4112  * partially or not at all when a second exit-inducing signal arrives
4113  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4114  * that mechanic, callbacks need not anticipate more than one call.) This
4115  * is fragile; it ought to instead follow the norm of handling interrupts
4116  * at selected, safe opportunities.
4117  */
4118  pqsignal(SIGTERM, startup_die);
4120  InitializeTimeouts(); /* establishes SIGALRM handler */
4122 
4123  /*
4124  * Get the remote host name and port for logging and status display.
4125  */
4126  remote_host[0] = '\0';
4127  remote_port[0] = '\0';
4128  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4129  remote_host, sizeof(remote_host),
4130  remote_port, sizeof(remote_port),
4131  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4132  ereport(WARNING,
4133  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4134  gai_strerror(ret))));
4135  if (remote_port[0] == '\0')
4136  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4137  else
4138  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4139 
4140  /*
4141  * Save remote_host and remote_port in port structure (after this, they
4142  * will appear in log_line_prefix data for log messages).
4143  */
4144  port->remote_host = strdup(remote_host);
4145  port->remote_port = strdup(remote_port);
4146 
4147  /* And now we can issue the Log_connections message, if wanted */
4148  if (Log_connections)
4149  {
4150  if (remote_port[0])
4151  ereport(LOG,
4152  (errmsg("connection received: host=%s port=%s",
4153  remote_host,
4154  remote_port)));
4155  else
4156  ereport(LOG,
4157  (errmsg("connection received: host=%s",
4158  remote_host)));
4159  }
4160 
4161  /*
4162  * If we did a reverse lookup to name, we might as well save the results
4163  * rather than possibly repeating the lookup during authentication.
4164  *
4165  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4166  * get nothing useful for a client without an rDNS entry. Therefore, we
4167  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4168  * it into remote_hostname if so. (This test is conservative and might
4169  * sometimes classify a hostname as numeric, but an error in that
4170  * direction is safe; it only results in a possible extra lookup.)
4171  */
4172  if (log_hostname &&
4173  ret == 0 &&
4174  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4175  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4176  port->remote_hostname = strdup(remote_host);
4177 
4178  /*
4179  * Ready to begin client interaction. We will give up and exit(1) after a
4180  * time delay, so that a broken client can't hog a connection
4181  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4182  * against the time limit.
4183  *
4184  * Note: AuthenticationTimeout is applied here while waiting for the
4185  * startup packet, and then again in InitPostgres for the duration of any
4186  * authentication operations. So a hostile client could tie up the
4187  * process for nearly twice AuthenticationTimeout before we kick him off.
4188  *
4189  * Note: because PostgresMain will call InitializeTimeouts again, the
4190  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4191  * since we never use it again after this function.
4192  */
4195 
4196  /*
4197  * Receive the startup packet (which might turn out to be a cancel request
4198  * packet).
4199  */
4200  status = ProcessStartupPacket(port, false);
4201 
4202  /*
4203  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4204  * already did any appropriate error reporting.
4205  */
4206  if (status != STATUS_OK)
4207  proc_exit(0);
4208 
4209  /*
4210  * Now that we have the user and database name, we can set the process
4211  * title for ps. It's good to do this as early as possible in startup.
4212  *
4213  * For a walsender, the ps display is set in the following form:
4214  *
4215  * postgres: wal sender process <user> <host> <activity>
4216  *
4217  * To achieve that, we pass "wal sender process" as username and username
4218  * as dbname to init_ps_display(). XXX: should add a new variant of
4219  * init_ps_display() to avoid abusing the parameters like this.
4220  */
4221  if (am_walsender)
4222  init_ps_display("wal sender process", port->user_name, remote_ps_data,
4223  update_process_title ? "authentication" : "");
4224  else
4225  init_ps_display(port->user_name, port->database_name, remote_ps_data,
4226  update_process_title ? "authentication" : "");
4227 
4228  /*
4229  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4230  */
4232  PG_SETMASK(&BlockSig);
4233 }
4234 
4235 
4236 /*
4237  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4238  *
4239  * returns:
4240  * Shouldn't return at all.
4241  * If PostgresMain() fails, return status.
4242  */
4243 static void
4245 {
4246  char **av;
4247  int maxac;
4248  int ac;
4249  long secs;
4250  int usecs;
4251  int i;
4252 
4253  /*
4254  * Don't want backend to be able to see the postmaster random number
4255  * generator state. We have to clobber the static random_seed *and* start
4256  * a new random sequence in the random() library function.
4257  */
4258 #ifndef HAVE_STRONG_RANDOM
4259  random_seed = 0;
4260  random_start_time.tv_usec = 0;
4261 #endif
4262  /* slightly hacky way to convert timestamptz into integers */
4263  TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
4264  srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
4265 
4266  /*
4267  * Now, build the argv vector that will be given to PostgresMain.
4268  *
4269  * The maximum possible number of commandline arguments that could come
4270  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4271  * pg_split_opts().
4272  */
4273  maxac = 2; /* for fixed args supplied below */
4274  maxac += (strlen(ExtraOptions) + 1) / 2;
4275 
4276  av = (char **) MemoryContextAlloc(TopMemoryContext,
4277  maxac * sizeof(char *));
4278  ac = 0;
4279 
4280  av[ac++] = "postgres";
4281 
4282  /*
4283  * Pass any backend switches specified with -o on the postmaster's own
4284  * command line. We assume these are secure.
4285  */
4286  pg_split_opts(av, &ac, ExtraOptions);
4287 
4288  av[ac] = NULL;
4289 
4290  Assert(ac < maxac);
4291 
4292  /*
4293  * Debug: print arguments being passed to backend
4294  */
4295  ereport(DEBUG3,
4296  (errmsg_internal("%s child[%d]: starting with (",
4297  progname, (int) getpid())));
4298  for (i = 0; i < ac; ++i)
4299  ereport(DEBUG3,
4300  (errmsg_internal("\t%s", av[i])));
4301  ereport(DEBUG3,
4302  (errmsg_internal(")")));
4303 
4304  /*
4305  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4306  * just yet, though, because InitPostgres will need the HBA data.)
4307  */
4309 
4310  PostgresMain(ac, av, port->database_name, port->user_name);
4311 }
4312 
4313 
4314 #ifdef EXEC_BACKEND
4315 
4316 /*
4317  * postmaster_forkexec -- fork and exec a postmaster subprocess
4318  *
4319  * The caller must have set up the argv array already, except for argv[2]
4320  * which will be filled with the name of the temp variable file.
4321  *
4322  * Returns the child process PID, or -1 on fork failure (a suitable error
4323  * message has been logged on failure).
4324  *
4325  * All uses of this routine will dispatch to SubPostmasterMain in the
4326  * child process.
4327  */
4328 pid_t
4329 postmaster_forkexec(int argc, char *argv[])
4330 {
4331  Port port;
4332 
4333  /* This entry point passes dummy values for the Port variables */
4334  memset(&port, 0, sizeof(port));
4335  return internal_forkexec(argc, argv, &port);
4336 }
4337 
4338 /*
4339  * backend_forkexec -- fork/exec off a backend process
4340  *
4341  * Some operating systems (WIN32) don't have fork() so we have to simulate
4342  * it by storing parameters that need to be passed to the child and
4343  * then create a new child process.
4344  *
4345  * returns the pid of the fork/exec'd process, or -1 on failure
4346  */
4347 static pid_t
4348 backend_forkexec(Port *port)
4349 {
4350  char *av[4];
4351  int ac = 0;
4352 
4353  av[ac++] = "postgres";
4354  av[ac++] = "--forkbackend";
4355  av[ac++] = NULL; /* filled in by internal_forkexec */
4356 
4357  av[ac] = NULL;
4358  Assert(ac < lengthof(av));
4359 
4360  return internal_forkexec(ac, av, port);
4361 }
4362 
4363 #ifndef WIN32
4364 
4365 /*
4366  * internal_forkexec non-win32 implementation
4367  *
4368  * - writes out backend variables to the parameter file
4369  * - fork():s, and then exec():s the child process
4370  */
4371 static pid_t
4372 internal_forkexec(int argc, char *argv[], Port *port)
4373 {
4374  static unsigned long tmpBackendFileNum = 0;
4375  pid_t pid;
4376  char tmpfilename[MAXPGPATH];
4377  BackendParameters param;
4378  FILE *fp;
4379 
4380  if (!save_backend_variables(&param, port))
4381  return -1; /* log made by save_backend_variables */
4382 
4383  /* Calculate name for temp file */
4384  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4386  MyProcPid, ++tmpBackendFileNum);
4387 
4388  /* Open file */
4389  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4390  if (!fp)
4391  {
4392  /*
4393  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4394  * directory
4395  */
4396  mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
4397 
4398  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4399  if (!fp)
4400  {
4401  ereport(LOG,
4403  errmsg("could not create file \"%s\": %m",
4404  tmpfilename)));
4405  return -1;
4406  }
4407  }
4408 
4409  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4410  {
4411  ereport(LOG,
4413  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4414  FreeFile(fp);
4415  return -1;
4416  }
4417 
4418  /* Release file */
4419  if (FreeFile(fp))
4420  {
4421  ereport(LOG,
4423  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4424  return -1;
4425  }
4426 
4427  /* Make sure caller set up argv properly */
4428  Assert(argc >= 3);
4429  Assert(argv[argc] == NULL);
4430  Assert(strncmp(argv[1], "--fork", 6) == 0);
4431  Assert(argv[2] == NULL);
4432 
4433  /* Insert temp file name after --fork argument */
4434  argv[2] = tmpfilename;
4435 
4436  /* Fire off execv in child */
4437  if ((pid = fork_process()) == 0)
4438  {
4439  if (execv(postgres_exec_path, argv) < 0)
4440  {
4441  ereport(LOG,
4442  (errmsg("could not execute server process \"%s\": %m",
4443  postgres_exec_path)));
4444  /* We're already in the child process here, can't return */
4445  exit(1);
4446  }
4447  }
4448 
4449  return pid; /* Parent returns pid, or -1 on fork failure */
4450 }
4451 #else /* WIN32 */
4452 
4453 /*
4454  * internal_forkexec win32 implementation
4455  *
4456  * - starts backend using CreateProcess(), in suspended state
4457  * - writes out backend variables to the parameter file
4458  * - during this, duplicates handles and sockets required for
4459  * inheritance into the new process
4460  * - resumes execution of the new process once the backend parameter
4461  * file is complete.
4462  */
4463 static pid_t
4464 internal_forkexec(int argc, char *argv[], Port *port)
4465 {
4466  STARTUPINFO si;
4467  PROCESS_INFORMATION pi;
4468  int i;
4469  int j;
4470  char cmdLine[MAXPGPATH * 2];
4471  HANDLE paramHandle;
4472  BackendParameters *param;
4473  SECURITY_ATTRIBUTES sa;
4474  char paramHandleStr[32];
4475  win32_deadchild_waitinfo *childinfo;
4476 
4477  /* Make sure caller set up argv properly */
4478  Assert(argc >= 3);
4479  Assert(argv[argc] == NULL);
4480  Assert(strncmp(argv[1], "--fork", 6) == 0);
4481  Assert(argv[2] == NULL);
4482 
4483  /* Set up shared memory for parameter passing */
4484  ZeroMemory(&sa, sizeof(sa));
4485  sa.nLength = sizeof(sa);
4486  sa.bInheritHandle = TRUE;
4487  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4488  &sa,
4489  PAGE_READWRITE,
4490  0,
4491  sizeof(BackendParameters),
4492  NULL);
4493  if (paramHandle == INVALID_HANDLE_VALUE)
4494  {
4495  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4496  GetLastError());
4497  return -1;
4498  }
4499 
4500  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4501  if (!param)
4502  {
4503  elog(LOG, "could not map backend parameter memory: error code %lu",
4504  GetLastError());
4505  CloseHandle(paramHandle);
4506  return -1;
4507  }
4508 
4509  /* Insert temp file name after --fork argument */
4510 #ifdef _WIN64
4511  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4512 #else
4513  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4514 #endif
4515  argv[2] = paramHandleStr;
4516 
4517  /* Format the cmd line */
4518  cmdLine[sizeof(cmdLine) - 1] = '\0';
4519  cmdLine[sizeof(cmdLine) - 2] = '\0';
4520  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4521  i = 0;
4522  while (argv[++i] != NULL)
4523  {
4524  j = strlen(cmdLine);
4525  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4526  }
4527  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4528  {
4529  elog(LOG, "subprocess command line too long");
4530  return -1;
4531  }
4532 
4533  memset(&pi, 0, sizeof(pi));
4534  memset(&si, 0, sizeof(si));
4535  si.cb = sizeof(si);
4536 
4537  /*
4538  * Create the subprocess in a suspended state. This will be resumed later,
4539  * once we have written out the parameter file.
4540  */
4541  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4542  NULL, NULL, &si, &pi))
4543  {
4544  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4545  GetLastError());
4546  return -1;
4547  }
4548 
4549  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4550  {
4551  /*
4552  * log made by save_backend_variables, but we have to clean up the
4553  * mess with the half-started process
4554  */
4555  if (!TerminateProcess(pi.hProcess, 255))
4556  ereport(LOG,
4557  (errmsg_internal("could not terminate unstarted process: error code %lu",
4558  GetLastError())));
4559  CloseHandle(pi.hProcess);
4560  CloseHandle(pi.hThread);
4561  return -1; /* log made by save_backend_variables */
4562  }
4563 
4564  /* Drop the parameter shared memory that is now inherited to the backend */
4565  if (!UnmapViewOfFile(param))
4566  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4567  GetLastError());
4568  if (!CloseHandle(paramHandle))
4569  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4570  GetLastError());
4571 
4572  /*
4573  * Reserve the memory region used by our main shared memory segment before
4574  * we resume the child process.
4575  */
4576  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4577  {
4578  /*
4579  * Failed to reserve the memory, so terminate the newly created
4580  * process and give up.
4581  */
4582  if (!TerminateProcess(pi.hProcess, 255))
4583  ereport(LOG,
4584  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4585  GetLastError())));
4586  CloseHandle(pi.hProcess);
4587  CloseHandle(pi.hThread);
4588  return -1; /* logging done made by
4589  * pgwin32_ReserveSharedMemoryRegion() */
4590  }
4591 
4592  /*
4593  * Now that the backend variables are written out, we start the child
4594  * thread so it can start initializing while we set up the rest of the
4595  * parent state.
4596  */
4597  if (ResumeThread(pi.hThread) == -1)
4598  {
4599  if (!TerminateProcess(pi.hProcess, 255))
4600  {
4601  ereport(LOG,
4602  (errmsg_internal("could not terminate unstartable process: error code %lu",
4603  GetLastError())));
4604  CloseHandle(pi.hProcess);
4605  CloseHandle(pi.hThread);
4606  return -1;
4607  }
4608  CloseHandle(pi.hProcess);
4609  CloseHandle(pi.hThread);
4610  ereport(LOG,
4611  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4612  GetLastError())));
4613  return -1;
4614  }
4615 
4616  /*
4617  * Queue a waiter for to signal when this child dies. The wait will be
4618  * handled automatically by an operating system thread pool.
4619  *
4620  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4621  * Struct will be free():d from the callback function that runs on a
4622  * different thread.
4623  */
4624  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4625  if (!childinfo)
4626  ereport(FATAL,
4627  (errcode(ERRCODE_OUT_OF_MEMORY),
4628  errmsg("out of memory")));
4629 
4630  childinfo->procHandle = pi.hProcess;
4631  childinfo->procId = pi.dwProcessId;
4632 
4633  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4634  pi.hProcess,
4635  pgwin32_deadchild_callback,
4636  childinfo,
4637  INFINITE,
4638  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4639  ereport(FATAL,
4640  (errmsg_internal("could not register process for wait: error code %lu",
4641  GetLastError())));
4642 
4643  /* Don't close pi.hProcess here - the wait thread needs access to it */
4644 
4645  CloseHandle(pi.hThread);
4646 
4647  return pi.dwProcessId;
4648 }
4649 #endif /* WIN32 */
4650 
4651 
4652 /*
4653  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4654  * to what it would be if we'd simply forked on Unix, and then
4655  * dispatch to the appropriate place.
4656  *
4657  * The first two command line arguments are expected to be "--forkFOO"
4658  * (where FOO indicates which postmaster child we are to become), and
4659  * the name of a variables file that we can read to load data that would
4660  * have been inherited by fork() on Unix. Remaining arguments go to the
4661  * subprocess FooMain() routine.
4662  */
4663 void
4664 SubPostmasterMain(int argc, char *argv[])
4665 {
4666  Port port;
4667 
4668  /* In EXEC_BACKEND case we will not have inherited these settings */
4669  IsPostmasterEnvironment = true;
4671 
4672  /* Setup as postmaster child */
4674 
4675  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4677 
4678  /* Check we got appropriate args */
4679  if (argc < 3)
4680  elog(FATAL, "invalid subpostmaster invocation");
4681 
4682  /* Read in the variables file */
4683  memset(&port, 0, sizeof(Port));
4684  read_backend_variables(argv[2], &port);
4685 
4686  /* Close the postmaster's sockets (as soon as we know them) */
4687  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4688 
4689  /*
4690  * Set reference point for stack-depth checking
4691  */
4692  set_stack_base();
4693 
4694  /*
4695  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4696  * for the non-exec case.
4697  */
4698 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4699  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4700  if (!port.gss)
4701  ereport(FATAL,
4702  (errcode(ERRCODE_OUT_OF_MEMORY),
4703  errmsg("out of memory")));
4704 #endif
4705 
4706  /*
4707  * If appropriate, physically re-attach to shared memory segment. We want
4708  * to do this before going any further to ensure that we can attach at the
4709  * same address the postmaster used. On the other hand, if we choose not
4710  * to re-attach, we may have other cleanup to do.
4711  *
4712  * If testing EXEC_BACKEND on Linux, you should run this as root before
4713  * starting the postmaster:
4714  *
4715  * echo 0 >/proc/sys/kernel/randomize_va_space
4716  *
4717  * This prevents using randomized stack and code addresses that cause the
4718  * child process's memory map to be different from the parent's, making it
4719  * sometimes impossible to attach to shared memory at the desired address.
4720  * Return the setting to its old value (usually '1' or '2') when finished.
4721  */
4722  if (strcmp(argv[1], "--forkbackend") == 0 ||
4723  strcmp(argv[1], "--forkavlauncher") == 0 ||
4724  strcmp(argv[1], "--forkavworker") == 0 ||
4725  strcmp(argv[1], "--forkboot") == 0 ||
4726  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4728  else
4730 
4731  /* autovacuum needs this set before calling InitProcess */
4732  if (strcmp(argv[1], "--forkavlauncher") == 0)
4733  AutovacuumLauncherIAm();
4734  if (strcmp(argv[1], "--forkavworker") == 0)
4735  AutovacuumWorkerIAm();
4736 
4737  /*
4738  * Start our win32 signal implementation. This has to be done after we
4739  * read the backend variables, because we need to pick up the signal pipe
4740  * from the parent process.
4741  */
4742 #ifdef WIN32
4744 #endif
4745 
4746  /* In EXEC_BACKEND case we will not have inherited these settings */
4747  pqinitmask();
4748  PG_SETMASK(&BlockSig);
4749 
4750  /* Read in remaining GUC variables */
4751  read_nondefault_variables();
4752 
4753  /*
4754  * Reload any libraries that were preloaded by the postmaster. Since we
4755  * exec'd this process, those libraries didn't come along with us; but we
4756  * should load them into all child processes to be consistent with the
4757  * non-EXEC_BACKEND behavior.
4758  */
4760 
4761  /* Run backend or appropriate child */
4762  if (strcmp(argv[1], "--forkbackend") == 0)
4763  {
4764  Assert(argc == 3); /* shouldn't be any more args */
4765 
4766  /*
4767  * Need to reinitialize the SSL library in the backend, since the
4768  * context structures contain function pointers and cannot be passed
4769  * through the parameter file.
4770  *
4771  * If for some reason reload fails (maybe the user installed broken
4772  * key files), soldier on without SSL; that's better than all
4773  * connections becoming impossible.
4774  *
4775  * XXX should we do this in all child processes? For the moment it's
4776  * enough to do it in backend children.
4777  */
4778 #ifdef USE_SSL
4779  if (EnableSSL)
4780  {
4781  if (secure_initialize(false) == 0)
4782  LoadedSSL = true;
4783  else
4784  ereport(LOG,
4785  (errmsg("SSL configuration could not be loaded in child process")));
4786  }
4787 #endif
4788 
4789  /*
4790  * Perform additional initialization and collect startup packet.
4791  *
4792  * We want to do this before InitProcess() for a couple of reasons: 1.
4793  * so that we aren't eating up a PGPROC slot while waiting on the
4794  * client. 2. so that if InitProcess() fails due to being out of
4795  * PGPROC slots, we have already initialized libpq and are able to
4796  * report the error to the client.
4797  */
4798  BackendInitialize(&port);
4799 
4800  /* Restore basic shared memory pointers */
4802 
4803  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4804  InitProcess();
4805 
4806  /* Attach process to shared data structures */
4808 
4809  /* And run the backend */
4810  BackendRun(&port); /* does not return */
4811  }
4812  if (strcmp(argv[1], "--forkboot") == 0)
4813  {
4814  /* Restore basic shared memory pointers */
4816 
4817  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4819 
4820  /* Attach process to shared data structures */
4822 
4823  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4824  }
4825  if (strcmp(argv[1], "--forkavlauncher") == 0)
4826  {
4827  /* Restore basic shared memory pointers */
4829 
4830  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4831  InitProcess();
4832 
4833  /* Attach process to shared data structures */
4835 
4836  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4837  }
4838  if (strcmp(argv[1], "--forkavworker") == 0)
4839  {
4840  /* Restore basic shared memory pointers */
4842 
4843  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4844  InitProcess();
4845 
4846  /* Attach process to shared data structures */
4848 
4849  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4850  }
4851  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4852  {
4853  int shmem_slot;
4854 
4855  /* do this as early as possible; in particular, before InitProcess() */
4856  IsBackgroundWorker = true;
4857 
4858  /* Restore basic shared memory pointers */
4860 
4861  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4862  InitProcess();
4863 
4864  /* Attach process to shared data structures */
4866 
4867  /* Fetch MyBgworkerEntry from shared memory */
4868  shmem_slot = atoi(argv[1] + 15);
4869  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4870 
4872  }
4873  if (strcmp(argv[1], "--forkarch") == 0)
4874  {
4875  /* Do not want to attach to shared memory */
4876 
4877  PgArchiverMain(argc, argv); /* does not return */
4878  }
4879  if (strcmp(argv[1], "--forkcol") == 0)
4880  {
4881  /* Do not want to attach to shared memory */
4882 
4883  PgstatCollectorMain(argc, argv); /* does not return */
4884  }
4885  if (strcmp(argv[1], "--forklog") == 0)
4886  {
4887  /* Do not want to attach to shared memory */
4888 
4889  SysLoggerMain(argc, argv); /* does not return */
4890  }
4891 
4892  abort(); /* shouldn't get here */
4893 }
4894 #endif /* EXEC_BACKEND */
4895 
4896 
4897 /*
4898  * ExitPostmaster -- cleanup
4899  *
4900  * Do NOT call exit() directly --- always go through here!
4901  */
4902 static void
4904 {
4905 #ifdef HAVE_PTHREAD_IS_THREADED_NP
4906 
4907  /*
4908  * There is no known cause for a postmaster to become multithreaded after
4909  * startup. Recheck to account for the possibility of unknown causes.
4910  * This message uses LOG level, because an unclean shutdown at this point
4911  * would usually not look much different from a clean shutdown.
4912  */
4913  if (pthread_is_threaded_np() != 0)
4914  ereport(LOG,
4915  (errcode(ERRCODE_INTERNAL_ERROR),
4916  errmsg_internal("postmaster became multithreaded"),
4917  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
4918 #endif
4919 
4920  /* should cleanup shared memory and kill all backends */
4921 
4922  /*
4923  * Not sure of the semantics here. When the Postmaster dies, should the
4924  * backends all be killed? probably not.
4925  *
4926  * MUST -- vadim 05-10-1999
4927  */
4928 
4929  proc_exit(status);
4930 }
4931 
4932 /*
4933  * sigusr1_handler - handle signal conditions from child processes
4934  */
4935 static void
4937 {
4938  int save_errno = errno;
4939 
4940  PG_SETMASK(&BlockSig);
4941 
4942  /* Process background worker state change. */
4944  {
4946  StartWorkerNeeded = true;
4947  }
4948 
4949  /*
4950  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
4951  * unexpected states. If the startup process quickly starts up, completes
4952  * recovery, exits, we might process the death of the startup process
4953  * first. We don't want to go back to recovery in that case.
4954  */
4957  {
4958  /* WAL redo has started. We're out of reinitialization. */
4959  FatalError = false;
4960  Assert(AbortStartTime == 0);
4961 
4962  /*
4963  * Crank up the background tasks. It doesn't matter if this fails,
4964  * we'll just try again later.
4965  */
4966  Assert(CheckpointerPID == 0);
4968  Assert(BgWriterPID == 0);
4970 
4971  /*
4972  * Start the archiver if we're responsible for (re-)archiving received
4973  * files.
4974  */
4975  Assert(PgArchPID == 0);
4976  if (XLogArchivingAlways())
4977  PgArchPID = pgarch_start();
4978 
4979 #ifdef USE_SYSTEMD
4980  if (!EnableHotStandby)
4981  sd_notify(0, "READY=1");
4982 #endif
4983 
4984  pmState = PM_RECOVERY;
4985  }
4988  {
4989  /*
4990  * Likewise, start other special children as needed.
4991  */
4992  Assert(PgStatPID == 0);
4993  PgStatPID = pgstat_start();
4994 
4995  ereport(LOG,
4996  (errmsg("database system is ready to accept read only connections")));
4997 
4998 #ifdef USE_SYSTEMD
4999  sd_notify(0, "READY=1");
5000 #endif
5001 
5003  /* Some workers may be scheduled to start now */
5004  StartWorkerNeeded = true;
5005  }
5006 
5009 
5011  PgArchPID != 0)
5012  {
5013  /*
5014  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5015  * next transaction log file.
5016  */
5018  }
5019 
5021  SysLoggerPID != 0)
5022  {
5023  /* Tell syslogger to rotate logfile */
5025  }
5026 
5028  Shutdown == NoShutdown)
5029  {
5030  /*
5031  * Start one iteration of the autovacuum daemon, even if autovacuuming
5032  * is nominally not enabled. This is so we can have an active defense
5033  * against transaction ID wraparound. We set a flag for the main loop
5034  * to do it rather than trying to do it here --- this is because the
5035  * autovac process itself may send the signal, and we want to handle
5036  * that by launching another iteration as soon as the current one
5037  * completes.
5038  */
5039  start_autovac_launcher = true;
5040  }
5041 
5043  Shutdown == NoShutdown)
5044  {
5045  /* The autovacuum launcher wants us to start a worker process. */
5047  }
5048 
5050  WalReceiverPID == 0 &&
5051  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5053  Shutdown == NoShutdown)
5054  {
5055  /* Startup Process wants us to start the walreceiver process. */
5057  }
5058 
5061  {
5062  /* Advance postmaster's state machine */
5064  }
5065 
5066  if (CheckPromoteSignal() && StartupPID != 0 &&
5067  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5069  {
5070  /* Tell startup process to finish recovery */
5072  }
5073 
5075 
5076  errno = save_errno;
5077 }
5078 
5079 /*
5080  * SIGTERM or SIGQUIT while processing startup packet.
5081  * Clean up and exit(1).
5082  *
5083  * XXX: possible future improvement: try to send a message indicating
5084  * why we are disconnecting. Problem is to be sure we don't block while
5085  * doing so, nor mess up SSL initialization. In practice, if the client
5086  * has wedged here, it probably couldn't do anything with the message anyway.
5087  */
5088 static void
5090 {
5091  proc_exit(1);
5092 }
5093 
5094 /*
5095  * Dummy signal handler
5096  *
5097  * We use this for signals that we don't actually use in the postmaster,
5098  * but we do use in backends. If we were to SIG_IGN such signals in the
5099  * postmaster, then a newly started backend might drop a signal that arrives
5100  * before it's able to reconfigure its signal processing. (See notes in
5101  * tcop/postgres.c.)
5102  */
5103 static void
5105 {
5106 }
5107 
5108 /*
5109  * Timeout while processing startup packet.
5110  * As for startup_die(), we clean up and exit(1).
5111  */
5112 static void
5114 {
5115  proc_exit(1);
5116 }
5117 
5118 
5119 /*
5120  * Generate a random cancel key.
5121  */
5122 static bool
5124 {
5125 #ifdef HAVE_STRONG_RANDOM
5126  return pg_strong_random((char *) cancel_key, sizeof(int32));
5127 #else
5128  /*
5129  * If built with --disable-strong-random, use plain old erand48.
5130  *
5131  * We cannot use pg_backend_random() in postmaster, because it stores
5132  * its state in shared memory.
5133  */
5134  static unsigned short seed[3];
5135 
5136  /*
5137  * Select a random seed at the time of first receiving a request.
5138  */
5139  if (random_seed == 0)
5140  {
5141  struct timeval random_stop_time;
5142 
5143  gettimeofday(&random_stop_time, NULL);
5144 
5145  seed[0] = (unsigned short) random_start_time.tv_usec;
5146  seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
5147  seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
5148 
5149  random_seed = 1;
5150  }
5151 
5152  *cancel_key = pg_jrand48(seed);
5153 
5154  return true;
5155 #endif
5156 }
5157 
5158 /*
5159  * Count up number of child processes of specified types (dead_end children
5160  * are always excluded).
5161  */
5162 static int
5163 CountChildren(int target)
5164 {
5165  dlist_iter iter;
5166  int cnt = 0;
5167 
5168  dlist_foreach(iter, &BackendList)
5169  {
5170  Backend *bp = dlist_container(Backend, elem, iter.cur);
5171 
5172  if (bp->dead_end)
5173  continue;
5174 
5175  /*
5176  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5177  * it first and avoid touching shared memory for every child.
5178  */
5179  if (target != BACKEND_TYPE_ALL)
5180  {
5181  /*
5182  * Assign bkend_type for any recently announced WAL Sender
5183  * processes.
5184  */
5185  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5188 
5189  if (!(target & bp->bkend_type))
5190  continue;
5191  }
5192 
5193  cnt++;
5194  }
5195  return cnt;
5196 }
5197 
5198 
5199 /*
5200  * StartChildProcess -- start an auxiliary process for the postmaster
5201  *
5202  * "type" determines what kind of child will be started. All child types
5203  * initially go to AuxiliaryProcessMain, which will handle common setup.
5204  *
5205  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5206  * to start subprocess.
5207  */
5208 static pid_t
5210 {
5211  pid_t pid;
5212  char *av[10];
5213  int ac = 0;
5214  char typebuf[32];
5215 
5216  /*
5217  * Set up command-line arguments for subprocess
5218  */
5219  av[ac++] = "postgres";
5220 
5221 #ifdef EXEC_BACKEND
5222  av[ac++] = "--forkboot";
5223  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5224 #endif
5225 
5226  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5227  av[ac++] = typebuf;
5228 
5229  av[ac] = NULL;
5230  Assert(ac < lengthof(av));
5231 
5232 #ifdef EXEC_BACKEND
5233  pid = postmaster_forkexec(ac, av);
5234 #else /* !EXEC_BACKEND */
5235  pid = fork_process();
5236 
5237  if (pid == 0) /* child */
5238  {
5240 
5241  /* Close the postmaster's sockets */
5242  ClosePostmasterPorts(false);
5243 
5244  /* Release postmaster's working memory context */
5248 
5249  AuxiliaryProcessMain(ac, av);
5250  ExitPostmaster(0);
5251  }
5252 #endif /* EXEC_BACKEND */
5253 
5254  if (pid < 0)
5255  {
5256  /* in parent, fork failed */
5257  int save_errno = errno;
5258 
5259  errno = save_errno;
5260  switch (type)
5261  {
5262  case StartupProcess:
5263  ereport(LOG,
5264  (errmsg("could not fork startup process: %m")));
5265  break;
5266  case BgWriterProcess:
5267  ereport(LOG,
5268  (errmsg("could not fork background writer process: %m")));
5269  break;
5270  case CheckpointerProcess:
5271  ereport(LOG,
5272  (errmsg("could not fork checkpointer process: %m")));
5273  break;
5274  case WalWriterProcess:
5275  ereport(LOG,
5276  (errmsg("could not fork WAL writer process: %m")));
5277  break;
5278  case WalReceiverProcess:
5279  ereport(LOG,
5280  (errmsg("could not fork WAL receiver process: %m")));
5281  break;
5282  default:
5283  ereport(LOG,
5284  (errmsg("could not fork process: %m")));
5285  break;
5286  }
5287 
5288  /*
5289  * fork failure is fatal during startup, but there's no need to choke
5290  * immediately if starting other child types fails.
5291  */
5292  if (type == StartupProcess)
5293  ExitPostmaster(1);
5294  return 0;
5295  }
5296 
5297  /*
5298  * in parent, successful fork
5299  */
5300  return pid;
5301 }
5302 
5303 /*
5304  * StartAutovacuumWorker
5305  * Start an autovac worker process.
5306  *
5307  * This function is here because it enters the resulting PID into the
5308  * postmaster's private backends list.
5309  *
5310  * NB -- this code very roughly matches BackendStartup.
5311  */
5312 static void
5314 {
5315  Backend *bn;
5316 
5317  /*
5318  * If not in condition to run a process, don't try, but handle it like a
5319  * fork failure. This does not normally happen, since the signal is only
5320  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5321  * we have to check to avoid race-condition problems during DB state
5322  * changes.
5323  */
5324  if (canAcceptConnections() == CAC_OK)
5325  {
5326  /*
5327  * Compute the cancel key that will be assigned to this session.
5328  * We probably don't need cancel keys for autovac workers, but
5329  * we'd better have something random in the field to prevent
5330  * unfriendly people from sending cancels to them.
5331  */
5333  {
5334  ereport(LOG,
5335  (errcode(ERRCODE_INTERNAL_ERROR),
5336  errmsg("could not generate random cancel key")));
5337  return;
5338  }
5339 
5340  bn = (Backend *) malloc(sizeof(Backend));
5341  if (bn)
5342  {
5343  bn->cancel_key = MyCancelKey;
5344 
5345  /* Autovac workers are not dead_end and need a child slot */
5346  bn->dead_end = false;
5348  bn->bgworker_notify = false;
5349 
5350  bn->pid = StartAutoVacWorker();
5351  if (bn->pid > 0)
5352  {
5354  dlist_push_head(&BackendList, &bn->elem);
5355 #ifdef EXEC_BACKEND
5356  ShmemBackendArrayAdd(bn);
5357 #endif
5358  /* all OK */
5359  return;
5360  }
5361 
5362  /*
5363  * fork failed, fall through to report -- actual error message was
5364  * logged by StartAutoVacWorker
5365  */
5367  free(bn);
5368  }
5369  else
5370  ereport(LOG,
5371  (errcode(ERRCODE_OUT_OF_MEMORY),
5372  errmsg("out of memory")));
5373  }
5374 
5375  /*
5376  * Report the failure to the launcher, if it's running. (If it's not, we
5377  * might not even be connected to shared memory, so don't try to call
5378  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5379  * responds to the condition, but we don't do that here, instead waiting
5380  * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5381  * quick succession between the autovac launcher and postmaster in case
5382  * things get ugly.
5383  */
5384  if (AutoVacPID != 0)
5385  {
5387  avlauncher_needs_signal = true;
5388  }
5389 }
5390 
5391 /*
5392  * Create the opts file
5393  */
5394 static bool
5395 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5396 {
5397  FILE *fp;
5398  int i;
5399 
5400 #define OPTS_FILE "postmaster.opts"
5401 
5402  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5403  {
5404  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5405  return false;
5406  }
5407 
5408  fprintf(fp, "%s", fullprogname);
5409  for (i = 1; i < argc; i++)
5410  fprintf(fp, " \"%s\"", argv[i]);
5411  fputs("\n", fp);
5412 
5413  if (fclose(fp))
5414  {
5415  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5416  return false;
5417  }
5418 
5419  return true;
5420 }
5421 
5422 
5423 /*
5424  * MaxLivePostmasterChildren
5425  *
5426  * This reports the number of entries needed in per-child-process arrays
5427  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5428  * These arrays include regular backends, autovac workers, walsenders
5429  * and background workers, but not special children nor dead_end children.
5430  * This allows the arrays to have a fixed maximum size, to wit the same
5431  * too-many-children limit enforced by canAcceptConnections(). The exact value
5432  * isn't too critical as long as it's more than MaxBackends.
5433  */
5434 int
5436 {
5437  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5439 }
5440 
5441 /*
5442  * Connect background worker to a database.
5443  */
5444 void
5446 {
5448 
5449  /* XXX is this the right errcode? */
5451  ereport(FATAL,
5452  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5453  errmsg("database connection requirement not indicated during registration")));
5454 
5455  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL);
5456 
5457  /* it had better not gotten out of "init" mode yet */
5458  if (!IsInitProcessingMode())
5459  ereport(ERROR,
5460  (errmsg("invalid processing mode in background worker")));
5462 }
5463 
5464 /*
5465  * Connect background worker to a database using OIDs.
5466  */
5467 void
5469 {
5471 
5472  /* XXX is this the right errcode? */
5474  ereport(FATAL,
5475  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5476  errmsg("database connection requirement not indicated during registration")));
5477 
5478  InitPostgres(NULL, dboid, NULL, useroid, NULL);
5479 
5480  /* it had better not gotten out of "init" mode yet */
5481  if (!IsInitProcessingMode())
5482  ereport(ERROR,
5483  (errmsg("invalid processing mode in background worker")));
5485 }
5486 
5487 /*
5488  * Block/unblock signals in a background worker
5489  */
5490 void
5492 {
5493  PG_SETMASK(&BlockSig);
5494 }
5495 
5496 void
5498 {
5500 }
5501 
5502 #ifdef EXEC_BACKEND
5503 static pid_t
5504 bgworker_forkexec(int shmem_slot)
5505 {
5506  char *av[10];
5507  int ac = 0;
5508  char forkav[MAXPGPATH];
5509 
5510  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5511 
5512  av[ac++] = "postgres";
5513  av[ac++] = forkav;
5514  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5515  av[ac] = NULL;
5516 
5517  Assert(ac < lengthof(av));
5518 
5519  return postmaster_forkexec(ac, av);
5520 }
5521 #endif
5522 
5523 /*
5524  * Start a new bgworker.
5525  * Starting time conditions must have been checked already.
5526  *
5527  * This code is heavily based on autovacuum.c, q.v.
5528  */
5529 static void
5531 {
5532  pid_t worker_pid;
5533 
5534  ereport(DEBUG1,
5535  (errmsg("starting background worker process \"%s\"",
5536  rw->rw_worker.bgw_name)));
5537 
5538 #ifdef EXEC_BACKEND
5539  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5540 #else
5541  switch ((worker_pid = fork_process()))
5542 #endif
5543  {
5544  case -1:
5545  ereport(LOG,
5546  (errmsg("could not fork worker process: %m")));
5547  return;
5548 
5549 #ifndef EXEC_BACKEND
5550  case 0:
5551  /* in postmaster child ... */
5553 
5554  /* Close the postmaster's sockets */
5555  ClosePostmasterPorts(false);
5556 
5557  /*
5558  * Before blowing away PostmasterContext, save this bgworker's
5559  * data where it can find it.
5560  */
5561  MyBgworkerEntry = (BackgroundWorker *)
5563  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5564 
5565  /* Release postmaster's working memory context */
5569 
5571  break;
5572 #endif
5573  default:
5574  rw->rw_pid = worker_pid;
5575  rw->rw_backend->pid = rw->rw_pid;
5577  break;
5578  }
5579 }
5580 
5581 /*
5582  * Does the current postmaster state require starting a worker with the
5583  * specified start_time?
5584  */
5585 static bool
5587 {
5588  switch (pmState)
5589  {
5590  case PM_NO_CHILDREN:
5591  case PM_WAIT_DEAD_END:
5592  case PM_SHUTDOWN_2:
5593  case PM_SHUTDOWN:
5594  case PM_WAIT_BACKENDS:
5595  case PM_WAIT_READONLY:
5596  case PM_WAIT_BACKUP:
5597  break;
5598 
5599  case PM_RUN:
5600  if (start_time == BgWorkerStart_RecoveryFinished)
5601  return true;
5602  /* fall through */
5603 
5604  case PM_HOT_STANDBY:
5605  if (start_time == BgWorkerStart_ConsistentState)
5606  return true;
5607  /* fall through */
5608 
5609  case PM_RECOVERY:
5610  case PM_STARTUP:
5611  case PM_INIT:
5612  if (start_time == BgWorkerStart_PostmasterStart)
5613  return true;
5614  /* fall through */
5615 
5616  }
5617 
5618  return false;
5619 }
5620 
5621 /*
5622  * Allocate the Backend struct for a connected background worker, but don't
5623  * add it to the list of backends just yet.
5624  *
5625  * Some info from the Backend is copied into the passed rw.
5626  */
5627 static bool
5629 {
5630  Backend *bn;
5631 
5632  /*
5633  * Compute the cancel key that will be assigned to this session. We
5634  * probably don't need cancel keys for background workers, but we'd better
5635  * have something random in the field to prevent unfriendly people from
5636  * sending cancels to them.
5637  */
5639  {
5640  ereport(LOG,
5641  (errcode(ERRCODE_INTERNAL_ERROR),
5642  errmsg("could not generate random cancel key")));
5643 
5645  return false;
5646  }
5647 
5648  bn = malloc(sizeof(Backend));
5649  if (bn == NULL)
5650  {
5651  ereport(LOG,
5652  (errcode(ERRCODE_OUT_OF_MEMORY),
5653  errmsg("out of memory")));
5654 
5655  /*
5656  * The worker didn't really crash, but setting this nonzero makes
5657  * postmaster wait a bit before attempting to start it again; if it
5658  * tried again right away, most likely it'd find itself under the same
5659  * memory pressure.
5660  */
5662  return false;
5663  }
5664 
5665  bn->cancel_key = MyCancelKey;
5668  bn->dead_end = false;
5669  bn->bgworker_notify = false;
5670 
5671  rw->rw_backend = bn;
5672  rw->rw_child_slot = bn->child_slot;
5673 
5674  return true;
5675 }
5676 
5677 /*
5678  * If the time is right, start one background worker.
5679  *
5680  * As a side effect, the bgworker control variables are set or reset whenever
5681  * there are more workers to start after this one, and whenever the overall
5682  * system state requires it.
5683  */
5684 static void
5686 {
5687  slist_mutable_iter iter;
5688  TimestampTz now = 0;
5689 
5690  if (FatalError)
5691  {
5692  StartWorkerNeeded = false;
5693  HaveCrashedWorker = false;
5694  return; /* not yet */
5695  }
5696 
5697  HaveCrashedWorker = false;
5698 
5700  {
5701  RegisteredBgWorker *rw;
5702 
5703  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5704 
5705  /* already running? */
5706  if (rw->rw_pid != 0)
5707  continue;
5708 
5709  /* marked for death? */
5710  if (rw->rw_terminate)
5711  {
5712  ForgetBackgroundWorker(&iter);
5713  continue;
5714  }
5715 
5716  /*
5717  * If this worker has crashed previously, maybe it needs to be
5718  * restarted (unless on registration it specified it doesn't want to
5719  * be restarted at all). Check how long ago did a crash last happen.
5720  * If the last crash is too recent, don't start it right away; let it
5721  * be restarted once enough time has passed.
5722  */
5723  if (rw->rw_crashed_at != 0)
5724  {
5726  {
5727  ForgetBackgroundWorker(&iter);
5728  continue;
5729  }
5730 
5731  if (now == 0)
5732  now = GetCurrentTimestamp();
5733 
5735  rw->rw_worker.bgw_restart_time * 1000))
5736  {
5737  HaveCrashedWorker = true;
5738  continue;
5739  }
5740  }
5741 
5743  {
5744  /* reset crash time before calling assign_backendlist_entry */
5745  rw->rw_crashed_at = 0;
5746 
5747  /*
5748  * Allocate and assign the Backend element. Note we must do this
5749  * before forking, so that we can handle out of memory properly.
5750  */
5751  if (!assign_backendlist_entry(rw))
5752  return;
5753 
5754  do_start_bgworker(rw); /* sets rw->rw_pid */
5755 
5756  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5757 #ifdef EXEC_BACKEND
5758  ShmemBackendArrayAdd(rw->rw_backend);
5759 #endif
5760 
5761  /*
5762  * Have ServerLoop call us again. Note that there might not
5763  * actually *be* another runnable worker, but we don't care all
5764  * that much; we will find out the next time we run.
5765  */
5766  StartWorkerNeeded = true;
5767  return;
5768  }
5769  }
5770 
5771  /* no runnable worker found */
5772  StartWorkerNeeded = false;
5773 }
5774 
5775 /*
5776  * When a backend asks to be notified about worker state changes, we
5777  * set a flag in its backend entry. The background worker machinery needs
5778  * to know when such backends exit.
5779  */
5780 bool
5782 {
5783  dlist_iter iter;
5784  Backend *bp;
5785 
5786  dlist_foreach(iter, &BackendList)
5787  {
5788  bp = dlist_container(Backend, elem, iter.cur);
5789  if (bp->pid == pid)
5790  {
5791  bp->bgworker_notify = true;
5792  return true;
5793  }
5794  }
5795  return false;
5796 }
5797 
5798 #ifdef EXEC_BACKEND
5799 
5800 /*
5801  * The following need to be available to the save/restore_backend_variables
5802  * functions. They are marked NON_EXEC_STATIC in their home modules.
5803  */
5804 extern