PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "lib/ilist.h"
103 #include "libpq/auth.h"
104 #include "libpq/libpq.h"
105 #include "libpq/pqformat.h"
106 #include "libpq/pqsignal.h"
107 #include "miscadmin.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "port/pg_bswap.h"
111 #include "postmaster/autovacuum.h"
113 #include "postmaster/fork_process.h"
114 #include "postmaster/pgarch.h"
115 #include "postmaster/postmaster.h"
116 #include "postmaster/syslogger.h"
118 #include "replication/walsender.h"
119 #include "storage/fd.h"
120 #include "storage/ipc.h"
121 #include "storage/pg_shmem.h"
122 #include "storage/pmsignal.h"
123 #include "storage/proc.h"
124 #include "tcop/tcopprot.h"
125 #include "utils/builtins.h"
126 #include "utils/datetime.h"
127 #include "utils/dynamic_loader.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/varlena.h"
133 
134 #ifdef EXEC_BACKEND
135 #include "storage/spin.h"
136 #endif
137 
138 
139 /*
140  * Possible types of a backend. Beyond being the possible bkend_type values in
141  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
142  * and CountChildren().
143  */
144 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
145 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
146 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
147 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
148 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
149 
150 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
151 
152 /*
153  * List of active backends (or child processes anyway; we don't actually
154  * know whether a given child has become a backend or is still in the
155  * authorization phase). This is used mainly to keep track of how many
156  * children we have and send them appropriate signals when necessary.
157  *
158  * "Special" children such as the startup, bgwriter and autovacuum launcher
159  * tasks are not in this list. Autovacuum worker and walsender are in it.
160  * Also, "dead_end" children are in it: these are children launched just for
161  * the purpose of sending a friendly rejection message to a would-be client.
162  * We must track them because they are attached to shared memory, but we know
163  * they will never become live backends. dead_end children are not assigned a
164  * PMChildSlot.
165  *
166  * Background workers are in this list, too.
167  */
168 typedef struct bkend
169 {
170  pid_t pid; /* process id of backend */
171  int32 cancel_key; /* cancel key for cancels for this backend */
172  int child_slot; /* PMChildSlot for this backend, if any */
173 
174  /*
175  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
176  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
177  * bkend_type is normal, you should check for a recent transition.
178  */
180  bool dead_end; /* is it going to send an error and quit? */
181  bool bgworker_notify; /* gets bgworker start/stop notifications */
182  dlist_node elem; /* list link in BackendList */
183 } Backend;
184 
186 
187 #ifdef EXEC_BACKEND
188 static Backend *ShmemBackendArray;
189 #endif
190 
192 
193 
194 
195 /* The socket number we are listening for connections on */
197 
198 /* The directory names for Unix socket(s) */
200 
201 /* The TCP listen address(es) */
203 
204 /*
205  * ReservedBackends is the number of backends reserved for superuser use.
206  * This number is taken out of the pool size given by MaxConnections so
207  * number of backend slots available to non-superusers is
208  * (MaxConnections - ReservedBackends). Note what this really means is
209  * "if there are <= ReservedBackends connections available, only superusers
210  * can make new connections" --- pre-existing superuser connections don't
211  * count against the limit.
212  */
214 
215 /* The socket(s) we're listening to. */
216 #define MAXLISTEN 64
218 
219 /*
220  * Set by the -o option
221  */
222 static char ExtraOptions[MAXPGPATH];
223 
224 /*
225  * These globals control the behavior of the postmaster in case some
226  * backend dumps core. Normally, it kills all peers of the dead backend
227  * and reinitializes shared memory. By specifying -s or -n, we can have
228  * the postmaster stop (rather than kill) peers and not reinitialize
229  * shared data structures. (Reinit is currently dead code, though.)
230  */
231 static bool Reinit = true;
232 static int SendStop = false;
233 
234 /* still more option variables */
235 bool EnableSSL = false;
236 
237 int PreAuthDelay = 0;
239 
240 bool log_hostname; /* for ps display and logging */
241 bool Log_connections = false;
242 bool Db_user_namespace = false;
243 
244 bool enable_bonjour = false;
247 
248 /* PIDs of special child processes; 0 when not running */
249 static pid_t StartupPID = 0,
258 
259 /* Startup process's status */
260 typedef enum
261 {
264  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
267 
269 
270 /* Startup/shutdown state */
271 #define NoShutdown 0
272 #define SmartShutdown 1
273 #define FastShutdown 2
274 #define ImmediateShutdown 3
275 
276 static int Shutdown = NoShutdown;
277 
278 static bool FatalError = false; /* T if recovering from backend crash */
279 
280 /*
281  * We use a simple state machine to control startup, shutdown, and
282  * crash recovery (which is rather like shutdown followed by startup).
283  *
284  * After doing all the postmaster initialization work, we enter PM_STARTUP
285  * state and the startup process is launched. The startup process begins by
286  * reading the control file and other preliminary initialization steps.
287  * In a normal startup, or after crash recovery, the startup process exits
288  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
289  * is handled specially since it takes much longer and we would like to support
290  * hot standby during archive recovery.
291  *
292  * When the startup process is ready to start archive recovery, it signals the
293  * postmaster, and we switch to PM_RECOVERY state. The background writer and
294  * checkpointer are launched, while the startup process continues applying WAL.
295  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
296  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
297  * state and begin accepting connections to perform read-only queries. When
298  * archive recovery is finished, the startup process exits with exit code 0
299  * and we switch to PM_RUN state.
300  *
301  * Normal child backends can only be launched when we are in PM_RUN or
302  * PM_HOT_STANDBY state. (We also allow launch of normal
303  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
304  * In other states we handle connection requests by launching "dead_end"
305  * child processes, which will simply send the client an error message and
306  * quit. (We track these in the BackendList so that we can know when they
307  * are all gone; this is important because they're still connected to shared
308  * memory, and would interfere with an attempt to destroy the shmem segment,
309  * possibly leading to SHMALL failure when we try to make a new one.)
310  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
311  * to drain out of the system, and therefore stop accepting connection
312  * requests at all until the last existing child has quit (which hopefully
313  * will not be very long).
314  *
315  * Notice that this state variable does not distinguish *why* we entered
316  * states later than PM_RUN --- Shutdown and FatalError must be consulted
317  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
318  * states, nor in PM_SHUTDOWN states (because we don't enter those states
319  * when trying to recover from a crash). It can be true in PM_STARTUP state,
320  * because we don't clear it until we've successfully started WAL redo.
321  */
322 typedef enum
323 {
324  PM_INIT, /* postmaster starting */
325  PM_STARTUP, /* waiting for startup subprocess */
326  PM_RECOVERY, /* in archive recovery mode */
327  PM_HOT_STANDBY, /* in hot standby mode */
328  PM_RUN, /* normal "database is alive" state */
329  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
330  PM_WAIT_READONLY, /* waiting for read only backends to exit */
331  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
332  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
333  * ckpt */
334  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
335  * finish */
336  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
337  PM_NO_CHILDREN /* all important children have exited */
338 } PMState;
339 
341 
342 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
343 /* Zero means timeout is not running */
344 static time_t AbortStartTime = 0;
345 
346 /* Length of said timeout */
347 #define SIGKILL_CHILDREN_AFTER_SECS 5
348 
349 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
350 
351 bool ClientAuthInProgress = false; /* T during new-client
352  * authentication */
353 
354 bool redirection_done = false; /* stderr redirected for syslogger? */
355 
356 /* received START_AUTOVAC_LAUNCHER signal */
357 static volatile sig_atomic_t start_autovac_launcher = false;
358 
359 /* the launcher needs to be signalled to communicate some condition */
360 static volatile bool avlauncher_needs_signal = false;
361 
362 /* received START_WALRECEIVER signal */
363 static volatile sig_atomic_t WalReceiverRequested = false;
364 
365 /* set when there's a worker that needs to be started up */
366 static volatile bool StartWorkerNeeded = true;
367 static volatile bool HaveCrashedWorker = false;
368 
369 #ifndef HAVE_STRONG_RANDOM
370 /*
371  * State for assigning cancel keys.
372  * Also, the global MyCancelKey passes the cancel key assigned to a given
373  * backend from the postmaster to that backend (via fork).
374  */
375 static unsigned int random_seed = 0;
376 static struct timeval random_start_time;
377 #endif
378 
379 #ifdef USE_SSL
380 /* Set when and if SSL has been initialized properly */
381 static bool LoadedSSL = false;
382 #endif
383 
384 #ifdef USE_BONJOUR
385 static DNSServiceRef bonjour_sdref = NULL;
386 #endif
387 
388 /*
389  * postmaster.c - function prototypes
390  */
391 static void CloseServerPorts(int status, Datum arg);
392 static void unlink_external_pid_file(int status, Datum arg);
393 static void getInstallationPaths(const char *argv0);
394 static void checkControlFile(void);
395 static Port *ConnCreate(int serverFd);
396 static void ConnFree(Port *port);
397 static void reset_shared(int port);
398 static void SIGHUP_handler(SIGNAL_ARGS);
399 static void pmdie(SIGNAL_ARGS);
400 static void reaper(SIGNAL_ARGS);
401 static void sigusr1_handler(SIGNAL_ARGS);
402 static void startup_die(SIGNAL_ARGS);
403 static void dummy_handler(SIGNAL_ARGS);
404 static void StartupPacketTimeoutHandler(void);
405 static void CleanupBackend(int pid, int exitstatus);
406 static bool CleanupBackgroundWorker(int pid, int exitstatus);
407 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
408 static void LogChildExit(int lev, const char *procname,
409  int pid, int exitstatus);
410 static void PostmasterStateMachine(void);
411 static void BackendInitialize(Port *port);
412 static void BackendRun(Port *port) pg_attribute_noreturn();
413 static void ExitPostmaster(int status) pg_attribute_noreturn();
414 static int ServerLoop(void);
415 static int BackendStartup(Port *port);
416 static int ProcessStartupPacket(Port *port, bool SSLdone);
417 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
418 static void processCancelRequest(Port *port, void *pkt);
419 static int initMasks(fd_set *rmask);
420 static void report_fork_failure_to_client(Port *port, int errnum);
421 static CAC_state canAcceptConnections(void);
422 static bool RandomCancelKey(int32 *cancel_key);
423 static void signal_child(pid_t pid, int signal);
424 static bool SignalSomeChildren(int signal, int targets);
425 static void TerminateChildren(int signal);
426 
427 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
428 
429 static int CountChildren(int target);
431 static void maybe_start_bgworkers(void);
432 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
433 static pid_t StartChildProcess(AuxProcType type);
434 static void StartAutovacuumWorker(void);
435 static void MaybeStartWalReceiver(void);
436 static void InitPostmasterDeathWatchHandle(void);
437 
438 /*
439  * Archiver is allowed to start up at the current postmaster state?
440  *
441  * If WAL archiving is enabled always, we are allowed to start archiver
442  * even during recovery.
443  */
444 #define PgArchStartupAllowed() \
445  ((XLogArchivingActive() && pmState == PM_RUN) || \
446  (XLogArchivingAlways() && \
447  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
448 
449 #ifdef EXEC_BACKEND
450 
451 #ifdef WIN32
452 #define WNOHANG 0 /* ignored, so any integer value will do */
453 
454 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
455 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
456 
457 static HANDLE win32ChildQueue;
458 
459 typedef struct
460 {
461  HANDLE waitHandle;
462  HANDLE procHandle;
463  DWORD procId;
464 } win32_deadchild_waitinfo;
465 #endif /* WIN32 */
466 
467 static pid_t backend_forkexec(Port *port);
468 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
469 
470 /* Type for a socket that can be inherited to a client process */
471 #ifdef WIN32
472 typedef struct
473 {
474  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
475  * if not a socket */
476  WSAPROTOCOL_INFO wsainfo;
477 } InheritableSocket;
478 #else
479 typedef int InheritableSocket;
480 #endif
481 
482 /*
483  * Structure contains all variables passed to exec:ed backends
484  */
485 typedef struct
486 {
487  Port port;
488  InheritableSocket portsocket;
489  char DataDir[MAXPGPATH];
492  int MyPMChildSlot;
493 #ifndef WIN32
494  unsigned long UsedShmemSegID;
495 #else
496  HANDLE UsedShmemSegID;
497 #endif
498  void *UsedShmemSegAddr;
501  Backend *ShmemBackendArray;
502 #ifndef HAVE_SPINLOCKS
504 #endif
513  InheritableSocket pgStatSock;
514  pid_t PostmasterPid;
518  bool redirection_done;
519  bool IsBinaryUpgrade;
520  int max_safe_fds;
521  int MaxBackends;
522 #ifdef WIN32
523  HANDLE PostmasterHandle;
524  HANDLE initial_signal_pipe;
525  HANDLE syslogPipe[2];
526 #else
527  int postmaster_alive_fds[2];
528  int syslogPipe[2];
529 #endif
530  char my_exec_path[MAXPGPATH];
531  char pkglib_path[MAXPGPATH];
532  char ExtraOptions[MAXPGPATH];
533 } BackendParameters;
534 
535 static void read_backend_variables(char *id, Port *port);
536 static void restore_backend_variables(BackendParameters *param, Port *port);
537 
538 #ifndef WIN32
539 static bool save_backend_variables(BackendParameters *param, Port *port);
540 #else
541 static bool save_backend_variables(BackendParameters *param, Port *port,
542  HANDLE childProcess, pid_t childPid);
543 #endif
544 
545 static void ShmemBackendArrayAdd(Backend *bn);
546 static void ShmemBackendArrayRemove(Backend *bn);
547 #endif /* EXEC_BACKEND */
548 
549 #define StartupDataBase() StartChildProcess(StartupProcess)
550 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
551 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
552 #define StartWalWriter() StartChildProcess(WalWriterProcess)
553 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
554 
555 /* Macros to check exit status of a child process */
556 #define EXIT_STATUS_0(st) ((st) == 0)
557 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
558 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
559 
560 #ifndef WIN32
561 /*
562  * File descriptors for pipe used to monitor if postmaster is alive.
563  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
564  */
565 int postmaster_alive_fds[2] = {-1, -1};
566 #else
567 /* Process handle of postmaster used for the same purpose on Windows */
568 HANDLE PostmasterHandle;
569 #endif
570 
571 /*
572  * Postmaster main entry point
573  */
574 void
575 PostmasterMain(int argc, char *argv[])
576 {
577  int opt;
578  int status;
579  char *userDoption = NULL;
580  bool listen_addr_saved = false;
581  int i;
582  char *output_config_variable = NULL;
583 
584  MyProcPid = PostmasterPid = getpid();
585 
586  MyStartTime = time(NULL);
587 
589 
590  /*
591  * We should not be creating any files or directories before we check the
592  * data directory (see checkDataDir()), but just in case set the umask to
593  * the most restrictive (owner-only) permissions.
594  *
595  * checkDataDir() will reset the umask based on the data directory
596  * permissions.
597  */
598  umask(PG_MODE_MASK_OWNER);
599 
600  /*
601  * Initialize random(3) so we don't get the same values in every run.
602  *
603  * Note: the seed is pretty predictable from externally-visible facts such
604  * as postmaster start time, so avoid using random() for security-critical
605  * random values during postmaster startup. At the time of first
606  * connection, PostmasterRandom will select a hopefully-more-random seed.
607  */
608  srandom((unsigned int) (MyProcPid ^ MyStartTime));
609 
610  /*
611  * By default, palloc() requests in the postmaster will be allocated in
612  * the PostmasterContext, which is space that can be recycled by backends.
613  * Allocated data that needs to be available to backends should be
614  * allocated in TopMemoryContext.
615  */
617  "Postmaster",
620 
621  /* Initialize paths to installation files */
622  getInstallationPaths(argv[0]);
623 
624  /*
625  * Set up signal handlers for the postmaster process.
626  *
627  * In the postmaster, we want to install non-ignored handlers *without*
628  * SA_RESTART. This is because they'll be blocked at all times except
629  * when ServerLoop is waiting for something to happen, and during that
630  * window, we want signals to exit the select(2) wait so that ServerLoop
631  * can respond if anything interesting happened. On some platforms,
632  * signals marked SA_RESTART would not cause the select() wait to end.
633  * Child processes will generally want SA_RESTART, but we expect them to
634  * set up their own handlers before unblocking signals.
635  *
636  * CAUTION: when changing this list, check for side-effects on the signal
637  * handling setup of child processes. See tcop/postgres.c,
638  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
639  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
640  * postmaster/syslogger.c, postmaster/bgworker.c and
641  * postmaster/checkpointer.c.
642  */
643  pqinitmask();
645 
646  pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and
647  * have children do same */
648  pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */
649  pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */
650  pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */
651  pqsignal(SIGALRM, SIG_IGN); /* ignored */
652  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
653  pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child
654  * process */
655  pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for
656  * children */
657  pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */
658  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
659  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
660  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
661 #ifdef SIGXFSZ
662  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
663 #endif
664 
665  /*
666  * Options setup
667  */
669 
670  opterr = 1;
671 
672  /*
673  * Parse command-line options. CAUTION: keep this in sync with
674  * tcop/postgres.c (the option sets should not conflict) and with the
675  * common help() function in main/main.c.
676  */
677  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
678  {
679  switch (opt)
680  {
681  case 'B':
682  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
683  break;
684 
685  case 'b':
686  /* Undocumented flag used for binary upgrades */
687  IsBinaryUpgrade = true;
688  break;
689 
690  case 'C':
691  output_config_variable = strdup(optarg);
692  break;
693 
694  case 'D':
695  userDoption = strdup(optarg);
696  break;
697 
698  case 'd':
700  break;
701 
702  case 'E':
703  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
704  break;
705 
706  case 'e':
707  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
708  break;
709 
710  case 'F':
711  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
712  break;
713 
714  case 'f':
716  {
717  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
718  progname, optarg);
719  ExitPostmaster(1);
720  }
721  break;
722 
723  case 'h':
724  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
725  break;
726 
727  case 'i':
728  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'j':
732  /* only used by interactive backend */
733  break;
734 
735  case 'k':
736  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
737  break;
738 
739  case 'l':
740  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
741  break;
742 
743  case 'N':
744  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
745  break;
746 
747  case 'n':
748  /* Don't reinit shared mem after abnormal exit */
749  Reinit = false;
750  break;
751 
752  case 'O':
753  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
754  break;
755 
756  case 'o':
757  /* Other options to pass to the backend on the command line */
759  sizeof(ExtraOptions) - strlen(ExtraOptions),
760  " %s", optarg);
761  break;
762 
763  case 'P':
764  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
765  break;
766 
767  case 'p':
769  break;
770 
771  case 'r':
772  /* only used by single-user backend */
773  break;
774 
775  case 'S':
777  break;
778 
779  case 's':
780  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
781  break;
782 
783  case 'T':
784 
785  /*
786  * In the event that some backend dumps core, send SIGSTOP,
787  * rather than SIGQUIT, to all its peers. This lets the wily
788  * post_hacker collect core dumps from everyone.
789  */
790  SendStop = true;
791  break;
792 
793  case 't':
794  {
795  const char *tmp = get_stats_option_name(optarg);
796 
797  if (tmp)
798  {
800  }
801  else
802  {
803  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
804  progname, optarg);
805  ExitPostmaster(1);
806  }
807  break;
808  }
809 
810  case 'W':
811  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
812  break;
813 
814  case 'c':
815  case '-':
816  {
817  char *name,
818  *value;
819 
820  ParseLongOption(optarg, &name, &value);
821  if (!value)
822  {
823  if (opt == '-')
824  ereport(ERROR,
825  (errcode(ERRCODE_SYNTAX_ERROR),
826  errmsg("--%s requires a value",
827  optarg)));
828  else
829  ereport(ERROR,
830  (errcode(ERRCODE_SYNTAX_ERROR),
831  errmsg("-c %s requires a value",
832  optarg)));
833  }
834 
836  free(name);
837  if (value)
838  free(value);
839  break;
840  }
841 
842  default:
843  write_stderr("Try \"%s --help\" for more information.\n",
844  progname);
845  ExitPostmaster(1);
846  }
847  }
848 
849  /*
850  * Postmaster accepts no non-option switch arguments.
851  */
852  if (optind < argc)
853  {
854  write_stderr("%s: invalid argument: \"%s\"\n",
855  progname, argv[optind]);
856  write_stderr("Try \"%s --help\" for more information.\n",
857  progname);
858  ExitPostmaster(1);
859  }
860 
861  /*
862  * Locate the proper configuration files and data directory, and read
863  * postgresql.conf for the first time.
864  */
865  if (!SelectConfigFiles(userDoption, progname))
866  ExitPostmaster(2);
867 
868  if (output_config_variable != NULL)
869  {
870  /*
871  * "-C guc" was specified, so print GUC's value and exit. No extra
872  * permission check is needed because the user is reading inside the
873  * data dir.
874  */
875  const char *config_val = GetConfigOption(output_config_variable,
876  false, false);
877 
878  puts(config_val ? config_val : "");
879  ExitPostmaster(0);
880  }
881 
882  /* Verify that DataDir looks reasonable */
883  checkDataDir();
884 
885  /* Check that pg_control exists */
887 
888  /* And switch working directory into it */
889  ChangeToDataDir();
890 
891  /*
892  * Check for invalid combinations of GUC settings.
893  */
895  {
896  write_stderr("%s: superuser_reserved_connections (%d) plus max_wal_senders (%d) must be less than max_connections (%d)\n",
897  progname,
899  ExitPostmaster(1);
900  }
902  ereport(ERROR,
903  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
905  ereport(ERROR,
906  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
907 
908  /*
909  * Other one-time internal sanity checks can go here, if they are fast.
910  * (Put any slow processing further down, after postmaster.pid creation.)
911  */
912  if (!CheckDateTokenTables())
913  {
914  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
915  ExitPostmaster(1);
916  }
917 
918  /*
919  * Now that we are done processing the postmaster arguments, reset
920  * getopt(3) library so that it will work correctly in subprocesses.
921  */
922  optind = 1;
923 #ifdef HAVE_INT_OPTRESET
924  optreset = 1; /* some systems need this too */
925 #endif
926 
927  /* For debugging: display postmaster environment */
928  {
929  extern char **environ;
930  char **p;
931 
932  ereport(DEBUG3,
933  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
934  progname)));
935  ereport(DEBUG3,
936  (errmsg_internal("-----------------------------------------")));
937  for (p = environ; *p; ++p)
938  ereport(DEBUG3,
939  (errmsg_internal("\t%s", *p)));
940  ereport(DEBUG3,
941  (errmsg_internal("-----------------------------------------")));
942  }
943 
944  /*
945  * Create lockfile for data directory.
946  *
947  * We want to do this before we try to grab the input sockets, because the
948  * data directory interlock is more reliable than the socket-file
949  * interlock (thanks to whoever decided to put socket files in /tmp :-().
950  * For the same reason, it's best to grab the TCP socket(s) before the
951  * Unix socket(s).
952  *
953  * Also note that this internally sets up the on_proc_exit function that
954  * is responsible for removing both data directory and socket lockfiles;
955  * so it must happen before opening sockets so that at exit, the socket
956  * lockfiles go away after CloseServerPorts runs.
957  */
958  CreateDataDirLockFile(true);
959 
960  /* read control file (error checking and contains config) */
962 
963  /*
964  * Initialize SSL library, if specified.
965  */
966 #ifdef USE_SSL
967  if (EnableSSL)
968  {
969  (void) secure_initialize(true);
970  LoadedSSL = true;
971  }
972 #endif
973 
974  /*
975  * Register the apply launcher. Since it registers a background worker,
976  * it needs to be called before InitializeMaxBackends(), and it's probably
977  * a good idea to call it before any modules had chance to take the
978  * background worker slots.
979  */
981 
982  /*
983  * process any libraries that should be preloaded at postmaster start
984  */
986 
987  /*
988  * Now that loadable modules have had their chance to register background
989  * workers, calculate MaxBackends.
990  */
992 
993  /*
994  * Establish input sockets.
995  *
996  * First, mark them all closed, and set up an on_proc_exit function that's
997  * charged with closing the sockets again at postmaster shutdown.
998  */
999  for (i = 0; i < MAXLISTEN; i++)
1001 
1003 
1004  if (ListenAddresses)
1005  {
1006  char *rawstring;
1007  List *elemlist;
1008  ListCell *l;
1009  int success = 0;
1010 
1011  /* Need a modifiable copy of ListenAddresses */
1012  rawstring = pstrdup(ListenAddresses);
1013 
1014  /* Parse string into list of hostnames */
1015  if (!SplitIdentifierString(rawstring, ',', &elemlist))
1016  {
1017  /* syntax error in list */
1018  ereport(FATAL,
1019  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1020  errmsg("invalid list syntax in parameter \"%s\"",
1021  "listen_addresses")));
1022  }
1023 
1024  foreach(l, elemlist)
1025  {
1026  char *curhost = (char *) lfirst(l);
1027 
1028  if (strcmp(curhost, "*") == 0)
1029  status = StreamServerPort(AF_UNSPEC, NULL,
1030  (unsigned short) PostPortNumber,
1031  NULL,
1033  else
1034  status = StreamServerPort(AF_UNSPEC, curhost,
1035  (unsigned short) PostPortNumber,
1036  NULL,
1037  ListenSocket, MAXLISTEN);
1038 
1039  if (status == STATUS_OK)
1040  {
1041  success++;
1042  /* record the first successful host addr in lockfile */
1043  if (!listen_addr_saved)
1044  {
1046  listen_addr_saved = true;
1047  }
1048  }
1049  else
1050  ereport(WARNING,
1051  (errmsg("could not create listen socket for \"%s\"",
1052  curhost)));
1053  }
1054 
1055  if (!success && elemlist != NIL)
1056  ereport(FATAL,
1057  (errmsg("could not create any TCP/IP sockets")));
1058 
1059  list_free(elemlist);
1060  pfree(rawstring);
1061  }
1062 
1063 #ifdef USE_BONJOUR
1064  /* Register for Bonjour only if we opened TCP socket(s) */
1066  {
1067  DNSServiceErrorType err;
1068 
1069  /*
1070  * We pass 0 for interface_index, which will result in registering on
1071  * all "applicable" interfaces. It's not entirely clear from the
1072  * DNS-SD docs whether this would be appropriate if we have bound to
1073  * just a subset of the available network interfaces.
1074  */
1075  err = DNSServiceRegister(&bonjour_sdref,
1076  0,
1077  0,
1078  bonjour_name,
1079  "_postgresql._tcp.",
1080  NULL,
1081  NULL,
1083  0,
1084  NULL,
1085  NULL,
1086  NULL);
1087  if (err != kDNSServiceErr_NoError)
1088  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1089  (long) err);
1090 
1091  /*
1092  * We don't bother to read the mDNS daemon's reply, and we expect that
1093  * it will automatically terminate our registration when the socket is
1094  * closed at postmaster termination. So there's nothing more to be
1095  * done here. However, the bonjour_sdref is kept around so that
1096  * forked children can close their copies of the socket.
1097  */
1098  }
1099 #endif
1100 
1101 #ifdef HAVE_UNIX_SOCKETS
1103  {
1104  char *rawstring;
1105  List *elemlist;
1106  ListCell *l;
1107  int success = 0;
1108 
1109  /* Need a modifiable copy of Unix_socket_directories */
1110  rawstring = pstrdup(Unix_socket_directories);
1111 
1112  /* Parse string into list of directories */
1113  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1114  {
1115  /* syntax error in list */
1116  ereport(FATAL,
1117  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1118  errmsg("invalid list syntax in parameter \"%s\"",
1119  "unix_socket_directories")));
1120  }
1121 
1122  foreach(l, elemlist)
1123  {
1124  char *socketdir = (char *) lfirst(l);
1125 
1126  status = StreamServerPort(AF_UNIX, NULL,
1127  (unsigned short) PostPortNumber,
1128  socketdir,
1129  ListenSocket, MAXLISTEN);
1130 
1131  if (status == STATUS_OK)
1132  {
1133  success++;
1134  /* record the first successful Unix socket in lockfile */
1135  if (success == 1)
1137  }
1138  else
1139  ereport(WARNING,
1140  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1141  socketdir)));
1142  }
1143 
1144  if (!success && elemlist != NIL)
1145  ereport(FATAL,
1146  (errmsg("could not create any Unix-domain sockets")));
1147 
1148  list_free_deep(elemlist);
1149  pfree(rawstring);
1150  }
1151 #endif
1152 
1153  /*
1154  * check that we have some socket to listen on
1155  */
1156  if (ListenSocket[0] == PGINVALID_SOCKET)
1157  ereport(FATAL,
1158  (errmsg("no socket created for listening")));
1159 
1160  /*
1161  * If no valid TCP ports, write an empty line for listen address,
1162  * indicating the Unix socket must be used. Note that this line is not
1163  * added to the lock file until there is a socket backing it.
1164  */
1165  if (!listen_addr_saved)
1167 
1168  /*
1169  * Set up shared memory and semaphores.
1170  */
1172 
1173  /*
1174  * Estimate number of openable files. This must happen after setting up
1175  * semaphores, because on some platforms semaphores count as open files.
1176  */
1177  set_max_safe_fds();
1178 
1179  /*
1180  * Set reference point for stack-depth checking.
1181  */
1182  set_stack_base();
1183 
1184  /*
1185  * Initialize pipe (or process handle on Windows) that allows children to
1186  * wake up from sleep on postmaster death.
1187  */
1189 
1190 #ifdef WIN32
1191 
1192  /*
1193  * Initialize I/O completion port used to deliver list of dead children.
1194  */
1195  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1196  if (win32ChildQueue == NULL)
1197  ereport(FATAL,
1198  (errmsg("could not create I/O completion port for child queue")));
1199 #endif
1200 
1201  /*
1202  * Record postmaster options. We delay this till now to avoid recording
1203  * bogus options (eg, NBuffers too high for available memory).
1204  */
1205  if (!CreateOptsFile(argc, argv, my_exec_path))
1206  ExitPostmaster(1);
1207 
1208 #ifdef EXEC_BACKEND
1209  /* Write out nondefault GUC settings for child processes to use */
1210  write_nondefault_variables(PGC_POSTMASTER);
1211 #endif
1212 
1213  /*
1214  * Write the external PID file if requested
1215  */
1216  if (external_pid_file)
1217  {
1218  FILE *fpidfile = fopen(external_pid_file, "w");
1219 
1220  if (fpidfile)
1221  {
1222  fprintf(fpidfile, "%d\n", MyProcPid);
1223  fclose(fpidfile);
1224 
1225  /* Make PID file world readable */
1226  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1227  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1229  }
1230  else
1231  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1233 
1235  }
1236 
1237  /*
1238  * Remove old temporary files. At this point there can be no other
1239  * Postgres processes running in this directory, so this should be safe.
1240  */
1242 
1243  /*
1244  * Forcibly remove the files signaling a standby promotion request.
1245  * Otherwise, the existence of those files triggers a promotion too early,
1246  * whether a user wants that or not.
1247  *
1248  * This removal of files is usually unnecessary because they can exist
1249  * only during a few moments during a standby promotion. However there is
1250  * a race condition: if pg_ctl promote is executed and creates the files
1251  * during a promotion, the files can stay around even after the server is
1252  * brought up to new master. Then, if new standby starts by using the
1253  * backup taken from that master, the files can exist at the server
1254  * startup and should be removed in order to avoid an unexpected
1255  * promotion.
1256  *
1257  * Note that promotion signal files need to be removed before the startup
1258  * process is invoked. Because, after that, they can be used by
1259  * postmaster's SIGUSR1 signal handler.
1260  */
1262 
1263  /* Remove any outdated file holding the current log filenames. */
1264  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1265  ereport(LOG,
1267  errmsg("could not remove file \"%s\": %m",
1269 
1270  /*
1271  * If enabled, start up syslogger collection subprocess
1272  */
1274 
1275  /*
1276  * Reset whereToSendOutput from DestDebug (its starting state) to
1277  * DestNone. This stops ereport from sending log messages to stderr unless
1278  * Log_destination permits. We don't do this until the postmaster is
1279  * fully launched, since startup failures may as well be reported to
1280  * stderr.
1281  *
1282  * If we are in fact disabling logging to stderr, first emit a log message
1283  * saying so, to provide a breadcrumb trail for users who may not remember
1284  * that their logging is configured to go somewhere else.
1285  */
1287  ereport(LOG,
1288  (errmsg("ending log output to stderr"),
1289  errhint("Future log output will go to log destination \"%s\".",
1291 
1293 
1294  /*
1295  * Initialize stats collection subsystem (this does NOT start the
1296  * collector process!)
1297  */
1298  pgstat_init();
1299 
1300  /*
1301  * Initialize the autovacuum subsystem (again, no process start yet)
1302  */
1303  autovac_init();
1304 
1305  /*
1306  * Load configuration files for client authentication.
1307  */
1308  if (!load_hba())
1309  {
1310  /*
1311  * It makes no sense to continue if we fail to load the HBA file,
1312  * since there is no way to connect to the database in this case.
1313  */
1314  ereport(FATAL,
1315  (errmsg("could not load pg_hba.conf")));
1316  }
1317  if (!load_ident())
1318  {
1319  /*
1320  * We can start up without the IDENT file, although it means that you
1321  * cannot log in using any of the authentication methods that need a
1322  * user name mapping. load_ident() already logged the details of error
1323  * to the log.
1324  */
1325  }
1326 
1327 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1328 
1329  /*
1330  * On macOS, libintl replaces setlocale() with a version that calls
1331  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1332  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1333  * the process multithreaded. The postmaster calls sigprocmask() and
1334  * calls fork() without an immediate exec(), both of which have undefined
1335  * behavior in a multithreaded program. A multithreaded postmaster is the
1336  * normal case on Windows, which offers neither fork() nor sigprocmask().
1337  */
1338  if (pthread_is_threaded_np() != 0)
1339  ereport(FATAL,
1340  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1341  errmsg("postmaster became multithreaded during startup"),
1342  errhint("Set the LC_ALL environment variable to a valid locale.")));
1343 #endif
1344 
1345  /*
1346  * Remember postmaster startup time
1347  */
1349 #ifndef HAVE_STRONG_RANDOM
1350  /* RandomCancelKey wants its own copy */
1352 #endif
1353 
1354  /*
1355  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1356  * see what's happening.
1357  */
1359 
1360  /*
1361  * We're ready to rock and roll...
1362  */
1364  Assert(StartupPID != 0);
1366  pmState = PM_STARTUP;
1367 
1368  /* Some workers may be scheduled to start now */
1370 
1371  status = ServerLoop();
1372 
1373  /*
1374  * ServerLoop probably shouldn't ever return, but if it does, close down.
1375  */
1376  ExitPostmaster(status != STATUS_OK);
1377 
1378  abort(); /* not reached */
1379 }
1380 
1381 
1382 /*
1383  * on_proc_exit callback to close server's listen sockets
1384  */
1385 static void
1387 {
1388  int i;
1389 
1390  /*
1391  * First, explicitly close all the socket FDs. We used to just let this
1392  * happen implicitly at postmaster exit, but it's better to close them
1393  * before we remove the postmaster.pid lockfile; otherwise there's a race
1394  * condition if a new postmaster wants to re-use the TCP port number.
1395  */
1396  for (i = 0; i < MAXLISTEN; i++)
1397  {
1398  if (ListenSocket[i] != PGINVALID_SOCKET)
1399  {
1402  }
1403  }
1404 
1405  /*
1406  * Next, remove any filesystem entries for Unix sockets. To avoid race
1407  * conditions against incoming postmasters, this must happen after closing
1408  * the sockets and before removing lock files.
1409  */
1411 
1412  /*
1413  * We don't do anything about socket lock files here; those will be
1414  * removed in a later on_proc_exit callback.
1415  */
1416 }
1417 
1418 /*
1419  * on_proc_exit callback to delete external_pid_file
1420  */
1421 static void
1423 {
1424  if (external_pid_file)
1425  unlink(external_pid_file);
1426 }
1427 
1428 
1429 /*
1430  * Compute and check the directory paths to files that are part of the
1431  * installation (as deduced from the postgres executable's own location)
1432  */
1433 static void
1435 {
1436  DIR *pdir;
1437 
1438  /* Locate the postgres executable itself */
1439  if (find_my_exec(argv0, my_exec_path) < 0)
1440  elog(FATAL, "%s: could not locate my own executable path", argv0);
1441 
1442 #ifdef EXEC_BACKEND
1443  /* Locate executable backend before we change working directory */
1444  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1445  postgres_exec_path) < 0)
1446  ereport(FATAL,
1447  (errmsg("%s: could not locate matching postgres executable",
1448  argv0)));
1449 #endif
1450 
1451  /*
1452  * Locate the pkglib directory --- this has to be set early in case we try
1453  * to load any modules from it in response to postgresql.conf entries.
1454  */
1456 
1457  /*
1458  * Verify that there's a readable directory there; otherwise the Postgres
1459  * installation is incomplete or corrupt. (A typical cause of this
1460  * failure is that the postgres executable has been moved or hardlinked to
1461  * some directory that's not a sibling of the installation lib/
1462  * directory.)
1463  */
1464  pdir = AllocateDir(pkglib_path);
1465  if (pdir == NULL)
1466  ereport(ERROR,
1468  errmsg("could not open directory \"%s\": %m",
1469  pkglib_path),
1470  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1471  my_exec_path)));
1472  FreeDir(pdir);
1473 
1474  /*
1475  * XXX is it worth similarly checking the share/ directory? If the lib/
1476  * directory is there, then share/ probably is too.
1477  */
1478 }
1479 
1480 /*
1481  * Check that pg_control exists in the correct location in the data directory.
1482  *
1483  * No attempt is made to validate the contents of pg_control here. This is
1484  * just a sanity check to see if we are looking at a real data directory.
1485  */
1486 static void
1488 {
1489  char path[MAXPGPATH];
1490  FILE *fp;
1491 
1492  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1493 
1494  fp = AllocateFile(path, PG_BINARY_R);
1495  if (fp == NULL)
1496  {
1497  write_stderr("%s: could not find the database system\n"
1498  "Expected to find it in the directory \"%s\",\n"
1499  "but could not open file \"%s\": %s\n",
1500  progname, DataDir, path, strerror(errno));
1501  ExitPostmaster(2);
1502  }
1503  FreeFile(fp);
1504 }
1505 
1506 /*
1507  * Determine how long should we let ServerLoop sleep.
1508  *
1509  * In normal conditions we wait at most one minute, to ensure that the other
1510  * background tasks handled by ServerLoop get done even when no requests are
1511  * arriving. However, if there are background workers waiting to be started,
1512  * we don't actually sleep so that they are quickly serviced. Other exception
1513  * cases are as shown in the code.
1514  */
1515 static void
1516 DetermineSleepTime(struct timeval *timeout)
1517 {
1518  TimestampTz next_wakeup = 0;
1519 
1520  /*
1521  * Normal case: either there are no background workers at all, or we're in
1522  * a shutdown sequence (during which we ignore bgworkers altogether).
1523  */
1524  if (Shutdown > NoShutdown ||
1526  {
1527  if (AbortStartTime != 0)
1528  {
1529  /* time left to abort; clamp to 0 in case it already expired */
1530  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1531  (time(NULL) - AbortStartTime);
1532  timeout->tv_sec = Max(timeout->tv_sec, 0);
1533  timeout->tv_usec = 0;
1534  }
1535  else
1536  {
1537  timeout->tv_sec = 60;
1538  timeout->tv_usec = 0;
1539  }
1540  return;
1541  }
1542 
1543  if (StartWorkerNeeded)
1544  {
1545  timeout->tv_sec = 0;
1546  timeout->tv_usec = 0;
1547  return;
1548  }
1549 
1550  if (HaveCrashedWorker)
1551  {
1552  slist_mutable_iter siter;
1553 
1554  /*
1555  * When there are crashed bgworkers, we sleep just long enough that
1556  * they are restarted when they request to be. Scan the list to
1557  * determine the minimum of all wakeup times according to most recent
1558  * crash time and requested restart interval.
1559  */
1561  {
1562  RegisteredBgWorker *rw;
1563  TimestampTz this_wakeup;
1564 
1565  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1566 
1567  if (rw->rw_crashed_at == 0)
1568  continue;
1569 
1571  || rw->rw_terminate)
1572  {
1573  ForgetBackgroundWorker(&siter);
1574  continue;
1575  }
1576 
1577  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1578  1000L * rw->rw_worker.bgw_restart_time);
1579  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1580  next_wakeup = this_wakeup;
1581  }
1582  }
1583 
1584  if (next_wakeup != 0)
1585  {
1586  long secs;
1587  int microsecs;
1588 
1590  &secs, &microsecs);
1591  timeout->tv_sec = secs;
1592  timeout->tv_usec = microsecs;
1593 
1594  /* Ensure we don't exceed one minute */
1595  if (timeout->tv_sec > 60)
1596  {
1597  timeout->tv_sec = 60;
1598  timeout->tv_usec = 0;
1599  }
1600  }
1601  else
1602  {
1603  timeout->tv_sec = 60;
1604  timeout->tv_usec = 0;
1605  }
1606 }
1607 
1608 /*
1609  * Main idle loop of postmaster
1610  *
1611  * NB: Needs to be called with signals blocked
1612  */
1613 static int
1615 {
1616  fd_set readmask;
1617  int nSockets;
1618  time_t last_lockfile_recheck_time,
1619  last_touch_time;
1620 
1621  last_lockfile_recheck_time = last_touch_time = time(NULL);
1622 
1623  nSockets = initMasks(&readmask);
1624 
1625  for (;;)
1626  {
1627  fd_set rmask;
1628  int selres;
1629  time_t now;
1630 
1631  /*
1632  * Wait for a connection request to arrive.
1633  *
1634  * We block all signals except while sleeping. That makes it safe for
1635  * signal handlers, which again block all signals while executing, to
1636  * do nontrivial work.
1637  *
1638  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1639  * any new connections, so we don't call select(), and just sleep.
1640  */
1641  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1642 
1643  if (pmState == PM_WAIT_DEAD_END)
1644  {
1646 
1647  pg_usleep(100000L); /* 100 msec seems reasonable */
1648  selres = 0;
1649 
1650  PG_SETMASK(&BlockSig);
1651  }
1652  else
1653  {
1654  /* must set timeout each time; some OSes change it! */
1655  struct timeval timeout;
1656 
1657  /* Needs to run with blocked signals! */
1658  DetermineSleepTime(&timeout);
1659 
1661 
1662  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1663 
1664  PG_SETMASK(&BlockSig);
1665  }
1666 
1667  /* Now check the select() result */
1668  if (selres < 0)
1669  {
1670  if (errno != EINTR && errno != EWOULDBLOCK)
1671  {
1672  ereport(LOG,
1674  errmsg("select() failed in postmaster: %m")));
1675  return STATUS_ERROR;
1676  }
1677  }
1678 
1679  /*
1680  * New connection pending on any of our sockets? If so, fork a child
1681  * process to deal with it.
1682  */
1683  if (selres > 0)
1684  {
1685  int i;
1686 
1687  for (i = 0; i < MAXLISTEN; i++)
1688  {
1689  if (ListenSocket[i] == PGINVALID_SOCKET)
1690  break;
1691  if (FD_ISSET(ListenSocket[i], &rmask))
1692  {
1693  Port *port;
1694 
1695  port = ConnCreate(ListenSocket[i]);
1696  if (port)
1697  {
1698  BackendStartup(port);
1699 
1700  /*
1701  * We no longer need the open socket or port structure
1702  * in this process
1703  */
1704  StreamClose(port->sock);
1705  ConnFree(port);
1706  }
1707  }
1708  }
1709  }
1710 
1711  /* If we have lost the log collector, try to start a new one */
1712  if (SysLoggerPID == 0 && Logging_collector)
1714 
1715  /*
1716  * If no background writer process is running, and we are not in a
1717  * state that prevents it, start one. It doesn't matter if this
1718  * fails, we'll just try again later. Likewise for the checkpointer.
1719  */
1720  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1722  {
1723  if (CheckpointerPID == 0)
1725  if (BgWriterPID == 0)
1727  }
1728 
1729  /*
1730  * Likewise, if we have lost the walwriter process, try to start a new
1731  * one. But this is needed only in normal operation (else we cannot
1732  * be writing any new WAL).
1733  */
1734  if (WalWriterPID == 0 && pmState == PM_RUN)
1736 
1737  /*
1738  * If we have lost the autovacuum launcher, try to start a new one. We
1739  * don't want autovacuum to run in binary upgrade mode because
1740  * autovacuum might update relfrozenxid for empty tables before the
1741  * physical files are put in place.
1742  */
1743  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1745  pmState == PM_RUN)
1746  {
1748  if (AutoVacPID != 0)
1749  start_autovac_launcher = false; /* signal processed */
1750  }
1751 
1752  /* If we have lost the stats collector, try to start a new one */
1753  if (PgStatPID == 0 &&
1754  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1755  PgStatPID = pgstat_start();
1756 
1757  /* If we have lost the archiver, try to start a new one. */
1758  if (PgArchPID == 0 && PgArchStartupAllowed())
1759  PgArchPID = pgarch_start();
1760 
1761  /* If we need to signal the autovacuum launcher, do so now */
1763  {
1764  avlauncher_needs_signal = false;
1765  if (AutoVacPID != 0)
1767  }
1768 
1769  /* If we need to start a WAL receiver, try to do that now */
1772 
1773  /* Get other worker processes running, if needed */
1776 
1777 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1778 
1779  /*
1780  * With assertions enabled, check regularly for appearance of
1781  * additional threads. All builds check at start and exit.
1782  */
1783  Assert(pthread_is_threaded_np() == 0);
1784 #endif
1785 
1786  /*
1787  * Lastly, check to see if it's time to do some things that we don't
1788  * want to do every single time through the loop, because they're a
1789  * bit expensive. Note that there's up to a minute of slop in when
1790  * these tasks will be performed, since DetermineSleepTime() will let
1791  * us sleep at most that long; except for SIGKILL timeout which has
1792  * special-case logic there.
1793  */
1794  now = time(NULL);
1795 
1796  /*
1797  * If we already sent SIGQUIT to children and they are slow to shut
1798  * down, it's time to send them SIGKILL. This doesn't happen
1799  * normally, but under certain conditions backends can get stuck while
1800  * shutting down. This is a last measure to get them unwedged.
1801  *
1802  * Note we also do this during recovery from a process crash.
1803  */
1804  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1805  AbortStartTime != 0 &&
1807  {
1808  /* We were gentle with them before. Not anymore */
1810  /* reset flag so we don't SIGKILL again */
1811  AbortStartTime = 0;
1812  }
1813 
1814  /*
1815  * Once a minute, verify that postmaster.pid hasn't been removed or
1816  * overwritten. If it has, we force a shutdown. This avoids having
1817  * postmasters and child processes hanging around after their database
1818  * is gone, and maybe causing problems if a new database cluster is
1819  * created in the same place. It also provides some protection
1820  * against a DBA foolishly removing postmaster.pid and manually
1821  * starting a new postmaster. Data corruption is likely to ensue from
1822  * that anyway, but we can minimize the damage by aborting ASAP.
1823  */
1824  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1825  {
1826  if (!RecheckDataDirLockFile())
1827  {
1828  ereport(LOG,
1829  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1831  }
1832  last_lockfile_recheck_time = now;
1833  }
1834 
1835  /*
1836  * Touch Unix socket and lock files every 58 minutes, to ensure that
1837  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1838  * no one runs cleaners with cutoff times of less than an hour ...
1839  */
1840  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1841  {
1842  TouchSocketFiles();
1844  last_touch_time = now;
1845  }
1846  }
1847 }
1848 
1849 /*
1850  * Initialise the masks for select() for the ports we are listening on.
1851  * Return the number of sockets to listen on.
1852  */
1853 static int
1854 initMasks(fd_set *rmask)
1855 {
1856  int maxsock = -1;
1857  int i;
1858 
1859  FD_ZERO(rmask);
1860 
1861  for (i = 0; i < MAXLISTEN; i++)
1862  {
1863  int fd = ListenSocket[i];
1864 
1865  if (fd == PGINVALID_SOCKET)
1866  break;
1867  FD_SET(fd, rmask);
1868 
1869  if (fd > maxsock)
1870  maxsock = fd;
1871  }
1872 
1873  return maxsock + 1;
1874 }
1875 
1876 
1877 /*
1878  * Read a client's startup packet and do something according to it.
1879  *
1880  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1881  * not return at all.
1882  *
1883  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1884  * if that's what you want. Return STATUS_ERROR if you don't want to
1885  * send anything to the client, which would typically be appropriate
1886  * if we detect a communications failure.)
1887  */
1888 static int
1890 {
1891  int32 len;
1892  void *buf;
1893  ProtocolVersion proto;
1894  MemoryContext oldcontext;
1895 
1896  pq_startmsgread();
1897  if (pq_getbytes((char *) &len, 4) == EOF)
1898  {
1899  /*
1900  * EOF after SSLdone probably means the client didn't like our
1901  * response to NEGOTIATE_SSL_CODE. That's not an error condition, so
1902  * don't clutter the log with a complaint.
1903  */
1904  if (!SSLdone)
1906  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1907  errmsg("incomplete startup packet")));
1908  return STATUS_ERROR;
1909  }
1910 
1911  len = pg_ntoh32(len);
1912  len -= 4;
1913 
1914  if (len < (int32) sizeof(ProtocolVersion) ||
1916  {
1918  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1919  errmsg("invalid length of startup packet")));
1920  return STATUS_ERROR;
1921  }
1922 
1923  /*
1924  * Allocate at least the size of an old-style startup packet, plus one
1925  * extra byte, and make sure all are zeroes. This ensures we will have
1926  * null termination of all strings, in both fixed- and variable-length
1927  * packet layouts.
1928  */
1929  if (len <= (int32) sizeof(StartupPacket))
1930  buf = palloc0(sizeof(StartupPacket) + 1);
1931  else
1932  buf = palloc0(len + 1);
1933 
1934  if (pq_getbytes(buf, len) == EOF)
1935  {
1937  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1938  errmsg("incomplete startup packet")));
1939  return STATUS_ERROR;
1940  }
1941  pq_endmsgread();
1942 
1943  /*
1944  * The first field is either a protocol version number or a special
1945  * request code.
1946  */
1947  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
1948 
1949  if (proto == CANCEL_REQUEST_CODE)
1950  {
1951  processCancelRequest(port, buf);
1952  /* Not really an error, but we don't want to proceed further */
1953  return STATUS_ERROR;
1954  }
1955 
1956  if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1957  {
1958  char SSLok;
1959 
1960 #ifdef USE_SSL
1961  /* No SSL when disabled or on Unix sockets */
1962  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1963  SSLok = 'N';
1964  else
1965  SSLok = 'S'; /* Support for SSL */
1966 #else
1967  SSLok = 'N'; /* No support for SSL */
1968 #endif
1969 
1970 retry1:
1971  if (send(port->sock, &SSLok, 1, 0) != 1)
1972  {
1973  if (errno == EINTR)
1974  goto retry1; /* if interrupted, just retry */
1977  errmsg("failed to send SSL negotiation response: %m")));
1978  return STATUS_ERROR; /* close the connection */
1979  }
1980 
1981 #ifdef USE_SSL
1982  if (SSLok == 'S' && secure_open_server(port) == -1)
1983  return STATUS_ERROR;
1984 #endif
1985  /* regular startup packet, cancel, etc packet should follow... */
1986  /* but not another SSL negotiation request */
1987  return ProcessStartupPacket(port, true);
1988  }
1989 
1990  /* Could add additional special packet types here */
1991 
1992  /*
1993  * Set FrontendProtocol now so that ereport() knows what format to send if
1994  * we fail during startup.
1995  */
1996  FrontendProtocol = proto;
1997 
1998  /* Check that the major protocol version is in range. */
2001  ereport(FATAL,
2002  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2003  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2004  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2008 
2009  /*
2010  * Now fetch parameters out of startup packet and save them into the Port
2011  * structure. All data structures attached to the Port struct must be
2012  * allocated in TopMemoryContext so that they will remain available in a
2013  * running backend (even after PostmasterContext is destroyed). We need
2014  * not worry about leaking this storage on failure, since we aren't in the
2015  * postmaster process anymore.
2016  */
2018 
2019  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2020  {
2021  int32 offset = sizeof(ProtocolVersion);
2022  List *unrecognized_protocol_options = NIL;
2023 
2024  /*
2025  * Scan packet body for name/option pairs. We can assume any string
2026  * beginning within the packet body is null-terminated, thanks to
2027  * zeroing extra byte above.
2028  */
2029  port->guc_options = NIL;
2030 
2031  while (offset < len)
2032  {
2033  char *nameptr = ((char *) buf) + offset;
2034  int32 valoffset;
2035  char *valptr;
2036 
2037  if (*nameptr == '\0')
2038  break; /* found packet terminator */
2039  valoffset = offset + strlen(nameptr) + 1;
2040  if (valoffset >= len)
2041  break; /* missing value, will complain below */
2042  valptr = ((char *) buf) + valoffset;
2043 
2044  if (strcmp(nameptr, "database") == 0)
2045  port->database_name = pstrdup(valptr);
2046  else if (strcmp(nameptr, "user") == 0)
2047  port->user_name = pstrdup(valptr);
2048  else if (strcmp(nameptr, "options") == 0)
2049  port->cmdline_options = pstrdup(valptr);
2050  else if (strcmp(nameptr, "replication") == 0)
2051  {
2052  /*
2053  * Due to backward compatibility concerns the replication
2054  * parameter is a hybrid beast which allows the value to be
2055  * either boolean or the string 'database'. The latter
2056  * connects to a specific database which is e.g. required for
2057  * logical decoding while.
2058  */
2059  if (strcmp(valptr, "database") == 0)
2060  {
2061  am_walsender = true;
2062  am_db_walsender = true;
2063  }
2064  else if (!parse_bool(valptr, &am_walsender))
2065  ereport(FATAL,
2066  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2067  errmsg("invalid value for parameter \"%s\": \"%s\"",
2068  "replication",
2069  valptr),
2070  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2071  }
2072  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2073  {
2074  /*
2075  * Any option beginning with _pq_. is reserved for use as a
2076  * protocol-level option, but at present no such options are
2077  * defined.
2078  */
2079  unrecognized_protocol_options =
2080  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2081  }
2082  else
2083  {
2084  /* Assume it's a generic GUC option */
2085  port->guc_options = lappend(port->guc_options,
2086  pstrdup(nameptr));
2087  port->guc_options = lappend(port->guc_options,
2088  pstrdup(valptr));
2089  }
2090  offset = valoffset + strlen(valptr) + 1;
2091  }
2092 
2093  /*
2094  * If we didn't find a packet terminator exactly at the end of the
2095  * given packet length, complain.
2096  */
2097  if (offset != len - 1)
2098  ereport(FATAL,
2099  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2100  errmsg("invalid startup packet layout: expected terminator as last byte")));
2101 
2102  /*
2103  * If the client requested a newer protocol version or if the client
2104  * requested any protocol options we didn't recognize, let them know
2105  * the newest minor protocol version we do support and the names of
2106  * any unrecognized options.
2107  */
2109  unrecognized_protocol_options != NIL)
2110  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2111  }
2112  else
2113  {
2114  /*
2115  * Get the parameters from the old-style, fixed-width-fields startup
2116  * packet as C strings. The packet destination was cleared first so a
2117  * short packet has zeros silently added. We have to be prepared to
2118  * truncate the pstrdup result for oversize fields, though.
2119  */
2120  StartupPacket *packet = (StartupPacket *) buf;
2121 
2122  port->database_name = pstrdup(packet->database);
2123  if (strlen(port->database_name) > sizeof(packet->database))
2124  port->database_name[sizeof(packet->database)] = '\0';
2125  port->user_name = pstrdup(packet->user);
2126  if (strlen(port->user_name) > sizeof(packet->user))
2127  port->user_name[sizeof(packet->user)] = '\0';
2128  port->cmdline_options = pstrdup(packet->options);
2129  if (strlen(port->cmdline_options) > sizeof(packet->options))
2130  port->cmdline_options[sizeof(packet->options)] = '\0';
2131  port->guc_options = NIL;
2132  }
2133 
2134  /* Check a user name was given. */
2135  if (port->user_name == NULL || port->user_name[0] == '\0')
2136  ereport(FATAL,
2137  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2138  errmsg("no PostgreSQL user name specified in startup packet")));
2139 
2140  /* The database defaults to the user name. */
2141  if (port->database_name == NULL || port->database_name[0] == '\0')
2142  port->database_name = pstrdup(port->user_name);
2143 
2144  if (Db_user_namespace)
2145  {
2146  /*
2147  * If user@, it is a global user, remove '@'. We only want to do this
2148  * if there is an '@' at the end and no earlier in the user string or
2149  * they may fake as a local user of another database attaching to this
2150  * database.
2151  */
2152  if (strchr(port->user_name, '@') ==
2153  port->user_name + strlen(port->user_name) - 1)
2154  *strchr(port->user_name, '@') = '\0';
2155  else
2156  {
2157  /* Append '@' and dbname */
2158  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2159  }
2160  }
2161 
2162  /*
2163  * Truncate given database and user names to length of a Postgres name.
2164  * This avoids lookup failures when overlength names are given.
2165  */
2166  if (strlen(port->database_name) >= NAMEDATALEN)
2167  port->database_name[NAMEDATALEN - 1] = '\0';
2168  if (strlen(port->user_name) >= NAMEDATALEN)
2169  port->user_name[NAMEDATALEN - 1] = '\0';
2170 
2171  /*
2172  * Normal walsender backends, e.g. for streaming replication, are not
2173  * connected to a particular database. But walsenders used for logical
2174  * replication need to connect to a specific database. We allow streaming
2175  * replication commands to be issued even if connected to a database as it
2176  * can make sense to first make a basebackup and then stream changes
2177  * starting from that.
2178  */
2179  if (am_walsender && !am_db_walsender)
2180  port->database_name[0] = '\0';
2181 
2182  /*
2183  * Done putting stuff in TopMemoryContext.
2184  */
2185  MemoryContextSwitchTo(oldcontext);
2186 
2187  /*
2188  * If we're going to reject the connection due to database state, say so
2189  * now instead of wasting cycles on an authentication exchange. (This also
2190  * allows a pg_ping utility to be written.)
2191  */
2192  switch (port->canAcceptConnections)
2193  {
2194  case CAC_STARTUP:
2195  ereport(FATAL,
2197  errmsg("the database system is starting up")));
2198  break;
2199  case CAC_SHUTDOWN:
2200  ereport(FATAL,
2202  errmsg("the database system is shutting down")));
2203  break;
2204  case CAC_RECOVERY:
2205  ereport(FATAL,
2207  errmsg("the database system is in recovery mode")));
2208  break;
2209  case CAC_TOOMANY:
2210  ereport(FATAL,
2211  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2212  errmsg("sorry, too many clients already")));
2213  break;
2214  case CAC_WAITBACKUP:
2215  /* OK for now, will check in InitPostgres */
2216  break;
2217  case CAC_OK:
2218  break;
2219  }
2220 
2221  return STATUS_OK;
2222 }
2223 
2224 /*
2225  * Send a NegotiateProtocolVersion to the client. This lets the client know
2226  * that they have requested a newer minor protocol version than we are able
2227  * to speak. We'll speak the highest version we know about; the client can,
2228  * of course, abandon the connection if that's a problem.
2229  *
2230  * We also include in the response a list of protocol options we didn't
2231  * understand. This allows clients to include optional parameters that might
2232  * be present either in newer protocol versions or third-party protocol
2233  * extensions without fear of having to reconnect if those options are not
2234  * understood, while at the same time making certain that the client is aware
2235  * of which options were actually accepted.
2236  */
2237 static void
2238 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2239 {
2241  ListCell *lc;
2242 
2243  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2245  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2246  foreach(lc, unrecognized_protocol_options)
2247  pq_sendstring(&buf, lfirst(lc));
2248  pq_endmessage(&buf);
2249 
2250  /* no need to flush, some other message will follow */
2251 }
2252 
2253 /*
2254  * The client has sent a cancel request packet, not a normal
2255  * start-a-new-connection packet. Perform the necessary processing.
2256  * Nothing is sent back to the client.
2257  */
2258 static void
2260 {
2261  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2262  int backendPID;
2263  int32 cancelAuthCode;
2264  Backend *bp;
2265 
2266 #ifndef EXEC_BACKEND
2267  dlist_iter iter;
2268 #else
2269  int i;
2270 #endif
2271 
2272  backendPID = (int) pg_ntoh32(canc->backendPID);
2273  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2274 
2275  /*
2276  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2277  * longer access the postmaster's own backend list, and must rely on the
2278  * duplicate array in shared memory.
2279  */
2280 #ifndef EXEC_BACKEND
2281  dlist_foreach(iter, &BackendList)
2282  {
2283  bp = dlist_container(Backend, elem, iter.cur);
2284 #else
2285  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2286  {
2287  bp = (Backend *) &ShmemBackendArray[i];
2288 #endif
2289  if (bp->pid == backendPID)
2290  {
2291  if (bp->cancel_key == cancelAuthCode)
2292  {
2293  /* Found a match; signal that backend to cancel current op */
2294  ereport(DEBUG2,
2295  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2296  backendPID)));
2297  signal_child(bp->pid, SIGINT);
2298  }
2299  else
2300  /* Right PID, wrong key: no way, Jose */
2301  ereport(LOG,
2302  (errmsg("wrong key in cancel request for process %d",
2303  backendPID)));
2304  return;
2305  }
2306  }
2307 
2308  /* No matching backend */
2309  ereport(LOG,
2310  (errmsg("PID %d in cancel request did not match any process",
2311  backendPID)));
2312 }
2313 
2314 /*
2315  * canAcceptConnections --- check to see if database state allows connections.
2316  */
2317 static CAC_state
2319 {
2320  CAC_state result = CAC_OK;
2321 
2322  /*
2323  * Can't start backends when in startup/shutdown/inconsistent recovery
2324  * state.
2325  *
2326  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2327  * allowed so that a superuser can end online backup mode); we return
2328  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2329  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2330  * have checked for too many children.
2331  */
2332  if (pmState != PM_RUN)
2333  {
2334  if (pmState == PM_WAIT_BACKUP)
2335  result = CAC_WAITBACKUP; /* allow superusers only */
2336  else if (Shutdown > NoShutdown)
2337  return CAC_SHUTDOWN; /* shutdown is pending */
2338  else if (!FatalError &&
2339  (pmState == PM_STARTUP ||
2340  pmState == PM_RECOVERY))
2341  return CAC_STARTUP; /* normal startup */
2342  else if (!FatalError &&
2344  result = CAC_OK; /* connection OK during hot standby */
2345  else
2346  return CAC_RECOVERY; /* else must be crash recovery */
2347  }
2348 
2349  /*
2350  * Don't start too many children.
2351  *
2352  * We allow more connections than we can have backends here because some
2353  * might still be authenticating; they might fail auth, or some existing
2354  * backend might exit before the auth cycle is completed. The exact
2355  * MaxBackends limit is enforced when a new backend tries to join the
2356  * shared-inval backend array.
2357  *
2358  * The limit here must match the sizes of the per-child-process arrays;
2359  * see comments for MaxLivePostmasterChildren().
2360  */
2362  result = CAC_TOOMANY;
2363 
2364  return result;
2365 }
2366 
2367 
2368 /*
2369  * ConnCreate -- create a local connection data structure
2370  *
2371  * Returns NULL on failure, other than out-of-memory which is fatal.
2372  */
2373 static Port *
2374 ConnCreate(int serverFd)
2375 {
2376  Port *port;
2377 
2378  if (!(port = (Port *) calloc(1, sizeof(Port))))
2379  {
2380  ereport(LOG,
2381  (errcode(ERRCODE_OUT_OF_MEMORY),
2382  errmsg("out of memory")));
2383  ExitPostmaster(1);
2384  }
2385 
2386  if (StreamConnection(serverFd, port) != STATUS_OK)
2387  {
2388  if (port->sock != PGINVALID_SOCKET)
2389  StreamClose(port->sock);
2390  ConnFree(port);
2391  return NULL;
2392  }
2393 
2394  /*
2395  * Allocate GSSAPI specific state struct
2396  */
2397 #ifndef EXEC_BACKEND
2398 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2399  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2400  if (!port->gss)
2401  {
2402  ereport(LOG,
2403  (errcode(ERRCODE_OUT_OF_MEMORY),
2404  errmsg("out of memory")));
2405  ExitPostmaster(1);
2406  }
2407 #endif
2408 #endif
2409 
2410  return port;
2411 }
2412 
2413 
2414 /*
2415  * ConnFree -- free a local connection data structure
2416  */
2417 static void
2419 {
2420 #ifdef USE_SSL
2421  secure_close(conn);
2422 #endif
2423  if (conn->gss)
2424  free(conn->gss);
2425  free(conn);
2426 }
2427 
2428 
2429 /*
2430  * ClosePostmasterPorts -- close all the postmaster's open sockets
2431  *
2432  * This is called during child process startup to release file descriptors
2433  * that are not needed by that child process. The postmaster still has
2434  * them open, of course.
2435  *
2436  * Note: we pass am_syslogger as a boolean because we don't want to set
2437  * the global variable yet when this is called.
2438  */
2439 void
2441 {
2442  int i;
2443 
2444 #ifndef WIN32
2445 
2446  /*
2447  * Close the write end of postmaster death watch pipe. It's important to
2448  * do this as early as possible, so that if postmaster dies, others won't
2449  * think that it's still running because we're holding the pipe open.
2450  */
2452  ereport(FATAL,
2454  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2456 #endif
2457 
2458  /* Close the listen sockets */
2459  for (i = 0; i < MAXLISTEN; i++)
2460  {
2461  if (ListenSocket[i] != PGINVALID_SOCKET)
2462  {
2465  }
2466  }
2467 
2468  /* If using syslogger, close the read side of the pipe */
2469  if (!am_syslogger)
2470  {
2471 #ifndef WIN32
2472  if (syslogPipe[0] >= 0)
2473  close(syslogPipe[0]);
2474  syslogPipe[0] = -1;
2475 #else
2476  if (syslogPipe[0])
2477  CloseHandle(syslogPipe[0]);
2478  syslogPipe[0] = 0;
2479 #endif
2480  }
2481 
2482 #ifdef USE_BONJOUR
2483  /* If using Bonjour, close the connection to the mDNS daemon */
2484  if (bonjour_sdref)
2485  close(DNSServiceRefSockFD(bonjour_sdref));
2486 #endif
2487 }
2488 
2489 
2490 /*
2491  * reset_shared -- reset shared memory and semaphores
2492  */
2493 static void
2494 reset_shared(int port)
2495 {
2496  /*
2497  * Create or re-create shared memory and semaphores.
2498  *
2499  * Note: in each "cycle of life" we will normally assign the same IPC keys
2500  * (if using SysV shmem and/or semas), since the port number is used to
2501  * determine IPC keys. This helps ensure that we will clean up dead IPC
2502  * objects if the postmaster crashes and is restarted.
2503  */
2504  CreateSharedMemoryAndSemaphores(false, port);
2505 }
2506 
2507 
2508 /*
2509  * SIGHUP -- reread config files, and tell children to do same
2510  */
2511 static void
2513 {
2514  int save_errno = errno;
2515 
2516  PG_SETMASK(&BlockSig);
2517 
2518  if (Shutdown <= SmartShutdown)
2519  {
2520  ereport(LOG,
2521  (errmsg("received SIGHUP, reloading configuration files")));
2524  if (StartupPID != 0)
2526  if (BgWriterPID != 0)
2528  if (CheckpointerPID != 0)
2530  if (WalWriterPID != 0)
2532  if (WalReceiverPID != 0)
2534  if (AutoVacPID != 0)
2536  if (PgArchPID != 0)
2538  if (SysLoggerPID != 0)
2540  if (PgStatPID != 0)
2542 
2543  /* Reload authentication config files too */
2544  if (!load_hba())
2545  ereport(LOG,
2546  (errmsg("pg_hba.conf was not reloaded")));
2547 
2548  if (!load_ident())
2549  ereport(LOG,
2550  (errmsg("pg_ident.conf was not reloaded")));
2551 
2552 #ifdef USE_SSL
2553  /* Reload SSL configuration as well */
2554  if (EnableSSL)
2555  {
2556  if (secure_initialize(false) == 0)
2557  LoadedSSL = true;
2558  else
2559  ereport(LOG,
2560  (errmsg("SSL configuration was not reloaded")));
2561  }
2562  else
2563  {
2564  secure_destroy();
2565  LoadedSSL = false;
2566  }
2567 #endif
2568 
2569 #ifdef EXEC_BACKEND
2570  /* Update the starting-point file for future children */
2571  write_nondefault_variables(PGC_SIGHUP);
2572 #endif
2573  }
2574 
2576 
2577  errno = save_errno;
2578 }
2579 
2580 
2581 /*
2582  * pmdie -- signal handler for processing various postmaster signals.
2583  */
2584 static void
2586 {
2587  int save_errno = errno;
2588 
2589  PG_SETMASK(&BlockSig);
2590 
2591  ereport(DEBUG2,
2592  (errmsg_internal("postmaster received signal %d",
2593  postgres_signal_arg)));
2594 
2595  switch (postgres_signal_arg)
2596  {
2597  case SIGTERM:
2598 
2599  /*
2600  * Smart Shutdown:
2601  *
2602  * Wait for children to end their work, then shut down.
2603  */
2604  if (Shutdown >= SmartShutdown)
2605  break;
2607  ereport(LOG,
2608  (errmsg("received smart shutdown request")));
2609 
2610  /* Report status */
2612 #ifdef USE_SYSTEMD
2613  sd_notify(0, "STOPPING=1");
2614 #endif
2615 
2616  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2618  {
2619  /* autovac workers are told to shut down immediately */
2620  /* and bgworkers too; does this need tweaking? */
2621  SignalSomeChildren(SIGTERM,
2623  /* and the autovac launcher too */
2624  if (AutoVacPID != 0)
2625  signal_child(AutoVacPID, SIGTERM);
2626  /* and the bgwriter too */
2627  if (BgWriterPID != 0)
2628  signal_child(BgWriterPID, SIGTERM);
2629  /* and the walwriter too */
2630  if (WalWriterPID != 0)
2631  signal_child(WalWriterPID, SIGTERM);
2632 
2633  /*
2634  * If we're in recovery, we can't kill the startup process
2635  * right away, because at present doing so does not release
2636  * its locks. We might want to change this in a future
2637  * release. For the time being, the PM_WAIT_READONLY state
2638  * indicates that we're waiting for the regular (read only)
2639  * backends to die off; once they do, we'll kill the startup
2640  * and walreceiver processes.
2641  */
2642  pmState = (pmState == PM_RUN) ?
2644  }
2645 
2646  /*
2647  * Now wait for online backup mode to end and backends to exit. If
2648  * that is already the case, PostmasterStateMachine will take the
2649  * next step.
2650  */
2652  break;
2653 
2654  case SIGINT:
2655 
2656  /*
2657  * Fast Shutdown:
2658  *
2659  * Abort all children with SIGTERM (rollback active transactions
2660  * and exit) and shut down when they are gone.
2661  */
2662  if (Shutdown >= FastShutdown)
2663  break;
2665  ereport(LOG,
2666  (errmsg("received fast shutdown request")));
2667 
2668  /* Report status */
2670 #ifdef USE_SYSTEMD
2671  sd_notify(0, "STOPPING=1");
2672 #endif
2673 
2674  if (StartupPID != 0)
2675  signal_child(StartupPID, SIGTERM);
2676  if (BgWriterPID != 0)
2677  signal_child(BgWriterPID, SIGTERM);
2678  if (WalReceiverPID != 0)
2679  signal_child(WalReceiverPID, SIGTERM);
2680  if (pmState == PM_RECOVERY)
2681  {
2683 
2684  /*
2685  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2686  * and/or checkpointer should be active in this state; we just
2687  * signaled the first four, and we don't want to kill
2688  * checkpointer yet.
2689  */
2691  }
2692  else if (pmState == PM_RUN ||
2693  pmState == PM_WAIT_BACKUP ||
2697  {
2698  ereport(LOG,
2699  (errmsg("aborting any active transactions")));
2700  /* shut down all backends and workers */
2701  SignalSomeChildren(SIGTERM,
2704  /* and the autovac launcher too */
2705  if (AutoVacPID != 0)
2706  signal_child(AutoVacPID, SIGTERM);
2707  /* and the walwriter too */
2708  if (WalWriterPID != 0)
2709  signal_child(WalWriterPID, SIGTERM);
2711  }
2712 
2713  /*
2714  * Now wait for backends to exit. If there are none,
2715  * PostmasterStateMachine will take the next step.
2716  */
2718  break;
2719 
2720  case SIGQUIT:
2721 
2722  /*
2723  * Immediate Shutdown:
2724  *
2725  * abort all children with SIGQUIT, wait for them to exit,
2726  * terminate remaining ones with SIGKILL, then exit without
2727  * attempt to properly shut down the data base system.
2728  */
2729  if (Shutdown >= ImmediateShutdown)
2730  break;
2732  ereport(LOG,
2733  (errmsg("received immediate shutdown request")));
2734 
2735  /* Report status */
2737 #ifdef USE_SYSTEMD
2738  sd_notify(0, "STOPPING=1");
2739 #endif
2740 
2743 
2744  /* set stopwatch for them to die */
2745  AbortStartTime = time(NULL);
2746 
2747  /*
2748  * Now wait for backends to exit. If there are none,
2749  * PostmasterStateMachine will take the next step.
2750  */
2752  break;
2753  }
2754 
2756 
2757  errno = save_errno;
2758 }
2759 
2760 /*
2761  * Reaper -- signal handler to cleanup after a child process dies.
2762  */
2763 static void
2765 {
2766  int save_errno = errno;
2767  int pid; /* process id of dead child process */
2768  int exitstatus; /* its exit status */
2769 
2770  PG_SETMASK(&BlockSig);
2771 
2772  ereport(DEBUG4,
2773  (errmsg_internal("reaping dead processes")));
2774 
2775  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2776  {
2777  /*
2778  * Check if this child was a startup process.
2779  */
2780  if (pid == StartupPID)
2781  {
2782  StartupPID = 0;
2783 
2784  /*
2785  * Startup process exited in response to a shutdown request (or it
2786  * completed normally regardless of the shutdown request).
2787  */
2788  if (Shutdown > NoShutdown &&
2789  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2790  {
2793  /* PostmasterStateMachine logic does the rest */
2794  continue;
2795  }
2796 
2797  if (EXIT_STATUS_3(exitstatus))
2798  {
2799  ereport(LOG,
2800  (errmsg("shutdown at recovery target")));
2803  TerminateChildren(SIGTERM);
2805  /* PostmasterStateMachine logic does the rest */
2806  continue;
2807  }
2808 
2809  /*
2810  * Unexpected exit of startup process (including FATAL exit)
2811  * during PM_STARTUP is treated as catastrophic. There are no
2812  * other processes running yet, so we can just exit.
2813  */
2814  if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2815  {
2816  LogChildExit(LOG, _("startup process"),
2817  pid, exitstatus);
2818  ereport(LOG,
2819  (errmsg("aborting startup due to startup process failure")));
2820  ExitPostmaster(1);
2821  }
2822 
2823  /*
2824  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2825  * the startup process is catastrophic, so kill other children,
2826  * and set StartupStatus so we don't try to reinitialize after
2827  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2828  * then we previously sent the startup process a SIGQUIT; so
2829  * that's probably the reason it died, and we do want to try to
2830  * restart in that case.
2831  */
2832  if (!EXIT_STATUS_0(exitstatus))
2833  {
2836  else
2838  HandleChildCrash(pid, exitstatus,
2839  _("startup process"));
2840  continue;
2841  }
2842 
2843  /*
2844  * Startup succeeded, commence normal operations
2845  */
2847  FatalError = false;
2848  Assert(AbortStartTime == 0);
2849  ReachedNormalRunning = true;
2850  pmState = PM_RUN;
2851 
2852  /*
2853  * Crank up the background tasks, if we didn't do that already
2854  * when we entered consistent recovery state. It doesn't matter
2855  * if this fails, we'll just try again later.
2856  */
2857  if (CheckpointerPID == 0)
2859  if (BgWriterPID == 0)
2861  if (WalWriterPID == 0)
2863 
2864  /*
2865  * Likewise, start other special children as needed. In a restart
2866  * situation, some of them may be alive already.
2867  */
2870  if (PgArchStartupAllowed() && PgArchPID == 0)
2871  PgArchPID = pgarch_start();
2872  if (PgStatPID == 0)
2873  PgStatPID = pgstat_start();
2874 
2875  /* workers may be scheduled to start now */
2877 
2878  /* at this point we are really open for business */
2879  ereport(LOG,
2880  (errmsg("database system is ready to accept connections")));
2881 
2882  /* Report status */
2884 #ifdef USE_SYSTEMD
2885  sd_notify(0, "READY=1");
2886 #endif
2887 
2888  continue;
2889  }
2890 
2891  /*
2892  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2893  * one at the next iteration of the postmaster's main loop, if
2894  * necessary. Any other exit condition is treated as a crash.
2895  */
2896  if (pid == BgWriterPID)
2897  {
2898  BgWriterPID = 0;
2899  if (!EXIT_STATUS_0(exitstatus))
2900  HandleChildCrash(pid, exitstatus,
2901  _("background writer process"));
2902  continue;
2903  }
2904 
2905  /*
2906  * Was it the checkpointer?
2907  */
2908  if (pid == CheckpointerPID)
2909  {
2910  CheckpointerPID = 0;
2911  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2912  {
2913  /*
2914  * OK, we saw normal exit of the checkpointer after it's been
2915  * told to shut down. We expect that it wrote a shutdown
2916  * checkpoint. (If for some reason it didn't, recovery will
2917  * occur on next postmaster start.)
2918  *
2919  * At this point we should have no normal backend children
2920  * left (else we'd not be in PM_SHUTDOWN state) but we might
2921  * have dead_end children to wait for.
2922  *
2923  * If we have an archiver subprocess, tell it to do a last
2924  * archive cycle and quit. Likewise, if we have walsender
2925  * processes, tell them to send any remaining WAL and quit.
2926  */
2928 
2929  /* Waken archiver for the last time */
2930  if (PgArchPID != 0)
2932 
2933  /*
2934  * Waken walsenders for the last time. No regular backends
2935  * should be around anymore.
2936  */
2938 
2940 
2941  /*
2942  * We can also shut down the stats collector now; there's
2943  * nothing left for it to do.
2944  */
2945  if (PgStatPID != 0)
2947  }
2948  else
2949  {
2950  /*
2951  * Any unexpected exit of the checkpointer (including FATAL
2952  * exit) is treated as a crash.
2953  */
2954  HandleChildCrash(pid, exitstatus,
2955  _("checkpointer process"));
2956  }
2957 
2958  continue;
2959  }
2960 
2961  /*
2962  * Was it the wal writer? Normal exit can be ignored; we'll start a
2963  * new one at the next iteration of the postmaster's main loop, if
2964  * necessary. Any other exit condition is treated as a crash.
2965  */
2966  if (pid == WalWriterPID)
2967  {
2968  WalWriterPID = 0;
2969  if (!EXIT_STATUS_0(exitstatus))
2970  HandleChildCrash(pid, exitstatus,
2971  _("WAL writer process"));
2972  continue;
2973  }
2974 
2975  /*
2976  * Was it the wal receiver? If exit status is zero (normal) or one
2977  * (FATAL exit), we assume everything is all right just like normal
2978  * backends. (If we need a new wal receiver, we'll start one at the
2979  * next iteration of the postmaster's main loop.)
2980  */
2981  if (pid == WalReceiverPID)
2982  {
2983  WalReceiverPID = 0;
2984  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2985  HandleChildCrash(pid, exitstatus,
2986  _("WAL receiver process"));
2987  continue;
2988  }
2989 
2990  /*
2991  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
2992  * start a new one at the next iteration of the postmaster's main
2993  * loop, if necessary. Any other exit condition is treated as a
2994  * crash.
2995  */
2996  if (pid == AutoVacPID)
2997  {
2998  AutoVacPID = 0;
2999  if (!EXIT_STATUS_0(exitstatus))
3000  HandleChildCrash(pid, exitstatus,
3001  _("autovacuum launcher process"));
3002  continue;
3003  }
3004 
3005  /*
3006  * Was it the archiver? If so, just try to start a new one; no need
3007  * to force reset of the rest of the system. (If fail, we'll try
3008  * again in future cycles of the main loop.). Unless we were waiting
3009  * for it to shut down; don't restart it in that case, and
3010  * PostmasterStateMachine() will advance to the next shutdown step.
3011  */
3012  if (pid == PgArchPID)
3013  {
3014  PgArchPID = 0;
3015  if (!EXIT_STATUS_0(exitstatus))
3016  LogChildExit(LOG, _("archiver process"),
3017  pid, exitstatus);
3018  if (PgArchStartupAllowed())
3019  PgArchPID = pgarch_start();
3020  continue;
3021  }
3022 
3023  /*
3024  * Was it the statistics collector? If so, just try to start a new
3025  * one; no need to force reset of the rest of the system. (If fail,
3026  * we'll try again in future cycles of the main loop.)
3027  */
3028  if (pid == PgStatPID)
3029  {
3030  PgStatPID = 0;
3031  if (!EXIT_STATUS_0(exitstatus))
3032  LogChildExit(LOG, _("statistics collector process"),
3033  pid, exitstatus);
3034  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3035  PgStatPID = pgstat_start();
3036  continue;
3037  }
3038 
3039  /* Was it the system logger? If so, try to start a new one */
3040  if (pid == SysLoggerPID)
3041  {
3042  SysLoggerPID = 0;
3043  /* for safety's sake, launch new logger *first* */
3045  if (!EXIT_STATUS_0(exitstatus))
3046  LogChildExit(LOG, _("system logger process"),
3047  pid, exitstatus);
3048  continue;
3049  }
3050 
3051  /* Was it one of our background workers? */
3052  if (CleanupBackgroundWorker(pid, exitstatus))
3053  {
3054  /* have it be restarted */
3055  HaveCrashedWorker = true;
3056  continue;
3057  }
3058 
3059  /*
3060  * Else do standard backend child cleanup.
3061  */
3062  CleanupBackend(pid, exitstatus);
3063  } /* loop over pending child-death reports */
3064 
3065  /*
3066  * After cleaning out the SIGCHLD queue, see if we have any state changes
3067  * or actions to make.
3068  */
3070 
3071  /* Done with signal handler */
3073 
3074  errno = save_errno;
3075 }
3076 
3077 /*
3078  * Scan the bgworkers list and see if the given PID (which has just stopped
3079  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3080  * bgworker, return false.
3081  *
3082  * This is heavily based on CleanupBackend. One important difference is that
3083  * we don't know yet that the dying process is a bgworker, so we must be silent
3084  * until we're sure it is.
3085  */
3086 static bool
3088  int exitstatus) /* child's exit status */
3089 {
3090  char namebuf[MAXPGPATH];
3091  slist_mutable_iter iter;
3092 
3094  {
3095  RegisteredBgWorker *rw;
3096 
3097  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3098 
3099  if (rw->rw_pid != pid)
3100  continue;
3101 
3102 #ifdef WIN32
3103  /* see CleanupBackend */
3104  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3105  exitstatus = 0;
3106 #endif
3107 
3108  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3109  rw->rw_worker.bgw_type);
3110 
3111 
3112  if (!EXIT_STATUS_0(exitstatus))
3113  {
3114  /* Record timestamp, so we know when to restart the worker. */
3116  }
3117  else
3118  {
3119  /* Zero exit status means terminate */
3120  rw->rw_crashed_at = 0;
3121  rw->rw_terminate = true;
3122  }
3123 
3124  /*
3125  * Additionally, for shared-memory-connected workers, just like a
3126  * backend, any exit status other than 0 or 1 is considered a crash
3127  * and causes a system-wide restart.
3128  */
3129  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3130  {
3131  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3132  {
3133  HandleChildCrash(pid, exitstatus, namebuf);
3134  return true;
3135  }
3136  }
3137 
3138  /*
3139  * We must release the postmaster child slot whether this worker is
3140  * connected to shared memory or not, but we only treat it as a crash
3141  * if it is in fact connected.
3142  */
3145  {
3146  HandleChildCrash(pid, exitstatus, namebuf);
3147  return true;
3148  }
3149 
3150  /* Get it out of the BackendList and clear out remaining data */
3151  dlist_delete(&rw->rw_backend->elem);
3152 #ifdef EXEC_BACKEND
3153  ShmemBackendArrayRemove(rw->rw_backend);
3154 #endif
3155 
3156  /*
3157  * It's possible that this background worker started some OTHER
3158  * background worker and asked to be notified when that worker started
3159  * or stopped. If so, cancel any notifications destined for the
3160  * now-dead backend.
3161  */
3162  if (rw->rw_backend->bgworker_notify)
3164  free(rw->rw_backend);
3165  rw->rw_backend = NULL;
3166  rw->rw_pid = 0;
3167  rw->rw_child_slot = 0;
3168  ReportBackgroundWorkerExit(&iter); /* report child death */
3169 
3170  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3171  namebuf, pid, exitstatus);
3172 
3173  return true;
3174  }
3175 
3176  return false;
3177 }
3178 
3179 /*
3180  * CleanupBackend -- cleanup after terminated backend.
3181  *
3182  * Remove all local state associated with backend.
3183  *
3184  * If you change this, see also CleanupBackgroundWorker.
3185  */
3186 static void
3188  int exitstatus) /* child's exit status. */
3189 {
3190  dlist_mutable_iter iter;
3191 
3192  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3193 
3194  /*
3195  * If a backend dies in an ugly way then we must signal all other backends
3196  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3197  * assume everything is all right and proceed to remove the backend from
3198  * the active backend list.
3199  */
3200 
3201 #ifdef WIN32
3202 
3203  /*
3204  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3205  * since that sometimes happens under load when the process fails to start
3206  * properly (long before it starts using shared memory). Microsoft reports
3207  * it is related to mutex failure:
3208  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3209  */
3210  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3211  {
3212  LogChildExit(LOG, _("server process"), pid, exitstatus);
3213  exitstatus = 0;
3214  }
3215 #endif
3216 
3217  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3218  {
3219  HandleChildCrash(pid, exitstatus, _("server process"));
3220  return;
3221  }
3222 
3223  dlist_foreach_modify(iter, &BackendList)
3224  {
3225  Backend *bp = dlist_container(Backend, elem, iter.cur);
3226 
3227  if (bp->pid == pid)
3228  {
3229  if (!bp->dead_end)
3230  {
3232  {
3233  /*
3234  * Uh-oh, the child failed to clean itself up. Treat as a
3235  * crash after all.
3236  */
3237  HandleChildCrash(pid, exitstatus, _("server process"));
3238  return;
3239  }
3240 #ifdef EXEC_BACKEND
3241  ShmemBackendArrayRemove(bp);
3242 #endif
3243  }
3244  if (bp->bgworker_notify)
3245  {
3246  /*
3247  * This backend may have been slated to receive SIGUSR1 when
3248  * some background worker started or stopped. Cancel those
3249  * notifications, as we don't want to signal PIDs that are not
3250  * PostgreSQL backends. This gets skipped in the (probably
3251  * very common) case where the backend has never requested any
3252  * such notifications.
3253  */
3255  }
3256  dlist_delete(iter.cur);
3257  free(bp);
3258  break;
3259  }
3260  }
3261 }
3262 
3263 /*
3264  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3265  * walwriter, autovacuum, or background worker.
3266  *
3267  * The objectives here are to clean up our local state about the child
3268  * process, and to signal all other remaining children to quickdie.
3269  */
3270 static void
3271 HandleChildCrash(int pid, int exitstatus, const char *procname)
3272 {
3273  dlist_mutable_iter iter;
3274  slist_iter siter;
3275  Backend *bp;
3276  bool take_action;
3277 
3278  /*
3279  * We only log messages and send signals if this is the first process
3280  * crash and we're not doing an immediate shutdown; otherwise, we're only
3281  * here to update postmaster's idea of live processes. If we have already
3282  * signalled children, nonzero exit status is to be expected, so don't
3283  * clutter log.
3284  */
3285  take_action = !FatalError && Shutdown != ImmediateShutdown;
3286 
3287  if (take_action)
3288  {
3289  LogChildExit(LOG, procname, pid, exitstatus);
3290  ereport(LOG,
3291  (errmsg("terminating any other active server processes")));
3292  }
3293 
3294  /* Process background workers. */
3296  {
3297  RegisteredBgWorker *rw;
3298 
3299  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3300  if (rw->rw_pid == 0)
3301  continue; /* not running */
3302  if (rw->rw_pid == pid)
3303  {
3304  /*
3305  * Found entry for freshly-dead worker, so remove it.
3306  */
3308  dlist_delete(&rw->rw_backend->elem);
3309 #ifdef EXEC_BACKEND
3310  ShmemBackendArrayRemove(rw->rw_backend);
3311 #endif
3312  free(rw->rw_backend);
3313  rw->rw_backend = NULL;
3314  rw->rw_pid = 0;
3315  rw->rw_child_slot = 0;
3316  /* don't reset crashed_at */
3317  /* don't report child stop, either */
3318  /* Keep looping so we can signal remaining workers */
3319  }
3320  else
3321  {
3322  /*
3323  * This worker is still alive. Unless we did so already, tell it
3324  * to commit hara-kiri.
3325  *
3326  * SIGQUIT is the special signal that says exit without proc_exit
3327  * and let the user know what's going on. But if SendStop is set
3328  * (-s on command line), then we send SIGSTOP instead, so that we
3329  * can get core dumps from all backends by hand.
3330  */
3331  if (take_action)
3332  {
3333  ereport(DEBUG2,
3334  (errmsg_internal("sending %s to process %d",
3335  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3336  (int) rw->rw_pid)));
3338  }
3339  }
3340  }
3341 
3342  /* Process regular backends */
3343  dlist_foreach_modify(iter, &BackendList)
3344  {
3345  bp = dlist_container(Backend, elem, iter.cur);
3346 
3347  if (bp->pid == pid)
3348  {
3349  /*
3350  * Found entry for freshly-dead backend, so remove it.
3351  */
3352  if (!bp->dead_end)
3353  {
3355 #ifdef EXEC_BACKEND
3356  ShmemBackendArrayRemove(bp);
3357 #endif
3358  }
3359  dlist_delete(iter.cur);
3360  free(bp);
3361  /* Keep looping so we can signal remaining backends */
3362  }
3363  else
3364  {
3365  /*
3366  * This backend is still alive. Unless we did so already, tell it
3367  * to commit hara-kiri.
3368  *
3369  * SIGQUIT is the special signal that says exit without proc_exit
3370  * and let the user know what's going on. But if SendStop is set
3371  * (-s on command line), then we send SIGSTOP instead, so that we
3372  * can get core dumps from all backends by hand.
3373  *
3374  * We could exclude dead_end children here, but at least in the
3375  * SIGSTOP case it seems better to include them.
3376  *
3377  * Background workers were already processed above; ignore them
3378  * here.
3379  */
3380  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3381  continue;
3382 
3383  if (take_action)
3384  {
3385  ereport(DEBUG2,
3386  (errmsg_internal("sending %s to process %d",
3387  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3388  (int) bp->pid)));
3389  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3390  }
3391  }
3392  }
3393 
3394  /* Take care of the startup process too */
3395  if (pid == StartupPID)
3396  {
3397  StartupPID = 0;
3399  }
3400  else if (StartupPID != 0 && take_action)
3401  {
3402  ereport(DEBUG2,
3403  (errmsg_internal("sending %s to process %d",
3404  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3405  (int) StartupPID)));
3406  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3408  }
3409 
3410  /* Take care of the bgwriter too */
3411  if (pid == BgWriterPID)
3412  BgWriterPID = 0;
3413  else if (BgWriterPID != 0 && take_action)
3414  {
3415  ereport(DEBUG2,
3416  (errmsg_internal("sending %s to process %d",
3417  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3418  (int) BgWriterPID)));
3419  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3420  }
3421 
3422  /* Take care of the checkpointer too */
3423  if (pid == CheckpointerPID)
3424  CheckpointerPID = 0;
3425  else if (CheckpointerPID != 0 && take_action)
3426  {
3427  ereport(DEBUG2,
3428  (errmsg_internal("sending %s to process %d",
3429  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3430  (int) CheckpointerPID)));
3431  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3432  }
3433 
3434  /* Take care of the walwriter too */
3435  if (pid == WalWriterPID)
3436  WalWriterPID = 0;
3437  else if (WalWriterPID != 0 && take_action)
3438  {
3439  ereport(DEBUG2,
3440  (errmsg_internal("sending %s to process %d",
3441  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3442  (int) WalWriterPID)));
3443  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3444  }
3445 
3446  /* Take care of the walreceiver too */
3447  if (pid == WalReceiverPID)
3448  WalReceiverPID = 0;
3449  else if (WalReceiverPID != 0 && take_action)
3450  {
3451  ereport(DEBUG2,
3452  (errmsg_internal("sending %s to process %d",
3453  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3454  (int) WalReceiverPID)));
3455  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3456  }
3457 
3458  /* Take care of the autovacuum launcher too */
3459  if (pid == AutoVacPID)
3460  AutoVacPID = 0;
3461  else if (AutoVacPID != 0 && take_action)
3462  {
3463  ereport(DEBUG2,
3464  (errmsg_internal("sending %s to process %d",
3465  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3466  (int) AutoVacPID)));
3467  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3468  }
3469 
3470  /*
3471  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3472  * necessary, but it seems like a good idea for robustness, and it
3473  * simplifies the state-machine logic in the case where a shutdown request
3474  * arrives during crash processing.)
3475  */
3476  if (PgArchPID != 0 && take_action)
3477  {
3478  ereport(DEBUG2,
3479  (errmsg_internal("sending %s to process %d",
3480  "SIGQUIT",
3481  (int) PgArchPID)));
3482  signal_child(PgArchPID, SIGQUIT);
3483  }
3484 
3485  /*
3486  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3487  * necessary, but it seems like a good idea for robustness, and it
3488  * simplifies the state-machine logic in the case where a shutdown request
3489  * arrives during crash processing.)
3490  */
3491  if (PgStatPID != 0 && take_action)
3492  {
3493  ereport(DEBUG2,
3494  (errmsg_internal("sending %s to process %d",
3495  "SIGQUIT",
3496  (int) PgStatPID)));
3497  signal_child(PgStatPID, SIGQUIT);
3499  }
3500 
3501  /* We do NOT restart the syslogger */
3502 
3503  if (Shutdown != ImmediateShutdown)
3504  FatalError = true;
3505 
3506  /* We now transit into a state of waiting for children to die */
3507  if (pmState == PM_RECOVERY ||
3508  pmState == PM_HOT_STANDBY ||
3509  pmState == PM_RUN ||
3510  pmState == PM_WAIT_BACKUP ||
3512  pmState == PM_SHUTDOWN)
3514 
3515  /*
3516  * .. and if this doesn't happen quickly enough, now the clock is ticking
3517  * for us to kill them without mercy.
3518  */
3519  if (AbortStartTime == 0)
3520  AbortStartTime = time(NULL);
3521 }
3522 
3523 /*
3524  * Log the death of a child process.
3525  */
3526 static void
3527 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3528 {
3529  /*
3530  * size of activity_buffer is arbitrary, but set equal to default
3531  * track_activity_query_size
3532  */
3533  char activity_buffer[1024];
3534  const char *activity = NULL;
3535 
3536  if (!EXIT_STATUS_0(exitstatus))
3537  activity = pgstat_get_crashed_backend_activity(pid,
3538  activity_buffer,
3539  sizeof(activity_buffer));
3540 
3541  if (WIFEXITED(exitstatus))
3542  ereport(lev,
3543 
3544  /*------
3545  translator: %s is a noun phrase describing a child process, such as
3546  "server process" */
3547  (errmsg("%s (PID %d) exited with exit code %d",
3548  procname, pid, WEXITSTATUS(exitstatus)),
3549  activity ? errdetail("Failed process was running: %s", activity) : 0));
3550  else if (WIFSIGNALED(exitstatus))
3551 #if defined(WIN32)
3552  ereport(lev,
3553 
3554  /*------
3555  translator: %s is a noun phrase describing a child process, such as
3556  "server process" */
3557  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3558  procname, pid, WTERMSIG(exitstatus)),
3559  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3560  activity ? errdetail("Failed process was running: %s", activity) : 0));
3561 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
3562  ereport(lev,
3563 
3564  /*------
3565  translator: %s is a noun phrase describing a child process, such as
3566  "server process" */
3567  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3568  procname, pid, WTERMSIG(exitstatus),
3569  WTERMSIG(exitstatus) < NSIG ?
3570  sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
3571  activity ? errdetail("Failed process was running: %s", activity) : 0));
3572 #else
3573  ereport(lev,
3574 
3575  /*------
3576  translator: %s is a noun phrase describing a child process, such as
3577  "server process" */
3578  (errmsg("%s (PID %d) was terminated by signal %d",
3579  procname, pid, WTERMSIG(exitstatus)),
3580  activity ? errdetail("Failed process was running: %s", activity) : 0));
3581 #endif
3582  else
3583  ereport(lev,
3584 
3585  /*------
3586  translator: %s is a noun phrase describing a child process, such as
3587  "server process" */
3588  (errmsg("%s (PID %d) exited with unrecognized status %d",
3589  procname, pid, exitstatus),
3590  activity ? errdetail("Failed process was running: %s", activity) : 0));
3591 }
3592 
3593 /*
3594  * Advance the postmaster's state machine and take actions as appropriate
3595  *
3596  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3597  * receive the signals that might mean we need to change state.
3598  */
3599 static void
3601 {
3602  if (pmState == PM_WAIT_BACKUP)
3603  {
3604  /*
3605  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3606  */
3607  if (!BackupInProgress())
3609  }
3610 
3611  if (pmState == PM_WAIT_READONLY)
3612  {
3613  /*
3614  * PM_WAIT_READONLY state ends when we have no regular backends that
3615  * have been started during recovery. We kill the startup and
3616  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3617  * we might like to kill these processes first and then wait for
3618  * backends to die off, but that doesn't work at present because
3619  * killing the startup process doesn't release its locks.
3620  */
3622  {
3623  if (StartupPID != 0)
3624  signal_child(StartupPID, SIGTERM);
3625  if (WalReceiverPID != 0)
3626  signal_child(WalReceiverPID, SIGTERM);
3628  }
3629  }
3630 
3631  /*
3632  * If we are in a state-machine state that implies waiting for backends to
3633  * exit, see if they're all gone, and change state if so.
3634  */
3635  if (pmState == PM_WAIT_BACKENDS)
3636  {
3637  /*
3638  * PM_WAIT_BACKENDS state ends when we have no regular backends
3639  * (including autovac workers), no bgworkers (including unconnected
3640  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3641  * doing crash recovery or an immediate shutdown then we expect the
3642  * checkpointer to exit as well, otherwise not. The archiver, stats,
3643  * and syslogger processes are disregarded since they are not
3644  * connected to shared memory; we also disregard dead_end children
3645  * here. Walsenders are also disregarded, they will be terminated
3646  * later after writing the checkpoint record, like the archiver
3647  * process.
3648  */
3650  StartupPID == 0 &&
3651  WalReceiverPID == 0 &&
3652  BgWriterPID == 0 &&
3653  (CheckpointerPID == 0 ||
3655  WalWriterPID == 0 &&
3656  AutoVacPID == 0)
3657  {
3659  {
3660  /*
3661  * Start waiting for dead_end children to die. This state
3662  * change causes ServerLoop to stop creating new ones.
3663  */
3665 
3666  /*
3667  * We already SIGQUIT'd the archiver and stats processes, if
3668  * any, when we started immediate shutdown or entered
3669  * FatalError state.
3670  */
3671  }
3672  else
3673  {
3674  /*
3675  * If we get here, we are proceeding with normal shutdown. All
3676  * the regular children are gone, and it's time to tell the
3677  * checkpointer to do a shutdown checkpoint.
3678  */
3680  /* Start the checkpointer if not running */
3681  if (CheckpointerPID == 0)
3683  /* And tell it to shut down */
3684  if (CheckpointerPID != 0)
3685  {
3687  pmState = PM_SHUTDOWN;
3688  }
3689  else
3690  {
3691  /*
3692  * If we failed to fork a checkpointer, just shut down.
3693  * Any required cleanup will happen at next restart. We
3694  * set FatalError so that an "abnormal shutdown" message
3695  * gets logged when we exit.
3696  */
3697  FatalError = true;
3699 
3700  /* Kill the walsenders, archiver and stats collector too */
3702  if (PgArchPID != 0)
3704  if (PgStatPID != 0)
3706  }
3707  }
3708  }
3709  }
3710 
3711  if (pmState == PM_SHUTDOWN_2)
3712  {
3713  /*
3714  * PM_SHUTDOWN_2 state ends when there's no other children than
3715  * dead_end children left. There shouldn't be any regular backends
3716  * left by now anyway; what we're really waiting for is walsenders and
3717  * archiver.
3718  *
3719  * Walreceiver should normally be dead by now, but not when a fast
3720  * shutdown is performed during recovery.
3721  */
3722  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
3723  WalReceiverPID == 0)
3724  {
3726  }
3727  }
3728 
3729  if (pmState == PM_WAIT_DEAD_END)
3730  {
3731  /*
3732  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3733  * (ie, no dead_end children remain), and the archiver and stats
3734  * collector are gone too.
3735  *
3736  * The reason we wait for those two is to protect them against a new
3737  * postmaster starting conflicting subprocesses; this isn't an
3738  * ironclad protection, but it at least helps in the
3739  * shutdown-and-immediately-restart scenario. Note that they have
3740  * already been sent appropriate shutdown signals, either during a
3741  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3742  * FatalError processing.
3743  */
3744  if (dlist_is_empty(&BackendList) &&
3745  PgArchPID == 0 && PgStatPID == 0)
3746  {
3747  /* These other guys should be dead already */
3748  Assert(StartupPID == 0);
3749  Assert(WalReceiverPID == 0);
3750  Assert(BgWriterPID == 0);
3751  Assert(CheckpointerPID == 0);
3752  Assert(WalWriterPID == 0);
3753  Assert(AutoVacPID == 0);
3754  /* syslogger is not considered here */
3756  }
3757  }
3758 
3759  /*
3760  * If we've been told to shut down, we exit as soon as there are no
3761  * remaining children. If there was a crash, cleanup will occur at the
3762  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3763  * crash before exiting, but that seems unwise if we are quitting because
3764  * we got SIGTERM from init --- there may well not be time for recovery
3765  * before init decides to SIGKILL us.)
3766  *
3767  * Note that the syslogger continues to run. It will exit when it sees
3768  * EOF on its input pipe, which happens when there are no more upstream
3769  * processes.
3770  */
3772  {
3773  if (FatalError)
3774  {
3775  ereport(LOG, (errmsg("abnormal database system shutdown")));
3776  ExitPostmaster(1);
3777  }
3778  else
3779  {
3780  /*
3781  * Terminate exclusive backup mode to avoid recovery after a clean
3782  * fast shutdown. Since an exclusive backup can only be taken
3783  * during normal running (and not, for example, while running
3784  * under Hot Standby) it only makes sense to do this if we reached
3785  * normal running. If we're still in recovery, the backup file is
3786  * one we're recovering *from*, and we must keep it around so that
3787  * recovery restarts from the right place.
3788  */
3790  CancelBackup();
3791 
3792  /* Normal exit from the postmaster is here */
3793  ExitPostmaster(0);
3794  }
3795  }
3796 
3797  /*
3798  * If the startup process failed, or the user does not want an automatic
3799  * restart after backend crashes, wait for all non-syslogger children to
3800  * exit, and then exit postmaster. We don't try to reinitialize when the
3801  * startup process fails, because more than likely it will just fail again
3802  * and we will keep trying forever.
3803  */
3804  if (pmState == PM_NO_CHILDREN &&
3806  ExitPostmaster(1);
3807 
3808  /*
3809  * If we need to recover from a crash, wait for all non-syslogger children
3810  * to exit, then reset shmem and StartupDataBase.
3811  */
3812  if (FatalError && pmState == PM_NO_CHILDREN)
3813  {
3814  ereport(LOG,
3815  (errmsg("all server processes terminated; reinitializing")));
3816 
3817  /* allow background workers to immediately restart */
3819 
3820  shmem_exit(1);
3821 
3822  /* re-read control file into local memory */
3824 
3826 
3828  Assert(StartupPID != 0);
3830  pmState = PM_STARTUP;
3831  /* crash recovery started, reset SIGKILL flag */
3832  AbortStartTime = 0;
3833  }
3834 }
3835 
3836 
3837 /*
3838  * Send a signal to a postmaster child process
3839  *
3840  * On systems that have setsid(), each child process sets itself up as a
3841  * process group leader. For signals that are generally interpreted in the
3842  * appropriate fashion, we signal the entire process group not just the
3843  * direct child process. This allows us to, for example, SIGQUIT a blocked
3844  * archive_recovery script, or SIGINT a script being run by a backend via
3845  * system().
3846  *
3847  * There is a race condition for recently-forked children: they might not
3848  * have executed setsid() yet. So we signal the child directly as well as
3849  * the group. We assume such a child will handle the signal before trying
3850  * to spawn any grandchild processes. We also assume that signaling the
3851  * child twice will not cause any problems.
3852  */
3853 static void
3854 signal_child(pid_t pid, int signal)
3855 {
3856  if (kill(pid, signal) < 0)
3857  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3858 #ifdef HAVE_SETSID
3859  switch (signal)
3860  {
3861  case SIGINT:
3862  case SIGTERM:
3863  case SIGQUIT:
3864  case SIGSTOP:
3865  case SIGKILL:
3866  if (kill(-pid, signal) < 0)
3867  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3868  break;
3869  default:
3870  break;
3871  }
3872 #endif
3873 }
3874 
3875 /*
3876  * Send a signal to the targeted children (but NOT special children;
3877  * dead_end children are never signaled, either).
3878  */
3879 static bool
3880 SignalSomeChildren(int signal, int target)
3881 {
3882  dlist_iter iter;
3883  bool signaled = false;
3884 
3885  dlist_foreach(iter, &BackendList)
3886  {
3887  Backend *bp = dlist_container(Backend, elem, iter.cur);
3888 
3889  if (bp->dead_end)
3890  continue;
3891 
3892  /*
3893  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3894  * it first and avoid touching shared memory for every child.
3895  */
3896  if (target != BACKEND_TYPE_ALL)
3897  {
3898  /*
3899  * Assign bkend_type for any recently announced WAL Sender
3900  * processes.
3901  */
3902  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3905 
3906  if (!(target & bp->bkend_type))
3907  continue;
3908  }
3909 
3910  ereport(DEBUG4,
3911  (errmsg_internal("sending signal %d to process %d",
3912  signal, (int) bp->pid)));
3913  signal_child(bp->pid, signal);
3914  signaled = true;
3915  }
3916  return signaled;
3917 }
3918 
3919 /*
3920  * Send a termination signal to children. This considers all of our children
3921  * processes, except syslogger and dead_end backends.
3922  */
3923 static void
3925 {
3926  SignalChildren(signal);
3927  if (StartupPID != 0)
3928  {
3929  signal_child(StartupPID, signal);
3930  if (signal == SIGQUIT || signal == SIGKILL)
3932  }
3933  if (BgWriterPID != 0)
3934  signal_child(BgWriterPID, signal);
3935  if (CheckpointerPID != 0)
3936  signal_child(CheckpointerPID, signal);
3937  if (WalWriterPID != 0)
3938  signal_child(WalWriterPID, signal);
3939  if (WalReceiverPID != 0)
3940  signal_child(WalReceiverPID, signal);
3941  if (AutoVacPID != 0)
3942  signal_child(AutoVacPID, signal);
3943  if (PgArchPID != 0)
3944  signal_child(PgArchPID, signal);
3945  if (PgStatPID != 0)
3946  signal_child(PgStatPID, signal);
3947 }
3948 
3949 /*
3950  * BackendStartup -- start backend process
3951  *
3952  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3953  *
3954  * Note: if you change this code, also consider StartAutovacuumWorker.
3955  */
3956 static int
3958 {
3959  Backend *bn; /* for backend cleanup */
3960  pid_t pid;
3961 
3962  /*
3963  * Create backend data structure. Better before the fork() so we can
3964  * handle failure cleanly.
3965  */
3966  bn = (Backend *) malloc(sizeof(Backend));
3967  if (!bn)
3968  {
3969  ereport(LOG,
3970  (errcode(ERRCODE_OUT_OF_MEMORY),
3971  errmsg("out of memory")));
3972  return STATUS_ERROR;
3973  }
3974 
3975  /*
3976  * Compute the cancel key that will be assigned to this backend. The
3977  * backend will have its own copy in the forked-off process' value of
3978  * MyCancelKey, so that it can transmit the key to the frontend.
3979  */
3981  {
3982  free(bn);
3983  ereport(LOG,
3984  (errcode(ERRCODE_INTERNAL_ERROR),
3985  errmsg("could not generate random cancel key")));
3986  return STATUS_ERROR;
3987  }
3988 
3989  bn->cancel_key = MyCancelKey;
3990 
3991  /* Pass down canAcceptConnections state */
3993  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3995 
3996  /*
3997  * Unless it's a dead_end child, assign it a child slot number
3998  */
3999  if (!bn->dead_end)
4001  else
4002  bn->child_slot = 0;
4003 
4004  /* Hasn't asked to be notified about any bgworkers yet */
4005  bn->bgworker_notify = false;
4006 
4007 #ifdef EXEC_BACKEND
4008  pid = backend_forkexec(port);
4009 #else /* !EXEC_BACKEND */
4010  pid = fork_process();
4011  if (pid == 0) /* child */
4012  {
4013  free(bn);
4014 
4015  /* Detangle from postmaster */
4017 
4018  /* Close the postmaster's sockets */
4019  ClosePostmasterPorts(false);
4020 
4021  /* Perform additional initialization and collect startup packet */
4022  BackendInitialize(port);
4023 
4024  /* And run the backend */
4025  BackendRun(port);
4026  }
4027 #endif /* EXEC_BACKEND */
4028 
4029  if (pid < 0)
4030  {
4031  /* in parent, fork failed */
4032  int save_errno = errno;
4033 
4034  if (!bn->dead_end)
4036  free(bn);
4037  errno = save_errno;
4038  ereport(LOG,
4039  (errmsg("could not fork new process for connection: %m")));
4040  report_fork_failure_to_client(port, save_errno);
4041  return STATUS_ERROR;
4042  }
4043 
4044  /* in parent, successful fork */
4045  ereport(DEBUG2,
4046  (errmsg_internal("forked new backend, pid=%d socket=%d",
4047  (int) pid, (int) port->sock)));
4048 
4049  /*
4050  * Everything's been successful, it's safe to add this backend to our list
4051  * of backends.
4052  */
4053  bn->pid = pid;
4054  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4055  dlist_push_head(&BackendList, &bn->elem);
4056 
4057 #ifdef EXEC_BACKEND
4058  if (!bn->dead_end)
4059  ShmemBackendArrayAdd(bn);
4060 #endif
4061 
4062  return STATUS_OK;
4063 }
4064 
4065 /*
4066  * Try to report backend fork() failure to client before we close the
4067  * connection. Since we do not care to risk blocking the postmaster on
4068  * this connection, we set the connection to non-blocking and try only once.
4069  *
4070  * This is grungy special-purpose code; we cannot use backend libpq since
4071  * it's not up and running.
4072  */
4073 static void
4075 {
4076  char buffer[1000];
4077  int rc;
4078 
4079  /* Format the error message packet (always V2 protocol) */
4080  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4081  _("could not fork new process for connection: "),
4082  strerror(errnum));
4083 
4084  /* Set port to non-blocking. Don't do send() if this fails */
4085  if (!pg_set_noblock(port->sock))
4086  return;
4087 
4088  /* We'll retry after EINTR, but ignore all other failures */
4089  do
4090  {
4091  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4092  } while (rc < 0 && errno == EINTR);
4093 }
4094 
4095 
4096 /*
4097  * BackendInitialize -- initialize an interactive (postmaster-child)
4098  * backend process, and collect the client's startup packet.
4099  *
4100  * returns: nothing. Will not return at all if there's any failure.
4101  *
4102  * Note: this code does not depend on having any access to shared memory.
4103  * In the EXEC_BACKEND case, we are physically attached to shared memory
4104  * but have not yet set up most of our local pointers to shmem structures.
4105  */
4106 static void
4108 {
4109  int status;
4110  int ret;
4111  char remote_host[NI_MAXHOST];
4112  char remote_port[NI_MAXSERV];
4113  char remote_ps_data[NI_MAXHOST];
4114 
4115  /* Save port etc. for ps status */
4116  MyProcPort = port;
4117 
4118  /*
4119  * PreAuthDelay is a debugging aid for investigating problems in the
4120  * authentication cycle: it can be set in postgresql.conf to allow time to
4121  * attach to the newly-forked backend with a debugger. (See also
4122  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4123  * is not honored until after authentication.)
4124  */
4125  if (PreAuthDelay > 0)
4126  pg_usleep(PreAuthDelay * 1000000L);
4127 
4128  /* This flag will remain set until InitPostgres finishes authentication */
4129  ClientAuthInProgress = true; /* limit visibility of log messages */
4130 
4131  /* save process start time */
4134 
4135  /* set these to empty in case they are needed before we set them up */
4136  port->remote_host = "";
4137  port->remote_port = "";
4138 
4139  /*
4140  * Initialize libpq and enable reporting of ereport errors to the client.
4141  * Must do this now because authentication uses libpq to send messages.
4142  */
4143  pq_init(); /* initialize libpq to talk to client */
4144  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4145 
4146  /*
4147  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4148  * timeout while trying to collect the startup packet. Otherwise the
4149  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4150  * buggy client fails to send the packet promptly. XXX it follows that
4151  * the remainder of this function must tolerate losing control at any
4152  * instant. Likewise, any pg_on_exit_callback registered before or during
4153  * this function must be prepared to execute at any instant between here
4154  * and the end of this function. Furthermore, affected callbacks execute
4155  * partially or not at all when a second exit-inducing signal arrives
4156  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4157  * that mechanic, callbacks need not anticipate more than one call.) This
4158  * is fragile; it ought to instead follow the norm of handling interrupts
4159  * at selected, safe opportunities.
4160  */
4161  pqsignal(SIGTERM, startup_die);
4163  InitializeTimeouts(); /* establishes SIGALRM handler */
4165 
4166  /*
4167  * Get the remote host name and port for logging and status display.
4168  */
4169  remote_host[0] = '\0';
4170  remote_port[0] = '\0';
4171  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4172  remote_host, sizeof(remote_host),
4173  remote_port, sizeof(remote_port),
4174  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4175  ereport(WARNING,
4176  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4177  gai_strerror(ret))));
4178  if (remote_port[0] == '\0')
4179  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4180  else
4181  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4182 
4183  /*
4184  * Save remote_host and remote_port in port structure (after this, they
4185  * will appear in log_line_prefix data for log messages).
4186  */
4187  port->remote_host = strdup(remote_host);
4188  port->remote_port = strdup(remote_port);
4189 
4190  /* And now we can issue the Log_connections message, if wanted */
4191  if (Log_connections)
4192  {
4193  if (remote_port[0])
4194  ereport(LOG,
4195  (errmsg("connection received: host=%s port=%s",
4196  remote_host,
4197  remote_port)));
4198  else
4199  ereport(LOG,
4200  (errmsg("connection received: host=%s",
4201  remote_host)));
4202  }
4203 
4204  /*
4205  * If we did a reverse lookup to name, we might as well save the results
4206  * rather than possibly repeating the lookup during authentication.
4207  *
4208  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4209  * get nothing useful for a client without an rDNS entry. Therefore, we
4210  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4211  * it into remote_hostname if so. (This test is conservative and might
4212  * sometimes classify a hostname as numeric, but an error in that
4213  * direction is safe; it only results in a possible extra lookup.)
4214  */
4215  if (log_hostname &&
4216  ret == 0 &&
4217  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4218  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4219  port->remote_hostname = strdup(remote_host);
4220 
4221  /*
4222  * Ready to begin client interaction. We will give up and exit(1) after a
4223  * time delay, so that a broken client can't hog a connection
4224  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4225  * against the time limit.
4226  *
4227  * Note: AuthenticationTimeout is applied here while waiting for the
4228  * startup packet, and then again in InitPostgres for the duration of any
4229  * authentication operations. So a hostile client could tie up the
4230  * process for nearly twice AuthenticationTimeout before we kick him off.
4231  *
4232  * Note: because PostgresMain will call InitializeTimeouts again, the
4233  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4234  * since we never use it again after this function.
4235  */
4238 
4239  /*
4240  * Receive the startup packet (which might turn out to be a cancel request
4241  * packet).
4242  */
4243  status = ProcessStartupPacket(port, false);
4244 
4245  /*
4246  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4247  * already did any appropriate error reporting.
4248  */
4249  if (status != STATUS_OK)
4250  proc_exit(0);
4251 
4252  /*
4253  * Now that we have the user and database name, we can set the process
4254  * title for ps. It's good to do this as early as possible in startup.
4255  *
4256  * For a walsender, the ps display is set in the following form:
4257  *
4258  * postgres: walsender <user> <host> <activity>
4259  *
4260  * To achieve that, we pass "walsender" as username and username as dbname
4261  * to init_ps_display(). XXX: should add a new variant of
4262  * init_ps_display() to avoid abusing the parameters like this.
4263  */
4264  if (am_walsender)
4266  update_process_title ? "authentication" : "");
4267  else
4268  init_ps_display(port->user_name, port->database_name, remote_ps_data,
4269  update_process_title ? "authentication" : "");
4270 
4271  /*
4272  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4273  */
4275  PG_SETMASK(&BlockSig);
4276 }
4277 
4278 
4279 /*
4280  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4281  *
4282  * returns:
4283  * Shouldn't return at all.
4284  * If PostgresMain() fails, return status.
4285  */
4286 static void
4288 {
4289  char **av;
4290  int maxac;
4291  int ac;
4292  long secs;
4293  int usecs;
4294  int i;
4295 
4296  /*
4297  * Don't want backend to be able to see the postmaster random number
4298  * generator state. We have to clobber the static random_seed *and* start
4299  * a new random sequence in the random() library function.
4300  */
4301 #ifndef HAVE_STRONG_RANDOM
4302  random_seed = 0;
4303  random_start_time.tv_usec = 0;
4304 #endif
4305  /* slightly hacky way to convert timestamptz into integers */
4306  TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
4307  srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
4308 
4309  /*
4310  * Now, build the argv vector that will be given to PostgresMain.
4311  *
4312  * The maximum possible number of commandline arguments that could come
4313  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4314  * pg_split_opts().
4315  */
4316  maxac = 2; /* for fixed args supplied below */
4317  maxac += (strlen(ExtraOptions) + 1) / 2;
4318 
4319  av = (char **) MemoryContextAlloc(TopMemoryContext,
4320  maxac * sizeof(char *));
4321  ac = 0;
4322 
4323  av[ac++] = "postgres";
4324 
4325  /*
4326  * Pass any backend switches specified with -o on the postmaster's own
4327  * command line. We assume these are secure.
4328  */
4329  pg_split_opts(av, &ac, ExtraOptions);
4330 
4331  av[ac] = NULL;
4332 
4333  Assert(ac < maxac);
4334 
4335  /*
4336  * Debug: print arguments being passed to backend
4337  */
4338  ereport(DEBUG3,
4339  (errmsg_internal("%s child[%d]: starting with (",
4340  progname, (int) getpid())));
4341  for (i = 0; i < ac; ++i)
4342  ereport(DEBUG3,
4343  (errmsg_internal("\t%s", av[i])));
4344  ereport(DEBUG3,
4345  (errmsg_internal(")")));
4346 
4347  /*
4348  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4349  * just yet, though, because InitPostgres will need the HBA data.)
4350  */
4352 
4353  PostgresMain(ac, av, port->database_name, port->user_name);
4354 }
4355 
4356 
4357 #ifdef EXEC_BACKEND
4358 
4359 /*
4360  * postmaster_forkexec -- fork and exec a postmaster subprocess
4361  *
4362  * The caller must have set up the argv array already, except for argv[2]
4363  * which will be filled with the name of the temp variable file.
4364  *
4365  * Returns the child process PID, or -1 on fork failure (a suitable error
4366  * message has been logged on failure).
4367  *
4368  * All uses of this routine will dispatch to SubPostmasterMain in the
4369  * child process.
4370  */
4371 pid_t
4372 postmaster_forkexec(int argc, char *argv[])
4373 {
4374  Port port;
4375 
4376  /* This entry point passes dummy values for the Port variables */
4377  memset(&port, 0, sizeof(port));
4378  return internal_forkexec(argc, argv, &port);
4379 }
4380 
4381 /*
4382  * backend_forkexec -- fork/exec off a backend process
4383  *
4384  * Some operating systems (WIN32) don't have fork() so we have to simulate
4385  * it by storing parameters that need to be passed to the child and
4386  * then create a new child process.
4387  *
4388  * returns the pid of the fork/exec'd process, or -1 on failure
4389  */
4390 static pid_t
4391 backend_forkexec(Port *port)
4392 {
4393  char *av[4];
4394  int ac = 0;
4395 
4396  av[ac++] = "postgres";
4397  av[ac++] = "--forkbackend";
4398  av[ac++] = NULL; /* filled in by internal_forkexec */
4399 
4400  av[ac] = NULL;
4401  Assert(ac < lengthof(av));
4402 
4403  return internal_forkexec(ac, av, port);
4404 }
4405 
4406 #ifndef WIN32
4407 
4408 /*
4409  * internal_forkexec non-win32 implementation
4410  *
4411  * - writes out backend variables to the parameter file
4412  * - fork():s, and then exec():s the child process
4413  */
4414 static pid_t
4415 internal_forkexec(int argc, char *argv[], Port *port)
4416 {
4417  static unsigned long tmpBackendFileNum = 0;
4418  pid_t pid;
4419  char tmpfilename[MAXPGPATH];
4420  BackendParameters param;
4421  FILE *fp;
4422 
4423  if (!save_backend_variables(&param, port))
4424  return -1; /* log made by save_backend_variables */
4425 
4426  /* Calculate name for temp file */
4427  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4429  MyProcPid, ++tmpBackendFileNum);
4430 
4431  /* Open file */
4432  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4433  if (!fp)
4434  {
4435  /*
4436  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4437  * directory, ignoring errors.
4438  */
4440 
4441  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4442  if (!fp)
4443  {
4444  ereport(LOG,
4446  errmsg("could not create file \"%s\": %m",
4447  tmpfilename)));
4448  return -1;
4449  }
4450  }
4451 
4452  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4453  {
4454  ereport(LOG,
4456  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4457  FreeFile(fp);
4458  return -1;
4459  }
4460 
4461  /* Release file */
4462  if (FreeFile(fp))
4463  {
4464  ereport(LOG,
4466  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4467  return -1;
4468  }
4469 
4470  /* Make sure caller set up argv properly */
4471  Assert(argc >= 3);
4472  Assert(argv[argc] == NULL);
4473  Assert(strncmp(argv[1], "--fork", 6) == 0);
4474  Assert(argv[2] == NULL);
4475 
4476  /* Insert temp file name after --fork argument */
4477  argv[2] = tmpfilename;
4478 
4479  /* Fire off execv in child */
4480  if ((pid = fork_process()) == 0)
4481  {
4482  if (execv(postgres_exec_path, argv) < 0)
4483  {
4484  ereport(LOG,
4485  (errmsg("could not execute server process \"%s\": %m",
4486  postgres_exec_path)));
4487  /* We're already in the child process here, can't return */
4488  exit(1);
4489  }
4490  }
4491 
4492  return pid; /* Parent returns pid, or -1 on fork failure */
4493 }
4494 #else /* WIN32 */
4495 
4496 /*
4497  * internal_forkexec win32 implementation
4498  *
4499  * - starts backend using CreateProcess(), in suspended state
4500  * - writes out backend variables to the parameter file
4501  * - during this, duplicates handles and sockets required for
4502  * inheritance into the new process
4503  * - resumes execution of the new process once the backend parameter
4504  * file is complete.
4505  */
4506 static pid_t
4507 internal_forkexec(int argc, char *argv[], Port *port)
4508 {
4509  int retry_count = 0;
4510  STARTUPINFO si;
4511  PROCESS_INFORMATION pi;
4512  int i;
4513  int j;
4514  char cmdLine[MAXPGPATH * 2];
4515  HANDLE paramHandle;
4516  BackendParameters *param;
4517  SECURITY_ATTRIBUTES sa;
4518  char paramHandleStr[32];
4519  win32_deadchild_waitinfo *childinfo;
4520 
4521  /* Make sure caller set up argv properly */
4522  Assert(argc >= 3);
4523  Assert(argv[argc] == NULL);
4524  Assert(strncmp(argv[1], "--fork", 6) == 0);
4525  Assert(argv[2] == NULL);
4526 
4527  /* Resume here if we need to retry */
4528 retry:
4529 
4530  /* Set up shared memory for parameter passing */
4531  ZeroMemory(&sa, sizeof(sa));
4532  sa.nLength = sizeof(sa);
4533  sa.bInheritHandle = TRUE;
4534  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4535  &sa,
4536  PAGE_READWRITE,
4537  0,
4538  sizeof(BackendParameters),
4539  NULL);
4540  if (paramHandle == INVALID_HANDLE_VALUE)
4541  {
4542  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4543  GetLastError());
4544  return -1;
4545  }
4546 
4547  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4548  if (!param)
4549  {
4550  elog(LOG, "could not map backend parameter memory: error code %lu",
4551  GetLastError());
4552  CloseHandle(paramHandle);
4553  return -1;
4554  }
4555 
4556  /* Insert temp file name after --fork argument */
4557 #ifdef _WIN64
4558  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4559 #else
4560  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4561 #endif
4562  argv[2] = paramHandleStr;
4563 
4564  /* Format the cmd line */
4565  cmdLine[sizeof(cmdLine) - 1] = '\0';
4566  cmdLine[sizeof(cmdLine) - 2] = '\0';
4567  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4568  i = 0;
4569  while (argv[++i] != NULL)
4570  {
4571  j = strlen(cmdLine);
4572  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4573  }
4574  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4575  {
4576  elog(LOG, "subprocess command line too long");
4577  return -1;
4578  }
4579 
4580  memset(&pi, 0, sizeof(pi));
4581  memset(&si, 0, sizeof(si));
4582  si.cb = sizeof(si);
4583 
4584  /*
4585  * Create the subprocess in a suspended state. This will be resumed later,
4586  * once we have written out the parameter file.
4587  */
4588  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4589  NULL, NULL, &si, &pi))
4590  {
4591  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4592  GetLastError());
4593  return -1;
4594  }
4595 
4596  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4597  {
4598  /*
4599  * log made by save_backend_variables, but we have to clean up the
4600  * mess with the half-started process
4601  */
4602  if (!TerminateProcess(pi.hProcess, 255))
4603  ereport(LOG,
4604  (errmsg_internal("could not terminate unstarted process: error code %lu",
4605  GetLastError())));
4606  CloseHandle(pi.hProcess);
4607  CloseHandle(pi.hThread);
4608  return -1; /* log made by save_backend_variables */
4609  }
4610 
4611  /* Drop the parameter shared memory that is now inherited to the backend */
4612  if (!UnmapViewOfFile(param))
4613  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4614  GetLastError());
4615  if (!CloseHandle(paramHandle))
4616  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4617  GetLastError());
4618 
4619  /*
4620  * Reserve the memory region used by our main shared memory segment before
4621  * we resume the child process. Normally this should succeed, but if ASLR
4622  * is active then it might sometimes fail due to the stack or heap having
4623  * gotten mapped into that range. In that case, just terminate the
4624  * process and retry.
4625  */
4626  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4627  {
4628  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4629  if (!TerminateProcess(pi.hProcess, 255))
4630  ereport(LOG,
4631  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4632  GetLastError())));
4633  CloseHandle(pi.hProcess);
4634  CloseHandle(pi.hThread);
4635  if (++retry_count < 100)
4636  goto retry;
4637  ereport(LOG,
4638  (errmsg("giving up after too many tries to reserve shared memory"),
4639  errhint("This might be caused by ASLR or antivirus software.")));
4640  return -1;
4641  }
4642 
4643  /*
4644  * Now that the backend variables are written out, we start the child
4645  * thread so it can start initializing while we set up the rest of the
4646  * parent state.
4647  */
4648  if (ResumeThread(pi.hThread) == -1)
4649  {
4650  if (!TerminateProcess(pi.hProcess, 255))
4651  {
4652  ereport(LOG,
4653  (errmsg_internal("could not terminate unstartable process: error code %lu",
4654  GetLastError())));
4655  CloseHandle(pi.hProcess);
4656  CloseHandle(pi.hThread);
4657  return -1;
4658  }
4659  CloseHandle(pi.hProcess);
4660  CloseHandle(pi.hThread);
4661  ereport(LOG,
4662  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4663  GetLastError())));
4664  return -1;
4665  }
4666 
4667  /*
4668  * Queue a waiter for to signal when this child dies. The wait will be
4669  * handled automatically by an operating system thread pool.
4670  *
4671  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4672  * Struct will be free():d from the callback function that runs on a
4673  * different thread.
4674  */
4675  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4676  if (!childinfo)
4677  ereport(FATAL,
4678  (errcode(ERRCODE_OUT_OF_MEMORY),
4679  errmsg("out of memory")));
4680 
4681  childinfo->procHandle = pi.hProcess;
4682  childinfo->procId = pi.dwProcessId;
4683 
4684  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4685  pi.hProcess,
4686  pgwin32_deadchild_callback,
4687  childinfo,
4688  INFINITE,
4689  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4690  ereport(FATAL,
4691  (errmsg_internal("could not register process for wait: error code %lu",
4692  GetLastError())));
4693 
4694  /* Don't close pi.hProcess here - the wait thread needs access to it */
4695 
4696  CloseHandle(pi.hThread);
4697 
4698  return pi.dwProcessId;
4699 }
4700 #endif /* WIN32 */
4701 
4702 
4703 /*
4704  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4705  * to what it would be if we'd simply forked on Unix, and then
4706  * dispatch to the appropriate place.
4707  *
4708  * The first two command line arguments are expected to be "--forkFOO"
4709  * (where FOO indicates which postmaster child we are to become), and
4710  * the name of a variables file that we can read to load data that would
4711  * have been inherited by fork() on Unix. Remaining arguments go to the
4712  * subprocess FooMain() routine.
4713  */
4714 void
4715 SubPostmasterMain(int argc, char *argv[])
4716 {
4717  Port port;
4718 
4719  /* In EXEC_BACKEND case we will not have inherited these settings */
4720  IsPostmasterEnvironment = true;
4722 
4723  /* Setup as postmaster child */
4725 
4726  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4728 
4729  /* Check we got appropriate args */
4730  if (argc < 3)
4731  elog(FATAL, "invalid subpostmaster invocation");
4732 
4733  /* Read in the variables file */
4734  memset(&port, 0, sizeof(Port));
4735  read_backend_variables(argv[2], &port);
4736 
4737  /* Close the postmaster's sockets (as soon as we know them) */
4738  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4739 
4740  /*
4741  * Set reference point for stack-depth checking
4742  */
4743  set_stack_base();
4744 
4745  /*
4746  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4747  * for the non-exec case.
4748  */
4749 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4750  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4751  if (!port.gss)
4752  ereport(FATAL,
4753  (errcode(ERRCODE_OUT_OF_MEMORY),
4754  errmsg("out of memory")));
4755 #endif
4756 
4757  /*
4758  * If appropriate, physically re-attach to shared memory segment. We want
4759  * to do this before going any further to ensure that we can attach at the
4760  * same address the postmaster used. On the other hand, if we choose not
4761  * to re-attach, we may have other cleanup to do.
4762  *
4763  * If testing EXEC_BACKEND on Linux, you should run this as root before
4764  * starting the postmaster:
4765  *
4766  * echo 0 >/proc/sys/kernel/randomize_va_space
4767  *
4768  * This prevents using randomized stack and code addresses that cause the
4769  * child process's memory map to be different from the parent's, making it
4770  * sometimes impossible to attach to shared memory at the desired address.
4771  * Return the setting to its old value (usually '1' or '2') when finished.
4772  */
4773  if (strcmp(argv[1], "--forkbackend") == 0 ||
4774  strcmp(argv[1], "--forkavlauncher") == 0 ||
4775  strcmp(argv[1], "--forkavworker") == 0 ||
4776  strcmp(argv[1], "--forkboot") == 0 ||
4777  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4779  else
4781 
4782  /* autovacuum needs this set before calling InitProcess */
4783  if (strcmp(argv[1], "--forkavlauncher") == 0)
4784  AutovacuumLauncherIAm();
4785  if (strcmp(argv[1], "--forkavworker") == 0)
4786  AutovacuumWorkerIAm();
4787 
4788  /*
4789  * Start our win32 signal implementation. This has to be done after we
4790  * read the backend variables, because we need to pick up the signal pipe
4791  * from the parent process.
4792  */
4793 #ifdef WIN32
4795 #endif
4796 
4797  /* In EXEC_BACKEND case we will not have inherited these settings */
4798  pqinitmask();
4799  PG_SETMASK(&BlockSig);
4800 
4801  /* Read in remaining GUC variables */
4802  read_nondefault_variables();
4803 
4804  /*
4805  * Check that the data directory looks valid, which will also check the
4806  * privileges on the data directory and update our umask and file/group
4807  * variables for creating files later. Note: this should really be done
4808  * before we create any files or directories.
4809  */
4810  checkDataDir();
4811 
4812  /*
4813  * (re-)read control file, as it contains config. The postmaster will
4814  * already have read this, but this process doesn't know about that.
4815  */
4816  LocalProcessControlFile(false);
4817 
4818  /*
4819  * Reload any libraries that were preloaded by the postmaster. Since we
4820  * exec'd this process, those libraries didn't come along with us; but we
4821  * should load them into all child processes to be consistent with the
4822  * non-EXEC_BACKEND behavior.
4823  */
4825 
4826  /* Run backend or appropriate child */
4827  if (strcmp(argv[1], "--forkbackend") == 0)
4828  {
4829  Assert(argc == 3); /* shouldn't be any more args */
4830 
4831  /*
4832  * Need to reinitialize the SSL library in the backend, since the
4833  * context structures contain function pointers and cannot be passed
4834  * through the parameter file.
4835  *
4836  * If for some reason reload fails (maybe the user installed broken
4837  * key files), soldier on without SSL; that's better than all
4838  * connections becoming impossible.
4839  *
4840  * XXX should we do this in all child processes? For the moment it's
4841  * enough to do it in backend children.
4842  */
4843 #ifdef USE_SSL
4844  if (EnableSSL)
4845  {
4846  if (secure_initialize(false) == 0)
4847  LoadedSSL = true;
4848  else
4849  ereport(LOG,
4850  (errmsg("SSL configuration could not be loaded in child process")));
4851  }
4852 #endif
4853 
4854  /*
4855  * Perform additional initialization and collect startup packet.
4856  *
4857  * We want to do this before InitProcess() for a couple of reasons: 1.
4858  * so that we aren't eating up a PGPROC slot while waiting on the
4859  * client. 2. so that if InitProcess() fails due to being out of
4860  * PGPROC slots, we have already initialized libpq and are able to
4861  * report the error to the client.
4862  */
4863  BackendInitialize(&port);
4864 
4865  /* Restore basic shared memory pointers */
4867 
4868  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4869  InitProcess();
4870 
4871  /* Attach process to shared data structures */
4873 
4874  /* And run the backend */
4875  BackendRun(&port); /* does not return */
4876  }
4877  if (strcmp(argv[1], "--forkboot") == 0)
4878  {
4879  /* Restore basic shared memory pointers */
4881 
4882  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4884 
4885  /* Attach process to shared data structures */
4887 
4888  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4889  }
4890  if (strcmp(argv[1], "--forkavlauncher") == 0)
4891  {
4892  /* Restore basic shared memory pointers */
4894 
4895  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4896  InitProcess();
4897 
4898  /* Attach process to shared data structures */
4900 
4901  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4902  }
4903  if (strcmp(argv[1], "--forkavworker") == 0)
4904  {
4905  /* Restore basic shared memory pointers */
4907 
4908  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4909  InitProcess();
4910 
4911  /* Attach process to shared data structures */
4913 
4914  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4915  }
4916  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4917  {
4918  int shmem_slot;
4919 
4920  /* do this as early as possible; in particular, before InitProcess() */
4921  IsBackgroundWorker = true;
4922 
4923  /* Restore basic shared memory pointers */
4925 
4926  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4927  InitProcess();
4928 
4929  /* Attach process to shared data structures */
4931 
4932  /* Fetch MyBgworkerEntry from shared memory */
4933  shmem_slot = atoi(argv[1] + 15);
4934  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4935 
4937  }
4938  if (strcmp(argv[1], "--forkarch") == 0)
4939  {
4940  /* Do not want to attach to shared memory */
4941 
4942  PgArchiverMain(argc, argv); /* does not return */
4943  }
4944  if (strcmp(argv[1], "--forkcol") == 0)
4945  {
4946  /* Do not want to attach to shared memory */
4947 
4948  PgstatCollectorMain(argc, argv); /* does not return */
4949  }
4950  if (strcmp(argv[1], "--forklog") == 0)
4951  {
4952  /* Do not want to attach to shared memory */
4953 
4954  SysLoggerMain(argc, argv); /* does not return */
4955  }
4956 
4957  abort(); /* shouldn't get here */
4958 }
4959 #endif /* EXEC_BACKEND */
4960 
4961 
4962 /*
4963  * ExitPostmaster -- cleanup
4964  *
4965  * Do NOT call exit() directly --- always go through here!
4966  */
4967 static void
4969 {
4970 #ifdef HAVE_PTHREAD_IS_THREADED_NP
4971 
4972  /*
4973  * There is no known cause for a postmaster to become multithreaded after
4974  * startup. Recheck to account for the possibility of unknown causes.
4975  * This message uses LOG level, because an unclean shutdown at this point
4976  * would usually not look much different from a clean shutdown.
4977  */
4978  if (pthread_is_threaded_np() != 0)
4979  ereport(LOG,
4980  (errcode(ERRCODE_INTERNAL_ERROR),
4981  errmsg_internal("postmaster became multithreaded"),
4982  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
4983 #endif
4984 
4985  /* should cleanup shared memory and kill all backends */
4986 
4987  /*
4988  * Not sure of the semantics here. When the Postmaster dies, should the
4989  * backends all be killed? probably not.
4990  *
4991  * MUST -- vadim 05-10-1999
4992  */
4993 
4994  proc_exit(status);
4995 }
4996 
4997 /*
4998  * sigusr1_handler - handle signal conditions from child processes
4999  */
5000 static void
5002 {
5003  int save_errno = errno;
5004 
5005  PG_SETMASK(&BlockSig);
5006 
5007  /* Process background worker state change. */
5009  {
5011  StartWorkerNeeded = true;
5012  }
5013 
5014  /*
5015  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5016  * unexpected states. If the startup process quickly starts up, completes
5017  * recovery, exits, we might process the death of the startup process
5018  * first. We don't want to go back to recovery in that case.
5019  */
5022  {
5023  /* WAL redo has started. We're out of reinitialization. */
5024  FatalError = false;
5025  Assert(AbortStartTime == 0);
5026 
5027  /*
5028  * Crank up the background tasks. It doesn't matter if this fails,
5029  * we'll just try again later.
5030  */
5031  Assert(CheckpointerPID == 0);
5033  Assert(BgWriterPID == 0);
5035 
5036  /*
5037  * Start the archiver if we're responsible for (re-)archiving received
5038  * files.
5039  */
5040  Assert(PgArchPID == 0);
5041  if (XLogArchivingAlways())
5042  PgArchPID = pgarch_start();
5043 
5044  /*
5045  * If we aren't planning to enter hot standby mode later, treat
5046  * RECOVERY_STARTED as meaning we're out of startup, and report status
5047  * accordingly.
5048  */
5049  if (!EnableHotStandby)
5050  {
5052 #ifdef USE_SYSTEMD
5053  sd_notify(0, "READY=1");
5054 #endif
5055  }
5056 
5057  pmState = PM_RECOVERY;
5058  }
5061  {
5062  /*
5063  * Likewise, start other special children as needed.
5064  */
5065  Assert(PgStatPID == 0);
5066  PgStatPID = pgstat_start();
5067 
5068  ereport(LOG,
5069  (errmsg("database system is ready to accept read only connections")));
5070 
5071  /* Report status */
5073 #ifdef USE_SYSTEMD
5074  sd_notify(0, "READY=1");
5075 #endif
5076 
5078  /* Some workers may be scheduled to start now */
5079  StartWorkerNeeded = true;
5080  }
5081 
5084 
5086  PgArchPID != 0)
5087  {
5088  /*
5089  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5090  * next WAL file.
5091  */
5093  }
5094 
5096  SysLoggerPID != 0)
5097  {
5098  /* Tell syslogger to rotate logfile */
5100  }
5101 
5103  Shutdown == NoShutdown)
5104  {
5105  /*
5106  * Start one iteration of the autovacuum daemon, even if autovacuuming
5107  * is nominally not enabled. This is so we can have an active defense
5108  * against transaction ID wraparound. We set a flag for the main loop
5109  * to do it rather than trying to do it here --- this is because the
5110  * autovac process itself may send the signal, and we want to handle
5111  * that by launching another iteration as soon as the current one
5112  * completes.
5113  */
5114  start_autovac_launcher = true;
5115  }
5116 
5118  Shutdown == NoShutdown)
5119  {
5120  /* The autovacuum launcher wants us to start a worker process. */
5122  }
5123 
5125  {
5126  /* Startup Process wants us to start the walreceiver process. */
5127  /* Start immediately if possible, else remember request for later. */
5128  WalReceiverRequested = true;
5130  }
5131 
5134  {
5135  /* Advance postmaster's state machine */
5137  }
5138 
5139  if (CheckPromoteSignal() && StartupPID != 0 &&
5140  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5142  {
5143  /* Tell startup process to finish recovery */
5145  }
5146 
5148 
5149  errno = save_errno;
5150 }
5151 
5152 /*
5153  * SIGTERM or SIGQUIT while processing startup packet.
5154  * Clean up and exit(1).
5155  *
5156  * XXX: possible future improvement: try to send a message indicating
5157  * why we are disconnecting. Problem is to be sure we don't block while
5158  * doing so, nor mess up SSL initialization. In practice, if the client
5159  * has wedged here, it probably couldn't do anything with the message anyway.
5160  */
5161 static void
5163 {
5164  proc_exit(1);
5165 }
5166 
5167 /*
5168  * Dummy signal handler
5169  *
5170  * We use this for signals that we don't actually use in the postmaster,
5171  * but we do use in backends. If we were to SIG_IGN such signals in the
5172  * postmaster, then a newly started backend might drop a signal that arrives
5173  * before it's able to reconfigure its signal processing. (See notes in
5174  * tcop/postgres.c.)
5175  */
5176 static void
5178 {
5179 }
5180 
5181 /*
5182  * Timeout while processing startup packet.
5183  * As for startup_die(), we clean up and exit(1).
5184  */
5185 static void
5187 {
5188  proc_exit(1);
5189 }
5190 
5191 
5192 /*
5193  * Generate a random cancel key.
5194  */
5195 static bool
5197 {
5198 #ifdef HAVE_STRONG_RANDOM
5199  return pg_strong_random((char *) cancel_key, sizeof(int32));
5200 #else
5201 
5202  /*
5203  * If built with --disable-strong-random, use plain old erand48.
5204  *
5205  * We cannot use pg_backend_random() in postmaster, because it stores its
5206  * state in shared memory.
5207  */
5208  static unsigned short seed[3];
5209 
5210  /*
5211  * Select a random seed at the time of first receiving a request.
5212  */
5213  if (random_seed == 0)
5214  {
5215  struct timeval random_stop_time;
5216 
5217  gettimeofday(&random_stop_time, NULL);
5218 
5219  seed[0] = (unsigned short) random_start_time.tv_usec;
5220  seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
5221  seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
5222 
5223  random_seed = 1;
5224  }
5225 
5226  *cancel_key = pg_jrand48(seed);
5227 
5228  return true;
5229 #endif
5230 }
5231 
5232 /*
5233  * Count up number of child processes of specified types (dead_end children
5234  * are always excluded).
5235  */
5236 static int
5237 CountChildren(int target)
5238 {
5239  dlist_iter iter;
5240  int cnt = 0;
5241 
5242  dlist_foreach(iter, &BackendList)
5243  {
5244  Backend *bp = dlist_container(Backend, elem, iter.cur);
5245 
5246  if (bp->dead_end)
5247  continue;
5248 
5249  /*
5250  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5251  * it first and avoid touching shared memory for every child.
5252  */
5253  if (target != BACKEND_TYPE_ALL)
5254  {
5255  /*
5256  * Assign bkend_type for any recently announced WAL Sender
5257  * processes.
5258  */
5259  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5262 
5263  if (!(target & bp->bkend_type))
5264  continue;
5265  }
5266 
5267  cnt++;
5268  }
5269  return cnt;
5270 }
5271 
5272 
5273 /*
5274  * StartChildProcess -- start an auxiliary process for the postmaster
5275  *
5276  * "type" determines what kind of child will be started. All child types
5277  * initially go to AuxiliaryProcessMain, which will handle common setup.
5278  *
5279  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5280  * to start subprocess.
5281  */
5282 static pid_t
5284 {
5285  pid_t pid;
5286  char *av[10];
5287  int ac = 0;
5288  char typebuf[32];
5289 
5290  /*
5291  * Set up command-line arguments for subprocess
5292  */
5293  av[ac++] = "postgres";
5294 
5295 #ifdef EXEC_BACKEND
5296  av[ac++] = "--forkboot";
5297  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5298 #endif
5299 
5300  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5301  av[ac++] = typebuf;
5302 
5303  av[ac] = NULL;
5304  Assert(ac < lengthof(av));
5305 
5306 #ifdef EXEC_BACKEND
5307  pid = postmaster_forkexec(ac, av);
5308 #else /* !EXEC_BACKEND */
5309  pid = fork_process();
5310 
5311  if (pid == 0) /* child */
5312  {
5314 
5315  /* Close the postmaster's sockets */
5316  ClosePostmasterPorts(false);
5317 
5318  /* Release postmaster's working memory context */
5321  PostmasterContext = NULL;
5322 
5323  AuxiliaryProcessMain(ac, av);
5324  ExitPostmaster(0);
5325  }
5326 #endif /* EXEC_BACKEND */
5327 
5328  if (pid < 0)
5329  {
5330  /* in parent, fork failed */
5331  int save_errno = errno;
5332 
5333  errno = save_errno;
5334  switch (type)
5335  {
5336  case StartupProcess:
5337  ereport(LOG,
5338  (errmsg("could not fork startup process: %m")));
5339  break;
5340  case BgWriterProcess:
5341  ereport(LOG,
5342  (errmsg("could not fork background writer process: %m")));
5343  break;
5344  case CheckpointerProcess:
5345  ereport(LOG,
5346  (errmsg("could not fork checkpointer process: %m")));
5347  break;
5348  case WalWriterProcess:
5349  ereport(LOG,
5350  (errmsg("could not fork WAL writer process: %m")));
5351  break;
5352  case WalReceiverProcess:
5353  ereport(LOG,
5354  (errmsg("could not fork WAL receiver process: %m")));
5355  break;
5356  default:
5357  ereport(LOG,
5358  (errmsg("could not fork process: %m")));
5359  break;
5360  }
5361 
5362  /*
5363  * fork failure is fatal during startup, but there's no need to choke
5364  * immediately if starting other child types fails.
5365  */
5366  if (type == StartupProcess)
5367  ExitPostmaster(1);
5368  return 0;
5369  }
5370 
5371  /*
5372  * in parent, successful fork
5373  */
5374  return pid;
5375 }
5376 
5377 /*
5378  * StartAutovacuumWorker
5379  * Start an autovac worker process.
5380  *
5381  * This function is here because it enters the resulting PID into the
5382  * postmaster's private backends list.
5383  *
5384  * NB -- this code very roughly matches BackendStartup.
5385  */
5386 static void
5388 {
5389  Backend *bn;
5390 
5391  /*
5392  * If not in condition to run a process, don't try, but handle it like a
5393  * fork failure. This does not normally happen, since the signal is only
5394  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5395  * we have to check to avoid race-condition problems during DB state
5396  * changes.
5397  */
5398  if (canAcceptConnections() == CAC_OK)
5399  {
5400  /*
5401  * Compute the cancel key that will be assigned to this session. We
5402  * probably don't need cancel keys for autovac workers, but we'd
5403  * better have something random in the field to prevent unfriendly
5404  * people from sending cancels to them.
5405  */
5407  {
5408  ereport(LOG,
5409  (errcode(ERRCODE_INTERNAL_ERROR),
5410  errmsg("could not generate random cancel key")));
5411  return;
5412  }
5413 
5414  bn = (Backend *) malloc(sizeof(Backend));
5415  if (bn)
5416  {
5417  bn->cancel_key = MyCancelKey;
5418 
5419  /* Autovac workers are not dead_end and need a child slot */
5420  bn->dead_end = false;
5422  bn->bgworker_notify = false;
5423 
5424  bn->pid = StartAutoVacWorker();
5425  if (bn->pid > 0)
5426  {
5428  dlist_push_head(&BackendList, &bn->elem);
5429 #ifdef EXEC_BACKEND
5430  ShmemBackendArrayAdd(bn);
5431 #endif
5432  /* all OK */
5433  return;
5434  }
5435 
5436  /*
5437  * fork failed, fall through to report -- actual error message was
5438  * logged by StartAutoVacWorker
5439  */
5441  free(bn);
5442  }
5443  else
5444  ereport(LOG,
5445  (errcode(ERRCODE_OUT_OF_MEMORY),
5446  errmsg("out of memory")));
5447  }
5448 
5449  /*
5450  * Report the failure to the launcher, if it's running. (If it's not, we
5451  * might not even be connected to shared memory, so don't try to call
5452  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5453  * responds to the condition, but we don't do that here, instead waiting
5454  * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5455  * quick succession between the autovac launcher and postmaster in case
5456  * things get ugly.
5457  */
5458  if (AutoVacPID != 0)
5459  {
5461  avlauncher_needs_signal = true;
5462  }
5463 }
5464 
5465 /*
5466  * MaybeStartWalReceiver
5467  * Start the WAL receiver process, if not running and our state allows.
5468  */
5469 static void
5471 {
5472  if (WalReceiverPID == 0 &&
5473  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5475  Shutdown == NoShutdown)
5476  {
5478  WalReceiverRequested = false;
5479  }
5480 }
5481 
5482 
5483 /*
5484  * Create the opts file
5485  */
5486 static bool
5487 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5488 {
5489  FILE *fp;
5490  int i;
5491 
5492 #define OPTS_FILE "postmaster.opts"
5493 
5494  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5495  {
5496  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5497  return false;
5498  }
5499 
5500  fprintf(fp, "%s", fullprogname);
5501  for (i = 1; i < argc; i++)
5502  fprintf(fp, " \"%s\"", argv[i]);
5503  fputs("\n", fp);
5504 
5505  if (fclose(fp))
5506  {
5507  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5508  return false;
5509  }
5510 
5511  return true;
5512 }
5513 
5514 
5515 /*
5516  * MaxLivePostmasterChildren
5517  *
5518  * This reports the number of entries needed in per-child-process arrays
5519  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5520  * These arrays include regular backends, autovac workers, walsenders
5521  * and background workers, but not special children nor dead_end children.
5522  * This allows the arrays to have a fixed maximum size, to wit the same
5523  * too-many-children limit enforced by canAcceptConnections(). The exact value
5524  * isn't too critical as long as it's more than MaxBackends.
5525  */
5526 int
5528 {
5529  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5531 }
5532 
5533 /*
5534  * Connect background worker to a database.
5535  */
5536 void
5538 {
5540 
5541  /* XXX is this the right errcode? */
5543  ereport(FATAL,
5544  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5545  errmsg("database connection requirement not indicated during registration")));
5546 
5547  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5548 
5549  /* it had better not gotten out of "init" mode yet */
5550  if (!IsInitProcessingMode())
5551  ereport(ERROR,
5552  (errmsg("invalid processing mode in background worker")));
5554 }
5555 
5556 /*
5557  * Connect background worker to a database using OIDs.
5558  */
5559 void
5561 {
5563 
5564  /* XXX is this the right errcode? */
5566  ereport(FATAL,
5567  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5568  errmsg("database connection requirement not indicated during registration")));
5569 
5570  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5571 
5572  /* it had better not gotten out of "init" mode yet */
5573  if (!IsInitProcessingMode())
5574  ereport(ERROR,
5575  (errmsg("invalid processing mode in background worker")));
5577 }
5578 
5579 /*
5580  * Block/unblock signals in a background worker
5581  */
5582 void
5584 {
5585  PG_SETMASK(&BlockSig);
5586 }
5587 
5588 void
5590 {
5592 }
5593 
5594 #ifdef EXEC_BACKEND
5595 static pid_t
5596 bgworker_forkexec(int shmem_slot)
5597 {
5598  char *av[10];
5599  int ac = 0;
5600  char forkav[MAXPGPATH];
5601 
5602  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5603 
5604  av[ac++] = "postgres";
5605  av[ac++] = forkav;
5606  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5607  av[ac] = NULL;
5608 
5609  Assert(ac < lengthof(av));
5610 
5611  return postmaster_forkexec(ac, av);
5612 }
5613 #endif
5614 
5615 /*
5616  * Start a new bgworker.
5617  * Starting time conditions must have been checked already.
5618  *
5619  * Returns true on success, false on failure.
5620  * In either case, update the RegisteredBgWorker's state appropriately.
5621  *
5622  * This code is heavily based on autovacuum.c, q.v.
5623  */
5624 static bool
5626 {
5627  pid_t worker_pid;
5628 
5629  Assert(rw->rw_pid == 0);
5630 
5631  /*
5632  * Allocate and assign the Backend element. Note we must do this before
5633  * forking, so that we can handle out of memory properly.
5634  *
5635  * Treat failure as though the worker had crashed. That way, the
5636  * postmaster will wait a bit before attempting to start it again; if it
5637  * tried again right away, most likely it'd find itself repeating the
5638  * out-of-memory or fork failure condition.
5639  */
5640  if (!assign_backendlist_entry(rw))
5641  {
5643  return false;
5644  }
5645 
5646  ereport(DEBUG1,
5647  (errmsg("starting background worker process \"%s\"",
5648  rw->rw_worker.bgw_name)));
5649 
5650 #ifdef EXEC_BACKEND
5651  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5652 #else
5653  switch ((worker_pid = fork_process()))
5654 #endif
5655  {
5656  case -1:
5657  /* in postmaster, fork failed ... */
5658  ereport(LOG,
5659  (errmsg("could not fork worker process: %m")));
5660  /* undo what assign_backendlist_entry did */
5662  rw->rw_child_slot = 0;
5663  free(rw->rw_backend);
5664  rw->rw_backend = NULL;
5665  /* mark entry as crashed, so we'll try again later */
5667  break;
5668 
5669 #ifndef EXEC_BACKEND
5670  case 0:
5671  /* in postmaster child ... */
5673 
5674  /* Close the postmaster's sockets */
5675  ClosePostmasterPorts(false);
5676 
5677  /*
5678  * Before blowing away PostmasterContext, save this bgworker's
5679  * data where it can find it.
5680  */
5681  MyBgworkerEntry = (BackgroundWorker *)
5683  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5684 
5685  /* Release postmaster's working memory context */
5688  PostmasterContext = NULL;
5689 
5691 
5692  exit(1); /* should not get here */
5693  break;
5694 #endif
5695  default:
5696  /* in postmaster, fork successful ... */
5697  rw->rw_pid = worker_pid;
5698  rw->rw_backend->pid = rw->rw_pid;
5700  /* add new worker to lists of backends */
5701  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5702 #ifdef EXEC_BACKEND
5703  ShmemBackendArrayAdd(rw->rw_backend);
5704 #endif
5705  return true;
5706  }
5707 
5708  return false;
5709 }
5710 
5711 /*
5712  * Does the current postmaster state require starting a worker with the
5713  * specified start_time?
5714  */
5715 static bool
5717 {
5718  switch (pmState)
5719  {
5720  case PM_NO_CHILDREN:
5721  case PM_WAIT_DEAD_END:
5722  case PM_SHUTDOWN_2:
5723  case PM_SHUTDOWN:
5724  case PM_WAIT_BACKENDS:
5725  case PM_WAIT_READONLY:
5726  case PM_WAIT_BACKUP:
5727  break;
5728 
5729  case PM_RUN:
5730  if (start_time == BgWorkerStart_RecoveryFinished)
5731  return true;
5732  /* fall through */
5733 
5734  case PM_HOT_STANDBY:
5735  if (start_time == BgWorkerStart_ConsistentState)
5736  return true;
5737  /* fall through */
5738 
5739  case PM_RECOVERY:
5740  case PM_STARTUP:
5741  case PM_INIT:
5742  if (start_time == BgWorkerStart_PostmasterStart)
5743  return true;
5744  /* fall through */
5745 
5746  }
5747 
5748  return false;
5749 }
5750 
5751 /*
5752  * Allocate the Backend struct for a connected background worker, but don't
5753  * add it to the list of backends just yet.
5754  *
5755  * On failure, return false without changing any worker state.
5756  *
5757  * Some info from the Backend is copied into the passed rw.
5758  */
5759 static bool
5761 {
5762  Backend *bn;
5763 
5764  /*
5765  * Compute the cancel key that will be assigned to this session. We
5766  * probably don't need cancel keys for background workers, but we'd better
5767  * have something random in the field to prevent unfriendly people from
5768  * sending cancels to them.
5769  */
5771  {
5772  ereport(LOG,
5773  (errcode(ERRCODE_INTERNAL_ERROR),
5774  errmsg("could not generate random cancel key")));
5775  return false;
5776  }
5777 
5778  bn = malloc(sizeof(Backend));
5779  if (bn == NULL)
5780  {
5781  ereport(LOG,
5782  (errcode(ERRCODE_OUT_OF_MEMORY),
5783  errmsg("out of memory")));
5784  return false;
5785  }
5786 
5787  bn->cancel_key = MyCancelKey;
5790  bn->dead_end = false;
5791  bn->bgworker_notify = false;
5792 
5793  rw->rw_backend = bn;
5794  rw->rw_child_slot = bn->child_slot;
5795 
5796  return true;
5797 }
5798 
5799 /*
5800  * If the time is right, start background worker(s).
5801  *
5802  * As a side effect, the bgworker control variables are set or reset
5803  * depending on whether more workers may need to be started.
5804  *
5805  * We limit the number of workers started per call, to avoid consuming the
5806  * postmaster's attention for too long when many such requests are pending.
5807  * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5808  * call this function again after dealing with any other issues.
5809  */
5810 static void
5812 {
5813 #define MAX_BGWORKERS_TO_LAUNCH 100
5814  int num_launched = 0;
5815  TimestampTz now = 0;
5816  slist_mutable_iter iter;
5817 
5818  /*
5819  * During crash recovery, we have no need to be called until the state
5820  * transition out of recovery.
5821  */
5822  if (FatalError)
5823  {
5824  StartWorkerNeeded = false;
5825  HaveCrashedWorker = false;
5826  return;
5827  }
5828 
5829  /* Don't need to be called again unless we find a reason for it below */
5830  StartWorkerNeeded = false;
5831  HaveCrashedWorker = false;
5832 
5834  {
5835  RegisteredBgWorker *rw;
5836 
5837  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5838 
5839  /* ignore if already running */
5840  if (rw->rw_pid != 0)
5841  continue;
5842 
5843  /* if marked for death, clean up and remove from list */
5844  if (rw->rw_terminate)
5845  {
5846  ForgetBackgroundWorker(&iter);
5847  continue;
5848  }