PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "catalog/pg_control.h"
99 #include "common/file_perm.h"
100 #include "common/ip.h"
101 #include "common/string.h"
102 #include "lib/ilist.h"
103 #include "libpq/auth.h"
104 #include "libpq/libpq.h"
105 #include "libpq/pqformat.h"
106 #include "libpq/pqsignal.h"
107 #include "pg_getopt.h"
108 #include "pgstat.h"
109 #include "port/pg_bswap.h"
110 #include "postmaster/autovacuum.h"
111 #include "postmaster/auxprocess.h"
113 #include "postmaster/fork_process.h"
114 #include "postmaster/interrupt.h"
115 #include "postmaster/pgarch.h"
116 #include "postmaster/postmaster.h"
117 #include "postmaster/syslogger.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/queryjumble.h"
132 #include "utils/timeout.h"
133 #include "utils/timestamp.h"
134 #include "utils/varlena.h"
135 
136 #ifdef EXEC_BACKEND
137 #include "storage/spin.h"
138 #endif
139 
140 
141 /*
142  * Possible types of a backend. Beyond being the possible bkend_type values in
143  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
144  * and CountChildren().
145  */
146 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
147 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
148 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
149 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
150 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
151 
152 /*
153  * List of active backends (or child processes anyway; we don't actually
154  * know whether a given child has become a backend or is still in the
155  * authorization phase). This is used mainly to keep track of how many
156  * children we have and send them appropriate signals when necessary.
157  *
158  * As shown in the above set of backend types, this list includes not only
159  * "normal" client sessions, but also autovacuum workers, walsenders, and
160  * background workers. (Note that at the time of launch, walsenders are
161  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
162  * upon noticing they've changed their PMChildFlags entry. Hence that check
163  * must be done before any operation that needs to distinguish walsenders
164  * from normal backends.)
165  *
166  * Also, "dead_end" children are in it: these are children launched just for
167  * the purpose of sending a friendly rejection message to a would-be client.
168  * We must track them because they are attached to shared memory, but we know
169  * they will never become live backends. dead_end children are not assigned a
170  * PMChildSlot. dead_end children have bkend_type NORMAL.
171  *
172  * "Special" children such as the startup, bgwriter and autovacuum launcher
173  * tasks are not in this list. They are tracked via StartupPID and other
174  * pid_t variables below. (Thus, there can't be more than one of any given
175  * "special" child process type. We use BackendList entries for any child
176  * process there can be more than one of.)
177  */
178 typedef struct bkend
179 {
180  pid_t pid; /* process id of backend */
181  int32 cancel_key; /* cancel key for cancels for this backend */
182  int child_slot; /* PMChildSlot for this backend, if any */
183  int bkend_type; /* child process flavor, see above */
184  bool dead_end; /* is it going to send an error and quit? */
185  bool bgworker_notify; /* gets bgworker start/stop notifications */
186  dlist_node elem; /* list link in BackendList */
187 } Backend;
188 
190 
191 #ifdef EXEC_BACKEND
192 static Backend *ShmemBackendArray;
193 #endif
194 
196 
197 
198 
199 /* The socket number we are listening for connections on */
201 
202 /* The directory names for Unix socket(s) */
204 
205 /* The TCP listen address(es) */
207 
208 /*
209  * ReservedBackends is the number of backends reserved for superuser use.
210  * This number is taken out of the pool size given by MaxConnections so
211  * number of backend slots available to non-superusers is
212  * (MaxConnections - ReservedBackends). Note what this really means is
213  * "if there are <= ReservedBackends connections available, only superusers
214  * can make new connections" --- pre-existing superuser connections don't
215  * count against the limit.
216  */
218 
219 /* The socket(s) we're listening to. */
220 #define MAXLISTEN 64
222 
223 /*
224  * These globals control the behavior of the postmaster in case some
225  * backend dumps core. Normally, it kills all peers of the dead backend
226  * and reinitializes shared memory. By specifying -s or -n, we can have
227  * the postmaster stop (rather than kill) peers and not reinitialize
228  * shared data structures. (Reinit is currently dead code, though.)
229  */
230 static bool Reinit = true;
231 static int SendStop = false;
232 
233 /* still more option variables */
234 bool EnableSSL = false;
235 
236 int PreAuthDelay = 0;
238 
239 bool log_hostname; /* for ps display and logging */
240 bool Log_connections = false;
241 bool Db_user_namespace = false;
242 
243 bool enable_bonjour = false;
247 
248 /* PIDs of special child processes; 0 when not running */
249 static pid_t StartupPID = 0,
258 
259 /* Startup process's status */
260 typedef enum
261 {
264  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
267 
269 
270 /* Startup/shutdown state */
271 #define NoShutdown 0
272 #define SmartShutdown 1
273 #define FastShutdown 2
274 #define ImmediateShutdown 3
275 
276 static int Shutdown = NoShutdown;
277 
278 static bool FatalError = false; /* T if recovering from backend crash */
279 
280 /*
281  * We use a simple state machine to control startup, shutdown, and
282  * crash recovery (which is rather like shutdown followed by startup).
283  *
284  * After doing all the postmaster initialization work, we enter PM_STARTUP
285  * state and the startup process is launched. The startup process begins by
286  * reading the control file and other preliminary initialization steps.
287  * In a normal startup, or after crash recovery, the startup process exits
288  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
289  * is handled specially since it takes much longer and we would like to support
290  * hot standby during archive recovery.
291  *
292  * When the startup process is ready to start archive recovery, it signals the
293  * postmaster, and we switch to PM_RECOVERY state. The background writer and
294  * checkpointer are launched, while the startup process continues applying WAL.
295  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
296  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
297  * state and begin accepting connections to perform read-only queries. When
298  * archive recovery is finished, the startup process exits with exit code 0
299  * and we switch to PM_RUN state.
300  *
301  * Normal child backends can only be launched when we are in PM_RUN or
302  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
303  * In other states we handle connection requests by launching "dead_end"
304  * child processes, which will simply send the client an error message and
305  * quit. (We track these in the BackendList so that we can know when they
306  * are all gone; this is important because they're still connected to shared
307  * memory, and would interfere with an attempt to destroy the shmem segment,
308  * possibly leading to SHMALL failure when we try to make a new one.)
309  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
310  * to drain out of the system, and therefore stop accepting connection
311  * requests at all until the last existing child has quit (which hopefully
312  * will not be very long).
313  *
314  * Notice that this state variable does not distinguish *why* we entered
315  * states later than PM_RUN --- Shutdown and FatalError must be consulted
316  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
317  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
318  * states when trying to recover from a crash). It can be true in PM_STARTUP
319  * state, because we don't clear it until we've successfully started WAL redo.
320  */
321 typedef enum
322 {
323  PM_INIT, /* postmaster starting */
324  PM_STARTUP, /* waiting for startup subprocess */
325  PM_RECOVERY, /* in archive recovery mode */
326  PM_HOT_STANDBY, /* in hot standby mode */
327  PM_RUN, /* normal "database is alive" state */
328  PM_STOP_BACKENDS, /* need to stop remaining backends */
329  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
330  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
331  * ckpt */
332  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
333  * finish */
334  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
335  PM_NO_CHILDREN /* all important children have exited */
336 } PMState;
337 
339 
340 /*
341  * While performing a "smart shutdown", we restrict new connections but stay
342  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
343  * connsAllowed is a sub-state indicator showing the active restriction.
344  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
345  */
346 typedef enum
347 {
348  ALLOW_ALL_CONNS, /* normal not-shutting-down state */
349  ALLOW_SUPERUSER_CONNS, /* only superusers can connect */
350  ALLOW_NO_CONNS /* no new connections allowed, period */
352 
354 
355 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
356 /* Zero means timeout is not running */
357 static time_t AbortStartTime = 0;
358 
359 /* Length of said timeout */
360 #define SIGKILL_CHILDREN_AFTER_SECS 5
361 
362 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
363 
364 bool ClientAuthInProgress = false; /* T during new-client
365  * authentication */
366 
367 bool redirection_done = false; /* stderr redirected for syslogger? */
368 
369 /* received START_AUTOVAC_LAUNCHER signal */
370 static volatile sig_atomic_t start_autovac_launcher = false;
371 
372 /* the launcher needs to be signaled to communicate some condition */
373 static volatile bool avlauncher_needs_signal = false;
374 
375 /* received START_WALRECEIVER signal */
376 static volatile sig_atomic_t WalReceiverRequested = false;
377 
378 /* set when there's a worker that needs to be started up */
379 static volatile bool StartWorkerNeeded = true;
380 static volatile bool HaveCrashedWorker = false;
381 
382 #ifdef USE_SSL
383 /* Set when and if SSL has been initialized properly */
384 static bool LoadedSSL = false;
385 #endif
386 
387 #ifdef USE_BONJOUR
388 static DNSServiceRef bonjour_sdref = NULL;
389 #endif
390 
391 /*
392  * postmaster.c - function prototypes
393  */
394 static void CloseServerPorts(int status, Datum arg);
395 static void unlink_external_pid_file(int status, Datum arg);
396 static void getInstallationPaths(const char *argv0);
397 static void checkControlFile(void);
398 static Port *ConnCreate(int serverFd);
399 static void ConnFree(Port *port);
400 static void reset_shared(void);
401 static void SIGHUP_handler(SIGNAL_ARGS);
402 static void pmdie(SIGNAL_ARGS);
403 static void reaper(SIGNAL_ARGS);
404 static void sigusr1_handler(SIGNAL_ARGS);
406 static void dummy_handler(SIGNAL_ARGS);
407 static void StartupPacketTimeoutHandler(void);
408 static void CleanupBackend(int pid, int exitstatus);
409 static bool CleanupBackgroundWorker(int pid, int exitstatus);
410 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
411 static void LogChildExit(int lev, const char *procname,
412  int pid, int exitstatus);
413 static void PostmasterStateMachine(void);
414 static void BackendInitialize(Port *port);
415 static void BackendRun(Port *port) pg_attribute_noreturn();
416 static void ExitPostmaster(int status) pg_attribute_noreturn();
417 static int ServerLoop(void);
418 static int BackendStartup(Port *port);
419 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
420 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
421 static void processCancelRequest(Port *port, void *pkt);
422 static int initMasks(fd_set *rmask);
423 static void report_fork_failure_to_client(Port *port, int errnum);
424 static CAC_state canAcceptConnections(int backend_type);
425 static bool RandomCancelKey(int32 *cancel_key);
426 static void signal_child(pid_t pid, int signal);
427 static bool SignalSomeChildren(int signal, int targets);
428 static void TerminateChildren(int signal);
429 
430 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
431 
432 static int CountChildren(int target);
434 static void maybe_start_bgworkers(void);
435 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
436 static pid_t StartChildProcess(AuxProcType type);
437 static void StartAutovacuumWorker(void);
438 static void MaybeStartWalReceiver(void);
439 static void InitPostmasterDeathWatchHandle(void);
440 
441 /*
442  * Archiver is allowed to start up at the current postmaster state?
443  *
444  * If WAL archiving is enabled always, we are allowed to start archiver
445  * even during recovery.
446  */
447 #define PgArchStartupAllowed() \
448  (((XLogArchivingActive() && pmState == PM_RUN) || \
449  (XLogArchivingAlways() && \
450  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
451  PgArchCanRestart())
452 
453 #ifdef EXEC_BACKEND
454 
455 #ifdef WIN32
456 #define WNOHANG 0 /* ignored, so any integer value will do */
457 
458 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
459 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
460 
461 static HANDLE win32ChildQueue;
462 
463 typedef struct
464 {
465  HANDLE waitHandle;
466  HANDLE procHandle;
467  DWORD procId;
468 } win32_deadchild_waitinfo;
469 #endif /* WIN32 */
470 
471 static pid_t backend_forkexec(Port *port);
472 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
473 
474 /* Type for a socket that can be inherited to a client process */
475 #ifdef WIN32
476 typedef struct
477 {
478  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
479  * if not a socket */
480  WSAPROTOCOL_INFO wsainfo;
481 } InheritableSocket;
482 #else
483 typedef int InheritableSocket;
484 #endif
485 
486 /*
487  * Structure contains all variables passed to exec:ed backends
488  */
489 typedef struct
490 {
491  Port port;
492  InheritableSocket portsocket;
493  char DataDir[MAXPGPATH];
496  int MyPMChildSlot;
497 #ifndef WIN32
498  unsigned long UsedShmemSegID;
499 #else
500  void *ShmemProtectiveRegion;
501  HANDLE UsedShmemSegID;
502 #endif
503  void *UsedShmemSegAddr;
506  Backend *ShmemBackendArray;
507 #ifndef HAVE_SPINLOCKS
509 #endif
518  InheritableSocket pgStatSock;
519  pid_t PostmasterPid;
523  bool redirection_done;
524  bool IsBinaryUpgrade;
525  bool query_id_enabled;
526  int max_safe_fds;
527  int MaxBackends;
528 #ifdef WIN32
529  HANDLE PostmasterHandle;
530  HANDLE initial_signal_pipe;
531  HANDLE syslogPipe[2];
532 #else
533  int postmaster_alive_fds[2];
534  int syslogPipe[2];
535 #endif
536  char my_exec_path[MAXPGPATH];
537  char pkglib_path[MAXPGPATH];
538 } BackendParameters;
539 
540 static void read_backend_variables(char *id, Port *port);
541 static void restore_backend_variables(BackendParameters *param, Port *port);
542 
543 #ifndef WIN32
544 static bool save_backend_variables(BackendParameters *param, Port *port);
545 #else
546 static bool save_backend_variables(BackendParameters *param, Port *port,
547  HANDLE childProcess, pid_t childPid);
548 #endif
549 
550 static void ShmemBackendArrayAdd(Backend *bn);
551 static void ShmemBackendArrayRemove(Backend *bn);
552 #endif /* EXEC_BACKEND */
553 
554 #define StartupDataBase() StartChildProcess(StartupProcess)
555 #define StartArchiver() StartChildProcess(ArchiverProcess)
556 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
557 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
558 #define StartWalWriter() StartChildProcess(WalWriterProcess)
559 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
560 
561 /* Macros to check exit status of a child process */
562 #define EXIT_STATUS_0(st) ((st) == 0)
563 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
564 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
565 
566 #ifndef WIN32
567 /*
568  * File descriptors for pipe used to monitor if postmaster is alive.
569  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
570  */
571 int postmaster_alive_fds[2] = {-1, -1};
572 #else
573 /* Process handle of postmaster used for the same purpose on Windows */
574 HANDLE PostmasterHandle;
575 #endif
576 
577 /*
578  * Postmaster main entry point
579  */
580 void
581 PostmasterMain(int argc, char *argv[])
582 {
583  int opt;
584  int status;
585  char *userDoption = NULL;
586  bool listen_addr_saved = false;
587  int i;
588  char *output_config_variable = NULL;
589 
591 
593 
595 
596  /*
597  * Start our win32 signal implementation
598  */
599 #ifdef WIN32
601 #endif
602 
603  /*
604  * We should not be creating any files or directories before we check the
605  * data directory (see checkDataDir()), but just in case set the umask to
606  * the most restrictive (owner-only) permissions.
607  *
608  * checkDataDir() will reset the umask based on the data directory
609  * permissions.
610  */
611  umask(PG_MODE_MASK_OWNER);
612 
613  /*
614  * By default, palloc() requests in the postmaster will be allocated in
615  * the PostmasterContext, which is space that can be recycled by backends.
616  * Allocated data that needs to be available to backends should be
617  * allocated in TopMemoryContext.
618  */
620  "Postmaster",
623 
624  /* Initialize paths to installation files */
625  getInstallationPaths(argv[0]);
626 
627  /*
628  * Set up signal handlers for the postmaster process.
629  *
630  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
631  * is used by all child processes and client processes). That has a
632  * couple of special behaviors:
633  *
634  * 1. Except on Windows, we tell sigaction() to block all signals for the
635  * duration of the signal handler. This is faster than our old approach
636  * of blocking/unblocking explicitly in the signal handler, and it should
637  * also prevent excessive stack consumption if signals arrive quickly.
638  *
639  * 2. We do not set the SA_RESTART flag. This is because signals will be
640  * blocked at all times except when ServerLoop is waiting for something to
641  * happen, and during that window, we want signals to exit the select(2)
642  * wait so that ServerLoop can respond if anything interesting happened.
643  * On some platforms, signals marked SA_RESTART would not cause the
644  * select() wait to end.
645  *
646  * Child processes will generally want SA_RESTART, so pqsignal() sets that
647  * flag. We expect children to set up their own handlers before
648  * unblocking signals.
649  *
650  * CAUTION: when changing this list, check for side-effects on the signal
651  * handling setup of child processes. See tcop/postgres.c,
652  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
653  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
654  * postmaster/syslogger.c, postmaster/bgworker.c and
655  * postmaster/checkpointer.c.
656  */
657  pqinitmask();
659 
660  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
661  * children do same */
662  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
663  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
664  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
665  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
666  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
667  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
668  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
669  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
670 
671 #ifdef SIGURG
672 
673  /*
674  * Ignore SIGURG for now. Child processes may change this (see
675  * InitializeLatchSupport), but they will not receive any such signals
676  * until they wait on a latch.
677  */
678  pqsignal_pm(SIGURG, SIG_IGN); /* ignored */
679 #endif
680 
681  /*
682  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
683  * ignore those signals in a postmaster environment, so that there is no
684  * risk of a child process freezing up due to writing to stderr. But for
685  * a standalone backend, their default handling is reasonable. Hence, all
686  * child processes should just allow the inherited settings to stand.
687  */
688 #ifdef SIGTTIN
689  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
690 #endif
691 #ifdef SIGTTOU
692  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
693 #endif
694 
695  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
696 #ifdef SIGXFSZ
697  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
698 #endif
699 
700  /*
701  * Options setup
702  */
704 
705  opterr = 1;
706 
707  /*
708  * Parse command-line options. CAUTION: keep this in sync with
709  * tcop/postgres.c (the option sets should not conflict) and with the
710  * common help() function in main/main.c.
711  */
712  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:W:-:")) != -1)
713  {
714  switch (opt)
715  {
716  case 'B':
717  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
718  break;
719 
720  case 'b':
721  /* Undocumented flag used for binary upgrades */
722  IsBinaryUpgrade = true;
723  break;
724 
725  case 'C':
726  output_config_variable = strdup(optarg);
727  break;
728 
729  case 'D':
730  userDoption = strdup(optarg);
731  break;
732 
733  case 'd':
735  break;
736 
737  case 'E':
738  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
739  break;
740 
741  case 'e':
742  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
743  break;
744 
745  case 'F':
746  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
747  break;
748 
749  case 'f':
751  {
752  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
753  progname, optarg);
754  ExitPostmaster(1);
755  }
756  break;
757 
758  case 'h':
759  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
760  break;
761 
762  case 'i':
763  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
764  break;
765 
766  case 'j':
767  /* only used by interactive backend */
768  break;
769 
770  case 'k':
771  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
772  break;
773 
774  case 'l':
775  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
776  break;
777 
778  case 'N':
779  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
780  break;
781 
782  case 'n':
783  /* Don't reinit shared mem after abnormal exit */
784  Reinit = false;
785  break;
786 
787  case 'O':
788  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
789  break;
790 
791  case 'P':
792  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
793  break;
794 
795  case 'p':
797  break;
798 
799  case 'r':
800  /* only used by single-user backend */
801  break;
802 
803  case 'S':
805  break;
806 
807  case 's':
808  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
809  break;
810 
811  case 'T':
812 
813  /*
814  * In the event that some backend dumps core, send SIGSTOP,
815  * rather than SIGQUIT, to all its peers. This lets the wily
816  * post_hacker collect core dumps from everyone.
817  */
818  SendStop = true;
819  break;
820 
821  case 't':
822  {
823  const char *tmp = get_stats_option_name(optarg);
824 
825  if (tmp)
826  {
828  }
829  else
830  {
831  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
832  progname, optarg);
833  ExitPostmaster(1);
834  }
835  break;
836  }
837 
838  case 'W':
839  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
840  break;
841 
842  case 'c':
843  case '-':
844  {
845  char *name,
846  *value;
847 
848  ParseLongOption(optarg, &name, &value);
849  if (!value)
850  {
851  if (opt == '-')
852  ereport(ERROR,
853  (errcode(ERRCODE_SYNTAX_ERROR),
854  errmsg("--%s requires a value",
855  optarg)));
856  else
857  ereport(ERROR,
858  (errcode(ERRCODE_SYNTAX_ERROR),
859  errmsg("-c %s requires a value",
860  optarg)));
861  }
862 
864  free(name);
865  if (value)
866  free(value);
867  break;
868  }
869 
870  default:
871  write_stderr("Try \"%s --help\" for more information.\n",
872  progname);
873  ExitPostmaster(1);
874  }
875  }
876 
877  /*
878  * Postmaster accepts no non-option switch arguments.
879  */
880  if (optind < argc)
881  {
882  write_stderr("%s: invalid argument: \"%s\"\n",
883  progname, argv[optind]);
884  write_stderr("Try \"%s --help\" for more information.\n",
885  progname);
886  ExitPostmaster(1);
887  }
888 
889  /*
890  * Locate the proper configuration files and data directory, and read
891  * postgresql.conf for the first time.
892  */
893  if (!SelectConfigFiles(userDoption, progname))
894  ExitPostmaster(2);
895 
896  if (output_config_variable != NULL)
897  {
898  /*
899  * If this is a runtime-computed GUC, it hasn't yet been initialized,
900  * and the present value is not useful. However, this is a convenient
901  * place to print the value for most GUCs because it is safe to run
902  * postmaster startup to this point even if the server is already
903  * running. For the handful of runtime-computed GUCs that we cannot
904  * provide meaningful values for yet, we wait until later in
905  * postmaster startup to print the value. We won't be able to use -C
906  * on running servers for those GUCs, but using this option now would
907  * lead to incorrect results for them.
908  */
909  int flags = GetConfigOptionFlags(output_config_variable, true);
910 
911  if ((flags & GUC_RUNTIME_COMPUTED) == 0)
912  {
913  /*
914  * "-C guc" was specified, so print GUC's value and exit. No
915  * extra permission check is needed because the user is reading
916  * inside the data dir.
917  */
918  const char *config_val = GetConfigOption(output_config_variable,
919  false, false);
920 
921  puts(config_val ? config_val : "");
922  ExitPostmaster(0);
923  }
924  }
925 
926  /* Verify that DataDir looks reasonable */
927  checkDataDir();
928 
929  /* Check that pg_control exists */
931 
932  /* And switch working directory into it */
933  ChangeToDataDir();
934 
935  /*
936  * Check for invalid combinations of GUC settings.
937  */
939  {
940  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
941  progname,
943  ExitPostmaster(1);
944  }
946  ereport(ERROR,
947  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
949  ereport(ERROR,
950  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
951 
952  /*
953  * Other one-time internal sanity checks can go here, if they are fast.
954  * (Put any slow processing further down, after postmaster.pid creation.)
955  */
956  if (!CheckDateTokenTables())
957  {
958  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
959  ExitPostmaster(1);
960  }
961 
962  /*
963  * Now that we are done processing the postmaster arguments, reset
964  * getopt(3) library so that it will work correctly in subprocesses.
965  */
966  optind = 1;
967 #ifdef HAVE_INT_OPTRESET
968  optreset = 1; /* some systems need this too */
969 #endif
970 
971  /* For debugging: display postmaster environment */
972  {
973  extern char **environ;
974  char **p;
975 
976  ereport(DEBUG3,
977  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
978  progname)));
979  ereport(DEBUG3,
980  (errmsg_internal("-----------------------------------------")));
981  for (p = environ; *p; ++p)
982  ereport(DEBUG3,
983  (errmsg_internal("\t%s", *p)));
984  ereport(DEBUG3,
985  (errmsg_internal("-----------------------------------------")));
986  }
987 
988  /*
989  * Create lockfile for data directory.
990  *
991  * We want to do this before we try to grab the input sockets, because the
992  * data directory interlock is more reliable than the socket-file
993  * interlock (thanks to whoever decided to put socket files in /tmp :-().
994  * For the same reason, it's best to grab the TCP socket(s) before the
995  * Unix socket(s).
996  *
997  * Also note that this internally sets up the on_proc_exit function that
998  * is responsible for removing both data directory and socket lockfiles;
999  * so it must happen before opening sockets so that at exit, the socket
1000  * lockfiles go away after CloseServerPorts runs.
1001  */
1002  CreateDataDirLockFile(true);
1003 
1004  /*
1005  * Read the control file (for error checking and config info).
1006  *
1007  * Since we verify the control file's CRC, this has a useful side effect
1008  * on machines where we need a run-time test for CRC support instructions.
1009  * The postmaster will do the test once at startup, and then its child
1010  * processes will inherit the correct function pointer and not need to
1011  * repeat the test.
1012  */
1013  LocalProcessControlFile(false);
1014 
1015  /*
1016  * Register the apply launcher. Since it registers a background worker,
1017  * it needs to be called before InitializeMaxBackends(), and it's probably
1018  * a good idea to call it before any modules had chance to take the
1019  * background worker slots.
1020  */
1022 
1023  /*
1024  * process any libraries that should be preloaded at postmaster start
1025  */
1027 
1028  /*
1029  * Initialize SSL library, if specified.
1030  */
1031 #ifdef USE_SSL
1032  if (EnableSSL)
1033  {
1034  (void) secure_initialize(true);
1035  LoadedSSL = true;
1036  }
1037 #endif
1038 
1039  /*
1040  * Now that loadable modules have had their chance to register background
1041  * workers, calculate MaxBackends.
1042  */
1044 
1045  /*
1046  * Now that loadable modules have had their chance to request additional
1047  * shared memory, determine the value of any runtime-computed GUCs that
1048  * depend on the amount of shared memory required.
1049  */
1051 
1052  /*
1053  * If -C was specified with a runtime-computed GUC, we held off printing
1054  * the value earlier, as the GUC was not yet initialized. We handle -C
1055  * for most GUCs before we lock the data directory so that the option may
1056  * be used on a running server. However, a handful of GUCs are runtime-
1057  * computed and do not have meaningful values until after locking the data
1058  * directory, and we cannot safely calculate their values earlier on a
1059  * running server. At this point, such GUCs should be properly
1060  * initialized, and we haven't yet set up shared memory, so this is a good
1061  * time to handle the -C option for these special GUCs.
1062  */
1063  if (output_config_variable != NULL)
1064  {
1065  const char *config_val = GetConfigOption(output_config_variable,
1066  false, false);
1067 
1068  puts(config_val ? config_val : "");
1069  ExitPostmaster(0);
1070  }
1071 
1072  /*
1073  * Set up shared memory and semaphores.
1074  */
1075  reset_shared();
1076 
1077  /*
1078  * Estimate number of openable files. This must happen after setting up
1079  * semaphores, because on some platforms semaphores count as open files.
1080  */
1081  set_max_safe_fds();
1082 
1083  /*
1084  * Set reference point for stack-depth checking.
1085  */
1086  set_stack_base();
1087 
1088  /*
1089  * Initialize pipe (or process handle on Windows) that allows children to
1090  * wake up from sleep on postmaster death.
1091  */
1093 
1094 #ifdef WIN32
1095 
1096  /*
1097  * Initialize I/O completion port used to deliver list of dead children.
1098  */
1099  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1100  if (win32ChildQueue == NULL)
1101  ereport(FATAL,
1102  (errmsg("could not create I/O completion port for child queue")));
1103 #endif
1104 
1105 #ifdef EXEC_BACKEND
1106  /* Write out nondefault GUC settings for child processes to use */
1107  write_nondefault_variables(PGC_POSTMASTER);
1108 
1109  /*
1110  * Clean out the temp directory used to transmit parameters to child
1111  * processes (see internal_forkexec, below). We must do this before
1112  * launching any child processes, else we have a race condition: we could
1113  * remove a parameter file before the child can read it. It should be
1114  * safe to do so now, because we verified earlier that there are no
1115  * conflicting Postgres processes in this data directory.
1116  */
1118 #endif
1119 
1120  /*
1121  * Forcibly remove the files signaling a standby promotion request.
1122  * Otherwise, the existence of those files triggers a promotion too early,
1123  * whether a user wants that or not.
1124  *
1125  * This removal of files is usually unnecessary because they can exist
1126  * only during a few moments during a standby promotion. However there is
1127  * a race condition: if pg_ctl promote is executed and creates the files
1128  * during a promotion, the files can stay around even after the server is
1129  * brought up to be the primary. Then, if a new standby starts by using
1130  * the backup taken from the new primary, the files can exist at server
1131  * startup and must be removed in order to avoid an unexpected promotion.
1132  *
1133  * Note that promotion signal files need to be removed before the startup
1134  * process is invoked. Because, after that, they can be used by
1135  * postmaster's SIGUSR1 signal handler.
1136  */
1138 
1139  /* Do the same for logrotate signal file */
1141 
1142  /* Remove any outdated file holding the current log filenames. */
1143  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1144  ereport(LOG,
1146  errmsg("could not remove file \"%s\": %m",
1148 
1149  /*
1150  * If enabled, start up syslogger collection subprocess
1151  */
1153 
1154  /*
1155  * Reset whereToSendOutput from DestDebug (its starting state) to
1156  * DestNone. This stops ereport from sending log messages to stderr unless
1157  * Log_destination permits. We don't do this until the postmaster is
1158  * fully launched, since startup failures may as well be reported to
1159  * stderr.
1160  *
1161  * If we are in fact disabling logging to stderr, first emit a log message
1162  * saying so, to provide a breadcrumb trail for users who may not remember
1163  * that their logging is configured to go somewhere else.
1164  */
1166  ereport(LOG,
1167  (errmsg("ending log output to stderr"),
1168  errhint("Future log output will go to log destination \"%s\".",
1170 
1172 
1173  /*
1174  * Report server startup in log. While we could emit this much earlier,
1175  * it seems best to do so after starting the log collector, if we intend
1176  * to use one.
1177  */
1178  ereport(LOG,
1179  (errmsg("starting %s", PG_VERSION_STR)));
1180 
1181  /*
1182  * Establish input sockets.
1183  *
1184  * First, mark them all closed, and set up an on_proc_exit function that's
1185  * charged with closing the sockets again at postmaster shutdown.
1186  */
1187  for (i = 0; i < MAXLISTEN; i++)
1189 
1191 
1192  if (ListenAddresses)
1193  {
1194  char *rawstring;
1195  List *elemlist;
1196  ListCell *l;
1197  int success = 0;
1198 
1199  /* Need a modifiable copy of ListenAddresses */
1200  rawstring = pstrdup(ListenAddresses);
1201 
1202  /* Parse string into list of hostnames */
1203  if (!SplitGUCList(rawstring, ',', &elemlist))
1204  {
1205  /* syntax error in list */
1206  ereport(FATAL,
1207  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1208  errmsg("invalid list syntax in parameter \"%s\"",
1209  "listen_addresses")));
1210  }
1211 
1212  foreach(l, elemlist)
1213  {
1214  char *curhost = (char *) lfirst(l);
1215 
1216  if (strcmp(curhost, "*") == 0)
1217  status = StreamServerPort(AF_UNSPEC, NULL,
1218  (unsigned short) PostPortNumber,
1219  NULL,
1221  else
1222  status = StreamServerPort(AF_UNSPEC, curhost,
1223  (unsigned short) PostPortNumber,
1224  NULL,
1225  ListenSocket, MAXLISTEN);
1226 
1227  if (status == STATUS_OK)
1228  {
1229  success++;
1230  /* record the first successful host addr in lockfile */
1231  if (!listen_addr_saved)
1232  {
1234  listen_addr_saved = true;
1235  }
1236  }
1237  else
1238  ereport(WARNING,
1239  (errmsg("could not create listen socket for \"%s\"",
1240  curhost)));
1241  }
1242 
1243  if (!success && elemlist != NIL)
1244  ereport(FATAL,
1245  (errmsg("could not create any TCP/IP sockets")));
1246 
1247  list_free(elemlist);
1248  pfree(rawstring);
1249  }
1250 
1251 #ifdef USE_BONJOUR
1252  /* Register for Bonjour only if we opened TCP socket(s) */
1254  {
1255  DNSServiceErrorType err;
1256 
1257  /*
1258  * We pass 0 for interface_index, which will result in registering on
1259  * all "applicable" interfaces. It's not entirely clear from the
1260  * DNS-SD docs whether this would be appropriate if we have bound to
1261  * just a subset of the available network interfaces.
1262  */
1263  err = DNSServiceRegister(&bonjour_sdref,
1264  0,
1265  0,
1266  bonjour_name,
1267  "_postgresql._tcp.",
1268  NULL,
1269  NULL,
1271  0,
1272  NULL,
1273  NULL,
1274  NULL);
1275  if (err != kDNSServiceErr_NoError)
1276  ereport(LOG,
1277  (errmsg("DNSServiceRegister() failed: error code %ld",
1278  (long) err)));
1279 
1280  /*
1281  * We don't bother to read the mDNS daemon's reply, and we expect that
1282  * it will automatically terminate our registration when the socket is
1283  * closed at postmaster termination. So there's nothing more to be
1284  * done here. However, the bonjour_sdref is kept around so that
1285  * forked children can close their copies of the socket.
1286  */
1287  }
1288 #endif
1289 
1290 #ifdef HAVE_UNIX_SOCKETS
1292  {
1293  char *rawstring;
1294  List *elemlist;
1295  ListCell *l;
1296  int success = 0;
1297 
1298  /* Need a modifiable copy of Unix_socket_directories */
1299  rawstring = pstrdup(Unix_socket_directories);
1300 
1301  /* Parse string into list of directories */
1302  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1303  {
1304  /* syntax error in list */
1305  ereport(FATAL,
1306  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1307  errmsg("invalid list syntax in parameter \"%s\"",
1308  "unix_socket_directories")));
1309  }
1310 
1311  foreach(l, elemlist)
1312  {
1313  char *socketdir = (char *) lfirst(l);
1314 
1315  status = StreamServerPort(AF_UNIX, NULL,
1316  (unsigned short) PostPortNumber,
1317  socketdir,
1318  ListenSocket, MAXLISTEN);
1319 
1320  if (status == STATUS_OK)
1321  {
1322  success++;
1323  /* record the first successful Unix socket in lockfile */
1324  if (success == 1)
1326  }
1327  else
1328  ereport(WARNING,
1329  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1330  socketdir)));
1331  }
1332 
1333  if (!success && elemlist != NIL)
1334  ereport(FATAL,
1335  (errmsg("could not create any Unix-domain sockets")));
1336 
1337  list_free_deep(elemlist);
1338  pfree(rawstring);
1339  }
1340 #endif
1341 
1342  /*
1343  * check that we have some socket to listen on
1344  */
1345  if (ListenSocket[0] == PGINVALID_SOCKET)
1346  ereport(FATAL,
1347  (errmsg("no socket created for listening")));
1348 
1349  /*
1350  * If no valid TCP ports, write an empty line for listen address,
1351  * indicating the Unix socket must be used. Note that this line is not
1352  * added to the lock file until there is a socket backing it.
1353  */
1354  if (!listen_addr_saved)
1356 
1357  /*
1358  * Record postmaster options. We delay this till now to avoid recording
1359  * bogus options (eg, unusable port number).
1360  */
1361  if (!CreateOptsFile(argc, argv, my_exec_path))
1362  ExitPostmaster(1);
1363 
1364  /*
1365  * Write the external PID file if requested
1366  */
1367  if (external_pid_file)
1368  {
1369  FILE *fpidfile = fopen(external_pid_file, "w");
1370 
1371  if (fpidfile)
1372  {
1373  fprintf(fpidfile, "%d\n", MyProcPid);
1374  fclose(fpidfile);
1375 
1376  /* Make PID file world readable */
1377  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1378  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1380  }
1381  else
1382  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1384 
1386  }
1387 
1388  /*
1389  * Remove old temporary files. At this point there can be no other
1390  * Postgres processes running in this directory, so this should be safe.
1391  */
1393 
1394  /*
1395  * Initialize stats collection subsystem (this does NOT start the
1396  * collector process!)
1397  */
1398  pgstat_init();
1399 
1400  /*
1401  * Initialize the autovacuum subsystem (again, no process start yet)
1402  */
1403  autovac_init();
1404 
1405  /*
1406  * Load configuration files for client authentication.
1407  */
1408  if (!load_hba())
1409  {
1410  /*
1411  * It makes no sense to continue if we fail to load the HBA file,
1412  * since there is no way to connect to the database in this case.
1413  */
1414  ereport(FATAL,
1415  (errmsg("could not load pg_hba.conf")));
1416  }
1417  if (!load_ident())
1418  {
1419  /*
1420  * We can start up without the IDENT file, although it means that you
1421  * cannot log in using any of the authentication methods that need a
1422  * user name mapping. load_ident() already logged the details of error
1423  * to the log.
1424  */
1425  }
1426 
1427 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1428 
1429  /*
1430  * On macOS, libintl replaces setlocale() with a version that calls
1431  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1432  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1433  * the process multithreaded. The postmaster calls sigprocmask() and
1434  * calls fork() without an immediate exec(), both of which have undefined
1435  * behavior in a multithreaded program. A multithreaded postmaster is the
1436  * normal case on Windows, which offers neither fork() nor sigprocmask().
1437  */
1438  if (pthread_is_threaded_np() != 0)
1439  ereport(FATAL,
1440  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1441  errmsg("postmaster became multithreaded during startup"),
1442  errhint("Set the LC_ALL environment variable to a valid locale.")));
1443 #endif
1444 
1445  /*
1446  * Remember postmaster startup time
1447  */
1449 
1450  /*
1451  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1452  * see what's happening.
1453  */
1455 
1456  /* Start bgwriter and checkpointer so they can help with recovery */
1457  if (CheckpointerPID == 0)
1459  if (BgWriterPID == 0)
1461 
1462  /*
1463  * We're ready to rock and roll...
1464  */
1466  Assert(StartupPID != 0);
1468  pmState = PM_STARTUP;
1469 
1470  /* Some workers may be scheduled to start now */
1472 
1473  status = ServerLoop();
1474 
1475  /*
1476  * ServerLoop probably shouldn't ever return, but if it does, close down.
1477  */
1478  ExitPostmaster(status != STATUS_OK);
1479 
1480  abort(); /* not reached */
1481 }
1482 
1483 
1484 /*
1485  * on_proc_exit callback to close server's listen sockets
1486  */
1487 static void
1489 {
1490  int i;
1491 
1492  /*
1493  * First, explicitly close all the socket FDs. We used to just let this
1494  * happen implicitly at postmaster exit, but it's better to close them
1495  * before we remove the postmaster.pid lockfile; otherwise there's a race
1496  * condition if a new postmaster wants to re-use the TCP port number.
1497  */
1498  for (i = 0; i < MAXLISTEN; i++)
1499  {
1500  if (ListenSocket[i] != PGINVALID_SOCKET)
1501  {
1504  }
1505  }
1506 
1507  /*
1508  * Next, remove any filesystem entries for Unix sockets. To avoid race
1509  * conditions against incoming postmasters, this must happen after closing
1510  * the sockets and before removing lock files.
1511  */
1513 
1514  /*
1515  * We don't do anything about socket lock files here; those will be
1516  * removed in a later on_proc_exit callback.
1517  */
1518 }
1519 
1520 /*
1521  * on_proc_exit callback to delete external_pid_file
1522  */
1523 static void
1525 {
1526  if (external_pid_file)
1527  unlink(external_pid_file);
1528 }
1529 
1530 
1531 /*
1532  * Compute and check the directory paths to files that are part of the
1533  * installation (as deduced from the postgres executable's own location)
1534  */
1535 static void
1537 {
1538  DIR *pdir;
1539 
1540  /* Locate the postgres executable itself */
1541  if (find_my_exec(argv0, my_exec_path) < 0)
1542  ereport(FATAL,
1543  (errmsg("%s: could not locate my own executable path", argv0)));
1544 
1545 #ifdef EXEC_BACKEND
1546  /* Locate executable backend before we change working directory */
1547  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1548  postgres_exec_path) < 0)
1549  ereport(FATAL,
1550  (errmsg("%s: could not locate matching postgres executable",
1551  argv0)));
1552 #endif
1553 
1554  /*
1555  * Locate the pkglib directory --- this has to be set early in case we try
1556  * to load any modules from it in response to postgresql.conf entries.
1557  */
1559 
1560  /*
1561  * Verify that there's a readable directory there; otherwise the Postgres
1562  * installation is incomplete or corrupt. (A typical cause of this
1563  * failure is that the postgres executable has been moved or hardlinked to
1564  * some directory that's not a sibling of the installation lib/
1565  * directory.)
1566  */
1567  pdir = AllocateDir(pkglib_path);
1568  if (pdir == NULL)
1569  ereport(ERROR,
1571  errmsg("could not open directory \"%s\": %m",
1572  pkglib_path),
1573  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1574  my_exec_path)));
1575  FreeDir(pdir);
1576 
1577  /*
1578  * XXX is it worth similarly checking the share/ directory? If the lib/
1579  * directory is there, then share/ probably is too.
1580  */
1581 }
1582 
1583 /*
1584  * Check that pg_control exists in the correct location in the data directory.
1585  *
1586  * No attempt is made to validate the contents of pg_control here. This is
1587  * just a sanity check to see if we are looking at a real data directory.
1588  */
1589 static void
1591 {
1592  char path[MAXPGPATH];
1593  FILE *fp;
1594 
1595  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1596 
1597  fp = AllocateFile(path, PG_BINARY_R);
1598  if (fp == NULL)
1599  {
1600  write_stderr("%s: could not find the database system\n"
1601  "Expected to find it in the directory \"%s\",\n"
1602  "but could not open file \"%s\": %s\n",
1603  progname, DataDir, path, strerror(errno));
1604  ExitPostmaster(2);
1605  }
1606  FreeFile(fp);
1607 }
1608 
1609 /*
1610  * Determine how long should we let ServerLoop sleep.
1611  *
1612  * In normal conditions we wait at most one minute, to ensure that the other
1613  * background tasks handled by ServerLoop get done even when no requests are
1614  * arriving. However, if there are background workers waiting to be started,
1615  * we don't actually sleep so that they are quickly serviced. Other exception
1616  * cases are as shown in the code.
1617  */
1618 static void
1619 DetermineSleepTime(struct timeval *timeout)
1620 {
1621  TimestampTz next_wakeup = 0;
1622 
1623  /*
1624  * Normal case: either there are no background workers at all, or we're in
1625  * a shutdown sequence (during which we ignore bgworkers altogether).
1626  */
1627  if (Shutdown > NoShutdown ||
1629  {
1630  if (AbortStartTime != 0)
1631  {
1632  /* time left to abort; clamp to 0 in case it already expired */
1633  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1634  (time(NULL) - AbortStartTime);
1635  timeout->tv_sec = Max(timeout->tv_sec, 0);
1636  timeout->tv_usec = 0;
1637  }
1638  else
1639  {
1640  timeout->tv_sec = 60;
1641  timeout->tv_usec = 0;
1642  }
1643  return;
1644  }
1645 
1646  if (StartWorkerNeeded)
1647  {
1648  timeout->tv_sec = 0;
1649  timeout->tv_usec = 0;
1650  return;
1651  }
1652 
1653  if (HaveCrashedWorker)
1654  {
1655  slist_mutable_iter siter;
1656 
1657  /*
1658  * When there are crashed bgworkers, we sleep just long enough that
1659  * they are restarted when they request to be. Scan the list to
1660  * determine the minimum of all wakeup times according to most recent
1661  * crash time and requested restart interval.
1662  */
1664  {
1665  RegisteredBgWorker *rw;
1666  TimestampTz this_wakeup;
1667 
1668  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1669 
1670  if (rw->rw_crashed_at == 0)
1671  continue;
1672 
1674  || rw->rw_terminate)
1675  {
1676  ForgetBackgroundWorker(&siter);
1677  continue;
1678  }
1679 
1680  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1681  1000L * rw->rw_worker.bgw_restart_time);
1682  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1683  next_wakeup = this_wakeup;
1684  }
1685  }
1686 
1687  if (next_wakeup != 0)
1688  {
1689  long secs;
1690  int microsecs;
1691 
1693  &secs, &microsecs);
1694  timeout->tv_sec = secs;
1695  timeout->tv_usec = microsecs;
1696 
1697  /* Ensure we don't exceed one minute */
1698  if (timeout->tv_sec > 60)
1699  {
1700  timeout->tv_sec = 60;
1701  timeout->tv_usec = 0;
1702  }
1703  }
1704  else
1705  {
1706  timeout->tv_sec = 60;
1707  timeout->tv_usec = 0;
1708  }
1709 }
1710 
1711 /*
1712  * Main idle loop of postmaster
1713  *
1714  * NB: Needs to be called with signals blocked
1715  */
1716 static int
1718 {
1719  fd_set readmask;
1720  int nSockets;
1721  time_t last_lockfile_recheck_time,
1722  last_touch_time;
1723 
1724  last_lockfile_recheck_time = last_touch_time = time(NULL);
1725 
1726  nSockets = initMasks(&readmask);
1727 
1728  for (;;)
1729  {
1730  fd_set rmask;
1731  int selres;
1732  time_t now;
1733 
1734  /*
1735  * Wait for a connection request to arrive.
1736  *
1737  * We block all signals except while sleeping. That makes it safe for
1738  * signal handlers, which again block all signals while executing, to
1739  * do nontrivial work.
1740  *
1741  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1742  * any new connections, so we don't call select(), and just sleep.
1743  */
1744  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1745 
1746  if (pmState == PM_WAIT_DEAD_END)
1747  {
1749 
1750  pg_usleep(100000L); /* 100 msec seems reasonable */
1751  selres = 0;
1752 
1753  PG_SETMASK(&BlockSig);
1754  }
1755  else
1756  {
1757  /* must set timeout each time; some OSes change it! */
1758  struct timeval timeout;
1759 
1760  /* Needs to run with blocked signals! */
1761  DetermineSleepTime(&timeout);
1762 
1764 
1765  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1766 
1767  PG_SETMASK(&BlockSig);
1768  }
1769 
1770  /* Now check the select() result */
1771  if (selres < 0)
1772  {
1773  if (errno != EINTR && errno != EWOULDBLOCK)
1774  {
1775  ereport(LOG,
1777  errmsg("select() failed in postmaster: %m")));
1778  return STATUS_ERROR;
1779  }
1780  }
1781 
1782  /*
1783  * New connection pending on any of our sockets? If so, fork a child
1784  * process to deal with it.
1785  */
1786  if (selres > 0)
1787  {
1788  int i;
1789 
1790  for (i = 0; i < MAXLISTEN; i++)
1791  {
1792  if (ListenSocket[i] == PGINVALID_SOCKET)
1793  break;
1794  if (FD_ISSET(ListenSocket[i], &rmask))
1795  {
1796  Port *port;
1797 
1798  port = ConnCreate(ListenSocket[i]);
1799  if (port)
1800  {
1801  BackendStartup(port);
1802 
1803  /*
1804  * We no longer need the open socket or port structure
1805  * in this process
1806  */
1807  StreamClose(port->sock);
1808  ConnFree(port);
1809  }
1810  }
1811  }
1812  }
1813 
1814  /* If we have lost the log collector, try to start a new one */
1815  if (SysLoggerPID == 0 && Logging_collector)
1817 
1818  /*
1819  * If no background writer process is running, and we are not in a
1820  * state that prevents it, start one. It doesn't matter if this
1821  * fails, we'll just try again later. Likewise for the checkpointer.
1822  */
1823  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1825  {
1826  if (CheckpointerPID == 0)
1828  if (BgWriterPID == 0)
1830  }
1831 
1832  /*
1833  * Likewise, if we have lost the walwriter process, try to start a new
1834  * one. But this is needed only in normal operation (else we cannot
1835  * be writing any new WAL).
1836  */
1837  if (WalWriterPID == 0 && pmState == PM_RUN)
1839 
1840  /*
1841  * If we have lost the autovacuum launcher, try to start a new one. We
1842  * don't want autovacuum to run in binary upgrade mode because
1843  * autovacuum might update relfrozenxid for empty tables before the
1844  * physical files are put in place.
1845  */
1846  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1848  pmState == PM_RUN)
1849  {
1851  if (AutoVacPID != 0)
1852  start_autovac_launcher = false; /* signal processed */
1853  }
1854 
1855  /* If we have lost the stats collector, try to start a new one */
1856  if (PgStatPID == 0 &&
1857  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1858  PgStatPID = pgstat_start();
1859 
1860  /* If we have lost the archiver, try to start a new one. */
1861  if (PgArchPID == 0 && PgArchStartupAllowed())
1863 
1864  /* If we need to signal the autovacuum launcher, do so now */
1866  {
1867  avlauncher_needs_signal = false;
1868  if (AutoVacPID != 0)
1870  }
1871 
1872  /* If we need to start a WAL receiver, try to do that now */
1875 
1876  /* Get other worker processes running, if needed */
1879 
1880 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1881 
1882  /*
1883  * With assertions enabled, check regularly for appearance of
1884  * additional threads. All builds check at start and exit.
1885  */
1886  Assert(pthread_is_threaded_np() == 0);
1887 #endif
1888 
1889  /*
1890  * Lastly, check to see if it's time to do some things that we don't
1891  * want to do every single time through the loop, because they're a
1892  * bit expensive. Note that there's up to a minute of slop in when
1893  * these tasks will be performed, since DetermineSleepTime() will let
1894  * us sleep at most that long; except for SIGKILL timeout which has
1895  * special-case logic there.
1896  */
1897  now = time(NULL);
1898 
1899  /*
1900  * If we already sent SIGQUIT to children and they are slow to shut
1901  * down, it's time to send them SIGKILL. This doesn't happen
1902  * normally, but under certain conditions backends can get stuck while
1903  * shutting down. This is a last measure to get them unwedged.
1904  *
1905  * Note we also do this during recovery from a process crash.
1906  */
1907  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1908  AbortStartTime != 0 &&
1910  {
1911  /* We were gentle with them before. Not anymore */
1912  ereport(LOG,
1913  (errmsg("issuing SIGKILL to recalcitrant children")));
1915  /* reset flag so we don't SIGKILL again */
1916  AbortStartTime = 0;
1917  }
1918 
1919  /*
1920  * Once a minute, verify that postmaster.pid hasn't been removed or
1921  * overwritten. If it has, we force a shutdown. This avoids having
1922  * postmasters and child processes hanging around after their database
1923  * is gone, and maybe causing problems if a new database cluster is
1924  * created in the same place. It also provides some protection
1925  * against a DBA foolishly removing postmaster.pid and manually
1926  * starting a new postmaster. Data corruption is likely to ensue from
1927  * that anyway, but we can minimize the damage by aborting ASAP.
1928  */
1929  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1930  {
1931  if (!RecheckDataDirLockFile())
1932  {
1933  ereport(LOG,
1934  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1936  }
1937  last_lockfile_recheck_time = now;
1938  }
1939 
1940  /*
1941  * Touch Unix socket and lock files every 58 minutes, to ensure that
1942  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1943  * no one runs cleaners with cutoff times of less than an hour ...
1944  */
1945  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1946  {
1947  TouchSocketFiles();
1949  last_touch_time = now;
1950  }
1951  }
1952 }
1953 
1954 /*
1955  * Initialise the masks for select() for the ports we are listening on.
1956  * Return the number of sockets to listen on.
1957  */
1958 static int
1959 initMasks(fd_set *rmask)
1960 {
1961  int maxsock = -1;
1962  int i;
1963 
1964  FD_ZERO(rmask);
1965 
1966  for (i = 0; i < MAXLISTEN; i++)
1967  {
1968  int fd = ListenSocket[i];
1969 
1970  if (fd == PGINVALID_SOCKET)
1971  break;
1972  FD_SET(fd, rmask);
1973 
1974  if (fd > maxsock)
1975  maxsock = fd;
1976  }
1977 
1978  return maxsock + 1;
1979 }
1980 
1981 
1982 /*
1983  * Read a client's startup packet and do something according to it.
1984  *
1985  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1986  * not return at all.
1987  *
1988  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1989  * if that's what you want. Return STATUS_ERROR if you don't want to
1990  * send anything to the client, which would typically be appropriate
1991  * if we detect a communications failure.)
1992  *
1993  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1994  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1995  * encryption layer sets both flags, but a rejected negotiation sets only the
1996  * flag for that layer, since the client may wish to try the other one. We
1997  * should make no assumption here about the order in which the client may make
1998  * requests.
1999  */
2000 static int
2001 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
2002 {
2003  int32 len;
2004  char *buf;
2005  ProtocolVersion proto;
2006  MemoryContext oldcontext;
2007 
2008  pq_startmsgread();
2009 
2010  /*
2011  * Grab the first byte of the length word separately, so that we can tell
2012  * whether we have no data at all or an incomplete packet. (This might
2013  * sound inefficient, but it's not really, because of buffering in
2014  * pqcomm.c.)
2015  */
2016  if (pq_getbytes((char *) &len, 1) == EOF)
2017  {
2018  /*
2019  * If we get no data at all, don't clutter the log with a complaint;
2020  * such cases often occur for legitimate reasons. An example is that
2021  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
2022  * client didn't like our response, it'll probably just drop the
2023  * connection. Service-monitoring software also often just opens and
2024  * closes a connection without sending anything. (So do port
2025  * scanners, which may be less benign, but it's not really our job to
2026  * notice those.)
2027  */
2028  return STATUS_ERROR;
2029  }
2030 
2031  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
2032  {
2033  /* Got a partial length word, so bleat about that */
2034  if (!ssl_done && !gss_done)
2036  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2037  errmsg("incomplete startup packet")));
2038  return STATUS_ERROR;
2039  }
2040 
2041  len = pg_ntoh32(len);
2042  len -= 4;
2043 
2044  if (len < (int32) sizeof(ProtocolVersion) ||
2046  {
2048  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2049  errmsg("invalid length of startup packet")));
2050  return STATUS_ERROR;
2051  }
2052 
2053  /*
2054  * Allocate space to hold the startup packet, plus one extra byte that's
2055  * initialized to be zero. This ensures we will have null termination of
2056  * all strings inside the packet.
2057  */
2058  buf = palloc(len + 1);
2059  buf[len] = '\0';
2060 
2061  if (pq_getbytes(buf, len) == EOF)
2062  {
2064  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2065  errmsg("incomplete startup packet")));
2066  return STATUS_ERROR;
2067  }
2068  pq_endmsgread();
2069 
2070  /*
2071  * The first field is either a protocol version number or a special
2072  * request code.
2073  */
2074  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2075 
2076  if (proto == CANCEL_REQUEST_CODE)
2077  {
2078  processCancelRequest(port, buf);
2079  /* Not really an error, but we don't want to proceed further */
2080  return STATUS_ERROR;
2081  }
2082 
2083  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2084  {
2085  char SSLok;
2086 
2087 #ifdef USE_SSL
2088  /* No SSL when disabled or on Unix sockets */
2089  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2090  SSLok = 'N';
2091  else
2092  SSLok = 'S'; /* Support for SSL */
2093 #else
2094  SSLok = 'N'; /* No support for SSL */
2095 #endif
2096 
2097 retry1:
2098  if (send(port->sock, &SSLok, 1, 0) != 1)
2099  {
2100  if (errno == EINTR)
2101  goto retry1; /* if interrupted, just retry */
2104  errmsg("failed to send SSL negotiation response: %m")));
2105  return STATUS_ERROR; /* close the connection */
2106  }
2107 
2108 #ifdef USE_SSL
2109  if (SSLok == 'S' && secure_open_server(port) == -1)
2110  return STATUS_ERROR;
2111 #endif
2112 
2113  /*
2114  * regular startup packet, cancel, etc packet should follow, but not
2115  * another SSL negotiation request, and a GSS request should only
2116  * follow if SSL was rejected (client may negotiate in either order)
2117  */
2118  return ProcessStartupPacket(port, true, SSLok == 'S');
2119  }
2120  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2121  {
2122  char GSSok = 'N';
2123 
2124 #ifdef ENABLE_GSS
2125  /* No GSSAPI encryption when on Unix socket */
2126  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2127  GSSok = 'G';
2128 #endif
2129 
2130  while (send(port->sock, &GSSok, 1, 0) != 1)
2131  {
2132  if (errno == EINTR)
2133  continue;
2136  errmsg("failed to send GSSAPI negotiation response: %m")));
2137  return STATUS_ERROR; /* close the connection */
2138  }
2139 
2140 #ifdef ENABLE_GSS
2141  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2142  return STATUS_ERROR;
2143 #endif
2144 
2145  /*
2146  * regular startup packet, cancel, etc packet should follow, but not
2147  * another GSS negotiation request, and an SSL request should only
2148  * follow if GSS was rejected (client may negotiate in either order)
2149  */
2150  return ProcessStartupPacket(port, GSSok == 'G', true);
2151  }
2152 
2153  /* Could add additional special packet types here */
2154 
2155  /*
2156  * Set FrontendProtocol now so that ereport() knows what format to send if
2157  * we fail during startup.
2158  */
2159  FrontendProtocol = proto;
2160 
2161  /* Check that the major protocol version is in range. */
2164  ereport(FATAL,
2165  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2166  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2167  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2171 
2172  /*
2173  * Now fetch parameters out of startup packet and save them into the Port
2174  * structure. All data structures attached to the Port struct must be
2175  * allocated in TopMemoryContext so that they will remain available in a
2176  * running backend (even after PostmasterContext is destroyed). We need
2177  * not worry about leaking this storage on failure, since we aren't in the
2178  * postmaster process anymore.
2179  */
2181 
2182  /* Handle protocol version 3 startup packet */
2183  {
2184  int32 offset = sizeof(ProtocolVersion);
2185  List *unrecognized_protocol_options = NIL;
2186 
2187  /*
2188  * Scan packet body for name/option pairs. We can assume any string
2189  * beginning within the packet body is null-terminated, thanks to
2190  * zeroing extra byte above.
2191  */
2192  port->guc_options = NIL;
2193 
2194  while (offset < len)
2195  {
2196  char *nameptr = buf + offset;
2197  int32 valoffset;
2198  char *valptr;
2199 
2200  if (*nameptr == '\0')
2201  break; /* found packet terminator */
2202  valoffset = offset + strlen(nameptr) + 1;
2203  if (valoffset >= len)
2204  break; /* missing value, will complain below */
2205  valptr = buf + valoffset;
2206 
2207  if (strcmp(nameptr, "database") == 0)
2208  port->database_name = pstrdup(valptr);
2209  else if (strcmp(nameptr, "user") == 0)
2210  port->user_name = pstrdup(valptr);
2211  else if (strcmp(nameptr, "options") == 0)
2212  port->cmdline_options = pstrdup(valptr);
2213  else if (strcmp(nameptr, "replication") == 0)
2214  {
2215  /*
2216  * Due to backward compatibility concerns the replication
2217  * parameter is a hybrid beast which allows the value to be
2218  * either boolean or the string 'database'. The latter
2219  * connects to a specific database which is e.g. required for
2220  * logical decoding while.
2221  */
2222  if (strcmp(valptr, "database") == 0)
2223  {
2224  am_walsender = true;
2225  am_db_walsender = true;
2226  }
2227  else if (!parse_bool(valptr, &am_walsender))
2228  ereport(FATAL,
2229  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2230  errmsg("invalid value for parameter \"%s\": \"%s\"",
2231  "replication",
2232  valptr),
2233  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2234  }
2235  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2236  {
2237  /*
2238  * Any option beginning with _pq_. is reserved for use as a
2239  * protocol-level option, but at present no such options are
2240  * defined.
2241  */
2242  unrecognized_protocol_options =
2243  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2244  }
2245  else
2246  {
2247  /* Assume it's a generic GUC option */
2248  port->guc_options = lappend(port->guc_options,
2249  pstrdup(nameptr));
2250  port->guc_options = lappend(port->guc_options,
2251  pstrdup(valptr));
2252 
2253  /*
2254  * Copy application_name to port if we come across it. This
2255  * is done so we can log the application_name in the
2256  * connection authorization message. Note that the GUC would
2257  * be used but we haven't gone through GUC setup yet.
2258  */
2259  if (strcmp(nameptr, "application_name") == 0)
2260  {
2261  char *tmp_app_name = pstrdup(valptr);
2262 
2263  pg_clean_ascii(tmp_app_name);
2264 
2265  port->application_name = tmp_app_name;
2266  }
2267  }
2268  offset = valoffset + strlen(valptr) + 1;
2269  }
2270 
2271  /*
2272  * If we didn't find a packet terminator exactly at the end of the
2273  * given packet length, complain.
2274  */
2275  if (offset != len - 1)
2276  ereport(FATAL,
2277  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2278  errmsg("invalid startup packet layout: expected terminator as last byte")));
2279 
2280  /*
2281  * If the client requested a newer protocol version or if the client
2282  * requested any protocol options we didn't recognize, let them know
2283  * the newest minor protocol version we do support and the names of
2284  * any unrecognized options.
2285  */
2287  unrecognized_protocol_options != NIL)
2288  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2289  }
2290 
2291  /* Check a user name was given. */
2292  if (port->user_name == NULL || port->user_name[0] == '\0')
2293  ereport(FATAL,
2294  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2295  errmsg("no PostgreSQL user name specified in startup packet")));
2296 
2297  /* The database defaults to the user name. */
2298  if (port->database_name == NULL || port->database_name[0] == '\0')
2299  port->database_name = pstrdup(port->user_name);
2300 
2301  if (Db_user_namespace)
2302  {
2303  /*
2304  * If user@, it is a global user, remove '@'. We only want to do this
2305  * if there is an '@' at the end and no earlier in the user string or
2306  * they may fake as a local user of another database attaching to this
2307  * database.
2308  */
2309  if (strchr(port->user_name, '@') ==
2310  port->user_name + strlen(port->user_name) - 1)
2311  *strchr(port->user_name, '@') = '\0';
2312  else
2313  {
2314  /* Append '@' and dbname */
2315  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2316  }
2317  }
2318 
2319  /*
2320  * Truncate given database and user names to length of a Postgres name.
2321  * This avoids lookup failures when overlength names are given.
2322  */
2323  if (strlen(port->database_name) >= NAMEDATALEN)
2324  port->database_name[NAMEDATALEN - 1] = '\0';
2325  if (strlen(port->user_name) >= NAMEDATALEN)
2326  port->user_name[NAMEDATALEN - 1] = '\0';
2327 
2328  if (am_walsender)
2330  else
2332 
2333  /*
2334  * Normal walsender backends, e.g. for streaming replication, are not
2335  * connected to a particular database. But walsenders used for logical
2336  * replication need to connect to a specific database. We allow streaming
2337  * replication commands to be issued even if connected to a database as it
2338  * can make sense to first make a basebackup and then stream changes
2339  * starting from that.
2340  */
2341  if (am_walsender && !am_db_walsender)
2342  port->database_name[0] = '\0';
2343 
2344  /*
2345  * Done putting stuff in TopMemoryContext.
2346  */
2347  MemoryContextSwitchTo(oldcontext);
2348 
2349  /*
2350  * If we're going to reject the connection due to database state, say so
2351  * now instead of wasting cycles on an authentication exchange. (This also
2352  * allows a pg_ping utility to be written.)
2353  */
2354  switch (port->canAcceptConnections)
2355  {
2356  case CAC_STARTUP:
2357  ereport(FATAL,
2359  errmsg("the database system is starting up")));
2360  break;
2361  case CAC_NOTCONSISTENT:
2362  if (EnableHotStandby)
2363  ereport(FATAL,
2365  errmsg("the database system is not yet accepting connections"),
2366  errdetail("Consistent recovery state has not been yet reached.")));
2367  else
2368  ereport(FATAL,
2370  errmsg("the database system is not accepting connections"),
2371  errdetail("Hot standby mode is disabled.")));
2372  break;
2373  case CAC_SHUTDOWN:
2374  ereport(FATAL,
2376  errmsg("the database system is shutting down")));
2377  break;
2378  case CAC_RECOVERY:
2379  ereport(FATAL,
2381  errmsg("the database system is in recovery mode")));
2382  break;
2383  case CAC_TOOMANY:
2384  ereport(FATAL,
2385  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2386  errmsg("sorry, too many clients already")));
2387  break;
2388  case CAC_SUPERUSER:
2389  /* OK for now, will check in InitPostgres */
2390  break;
2391  case CAC_OK:
2392  break;
2393  }
2394 
2395  return STATUS_OK;
2396 }
2397 
2398 /*
2399  * Send a NegotiateProtocolVersion to the client. This lets the client know
2400  * that they have requested a newer minor protocol version than we are able
2401  * to speak. We'll speak the highest version we know about; the client can,
2402  * of course, abandon the connection if that's a problem.
2403  *
2404  * We also include in the response a list of protocol options we didn't
2405  * understand. This allows clients to include optional parameters that might
2406  * be present either in newer protocol versions or third-party protocol
2407  * extensions without fear of having to reconnect if those options are not
2408  * understood, while at the same time making certain that the client is aware
2409  * of which options were actually accepted.
2410  */
2411 static void
2412 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2413 {
2415  ListCell *lc;
2416 
2417  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2419  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2420  foreach(lc, unrecognized_protocol_options)
2421  pq_sendstring(&buf, lfirst(lc));
2422  pq_endmessage(&buf);
2423 
2424  /* no need to flush, some other message will follow */
2425 }
2426 
2427 /*
2428  * The client has sent a cancel request packet, not a normal
2429  * start-a-new-connection packet. Perform the necessary processing.
2430  * Nothing is sent back to the client.
2431  */
2432 static void
2434 {
2435  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2436  int backendPID;
2437  int32 cancelAuthCode;
2438  Backend *bp;
2439 
2440 #ifndef EXEC_BACKEND
2441  dlist_iter iter;
2442 #else
2443  int i;
2444 #endif
2445 
2446  backendPID = (int) pg_ntoh32(canc->backendPID);
2447  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2448 
2449  /*
2450  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2451  * longer access the postmaster's own backend list, and must rely on the
2452  * duplicate array in shared memory.
2453  */
2454 #ifndef EXEC_BACKEND
2455  dlist_foreach(iter, &BackendList)
2456  {
2457  bp = dlist_container(Backend, elem, iter.cur);
2458 #else
2459  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2460  {
2461  bp = (Backend *) &ShmemBackendArray[i];
2462 #endif
2463  if (bp->pid == backendPID)
2464  {
2465  if (bp->cancel_key == cancelAuthCode)
2466  {
2467  /* Found a match; signal that backend to cancel current op */
2468  ereport(DEBUG2,
2469  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2470  backendPID)));
2471  signal_child(bp->pid, SIGINT);
2472  }
2473  else
2474  /* Right PID, wrong key: no way, Jose */
2475  ereport(LOG,
2476  (errmsg("wrong key in cancel request for process %d",
2477  backendPID)));
2478  return;
2479  }
2480 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2481  }
2482 #else
2483  }
2484 #endif
2485 
2486  /* No matching backend */
2487  ereport(LOG,
2488  (errmsg("PID %d in cancel request did not match any process",
2489  backendPID)));
2490 }
2491 
2492 /*
2493  * canAcceptConnections --- check to see if database state allows connections
2494  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2495  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2496  * know whether a NORMAL connection might turn into a walsender.)
2497  */
2498 static CAC_state
2499 canAcceptConnections(int backend_type)
2500 {
2501  CAC_state result = CAC_OK;
2502 
2503  /*
2504  * Can't start backends when in startup/shutdown/inconsistent recovery
2505  * state. We treat autovac workers the same as user backends for this
2506  * purpose. However, bgworkers are excluded from this test; we expect
2507  * bgworker_should_start_now() decided whether the DB state allows them.
2508  */
2509  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2510  backend_type != BACKEND_TYPE_BGWORKER)
2511  {
2512  if (Shutdown > NoShutdown)
2513  return CAC_SHUTDOWN; /* shutdown is pending */
2514  else if (!FatalError && pmState == PM_STARTUP)
2515  return CAC_STARTUP; /* normal startup */
2516  else if (!FatalError && pmState == PM_RECOVERY)
2517  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2518  * state */
2519  else
2520  return CAC_RECOVERY; /* else must be crash recovery */
2521  }
2522 
2523  /*
2524  * "Smart shutdown" restrictions are applied only to normal connections,
2525  * not to autovac workers or bgworkers. When only superusers can connect,
2526  * we return CAC_SUPERUSER to indicate that superuserness must be checked
2527  * later. Note that neither CAC_OK nor CAC_SUPERUSER can safely be
2528  * returned until we have checked for too many children.
2529  */
2530  if (connsAllowed != ALLOW_ALL_CONNS &&
2531  backend_type == BACKEND_TYPE_NORMAL)
2532  {
2534  result = CAC_SUPERUSER; /* allow superusers only */
2535  else
2536  return CAC_SHUTDOWN; /* shutdown is pending */
2537  }
2538 
2539  /*
2540  * Don't start too many children.
2541  *
2542  * We allow more connections here than we can have backends because some
2543  * might still be authenticating; they might fail auth, or some existing
2544  * backend might exit before the auth cycle is completed. The exact
2545  * MaxBackends limit is enforced when a new backend tries to join the
2546  * shared-inval backend array.
2547  *
2548  * The limit here must match the sizes of the per-child-process arrays;
2549  * see comments for MaxLivePostmasterChildren().
2550  */
2552  result = CAC_TOOMANY;
2553 
2554  return result;
2555 }
2556 
2557 
2558 /*
2559  * ConnCreate -- create a local connection data structure
2560  *
2561  * Returns NULL on failure, other than out-of-memory which is fatal.
2562  */
2563 static Port *
2564 ConnCreate(int serverFd)
2565 {
2566  Port *port;
2567 
2568  if (!(port = (Port *) calloc(1, sizeof(Port))))
2569  {
2570  ereport(LOG,
2571  (errcode(ERRCODE_OUT_OF_MEMORY),
2572  errmsg("out of memory")));
2573  ExitPostmaster(1);
2574  }
2575 
2576  if (StreamConnection(serverFd, port) != STATUS_OK)
2577  {
2578  if (port->sock != PGINVALID_SOCKET)
2579  StreamClose(port->sock);
2580  ConnFree(port);
2581  return NULL;
2582  }
2583 
2584  return port;
2585 }
2586 
2587 
2588 /*
2589  * ConnFree -- free a local connection data structure
2590  *
2591  * Caller has already closed the socket if any, so there's not much
2592  * to do here.
2593  */
2594 static void
2596 {
2597  free(conn);
2598 }
2599 
2600 
2601 /*
2602  * ClosePostmasterPorts -- close all the postmaster's open sockets
2603  *
2604  * This is called during child process startup to release file descriptors
2605  * that are not needed by that child process. The postmaster still has
2606  * them open, of course.
2607  *
2608  * Note: we pass am_syslogger as a boolean because we don't want to set
2609  * the global variable yet when this is called.
2610  */
2611 void
2612 ClosePostmasterPorts(bool am_syslogger)
2613 {
2614  int i;
2615 
2616 #ifndef WIN32
2617 
2618  /*
2619  * Close the write end of postmaster death watch pipe. It's important to
2620  * do this as early as possible, so that if postmaster dies, others won't
2621  * think that it's still running because we're holding the pipe open.
2622  */
2624  ereport(FATAL,
2626  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2628  /* Notify fd.c that we released one pipe FD. */
2630 #endif
2631 
2632  /*
2633  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2634  * so we don't call ReleaseExternalFD() here.
2635  */
2636  for (i = 0; i < MAXLISTEN; i++)
2637  {
2638  if (ListenSocket[i] != PGINVALID_SOCKET)
2639  {
2642  }
2643  }
2644 
2645  /*
2646  * If using syslogger, close the read side of the pipe. We don't bother
2647  * tracking this in fd.c, either.
2648  */
2649  if (!am_syslogger)
2650  {
2651 #ifndef WIN32
2652  if (syslogPipe[0] >= 0)
2653  close(syslogPipe[0]);
2654  syslogPipe[0] = -1;
2655 #else
2656  if (syslogPipe[0])
2657  CloseHandle(syslogPipe[0]);
2658  syslogPipe[0] = 0;
2659 #endif
2660  }
2661 
2662 #ifdef USE_BONJOUR
2663  /* If using Bonjour, close the connection to the mDNS daemon */
2664  if (bonjour_sdref)
2665  close(DNSServiceRefSockFD(bonjour_sdref));
2666 #endif
2667 }
2668 
2669 
2670 /*
2671  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2672  *
2673  * Called early in the postmaster and every backend.
2674  */
2675 void
2677 {
2678  unsigned int rseed;
2679 
2680  MyProcPid = getpid();
2683 
2684  /*
2685  * Set a different seed for random() in every process. We want something
2686  * unpredictable, so if possible, use high-quality random bits for the
2687  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2688  */
2689  if (!pg_strong_random(&rseed, sizeof(rseed)))
2690  {
2691  /*
2692  * Since PIDs and timestamps tend to change more frequently in their
2693  * least significant bits, shift the timestamp left to allow a larger
2694  * total number of seeds in a given time period. Since that would
2695  * leave only 20 bits of the timestamp that cycle every ~1 second,
2696  * also mix in some higher bits.
2697  */
2698  rseed = ((uint64) MyProcPid) ^
2699  ((uint64) MyStartTimestamp << 12) ^
2700  ((uint64) MyStartTimestamp >> 20);
2701  }
2702  srandom(rseed);
2703 }
2704 
2705 
2706 /*
2707  * reset_shared -- reset shared memory and semaphores
2708  */
2709 static void
2711 {
2712  /*
2713  * Create or re-create shared memory and semaphores.
2714  *
2715  * Note: in each "cycle of life" we will normally assign the same IPC keys
2716  * (if using SysV shmem and/or semas). This helps ensure that we will
2717  * clean up dead IPC objects if the postmaster crashes and is restarted.
2718  */
2720 }
2721 
2722 
2723 /*
2724  * SIGHUP -- reread config files, and tell children to do same
2725  */
2726 static void
2728 {
2729  int save_errno = errno;
2730 
2731  /*
2732  * We rely on the signal mechanism to have blocked all signals ... except
2733  * on Windows, which lacks sigaction(), so we have to do it manually.
2734  */
2735 #ifdef WIN32
2736  PG_SETMASK(&BlockSig);
2737 #endif
2738 
2739  if (Shutdown <= SmartShutdown)
2740  {
2741  ereport(LOG,
2742  (errmsg("received SIGHUP, reloading configuration files")));
2745  if (StartupPID != 0)
2747  if (BgWriterPID != 0)
2749  if (CheckpointerPID != 0)
2751  if (WalWriterPID != 0)
2753  if (WalReceiverPID != 0)
2755  if (AutoVacPID != 0)
2757  if (PgArchPID != 0)
2759  if (SysLoggerPID != 0)
2761  if (PgStatPID != 0)
2763 
2764  /* Reload authentication config files too */
2765  if (!load_hba())
2766  ereport(LOG,
2767  /* translator: %s is a configuration file */
2768  (errmsg("%s was not reloaded", "pg_hba.conf")));
2769 
2770  if (!load_ident())
2771  ereport(LOG,
2772  (errmsg("%s was not reloaded", "pg_ident.conf")));
2773 
2774 #ifdef USE_SSL
2775  /* Reload SSL configuration as well */
2776  if (EnableSSL)
2777  {
2778  if (secure_initialize(false) == 0)
2779  LoadedSSL = true;
2780  else
2781  ereport(LOG,
2782  (errmsg("SSL configuration was not reloaded")));
2783  }
2784  else
2785  {
2786  secure_destroy();
2787  LoadedSSL = false;
2788  }
2789 #endif
2790 
2791 #ifdef EXEC_BACKEND
2792  /* Update the starting-point file for future children */
2793  write_nondefault_variables(PGC_SIGHUP);
2794 #endif
2795  }
2796 
2797 #ifdef WIN32
2799 #endif
2800 
2801  errno = save_errno;
2802 }
2803 
2804 
2805 /*
2806  * pmdie -- signal handler for processing various postmaster signals.
2807  */
2808 static void
2810 {
2811  int save_errno = errno;
2812 
2813  /*
2814  * We rely on the signal mechanism to have blocked all signals ... except
2815  * on Windows, which lacks sigaction(), so we have to do it manually.
2816  */
2817 #ifdef WIN32
2818  PG_SETMASK(&BlockSig);
2819 #endif
2820 
2821  ereport(DEBUG2,
2822  (errmsg_internal("postmaster received signal %d",
2823  postgres_signal_arg)));
2824 
2825  switch (postgres_signal_arg)
2826  {
2827  case SIGTERM:
2828 
2829  /*
2830  * Smart Shutdown:
2831  *
2832  * Wait for children to end their work, then shut down.
2833  */
2834  if (Shutdown >= SmartShutdown)
2835  break;
2837  ereport(LOG,
2838  (errmsg("received smart shutdown request")));
2839 
2840  /* Report status */
2842 #ifdef USE_SYSTEMD
2843  sd_notify(0, "STOPPING=1");
2844 #endif
2845 
2846  /*
2847  * If we reached normal running, we have to wait for any online
2848  * backup mode to end; otherwise go straight to waiting for client
2849  * backends to exit. (The difference is that in the former state,
2850  * we'll still let in new superuser clients, so that somebody can
2851  * end the online backup mode.) If already in PM_STOP_BACKENDS or
2852  * a later state, do not change it.
2853  */
2854  if (pmState == PM_RUN)
2856  else if (pmState == PM_HOT_STANDBY)
2858  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2859  {
2860  /* There should be no clients, so proceed to stop children */
2862  }
2863 
2864  /*
2865  * Now wait for online backup mode to end and backends to exit. If
2866  * that is already the case, PostmasterStateMachine will take the
2867  * next step.
2868  */
2870  break;
2871 
2872  case SIGINT:
2873 
2874  /*
2875  * Fast Shutdown:
2876  *
2877  * Abort all children with SIGTERM (rollback active transactions
2878  * and exit) and shut down when they are gone.
2879  */
2880  if (Shutdown >= FastShutdown)
2881  break;
2883  ereport(LOG,
2884  (errmsg("received fast shutdown request")));
2885 
2886  /* Report status */
2888 #ifdef USE_SYSTEMD
2889  sd_notify(0, "STOPPING=1");
2890 #endif
2891 
2892  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2893  {
2894  /* Just shut down background processes silently */
2896  }
2897  else if (pmState == PM_RUN ||
2899  {
2900  /* Report that we're about to zap live client sessions */
2901  ereport(LOG,
2902  (errmsg("aborting any active transactions")));
2904  }
2905 
2906  /*
2907  * PostmasterStateMachine will issue any necessary signals, or
2908  * take the next step if no child processes need to be killed.
2909  */
2911  break;
2912 
2913  case SIGQUIT:
2914 
2915  /*
2916  * Immediate Shutdown:
2917  *
2918  * abort all children with SIGQUIT, wait for them to exit,
2919  * terminate remaining ones with SIGKILL, then exit without
2920  * attempt to properly shut down the data base system.
2921  */
2922  if (Shutdown >= ImmediateShutdown)
2923  break;
2925  ereport(LOG,
2926  (errmsg("received immediate shutdown request")));
2927 
2928  /* Report status */
2930 #ifdef USE_SYSTEMD
2931  sd_notify(0, "STOPPING=1");
2932 #endif
2933 
2934  /* tell children to shut down ASAP */
2938 
2939  /* set stopwatch for them to die */
2940  AbortStartTime = time(NULL);
2941 
2942  /*
2943  * Now wait for backends to exit. If there are none,
2944  * PostmasterStateMachine will take the next step.
2945  */
2947  break;
2948  }
2949 
2950 #ifdef WIN32
2952 #endif
2953 
2954  errno = save_errno;
2955 }
2956 
2957 /*
2958  * Reaper -- signal handler to cleanup after a child process dies.
2959  */
2960 static void
2962 {
2963  int save_errno = errno;
2964  int pid; /* process id of dead child process */
2965  int exitstatus; /* its exit status */
2966 
2967  /*
2968  * We rely on the signal mechanism to have blocked all signals ... except
2969  * on Windows, which lacks sigaction(), so we have to do it manually.
2970  */
2971 #ifdef WIN32
2972  PG_SETMASK(&BlockSig);
2973 #endif
2974 
2975  ereport(DEBUG4,
2976  (errmsg_internal("reaping dead processes")));
2977 
2978  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2979  {
2980  /*
2981  * Check if this child was a startup process.
2982  */
2983  if (pid == StartupPID)
2984  {
2985  StartupPID = 0;
2986 
2987  /*
2988  * Startup process exited in response to a shutdown request (or it
2989  * completed normally regardless of the shutdown request).
2990  */
2991  if (Shutdown > NoShutdown &&
2992  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2993  {
2996  /* PostmasterStateMachine logic does the rest */
2997  continue;
2998  }
2999 
3000  if (EXIT_STATUS_3(exitstatus))
3001  {
3002  ereport(LOG,
3003  (errmsg("shutdown at recovery target")));
3006  TerminateChildren(SIGTERM);
3008  /* PostmasterStateMachine logic does the rest */
3009  continue;
3010  }
3011 
3012  /*
3013  * Unexpected exit of startup process (including FATAL exit)
3014  * during PM_STARTUP is treated as catastrophic. There are no
3015  * other processes running yet, so we can just exit.
3016  */
3017  if (pmState == PM_STARTUP &&
3019  !EXIT_STATUS_0(exitstatus))
3020  {
3021  LogChildExit(LOG, _("startup process"),
3022  pid, exitstatus);
3023  ereport(LOG,
3024  (errmsg("aborting startup due to startup process failure")));
3025  ExitPostmaster(1);
3026  }
3027 
3028  /*
3029  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3030  * the startup process is catastrophic, so kill other children,
3031  * and set StartupStatus so we don't try to reinitialize after
3032  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3033  * then we previously sent the startup process a SIGQUIT; so
3034  * that's probably the reason it died, and we do want to try to
3035  * restart in that case.
3036  *
3037  * This stanza also handles the case where we sent a SIGQUIT
3038  * during PM_STARTUP due to some dead_end child crashing: in that
3039  * situation, if the startup process dies on the SIGQUIT, we need
3040  * to transition to PM_WAIT_BACKENDS state which will allow
3041  * PostmasterStateMachine to restart the startup process. (On the
3042  * other hand, the startup process might complete normally, if we
3043  * were too late with the SIGQUIT. In that case we'll fall
3044  * through and commence normal operations.)
3045  */
3046  if (!EXIT_STATUS_0(exitstatus))
3047  {
3049  {
3051  if (pmState == PM_STARTUP)
3053  }
3054  else
3056  HandleChildCrash(pid, exitstatus,
3057  _("startup process"));
3058  continue;
3059  }
3060 
3061  /*
3062  * Startup succeeded, commence normal operations
3063  */
3065  FatalError = false;
3066  AbortStartTime = 0;
3067  ReachedNormalRunning = true;
3068  pmState = PM_RUN;
3070 
3071  /*
3072  * Crank up the background tasks, if we didn't do that already
3073  * when we entered consistent recovery state. It doesn't matter
3074  * if this fails, we'll just try again later.
3075  */
3076  if (CheckpointerPID == 0)
3078  if (BgWriterPID == 0)
3080  if (WalWriterPID == 0)
3082 
3083  /*
3084  * Likewise, start other special children as needed. In a restart
3085  * situation, some of them may be alive already.
3086  */
3089  if (PgArchStartupAllowed() && PgArchPID == 0)
3091  if (PgStatPID == 0)
3092  PgStatPID = pgstat_start();
3093 
3094  /* workers may be scheduled to start now */
3096 
3097  /* at this point we are really open for business */
3098  ereport(LOG,
3099  (errmsg("database system is ready to accept connections")));
3100 
3101  /* Report status */
3103 #ifdef USE_SYSTEMD
3104  sd_notify(0, "READY=1");
3105 #endif
3106 
3107  continue;
3108  }
3109 
3110  /*
3111  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3112  * one at the next iteration of the postmaster's main loop, if
3113  * necessary. Any other exit condition is treated as a crash.
3114  */
3115  if (pid == BgWriterPID)
3116  {
3117  BgWriterPID = 0;
3118  if (!EXIT_STATUS_0(exitstatus))
3119  HandleChildCrash(pid, exitstatus,
3120  _("background writer process"));
3121  continue;
3122  }
3123 
3124  /*
3125  * Was it the checkpointer?
3126  */
3127  if (pid == CheckpointerPID)
3128  {
3129  CheckpointerPID = 0;
3130  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3131  {
3132  /*
3133  * OK, we saw normal exit of the checkpointer after it's been
3134  * told to shut down. We expect that it wrote a shutdown
3135  * checkpoint. (If for some reason it didn't, recovery will
3136  * occur on next postmaster start.)
3137  *
3138  * At this point we should have no normal backend children
3139  * left (else we'd not be in PM_SHUTDOWN state) but we might
3140  * have dead_end children to wait for.
3141  *
3142  * If we have an archiver subprocess, tell it to do a last
3143  * archive cycle and quit. Likewise, if we have walsender
3144  * processes, tell them to send any remaining WAL and quit.
3145  */
3147 
3148  /* Waken archiver for the last time */
3149  if (PgArchPID != 0)
3151 
3152  /*
3153  * Waken walsenders for the last time. No regular backends
3154  * should be around anymore.
3155  */
3157 
3159 
3160  /*
3161  * We can also shut down the stats collector now; there's
3162  * nothing left for it to do.
3163  */
3164  if (PgStatPID != 0)
3166  }
3167  else
3168  {
3169  /*
3170  * Any unexpected exit of the checkpointer (including FATAL
3171  * exit) is treated as a crash.
3172  */
3173  HandleChildCrash(pid, exitstatus,
3174  _("checkpointer process"));
3175  }
3176 
3177  continue;
3178  }
3179 
3180  /*
3181  * Was it the wal writer? Normal exit can be ignored; we'll start a
3182  * new one at the next iteration of the postmaster's main loop, if
3183  * necessary. Any other exit condition is treated as a crash.
3184  */
3185  if (pid == WalWriterPID)
3186  {
3187  WalWriterPID = 0;
3188  if (!EXIT_STATUS_0(exitstatus))
3189  HandleChildCrash(pid, exitstatus,
3190  _("WAL writer process"));
3191  continue;
3192  }
3193 
3194  /*
3195  * Was it the wal receiver? If exit status is zero (normal) or one
3196  * (FATAL exit), we assume everything is all right just like normal
3197  * backends. (If we need a new wal receiver, we'll start one at the
3198  * next iteration of the postmaster's main loop.)
3199  */
3200  if (pid == WalReceiverPID)
3201  {
3202  WalReceiverPID = 0;
3203  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3204  HandleChildCrash(pid, exitstatus,
3205  _("WAL receiver process"));
3206  continue;
3207  }
3208 
3209  /*
3210  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3211  * start a new one at the next iteration of the postmaster's main
3212  * loop, if necessary. Any other exit condition is treated as a
3213  * crash.
3214  */
3215  if (pid == AutoVacPID)
3216  {
3217  AutoVacPID = 0;
3218  if (!EXIT_STATUS_0(exitstatus))
3219  HandleChildCrash(pid, exitstatus,
3220  _("autovacuum launcher process"));
3221  continue;
3222  }
3223 
3224  /*
3225  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3226  * exit), we assume everything is all right just like normal backends
3227  * and just try to restart a new one so that we immediately retry
3228  * archiving remaining files. (If fail, we'll try again in future
3229  * cycles of the postmaster's main loop.) Unless we were waiting for
3230  * it to shut down; don't restart it in that case, and
3231  * PostmasterStateMachine() will advance to the next shutdown step.
3232  */
3233  if (pid == PgArchPID)
3234  {
3235  PgArchPID = 0;
3236  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3237  HandleChildCrash(pid, exitstatus,
3238  _("archiver process"));
3239  if (PgArchStartupAllowed())
3241  continue;
3242  }
3243 
3244  /*
3245  * Was it the statistics collector? If so, just try to start a new
3246  * one; no need to force reset of the rest of the system. (If fail,
3247  * we'll try again in future cycles of the main loop.)
3248  */
3249  if (pid == PgStatPID)
3250  {
3251  PgStatPID = 0;
3252  if (!EXIT_STATUS_0(exitstatus))
3253  LogChildExit(LOG, _("statistics collector process"),
3254  pid, exitstatus);
3255  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3256  PgStatPID = pgstat_start();
3257  continue;
3258  }
3259 
3260  /* Was it the system logger? If so, try to start a new one */
3261  if (pid == SysLoggerPID)
3262  {
3263  SysLoggerPID = 0;
3264  /* for safety's sake, launch new logger *first* */
3266  if (!EXIT_STATUS_0(exitstatus))
3267  LogChildExit(LOG, _("system logger process"),
3268  pid, exitstatus);
3269  continue;
3270  }
3271 
3272  /* Was it one of our background workers? */
3273  if (CleanupBackgroundWorker(pid, exitstatus))
3274  {
3275  /* have it be restarted */
3276  HaveCrashedWorker = true;
3277  continue;
3278  }
3279 
3280  /*
3281  * Else do standard backend child cleanup.
3282  */
3283  CleanupBackend(pid, exitstatus);
3284  } /* loop over pending child-death reports */
3285 
3286  /*
3287  * After cleaning out the SIGCHLD queue, see if we have any state changes
3288  * or actions to make.
3289  */
3291 
3292  /* Done with signal handler */
3293 #ifdef WIN32
3295 #endif
3296 
3297  errno = save_errno;
3298 }
3299 
3300 /*
3301  * Scan the bgworkers list and see if the given PID (which has just stopped
3302  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3303  * bgworker, return false.
3304  *
3305  * This is heavily based on CleanupBackend. One important difference is that
3306  * we don't know yet that the dying process is a bgworker, so we must be silent
3307  * until we're sure it is.
3308  */
3309 static bool
3311  int exitstatus) /* child's exit status */
3312 {
3313  char namebuf[MAXPGPATH];
3314  slist_mutable_iter iter;
3315 
3317  {
3318  RegisteredBgWorker *rw;
3319 
3320  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3321 
3322  if (rw->rw_pid != pid)
3323  continue;
3324 
3325 #ifdef WIN32
3326  /* see CleanupBackend */
3327  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3328  exitstatus = 0;
3329 #endif
3330 
3331  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3332  rw->rw_worker.bgw_type);
3333 
3334 
3335  if (!EXIT_STATUS_0(exitstatus))
3336  {
3337  /* Record timestamp, so we know when to restart the worker. */
3339  }
3340  else
3341  {
3342  /* Zero exit status means terminate */
3343  rw->rw_crashed_at = 0;
3344  rw->rw_terminate = true;
3345  }
3346 
3347  /*
3348  * Additionally, just like a backend, any exit status other than 0 or
3349  * 1 is considered a crash and causes a system-wide restart.
3350  */
3351  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3352  {
3353  HandleChildCrash(pid, exitstatus, namebuf);
3354  return true;
3355  }
3356 
3357  /*
3358  * We must release the postmaster child slot. If the worker failed to
3359  * do so, it did not clean up after itself, requiring a crash-restart
3360  * cycle.
3361  */
3363  {
3364  HandleChildCrash(pid, exitstatus, namebuf);
3365  return true;
3366  }
3367 
3368  /* Get it out of the BackendList and clear out remaining data */
3369  dlist_delete(&rw->rw_backend->elem);
3370 #ifdef EXEC_BACKEND
3371  ShmemBackendArrayRemove(rw->rw_backend);
3372 #endif
3373 
3374  /*
3375  * It's possible that this background worker started some OTHER
3376  * background worker and asked to be notified when that worker started
3377  * or stopped. If so, cancel any notifications destined for the
3378  * now-dead backend.
3379  */
3380  if (rw->rw_backend->bgworker_notify)
3382  free(rw->rw_backend);
3383  rw->rw_backend = NULL;
3384  rw->rw_pid = 0;
3385  rw->rw_child_slot = 0;
3386  ReportBackgroundWorkerExit(&iter); /* report child death */
3387 
3388  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3389  namebuf, pid, exitstatus);
3390 
3391  return true;
3392  }
3393 
3394  return false;
3395 }
3396 
3397 /*
3398  * CleanupBackend -- cleanup after terminated backend.
3399  *
3400  * Remove all local state associated with backend.
3401  *
3402  * If you change this, see also CleanupBackgroundWorker.
3403  */
3404 static void
3406  int exitstatus) /* child's exit status. */
3407 {
3408  dlist_mutable_iter iter;
3409 
3410  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3411 
3412  /*
3413  * If a backend dies in an ugly way then we must signal all other backends
3414  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3415  * assume everything is all right and proceed to remove the backend from
3416  * the active backend list.
3417  */
3418 
3419 #ifdef WIN32
3420 
3421  /*
3422  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3423  * since that sometimes happens under load when the process fails to start
3424  * properly (long before it starts using shared memory). Microsoft reports
3425  * it is related to mutex failure:
3426  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3427  */
3428  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3429  {
3430  LogChildExit(LOG, _("server process"), pid, exitstatus);
3431  exitstatus = 0;
3432  }
3433 #endif
3434 
3435  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3436  {
3437  HandleChildCrash(pid, exitstatus, _("server process"));
3438  return;
3439  }
3440 
3441  dlist_foreach_modify(iter, &BackendList)
3442  {
3443  Backend *bp = dlist_container(Backend, elem, iter.cur);
3444 
3445  if (bp->pid == pid)
3446  {
3447  if (!bp->dead_end)
3448  {
3450  {
3451  /*
3452  * Uh-oh, the child failed to clean itself up. Treat as a
3453  * crash after all.
3454  */
3455  HandleChildCrash(pid, exitstatus, _("server process"));
3456  return;
3457  }
3458 #ifdef EXEC_BACKEND
3459  ShmemBackendArrayRemove(bp);
3460 #endif
3461  }
3462  if (bp->bgworker_notify)
3463  {
3464  /*
3465  * This backend may have been slated to receive SIGUSR1 when
3466  * some background worker started or stopped. Cancel those
3467  * notifications, as we don't want to signal PIDs that are not
3468  * PostgreSQL backends. This gets skipped in the (probably
3469  * very common) case where the backend has never requested any
3470  * such notifications.
3471  */
3473  }
3474  dlist_delete(iter.cur);
3475  free(bp);
3476  break;
3477  }
3478  }
3479 }
3480 
3481 /*
3482  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3483  * walwriter, autovacuum, archiver or background worker.
3484  *
3485  * The objectives here are to clean up our local state about the child
3486  * process, and to signal all other remaining children to quickdie.
3487  */
3488 static void
3489 HandleChildCrash(int pid, int exitstatus, const char *procname)
3490 {
3491  dlist_mutable_iter iter;
3492  slist_iter siter;
3493  Backend *bp;
3494  bool take_action;
3495 
3496  /*
3497  * We only log messages and send signals if this is the first process
3498  * crash and we're not doing an immediate shutdown; otherwise, we're only
3499  * here to update postmaster's idea of live processes. If we have already
3500  * signaled children, nonzero exit status is to be expected, so don't
3501  * clutter log.
3502  */
3503  take_action = !FatalError && Shutdown != ImmediateShutdown;
3504 
3505  if (take_action)
3506  {
3507  LogChildExit(LOG, procname, pid, exitstatus);
3508  ereport(LOG,
3509  (errmsg("terminating any other active server processes")));
3511  }
3512 
3513  /* Process background workers. */
3515  {
3516  RegisteredBgWorker *rw;
3517 
3518  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3519  if (rw->rw_pid == 0)
3520  continue; /* not running */
3521  if (rw->rw_pid == pid)
3522  {
3523  /*
3524  * Found entry for freshly-dead worker, so remove it.
3525  */
3527  dlist_delete(&rw->rw_backend->elem);
3528 #ifdef EXEC_BACKEND
3529  ShmemBackendArrayRemove(rw->rw_backend);
3530 #endif
3531  free(rw->rw_backend);
3532  rw->rw_backend = NULL;
3533  rw->rw_pid = 0;
3534  rw->rw_child_slot = 0;
3535  /* don't reset crashed_at */
3536  /* don't report child stop, either */
3537  /* Keep looping so we can signal remaining workers */
3538  }
3539  else
3540  {
3541  /*
3542  * This worker is still alive. Unless we did so already, tell it
3543  * to commit hara-kiri.
3544  *
3545  * SIGQUIT is the special signal that says exit without proc_exit
3546  * and let the user know what's going on. But if SendStop is set
3547  * (-T on command line), then we send SIGSTOP instead, so that we
3548  * can get core dumps from all backends by hand.
3549  */
3550  if (take_action)
3551  {
3552  ereport(DEBUG2,
3553  (errmsg_internal("sending %s to process %d",
3554  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3555  (int) rw->rw_pid)));
3557  }
3558  }
3559  }
3560 
3561  /* Process regular backends */
3562  dlist_foreach_modify(iter, &BackendList)
3563  {
3564  bp = dlist_container(Backend, elem, iter.cur);
3565 
3566  if (bp->pid == pid)
3567  {
3568  /*
3569  * Found entry for freshly-dead backend, so remove it.
3570  */
3571  if (!bp->dead_end)
3572  {
3574 #ifdef EXEC_BACKEND
3575  ShmemBackendArrayRemove(bp);
3576 #endif
3577  }
3578  dlist_delete(iter.cur);
3579  free(bp);
3580  /* Keep looping so we can signal remaining backends */
3581  }
3582  else
3583  {
3584  /*
3585  * This backend is still alive. Unless we did so already, tell it
3586  * to commit hara-kiri.
3587  *
3588  * SIGQUIT is the special signal that says exit without proc_exit
3589  * and let the user know what's going on. But if SendStop is set
3590  * (-T on command line), then we send SIGSTOP instead, so that we
3591  * can get core dumps from all backends by hand.
3592  *
3593  * We could exclude dead_end children here, but at least in the
3594  * SIGSTOP case it seems better to include them.
3595  *
3596  * Background workers were already processed above; ignore them
3597  * here.
3598  */
3599  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3600  continue;
3601 
3602  if (take_action)
3603  {
3604  ereport(DEBUG2,
3605  (errmsg_internal("sending %s to process %d",
3606  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3607  (int) bp->pid)));
3608  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3609  }
3610  }
3611  }
3612 
3613  /* Take care of the startup process too */
3614  if (pid == StartupPID)
3615  {
3616  StartupPID = 0;
3617  /* Caller adjusts StartupStatus, so don't touch it here */
3618  }
3619  else if (StartupPID != 0 && take_action)
3620  {
3621  ereport(DEBUG2,
3622  (errmsg_internal("sending %s to process %d",
3623  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3624  (int) StartupPID)));
3625  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3627  }
3628 
3629  /* Take care of the bgwriter too */
3630  if (pid == BgWriterPID)
3631  BgWriterPID = 0;
3632  else if (BgWriterPID != 0 && take_action)
3633  {
3634  ereport(DEBUG2,
3635  (errmsg_internal("sending %s to process %d",
3636  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3637  (int) BgWriterPID)));
3638  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3639  }
3640 
3641  /* Take care of the checkpointer too */
3642  if (pid == CheckpointerPID)
3643  CheckpointerPID = 0;
3644  else if (CheckpointerPID != 0 && take_action)
3645  {
3646  ereport(DEBUG2,
3647  (errmsg_internal("sending %s to process %d",
3648  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3649  (int) CheckpointerPID)));
3650  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3651  }
3652 
3653  /* Take care of the walwriter too */
3654  if (pid == WalWriterPID)
3655  WalWriterPID = 0;
3656  else if (WalWriterPID != 0 && take_action)
3657  {
3658  ereport(DEBUG2,
3659  (errmsg_internal("sending %s to process %d",
3660  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3661  (int) WalWriterPID)));
3662  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3663  }
3664 
3665  /* Take care of the walreceiver too */
3666  if (pid == WalReceiverPID)
3667  WalReceiverPID = 0;
3668  else if (WalReceiverPID != 0 && take_action)
3669  {
3670  ereport(DEBUG2,
3671  (errmsg_internal("sending %s to process %d",
3672  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3673  (int) WalReceiverPID)));
3674  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3675  }
3676 
3677  /* Take care of the autovacuum launcher too */
3678  if (pid == AutoVacPID)
3679  AutoVacPID = 0;
3680  else if (AutoVacPID != 0 && take_action)
3681  {
3682  ereport(DEBUG2,
3683  (errmsg_internal("sending %s to process %d",
3684  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3685  (int) AutoVacPID)));
3686  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3687  }
3688 
3689  /* Take care of the archiver too */
3690  if (pid == PgArchPID)
3691  PgArchPID = 0;
3692  else if (PgArchPID != 0 && take_action)
3693  {
3694  ereport(DEBUG2,
3695  (errmsg_internal("sending %s to process %d",
3696  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3697  (int) PgArchPID)));
3698  signal_child(PgArchPID, (SendStop ? SIGSTOP : SIGQUIT));
3699  }
3700 
3701  /*
3702  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3703  * necessary, but it seems like a good idea for robustness, and it
3704  * simplifies the state-machine logic in the case where a shutdown request
3705  * arrives during crash processing.)
3706  */
3707  if (PgStatPID != 0 && take_action)
3708  {
3709  ereport(DEBUG2,
3710  (errmsg_internal("sending %s to process %d",
3711  "SIGQUIT",
3712  (int) PgStatPID)));
3713  signal_child(PgStatPID, SIGQUIT);
3715  }
3716 
3717  /* We do NOT restart the syslogger */
3718 
3719  if (Shutdown != ImmediateShutdown)
3720  FatalError = true;
3721 
3722  /* We now transit into a state of waiting for children to die */
3723  if (pmState == PM_RECOVERY ||
3724  pmState == PM_HOT_STANDBY ||
3725  pmState == PM_RUN ||
3727  pmState == PM_SHUTDOWN)
3729 
3730  /*
3731  * .. and if this doesn't happen quickly enough, now the clock is ticking
3732  * for us to kill them without mercy.
3733  */
3734  if (AbortStartTime == 0)
3735  AbortStartTime = time(NULL);
3736 }
3737 
3738 /*
3739  * Log the death of a child process.
3740  */
3741 static void
3742 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3743 {
3744  /*
3745  * size of activity_buffer is arbitrary, but set equal to default
3746  * track_activity_query_size
3747  */
3748  char activity_buffer[1024];
3749  const char *activity = NULL;
3750 
3751  if (!EXIT_STATUS_0(exitstatus))
3752  activity = pgstat_get_crashed_backend_activity(pid,
3753  activity_buffer,
3754  sizeof(activity_buffer));
3755 
3756  if (WIFEXITED(exitstatus))
3757  ereport(lev,
3758 
3759  /*------
3760  translator: %s is a noun phrase describing a child process, such as
3761  "server process" */
3762  (errmsg("%s (PID %d) exited with exit code %d",
3763  procname, pid, WEXITSTATUS(exitstatus)),
3764  activity ? errdetail("Failed process was running: %s", activity) : 0));
3765  else if (WIFSIGNALED(exitstatus))
3766  {
3767 #if defined(WIN32)
3768  ereport(lev,
3769 
3770  /*------
3771  translator: %s is a noun phrase describing a child process, such as
3772  "server process" */
3773  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3774  procname, pid, WTERMSIG(exitstatus)),
3775  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3776  activity ? errdetail("Failed process was running: %s", activity) : 0));
3777 #else
3778  ereport(lev,
3779 
3780  /*------
3781  translator: %s is a noun phrase describing a child process, such as
3782  "server process" */
3783  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3784  procname, pid, WTERMSIG(exitstatus),
3785  pg_strsignal(WTERMSIG(exitstatus))),
3786  activity ? errdetail("Failed process was running: %s", activity) : 0));
3787 #endif
3788  }
3789  else
3790  ereport(lev,
3791 
3792  /*------
3793  translator: %s is a noun phrase describing a child process, such as
3794  "server process" */
3795  (errmsg("%s (PID %d) exited with unrecognized status %d",
3796  procname, pid, exitstatus),
3797  activity ? errdetail("Failed process was running: %s", activity) : 0));
3798 }
3799 
3800 /*
3801  * Advance the postmaster's state machine and take actions as appropriate
3802  *
3803  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3804  * receive the signals that might mean we need to change state.
3805  */
3806 static void
3808 {
3809  /* If we're doing a smart shutdown, try to advance that state. */
3810  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3811  {
3813  {
3814  /*
3815  * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode
3816  * is not active.
3817  */
3818  if (!BackupInProgress())
3820  }
3821 
3823  {
3824  /*
3825  * ALLOW_NO_CONNS state ends when we have no normal client
3826  * backends running. Then we're ready to stop other children.
3827  */
3830  }
3831  }
3832 
3833  /*
3834  * If we're ready to do so, signal child processes to shut down. (This
3835  * isn't a persistent state, but treating it as a distinct pmState allows
3836  * us to share this code across multiple shutdown code paths.)
3837  */
3838  if (pmState == PM_STOP_BACKENDS)
3839  {
3840  /*
3841  * Forget any pending requests for background workers, since we're no
3842  * longer willing to launch any new workers. (If additional requests
3843  * arrive, BackgroundWorkerStateChange will reject them.)
3844  */
3846 
3847  /* Signal all backend children except walsenders */
3848  SignalSomeChildren(SIGTERM,
3850  /* and the autovac launcher too */
3851  if (AutoVacPID != 0)
3852  signal_child(AutoVacPID, SIGTERM);
3853  /* and the bgwriter too */
3854  if (BgWriterPID != 0)
3855  signal_child(BgWriterPID, SIGTERM);
3856  /* and the walwriter too */
3857  if (WalWriterPID != 0)
3858  signal_child(WalWriterPID, SIGTERM);
3859  /* If we're in recovery, also stop startup and walreceiver procs */
3860  if (StartupPID != 0)
3861  signal_child(StartupPID, SIGTERM);
3862  if (WalReceiverPID != 0)
3863  signal_child(WalReceiverPID, SIGTERM);
3864  /* checkpointer, archiver, stats, and syslogger may continue for now */
3865 
3866  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3868  }
3869 
3870  /*
3871  * If we are in a state-machine state that implies waiting for backends to
3872  * exit, see if they're all gone, and change state if so.
3873  */
3874  if (pmState == PM_WAIT_BACKENDS)
3875  {
3876  /*
3877  * PM_WAIT_BACKENDS state ends when we have no regular backends
3878  * (including autovac workers), no bgworkers (including unconnected
3879  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3880  * doing crash recovery or an immediate shutdown then we expect the
3881  * checkpointer to exit as well, otherwise not. The stats and
3882  * syslogger processes are disregarded since they are not connected to
3883  * shared memory; we also disregard dead_end children here. Walsenders
3884  * and archiver are also disregarded, they will be terminated later
3885  * after writing the checkpoint record.
3886  */
3888  StartupPID == 0 &&
3889  WalReceiverPID == 0 &&
3890  BgWriterPID == 0 &&
3891  (CheckpointerPID == 0 ||
3893  WalWriterPID == 0 &&
3894  AutoVacPID == 0)
3895  {
3897  {
3898  /*
3899  * Start waiting for dead_end children to die. This state
3900  * change causes ServerLoop to stop creating new ones.
3901  */
3903 
3904  /*
3905  * We already SIGQUIT'd the archiver and stats processes, if
3906  * any, when we started immediate shutdown or entered
3907  * FatalError state.
3908  */
3909  }
3910  else
3911  {
3912  /*
3913  * If we get here, we are proceeding with normal shutdown. All
3914  * the regular children are gone, and it's time to tell the
3915  * checkpointer to do a shutdown checkpoint.
3916  */
3918  /* Start the checkpointer if not running */
3919  if (CheckpointerPID == 0)
3921  /* And tell it to shut down */
3922  if (CheckpointerPID != 0)
3923  {
3925  pmState = PM_SHUTDOWN;
3926  }
3927  else
3928  {
3929  /*
3930  * If we failed to fork a checkpointer, just shut down.
3931  * Any required cleanup will happen at next restart. We
3932  * set FatalError so that an "abnormal shutdown" message
3933  * gets logged when we exit.
3934  */
3935  FatalError = true;
3937 
3938  /* Kill the walsenders, archiver and stats collector too */
3940  if (PgArchPID != 0)
3942  if (PgStatPID != 0)
3944  }
3945  }
3946  }
3947  }
3948 
3949  if (pmState == PM_SHUTDOWN_2)
3950  {
3951  /*
3952  * PM_SHUTDOWN_2 state ends when there's no other children than
3953  * dead_end children left. There shouldn't be any regular backends
3954  * left by now anyway; what we're really waiting for is walsenders and
3955  * archiver.
3956  */
3957  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3958  {
3960  }
3961  }
3962 
3963  if (pmState == PM_WAIT_DEAD_END)
3964  {
3965  /*
3966  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3967  * (ie, no dead_end children remain), and the archiver and stats
3968  * collector are gone too.
3969  *
3970  * The reason we wait for those two is to protect them against a new
3971  * postmaster starting conflicting subprocesses; this isn't an
3972  * ironclad protection, but it at least helps in the
3973  * shutdown-and-immediately-restart scenario. Note that they have
3974  * already been sent appropriate shutdown signals, either during a
3975  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3976  * FatalError processing.
3977  */
3978  if (dlist_is_empty(&BackendList) &&
3979  PgArchPID == 0 && PgStatPID == 0)
3980  {
3981  /* These other guys should be dead already */
3982  Assert(StartupPID == 0);
3983  Assert(WalReceiverPID == 0);
3984  Assert(BgWriterPID == 0);
3985  Assert(CheckpointerPID == 0);
3986  Assert(WalWriterPID == 0);
3987  Assert(AutoVacPID == 0);
3988  /* syslogger is not considered here */
3990  }
3991  }
3992 
3993  /*
3994  * If we've been told to shut down, we exit as soon as there are no
3995  * remaining children. If there was a crash, cleanup will occur at the
3996  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3997  * crash before exiting, but that seems unwise if we are quitting because
3998  * we got SIGTERM from init --- there may well not be time for recovery
3999  * before init decides to SIGKILL us.)
4000  *
4001  * Note that the syslogger continues to run. It will exit when it sees
4002  * EOF on its input pipe, which happens when there are no more upstream
4003  * processes.
4004  */
4006  {
4007  if (FatalError)
4008  {
4009  ereport(LOG, (errmsg("abnormal database system shutdown")));
4010  ExitPostmaster(1);
4011  }
4012  else
4013  {
4014  /*
4015  * Terminate exclusive backup mode to avoid recovery after a clean
4016  * fast shutdown. Since an exclusive backup can only be taken
4017  * during normal running (and not, for example, while running
4018  * under Hot Standby) it only makes sense to do this if we reached
4019  * normal running. If we're still in recovery, the backup file is
4020  * one we're recovering *from*, and we must keep it around so that
4021  * recovery restarts from the right place.
4022  */
4024  CancelBackup();
4025 
4026  /*
4027  * Normal exit from the postmaster is here. We don't need to log
4028  * anything here, since the UnlinkLockFiles proc_exit callback
4029  * will do so, and that should be the last user-visible action.
4030  */
4031  ExitPostmaster(0);
4032  }
4033  }
4034 
4035  /*
4036  * If the startup process failed, or the user does not want an automatic
4037  * restart after backend crashes, wait for all non-syslogger children to
4038  * exit, and then exit postmaster. We don't try to reinitialize when the
4039  * startup process fails, because more than likely it will just fail again
4040  * and we will keep trying forever.
4041  */
4042  if (pmState == PM_NO_CHILDREN)
4043  {
4045  {
4046  ereport(LOG,
4047  (errmsg("shutting down due to startup process failure")));
4048  ExitPostmaster(1);
4049  }
4050  if (!restart_after_crash)
4051  {
4052  ereport(LOG,
4053  (errmsg("shutting down because restart_after_crash is off")));
4054  ExitPostmaster(1);
4055  }
4056  }
4057 
4058  /*
4059  * If we need to recover from a crash, wait for all non-syslogger children
4060  * to exit, then reset shmem and StartupDataBase.
4061  */
4062  if (FatalError && pmState == PM_NO_CHILDREN)
4063  {
4064  ereport(LOG,
4065  (errmsg("all server processes terminated; reinitializing")));
4066 
4067  /* remove leftover temporary files after a crash */
4070 
4071  /* allow background workers to immediately restart */
4073 
4074  shmem_exit(1);
4075 
4076  /* re-read control file into local memory */
4078 
4079  reset_shared();
4080 
4082  Assert(StartupPID != 0);
4084  pmState = PM_STARTUP;
4085  /* crash recovery started, reset SIGKILL flag */
4086  AbortStartTime = 0;
4087  }
4088 }
4089 
4090 
4091 /*
4092  * Send a signal to a postmaster child process
4093  *
4094  * On systems that have setsid(), each child process sets itself up as a
4095  * process group leader. For signals that are generally interpreted in the
4096  * appropriate fashion, we signal the entire process group not just the
4097  * direct child process. This allows us to, for example, SIGQUIT a blocked
4098  * archive_recovery script, or SIGINT a script being run by a backend via
4099  * system().
4100  *
4101  * There is a race condition for recently-forked children: they might not
4102  * have executed setsid() yet. So we signal the child directly as well as
4103  * the group. We assume such a child will handle the signal before trying
4104  * to spawn any grandchild processes. We also assume that signaling the
4105  * child twice will not cause any problems.
4106  */
4107 static void
4108 signal_child(pid_t pid, int signal)
4109 {
4110  if (kill(pid, signal) < 0)
4111  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4112 #ifdef HAVE_SETSID
4113  switch (signal)
4114  {
4115  case SIGINT:
4116  case SIGTERM:
4117  case SIGQUIT:
4118  case SIGSTOP:
4119  case SIGKILL:
4120  if (kill(-pid, signal) < 0)
4121  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4122  break;
4123  default:
4124  break;
4125  }
4126 #endif
4127 }
4128 
4129 /*
4130  * Send a signal to the targeted children (but NOT special children;
4131  * dead_end children are never signaled, either).
4132  */
4133 static bool
4134 SignalSomeChildren(int signal, int target)
4135 {
4136  dlist_iter iter;
4137  bool signaled = false;
4138 
4139  dlist_foreach(iter, &BackendList)
4140  {
4141  Backend *bp = dlist_container(Backend, elem, iter.cur);
4142 
4143  if (bp->dead_end)
4144  continue;
4145 
4146  /*
4147  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4148  * it first and avoid touching shared memory for every child.
4149  */
4150  if (target != BACKEND_TYPE_ALL)
4151  {
4152  /*
4153  * Assign bkend_type for any recently announced WAL Sender
4154  * processes.
4155  */
4156  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4159 
4160  if (!(target & bp->bkend_type))
4161  continue;
4162  }
4163 
4164  ereport(DEBUG4,
4165  (errmsg_internal("sending signal %d to process %d",
4166  signal, (int) bp->pid)));
4167  signal_child(bp->pid, signal);
4168  signaled = true;
4169  }
4170  return signaled;
4171 }
4172 
4173 /*
4174  * Send a termination signal to children. This considers all of our children
4175  * processes, except syslogger and dead_end backends.
4176  */
4177 static void
4179 {
4180  SignalChildren(signal);
4181  if (StartupPID != 0)
4182  {
4183  signal_child(StartupPID, signal);
4184  if (signal == SIGQUIT || signal == SIGKILL)
4186  }
4187  if (BgWriterPID != 0)
4188  signal_child(BgWriterPID, signal);
4189  if (CheckpointerPID != 0)
4190  signal_child(CheckpointerPID, signal);
4191  if (WalWriterPID != 0)
4192  signal_child(WalWriterPID, signal);
4193  if (WalReceiverPID != 0)
4194  signal_child(WalReceiverPID, signal);
4195  if (AutoVacPID != 0)
4196  signal_child(AutoVacPID, signal);
4197  if (PgArchPID != 0)
4198  signal_child(PgArchPID, signal);
4199  if (PgStatPID != 0)
4200  signal_child(PgStatPID, signal);
4201 }
4202 
4203 /*
4204  * BackendStartup -- start backend process
4205  *
4206  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4207  *
4208  * Note: if you change this code, also consider StartAutovacuumWorker.
4209  */
4210 static int
4212 {
4213  Backend *bn; /* for backend cleanup */
4214  pid_t pid;
4215 
4216  /*
4217  * Create backend data structure. Better before the fork() so we can
4218  * handle failure cleanly.
4219  */
4220  bn = (Backend *) malloc(sizeof(Backend));
4221  if (!bn)
4222  {
4223  ereport(LOG,
4224  (errcode(ERRCODE_OUT_OF_MEMORY),
4225  errmsg("out of memory")));
4226  return STATUS_ERROR;
4227  }
4228 
4229  /*
4230  * Compute the cancel key that will be assigned to this backend. The
4231  * backend will have its own copy in the forked-off process' value of
4232  * MyCancelKey, so that it can transmit the key to the frontend.
4233  */
4235  {
4236  free(bn);
4237  ereport(LOG,
4238  (errcode(ERRCODE_INTERNAL_ERROR),
4239  errmsg("could not generate random cancel key")));
4240  return STATUS_ERROR;
4241  }
4242 
4243  bn->cancel_key = MyCancelKey;
4244 
4245  /* Pass down canAcceptConnections state */
4247  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4249 
4250  /*
4251  * Unless it's a dead_end child, assign it a child slot number
4252  */
4253  if (!bn->dead_end)
4255  else
4256  bn->child_slot = 0;
4257 
4258  /* Hasn't asked to be notified about any bgworkers yet */
4259  bn->bgworker_notify = false;
4260 
4261 #ifdef EXEC_BACKEND
4262  pid = backend_forkexec(port);
4263 #else /* !EXEC_BACKEND */
4264  pid = fork_process();
4265  if (pid == 0) /* child */
4266  {
4267  free(bn);
4268 
4269  /* Detangle from postmaster */
4271 
4272  /* Close the postmaster's sockets */
4273  ClosePostmasterPorts(false);
4274 
4275  /* Perform additional initialization and collect startup packet */
4276  BackendInitialize(port);
4277 
4278  /*
4279  * Create a per-backend PGPROC struct in shared memory. We must do
4280  * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4281  * this could be delayed a bit further, but EXEC_BACKEND needs to do
4282  * stuff with LWLocks before PostgresMain(), so we do it here as well
4283  * for symmetry.
4284  */
4285  InitProcess();
4286 
4287  /* And run the backend */
4288  BackendRun(port);
4289  }
4290 #endif /* EXEC_BACKEND */
4291 
4292  if (pid < 0)
4293  {
4294  /* in parent, fork failed */
4295  int save_errno = errno;
4296 
4297  if (!bn->dead_end)
4299  free(bn);
4300  errno = save_errno;
4301  ereport(LOG,
4302  (errmsg("could not fork new process for connection: %m")));
4303  report_fork_failure_to_client(port, save_errno);
4304  return STATUS_ERROR;
4305  }
4306 
4307  /* in parent, successful fork */
4308  ereport(DEBUG2,
4309  (errmsg_internal("forked new backend, pid=%d socket=%d",
4310  (int) pid, (int) port->sock)));
4311 
4312  /*
4313  * Everything's been successful, it's safe to add this backend to our list
4314  * of backends.
4315  */
4316  bn->pid = pid;
4317  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4318  dlist_push_head(&BackendList, &bn->elem);
4319 
4320 #ifdef EXEC_BACKEND
4321  if (!bn->dead_end)
4322  ShmemBackendArrayAdd(bn);
4323 #endif
4324 
4325  return STATUS_OK;
4326 }
4327 
4328 /*
4329  * Try to report backend fork() failure to client before we close the
4330  * connection. Since we do not care to risk blocking the postmaster on
4331  * this connection, we set the connection to non-blocking and try only once.
4332  *
4333  * This is grungy special-purpose code; we cannot use backend libpq since
4334  * it's not up and running.
4335  */
4336 static void
4338 {
4339  char buffer[1000];
4340  int rc;
4341 
4342  /* Format the error message packet (always V2 protocol) */
4343  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4344  _("could not fork new process for connection: "),
4345  strerror(errnum));
4346 
4347  /* Set port to non-blocking. Don't do send() if this fails */
4348  if (!pg_set_noblock(port->sock))
4349  return;
4350 
4351  /* We'll retry after EINTR, but ignore all other failures */
4352  do
4353  {
4354  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4355  } while (rc < 0 && errno == EINTR);
4356 }
4357 
4358 
4359 /*
4360  * BackendInitialize -- initialize an interactive (postmaster-child)
4361  * backend process, and collect the client's startup packet.
4362  *
4363  * returns: nothing. Will not return at all if there's any failure.
4364  *
4365  * Note: this code does not depend on having any access to shared memory.
4366  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4367  * shared memory not have been touched yet; see comments within.
4368  * In the EXEC_BACKEND case, we are physically attached to shared memory
4369  * but have not yet set up most of our local pointers to shmem structures.
4370  */
4371 static void
4373 {
4374  int status;
4375  int ret;
4376  char remote_host[NI_MAXHOST];
4377  char remote_port[NI_MAXSERV];
4378  StringInfoData ps_data;
4379 
4380  /* Save port etc. for ps status */
4381  MyProcPort = port;
4382 
4383  /* Tell fd.c about the long-lived FD associated with the port */
4385 
4386  /*
4387  * PreAuthDelay is a debugging aid for investigating problems in the
4388  * authentication cycle: it can be set in postgresql.conf to allow time to
4389  * attach to the newly-forked backend with a debugger. (See also
4390  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4391  * is not honored until after authentication.)
4392  */
4393  if (PreAuthDelay > 0)
4394  pg_usleep(PreAuthDelay * 1000000L);
4395 
4396  /* This flag will remain set until InitPostgres finishes authentication */
4397  ClientAuthInProgress = true; /* limit visibility of log messages */
4398 
4399  /* set these to empty in case they are needed before we set them up */
4400  port->remote_host = "";
4401  port->remote_port = "";
4402 
4403  /*
4404  * Initialize libpq and enable reporting of ereport errors to the client.
4405  * Must do this now because authentication uses libpq to send messages.
4406  */
4407  pq_init(); /* initialize libpq to talk to client */
4408  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4409 
4410  /*
4411  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4412  * to collect the startup packet; while SIGQUIT results in _exit(2).
4413  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4414  * cleanly if a buggy client fails to send the packet promptly.
4415  *
4416  * Exiting with _exit(1) is only possible because we have not yet touched
4417  * shared memory; therefore no outside-the-process state needs to get
4418  * cleaned up.
4419  */
4421  /* SIGQUIT handler was already set up by InitPostmasterChild */
4422  InitializeTimeouts(); /* establishes SIGALRM handler */
4424 
4425  /*
4426  * Get the remote host name and port for logging and status display.
4427  */
4428  remote_host[0] = '\0';
4429  remote_port[0] = '\0';
4430  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4431  remote_host, sizeof(remote_host),
4432  remote_port, sizeof(remote_port),
4433  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4434  ereport(WARNING,
4435  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4436  gai_strerror(ret))));
4437 
4438  /*
4439  * Save remote_host and remote_port in port structure (after this, they
4440  * will appear in log_line_prefix data for log messages).
4441  */
4442  port->remote_host = strdup(remote_host);
4443  port->remote_port = strdup(remote_port);
4444 
4445  /* And now we can issue the Log_connections message, if wanted */
4446  if (Log_connections)
4447  {
4448  if (remote_port[0])
4449  ereport(LOG,
4450  (errmsg("connection received: host=%s port=%s",
4451  remote_host,
4452  remote_port)));
4453  else
4454  ereport(LOG,
4455  (errmsg("connection received: host=%s",
4456  remote_host)));
4457  }
4458 
4459  /*
4460  * If we did a reverse lookup to name, we might as well save the results
4461  * rather than possibly repeating the lookup during authentication.
4462  *
4463  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4464  * get nothing useful for a client without an rDNS entry. Therefore, we
4465  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4466  * it into remote_hostname if so. (This test is conservative and might
4467  * sometimes classify a hostname as numeric, but an error in that
4468  * direction is safe; it only results in a possible extra lookup.)
4469  */
4470  if (log_hostname &&
4471  ret == 0 &&
4472  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4473  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4474  port->remote_hostname = strdup(remote_host);
4475 
4476  /*
4477  * Ready to begin client interaction. We will give up and _exit(1) after
4478  * a time delay, so that a broken client can't hog a connection
4479  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4480  * against the time limit.
4481  *
4482  * Note: AuthenticationTimeout is applied here while waiting for the
4483  * startup packet, and then again in InitPostgres for the duration of any
4484  * authentication operations. So a hostile client could tie up the
4485  * process for nearly twice AuthenticationTimeout before we kick him off.
4486  *
4487  * Note: because PostgresMain will call InitializeTimeouts again, the
4488  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4489  * since we never use it again after this function.
4490  */
4493 
4494  /*
4495  * Receive the startup packet (which might turn out to be a cancel request
4496  * packet).
4497  */
4498  status = ProcessStartupPacket(port, false, false);
4499 
4500  /*
4501  * Disable the timeout, and prevent SIGTERM again.
4502  */
4504  PG_SETMASK(&BlockSig);
4505 
4506  /*
4507  * As a safety check that nothing in startup has yet performed
4508  * shared-memory modifications that would need to be undone if we had
4509  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4510  * handlers have been registered yet. (This isn't terribly bulletproof,
4511  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4512  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4513  * handlers unfortunately, since pq_init() already registered one.)
4514  */
4516 
4517  /*
4518  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4519  * already did any appropriate error reporting.
4520  */
4521  if (status != STATUS_OK)
4522  proc_exit(0);
4523 
4524  /*
4525  * Now that we have the user and database name, we can set the process
4526  * title for ps. It's good to do this as early as possible in startup.
4527  */
4528  initStringInfo(&ps_data);
4529  if (am_walsender)
4531  appendStringInfo(&ps_data, "%s ", port->user_name);
4532  if (!am_walsender)
4533  appendStringInfo(&ps_data, "%s ", port->database_name);
4534  appendStringInfo(&ps_data, "%s", port->remote_host);
4535  if (port->remote_port[0] != '\0')
4536  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4537 
4538  init_ps_display(ps_data.data);
4539  pfree(ps_data.data);
4540 
4541  set_ps_display("initializing");
4542 }
4543 
4544 
4545 /*
4546  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4547  *
4548  * returns:
4549  * Doesn't return at all.
4550  */
4551 static void
4553 {
4554  char *av[2];
4555  const int ac = 1;
4556 
4557  av[0] = "postgres";
4558  av[1] = NULL;
4559 
4560  /*
4561  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4562  * just yet, though, because InitPostgres will need the HBA data.)
4563  */
4565 
4566  PostgresMain(ac, av, port->database_name, port->user_name);
4567 }
4568 
4569 
4570 #ifdef EXEC_BACKEND
4571 
4572 /*
4573  * postmaster_forkexec -- fork and exec a postmaster subprocess
4574  *
4575  * The caller must have set up the argv array already, except for argv[2]
4576  * which will be filled with the name of the temp variable file.
4577  *
4578  * Returns the child process PID, or -1 on fork failure (a suitable error
4579  * message has been logged on failure).
4580  *
4581  * All uses of this routine will dispatch to SubPostmasterMain in the
4582  * child process.
4583  */
4584 pid_t
4585 postmaster_forkexec(int argc, char *argv[])
4586 {
4587  Port port;
4588 
4589  /* This entry point passes dummy values for the Port variables */
4590  memset(&port, 0, sizeof(port));
4591  return internal_forkexec(argc, argv, &port);
4592 }
4593 
4594 /*
4595  * backend_forkexec -- fork/exec off a backend process
4596  *
4597  * Some operating systems (WIN32) don't have fork() so we have to simulate
4598  * it by storing parameters that need to be passed to the child and
4599  * then create a new child process.
4600  *
4601  * returns the pid of the fork/exec'd process, or -1 on failure
4602  */
4603 static pid_t
4604 backend_forkexec(Port *port)
4605 {
4606  char *av[4];
4607  int ac = 0;
4608 
4609  av[ac++] = "postgres";
4610  av[ac++] = "--forkbackend";
4611  av[ac++] = NULL; /* filled in by internal_forkexec */
4612 
4613  av[ac] = NULL;
4614  Assert(ac < lengthof(av));
4615 
4616  return internal_forkexec(ac, av, port);
4617 }
4618 
4619 #ifndef WIN32
4620 
4621 /*
4622  * internal_forkexec non-win32 implementation
4623  *
4624  * - writes out backend variables to the parameter file
4625  * - fork():s, and then exec():s the child process
4626  */
4627 static pid_t
4628 internal_forkexec(int argc, char *argv[], Port *port)
4629 {
4630  static unsigned long tmpBackendFileNum = 0;
4631  pid_t pid;
4632  char tmpfilename[MAXPGPATH];
4633  BackendParameters param;
4634  FILE *fp;
4635 
4636  if (!save_backend_variables(&param, port))
4637  return -1; /* log made by save_backend_variables */
4638 
4639  /* Calculate name for temp file */
4640  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4642  MyProcPid, ++tmpBackendFileNum);
4643 
4644  /* Open file */
4645  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4646  if (!fp)
4647  {
4648  /*
4649  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4650  * directory, ignoring errors.
4651  */
4653 
4654  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4655  if (!fp)
4656  {
4657  ereport(LOG,
4659  errmsg("could not create file \"%s\": %m",
4660  tmpfilename)));
4661  return -1;
4662  }
4663  }
4664 
4665  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4666  {
4667  ereport(LOG,
4669  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4670  FreeFile(fp);
4671  return -1;
4672  }
4673 
4674  /* Release file */
4675  if (FreeFile(fp))
4676  {
4677  ereport(LOG,
4679  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4680  return -1;
4681  }
4682 
4683  /* Make sure caller set up argv properly */
4684  Assert(argc >= 3);
4685  Assert(argv[argc] == NULL);
4686  Assert(strncmp(argv[1], "--fork", 6) == 0);
4687  Assert(argv[2] == NULL);
4688 
4689  /* Insert temp file name after --fork argument */
4690  argv[2] = tmpfilename;
4691 
4692  /* Fire off execv in child */
4693  if ((pid = fork_process()) == 0)
4694  {
4695  if (execv(postgres_exec_path, argv) < 0)
4696  {
4697  ereport(LOG,
4698  (errmsg("could not execute server process \"%s\": %m",
4699  postgres_exec_path)));
4700  /* We're already in the child process here, can't return */
4701  exit(1);
4702  }
4703  }
4704 
4705  return pid; /* Parent returns pid, or -1 on fork failure */
4706 }
4707 #else /* WIN32 */
4708 
4709 /*
4710  * internal_forkexec win32 implementation
4711  *
4712  * - starts backend using CreateProcess(), in suspended state
4713  * - writes out backend variables to the parameter file
4714  * - during this, duplicates handles and sockets required for
4715  * inheritance into the new process
4716  * - resumes execution of the new process once the backend parameter
4717  * file is complete.
4718  */
4719 static pid_t
4720 internal_forkexec(int argc, char *argv[], Port *port)
4721 {
4722  int retry_count = 0;
4723  STARTUPINFO si;
4724  PROCESS_INFORMATION pi;
4725  int i;
4726  int j;
4727  char cmdLine[MAXPGPATH * 2];
4728  HANDLE paramHandle;
4729  BackendParameters *param;
4730  SECURITY_ATTRIBUTES sa;
4731  char paramHandleStr[32];
4732  win32_deadchild_waitinfo *childinfo;
4733 
4734  /* Make sure caller set up argv properly */
4735  Assert(argc >= 3);
4736  Assert(argv[argc] == NULL);
4737  Assert(strncmp(argv[1], "--fork", 6) == 0);
4738  Assert(argv[2] == NULL);
4739 
4740  /* Resume here if we need to retry */
4741 retry:
4742 
4743  /* Set up shared memory for parameter passing */
4744  ZeroMemory(&sa, sizeof(sa));
4745  sa.nLength = sizeof(sa);
4746  sa.bInheritHandle = TRUE;
4747  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4748  &sa,
4749  PAGE_READWRITE,
4750  0,
4751  sizeof(BackendParameters),
4752  NULL);
4753  if (paramHandle == INVALID_HANDLE_VALUE)
4754  {
4755  ereport(LOG,
4756  (errmsg("could not create backend parameter file mapping: error code %lu",
4757  GetLastError())));
4758  return -1;
4759  }
4760 
4761  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4762  if (!param)
4763  {
4764  ereport(LOG,
4765  (errmsg("could not map backend parameter memory: error code %lu",
4766  GetLastError())));
4767  CloseHandle(paramHandle);
4768  return -1;
4769  }
4770 
4771  /* Insert temp file name after --fork argument */
4772 #ifdef _WIN64
4773  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4774 #else
4775  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4776 #endif
4777  argv[2] = paramHandleStr;
4778 
4779  /* Format the cmd line */
4780  cmdLine[sizeof(cmdLine) - 1] = '\0';
4781  cmdLine[sizeof(cmdLine) - 2] = '\0';
4782  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4783  i = 0;
4784  while (argv[++i] != NULL)
4785  {
4786  j = strlen(cmdLine);
4787  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4788  }
4789  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4790  {
4791  ereport(LOG,
4792  (errmsg("subprocess command line too long")));
4793  UnmapViewOfFile(param);
4794  CloseHandle(paramHandle);
4795  return -1;
4796  }
4797 
4798  memset(&pi, 0, sizeof(pi));
4799  memset(&si, 0, sizeof(si));
4800  si.cb = sizeof(si);
4801 
4802  /*
4803  * Create the subprocess in a suspended state. This will be resumed later,
4804  * once we have written out the parameter file.
4805  */
4806  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4807  NULL, NULL, &si, &pi))
4808  {
4809  ereport(LOG,
4810  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4811  GetLastError())));
4812  UnmapViewOfFile(param);
4813  CloseHandle(paramHandle);
4814  return -1;
4815  }
4816 
4817  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4818  {
4819  /*
4820  * log made by save_backend_variables, but we have to clean up the
4821  * mess with the half-started process
4822  */
4823  if (!TerminateProcess(pi.hProcess, 255))
4824  ereport(LOG,
4825  (errmsg_internal("could not terminate unstarted process: error code %lu",
4826  GetLastError())));
4827  CloseHandle(pi.hProcess);
4828  CloseHandle(pi.hThread);
4829  UnmapViewOfFile(param);
4830  CloseHandle(paramHandle);
4831  return -1; /* log made by save_backend_variables */
4832  }
4833 
4834  /* Drop the parameter shared memory that is now inherited to the backend */
4835  if (!UnmapViewOfFile(param))
4836  ereport(LOG,
4837  (errmsg("could not unmap view of backend parameter file: error code %lu",
4838  GetLastError())));
4839  if (!CloseHandle(paramHandle))
4840  ereport(LOG,
4841  (errmsg("could not close handle to backend parameter file: error code %lu",
4842  GetLastError())));
4843 
4844  /*
4845  * Reserve the memory region used by our main shared memory segment before
4846  * we resume the child process. Normally this should succeed, but if ASLR
4847  * is active then it might sometimes fail due to the stack or heap having
4848  * gotten mapped into that range. In that case, just terminate the
4849  * process and retry.
4850  */
4851  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4852  {
4853  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4854  if (!TerminateProcess(pi.hProcess, 255))
4855  ereport(LOG,
4856  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4857  GetLastError())));
4858  CloseHandle(pi.hProcess);
4859  CloseHandle(pi.hThread);
4860  if (++retry_count < 100)
4861  goto retry;
4862  ereport(LOG,
4863  (errmsg("giving up after too many tries to reserve shared memory"),
4864  errhint("This might be caused by ASLR or antivirus software.")));
4865  return -1;
4866  }
4867 
4868  /*
4869  * Now that the backend variables are written out, we start the child
4870  * thread so it can start initializing while we set up the rest of the
4871  * parent state.
4872  */
4873  if (ResumeThread(pi.hThread) == -1)
4874  {
4875  if (!TerminateProcess(pi.hProcess, 255))
4876  {
4877  ereport(LOG,
4878  (errmsg_internal("could not terminate unstartable process: error code %lu",
4879  GetLastError())));
4880  CloseHandle(pi.hProcess);
4881  CloseHandle(pi.hThread);
4882  return -1;
4883  }
4884  CloseHandle(pi.hProcess);
4885  CloseHandle(pi.hThread);
4886  ereport(LOG,
4887  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4888  GetLastError())));
4889  return -1;
4890  }
4891 
4892  /*
4893  * Queue a waiter to signal when this child dies. The wait will be handled
4894  * automatically by an operating system thread pool.
4895  *
4896  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4897  * Struct will be free():d from the callback function that runs on a
4898  * different thread.
4899  */
4900  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4901  if (!childinfo)
4902  ereport(FATAL,
4903  (errcode(ERRCODE_OUT_OF_MEMORY),
4904  errmsg("out of memory")));
4905 
4906  childinfo->procHandle = pi.hProcess;
4907  childinfo->procId = pi.dwProcessId;
4908 
4909  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4910  pi.hProcess,
4911  pgwin32_deadchild_callback,
4912  childinfo,
4913  INFINITE,
4914  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4915  ereport(FATAL,
4916  (errmsg_internal("could not register process for wait: error code %lu",
4917  GetLastError())));
4918 
4919  /* Don't close pi.hProcess here - the wait thread needs access to it */
4920 
4921  CloseHandle(pi.hThread);
4922 
4923  return pi.dwProcessId;
4924 }
4925 #endif /* WIN32 */
4926 
4927 
4928 /*
4929  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4930  * to what it would be if we'd simply forked on Unix, and then
4931  * dispatch to the appropriate place.
4932  *
4933  * The first two command line arguments are expected to be "--forkFOO"
4934  * (where FOO indicates which postmaster child we are to become), and
4935  * the name of a variables file that we can read to load data that would
4936  * have been inherited by fork() on Unix. Remaining arguments go to the
4937  * subprocess FooMain() routine.
4938  */
4939 void
4940 SubPostmasterMain(int argc, char *argv[])
4941 {
4942  Port port;
4943 
4944  /* In EXEC_BACKEND case we will not have inherited these settings */
4945  IsPostmasterEnvironment = true;
4947 
4948  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4950 
4951  /* Check we got appropriate args */
4952  if (argc < 3)
4953  elog(FATAL, "invalid subpostmaster invocation");
4954 
4955  /* Read in the variables file */
4956  memset(&port, 0, sizeof(Port));
4957  read_backend_variables(argv[2], &port);
4958 
4959  /* Close the postmaster's sockets (as soon as we know them) */
4960  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4961 
4962  /* Setup as postmaster child */
4964 
4965  /*
4966  * If appropriate, physically re-attach to shared memory segment. We want
4967  * to do this before going any further to ensure that we can attach at the
4968  * same address the postmaster used. On the other hand, if we choose not
4969  * to re-attach, we may have other cleanup to do.
4970  *
4971  * If testing EXEC_BACKEND on Linux, you should run this as root before
4972  * starting the postmaster:
4973  *
4974  * echo 0 >/proc/sys/kernel/randomize_va_space
4975  *
4976  * This prevents using randomized stack and code addresses that cause the
4977  * child process's memory map to be different from the parent's, making it
4978  * sometimes impossible to attach to shared memory at the desired address.
4979  * Return the setting to its old value (usually '1' or '2') when finished.
4980  */
4981  if (strcmp(argv[1], "--forkbackend") == 0 ||
4982  strcmp(argv[1], "--forkavlauncher") == 0 ||
4983  strcmp(argv[1], "--forkavworker") == 0 ||
4984  strcmp(argv[1], "--forkaux") == 0 ||
4985  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4987  else
4989 
4990  /* autovacuum needs this set before calling InitProcess */
4991  if (strcmp(argv[1], "--forkavlauncher") == 0)
4992  AutovacuumLauncherIAm();
4993  if (strcmp(argv[1], "--forkavworker") == 0)
4994  AutovacuumWorkerIAm();
4995 
4996  /* Read in remaining GUC variables */
4997  read_nondefault_variables();
4998 
4999  /*
5000  * Check that the data directory looks valid, which will also check the
5001  * privileges on the data directory and update our umask and file/group
5002  * variables for creating files later. Note: this should really be done
5003  * before we create any files or directories.
5004  */
5005  checkDataDir();
5006 
5007  /*
5008  * (re-)read control file, as it contains config. The postmaster will
5009  * already have read this, but this process doesn't know about that.
5010  */
5011  LocalProcessControlFile(false);
5012 
5013  /*
5014  * Reload any libraries that were preloaded by the postmaster. Since we
5015  * exec'd this process, those libraries didn't come along with us; but we
5016  * should load them into all child processes to be consistent with the
5017  * non-EXEC_BACKEND behavior.
5018  */
5020 
5021  /* Run backend or appropriate child */
5022  if (strcmp(argv[1], "--forkbackend") == 0)
5023  {
5024  Assert(argc == 3); /* shouldn't be any more args */
5025 
5026  /*
5027  * Need to reinitialize the SSL library in the backend, since the
5028  * context structures contain function pointers and cannot be passed
5029  * through the parameter file.
5030  *
5031  * If for some reason reload fails (maybe the user installed broken
5032  * key files), soldier on without SSL; that's better than all
5033  * connections becoming impossible.
5034  *
5035  * XXX should we do this in all child processes? For the moment it's
5036  * enough to do it in backend children.
5037  */
5038 #ifdef USE_SSL
5039  if (EnableSSL)
5040  {
5041  if (secure_initialize(false) == 0)
5042  LoadedSSL = true;
5043  else
5044  ereport(LOG,
5045  (errmsg("SSL configuration could not be loaded in child process")));
5046  }
5047 #endif
5048 
5049  /*
5050  * Perform additional initialization and collect startup packet.
5051  *
5052  * We want to do this before InitProcess() for a couple of reasons: 1.
5053  * so that we aren't eating up a PGPROC slot while waiting on the
5054  * client. 2. so that if InitProcess() fails due to being out of
5055  * PGPROC slots, we have already initialized libpq and are able to
5056  * report the error to the client.
5057  */
5058  BackendInitialize(&port);
5059 
5060  /* Restore basic shared memory pointers */
5062 
5063  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5064  InitProcess();
5065 
5066  /* Attach process to shared data structures */
5068 
5069  /* And run the backend */
5070  BackendRun(&port); /* does not return */
5071  }
5072  if (strcmp(argv[1], "--forkaux") == 0)
5073  {
5074  AuxProcType auxtype;
5075 
5076  Assert(argc == 4);
5077 
5078  /* Restore basic shared memory pointers */
5080 
5081  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5083 
5084  /* Attach process to shared data structures */
5086 
5087  auxtype = atoi(argv[3]);
5088  AuxiliaryProcessMain(auxtype); /* does not return */
5089  }
5090  if (strcmp(argv[1], "--forkavlauncher") == 0)
5091  {
5092  /* Restore basic shared memory pointers */
5094 
5095  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5096  InitProcess();
5097 
5098  /* Attach process to shared data structures */
5100 
5101  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5102  }
5103  if (strcmp(argv[1], "--forkavworker") == 0)
5104  {
5105  /* Restore basic shared memory pointers */
5107 
5108  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5109  InitProcess();
5110 
5111  /* Attach process to shared data structures */
5113 
5114  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5115  }
5116  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5117  {
5118  int shmem_slot;
5119 
5120  /* do this as early as possible; in particular, before InitProcess() */
5121  IsBackgroundWorker = true;
5122 
5123  /* Restore basic shared memory pointers */
5125 
5126  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5127  InitProcess();
5128 
5129  /* Attach process to shared data structures */
5131 
5132  /* Fetch MyBgworkerEntry from shared memory */
5133  shmem_slot = atoi(argv[1] + 15);
5134  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5135 
5137  }
5138  if (strcmp(argv[1], "--forkcol") == 0)
5139  {
5140  /* Do not want to attach to shared memory */
5141 
5142  PgstatCollectorMain(argc, argv); /* does not return */
5143  }
5144  if (strcmp(argv[1], "--forklog") == 0)
5145  {
5146  /* Do not want to attach to shared memory */
5147 
5148  SysLoggerMain(argc, argv); /* does not return */
5149  }
5150 
5151  abort(); /* shouldn't get here */
5152 }
5153 #endif /* EXEC_BACKEND */
5154 
5155 
5156 /*
5157  * ExitPostmaster -- cleanup
5158  *
5159  * Do NOT call exit() directly --- always go through here!
5160  */
5161 static void
5163 {
5164 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5165 
5166  /*
5167  * There is no known cause for a postmaster to become multithreaded after
5168  * startup. Recheck to account for the possibility of unknown causes.
5169  * This message uses LOG level, because an unclean shutdown at this point
5170  * would usually not look much different from a clean shutdown.
5171  */
5172  if (pthread_is_threaded_np() != 0)
5173  ereport(LOG,
5174  (errcode(ERRCODE_INTERNAL_ERROR),
5175  errmsg_internal("postmaster became multithreaded"),
5176  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5177 #endif
5178 
5179  /* should cleanup shared memory and kill all backends */
5180 
5181  /*
5182  * Not sure of the semantics here. When the Postmaster dies, should the
5183  * backends all be killed? probably not.
5184  *
5185  * MUST -- vadim 05-10-1999
5186  */
5187 
5188  proc_exit(status);
5189 }
5190 
5191 /*
5192  * sigusr1_handler - handle signal conditions from child processes
5193  */
5194 static void
5196 {
5197  int save_errno = errno;
5198 
5199  /*
5200  * We rely on the signal mechanism to have blocked all signals ... except
5201  * on Windows, which lacks sigaction(), so we have to do it manually.
5202  */
5203 #ifdef WIN32
5204  PG_SETMASK(&BlockSig);
5205 #endif
5206 
5207  /*
5208  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5209  * unexpected states. If the startup process quickly starts up, completes
5210  * recovery, exits, we might process the death of the startup process
5211  * first. We don't want to go back to recovery in that case.
5212  */
5215  {
5216  /* WAL redo has started. We're out of reinitialization. */
5217  FatalError = false;
5218  AbortStartTime = 0;
5219 
5220  /*
5221  * Start the archiver if we're responsible for (re-)archiving received
5222  * files.
5223  */
5224  Assert(PgArchPID == 0);
5225  if (XLogArchivingAlways())
5227 
5228  /*
5229  * If we aren't planning to enter hot standby mode later, treat
5230  * RECOVERY_STARTED as meaning we're out of startup, and report status
5231  * accordingly.
5232  */
5233  if (!EnableHotStandby)
5234  {
5236 #ifdef USE_SYSTEMD
5237  sd_notify(0, "READY=1");
5238 #endif
5239  }
5240 
5241  pmState = PM_RECOVERY;
5242  }
5243 
5246  {
5247  /*
5248  * Likewise, start other special children as needed.
5249  */
5250  Assert(PgStatPID == 0);
5251  PgStatPID = pgstat_start();
5252 
5253  ereport(LOG,
5254  (errmsg("database system is ready to accept read-only connections")));
5255 
5256  /* Report status */
5258 #ifdef USE_SYSTEMD
5259  sd_notify(0, "READY=1");
5260 #endif
5261 
5264 
5265  /* Some workers may be scheduled to start now */
5266  StartWorkerNeeded = true;
5267  }
5268 
5269  /* Process background worker state changes. */
5271  {
5272  /* Accept new worker requests only if not stopping. */
5274  StartWorkerNeeded = true;
5275  }
5276 
5279 
5280  /* Tell syslogger to rotate logfile if requested */
5281  if (SysLoggerPID != 0)
5282  {
5283  if (CheckLogrotateSignal())
5284  {
5287  }
5289  {
5291  }
5292  }
5293 
5296  {
5297  /*
5298  * Start one iteration of the autovacuum daemon, even if autovacuuming
5299  * is nominally not enabled. This is so we can have an active defense
5300  * against transaction ID wraparound. We set a flag for the main loop
5301  * to do it rather than trying to do it here --- this is because the
5302  * autovac process itself may send the signal, and we want to handle
5303  * that by launching another iteration as soon as the current one
5304  * completes.
5305  */
5306  start_autovac_launcher = true;
5307  }
5308 
5311  {
5312  /* The autovacuum launcher wants us to start a worker process. */
5314  }
5315 
5317  {
5318  /* Startup Process wants us to start the walreceiver process. */
5319  /* Start immediately if possible, else remember request for later. */
5320  WalReceiverRequested = true;
5322  }
5323 
5324  /*
5325  * Try to advance postmaster's state machine, if a child requests it.
5326  *
5327  * Be careful about the order of this action relative to sigusr1_handler's
5328  * other actions. Generally, this should be after other actions, in case
5329  * they have effects PostmasterStateMachine would need to know about.
5330  * However, we should do it before the CheckPromoteSignal step, which
5331  * cannot have any (immediate) effect on the state machine, but does
5332  * depend on what state we're in now.
5333  */
5335  {
5337  }
5338 
5339  if (StartupPID != 0 &&
5340  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5341  pmState == PM_HOT_STANDBY) &&
5343  {
5344  /*
5345  * Tell startup process to finish recovery.
5346  *
5347  * Leave the promote signal file in place and let the Startup process
5348  * do the unlink.
5349  */
5351  }
5352 
5353 #ifdef WIN32
5355 #endif
5356 
5357  errno = save_errno;
5358 }
5359 
5360 /*
5361  * SIGTERM while processing startup packet.
5362  *
5363  * Running proc_exit() from a signal handler would be quite unsafe.
5364  * However, since we have not yet touched shared memory, we can just
5365  * pull the plug and exit without running any atexit handlers.
5366  *
5367  * One might be tempted to try to send a message, or log one, indicating
5368  * why we are disconnecting. However, that would be quite unsafe in itself.
5369  * Also, it seems undesirable to provide clues about the database's state
5370  * to a client that has not yet completed authentication, or even sent us
5371  * a startup packet.
5372  */
5373 static void
5375 {
5376  _exit(1);
5377 }
5378 
5379 /*
5380  * Dummy signal handler
5381  *
5382  * We use this for signals that we don't actually use in the postmaster,
5383  * but we do use in backends. If we were to SIG_IGN such signals in the
5384  * postmaster, then a newly started backend might drop a signal that arrives
5385  * before it's able to reconfigure its signal processing. (See notes in
5386  * tcop/postgres.c.)
5387  */
5388 static void
5390 {
5391 }
5392 
5393 /*
5394  * Timeout while processing startup packet.
5395  * As for process_startup_packet_die(), we exit via _exit(1).
5396  */
5397 static void
5399 {
5400  _exit(1);
5401 }
5402 
5403 
5404 /*
5405  * Generate a random cancel key.
5406  */
5407 static bool
5409 {
5410  return pg_strong_random(cancel_key, sizeof(int32));
5411 }
5412 
5413 /*
5414  * Count up number of child processes of specified types (dead_end children
5415  * are always excluded).
5416  */
5417 static int
5418 CountChildren(int target)
5419 {
5420  dlist_iter iter;
5421  int cnt = 0;
5422 
5423  dlist_foreach(iter, &BackendList)
5424  {
5425  Backend *bp = dlist_container(Backend, elem, iter.cur);
5426 
5427  if (bp->dead_end)
5428  continue;
5429 
5430  /*
5431  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5432  * it first and avoid touching shared memory for every child.
5433  */
5434  if (target != BACKEND_TYPE_ALL)
5435  {
5436  /*
5437  * Assign bkend_type for any recently announced WAL Sender
5438  * processes.
5439  */
5440  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5443 
5444  if (!(target & bp->bkend_type))
5445  continue;
5446  }
5447 
5448  cnt++;
5449  }
5450  return cnt;
5451 }
5452 
5453 
5454 /*
5455  * StartChildProcess -- start an auxiliary process for the postmaster
5456  *
5457  * "type" determines what kind of child will be started. All child types
5458  * initially go to AuxiliaryProcessMain, which will handle common setup.
5459  *
5460  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5461  * to start subprocess.
5462  */
5463 static pid_t
5465 {
5466  pid_t pid;
5467 
5468 #ifdef EXEC_BACKEND
5469  {
5470  char *av[10];
5471  int ac = 0;
5472  char typebuf[32];
5473 
5474  /*
5475  * Set up command-line arguments for subprocess
5476  */
5477  av[ac++] = "postgres";
5478  av[ac++] = "--forkaux";
5479  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5480 
5481  snprintf(typebuf, sizeof(typebuf), "%d", type);
5482  av[ac++] = typebuf;
5483 
5484  av[ac] = NULL;
5485  Assert(ac < lengthof(av));
5486 
5487  pid = postmaster_forkexec(ac, av);
5488  }
5489 #else /* !EXEC_BACKEND */
5490  pid = fork_process();
5491 
5492  if (pid == 0) /* child */
5493  {
5495 
5496  /* Close the postmaster's sockets */
5497  ClosePostmasterPorts(false);
5498 
5499  /* Release postmaster's working memory context */
5502  PostmasterContext = NULL;
5503 
5504  AuxiliaryProcessMain(type); /* does not return */
5505  }
5506 #endif /* EXEC_BACKEND */
5507 
5508  if (pid < 0)
5509  {
5510  /* in parent, fork failed */
5511  int save_errno = errno;
5512 
5513  errno = save_errno;
5514  switch (type)
5515  {
5516  case StartupProcess:
5517  ereport(LOG,
5518  (errmsg("could not fork startup process: %m")));
5519  break;
5520  case ArchiverProcess:
5521  ereport(LOG,
5522  (errmsg("could not fork archiver process: %m")));
5523  break;
5524  case BgWriterProcess:
5525  ereport(LOG,
5526  (errmsg("could not fork background writer process: %m")));
5527  break;
5528  case CheckpointerProcess:
5529  ereport(LOG,
5530  (errmsg("could not fork checkpointer process: %m")));
5531  break;
5532  case WalWriterProcess:
5533  ereport(LOG,
5534  (errmsg("could not fork WAL writer process: %m")));
5535  break;
5536  case WalReceiverProcess:
5537  ereport(LOG,
5538  (errmsg("could not fork WAL receiver process: %m")));
5539  break;
5540  default:
5541  ereport(LOG,
5542  (errmsg("could not fork process: %m")));
5543  break;
5544  }
5545 
5546  /*
5547  * fork failure is fatal during startup, but there's no need to choke
5548  * immediately if starting other child types fails.
5549  */
5550  if (type == StartupProcess)
5551  ExitPostmaster(1);
5552  return 0;
5553  }
5554 
5555  /*
5556  * in parent, successful fork
5557  */
5558  return pid;
5559 }
5560 
5561 /*
5562  * StartAutovacuumWorker
5563  * Start an autovac worker process.
5564  *
5565  * This function is here because it enters the resulting PID into the
5566  * postmaster's private backends list.
5567  *
5568  * NB -- this code very roughly matches BackendStartup.
5569  */
5570 static void
5572 {
5573  Backend *bn;
5574 
5575  /*
5576  * If not in condition to run a process, don't try, but handle it like a
5577  * fork failure. This does not normally happen, since the signal is only
5578  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5579  * we have to check to avoid race-condition problems during DB state
5580  * changes.
5581  */
5583  {
5584  /*
5585  * Compute the cancel key that will be assigned to this session. We
5586  * probably don't need cancel keys for autovac workers, but we'd
5587  * better have something random in the field to prevent unfriendly
5588  * people from sending cancels to them.
5589  */
5591  {
5592  ereport(LOG,
5593  (errcode(ERRCODE_INTERNAL_ERROR),
5594  errmsg("could not generate random cancel key")));
5595  return;
5596  }
5597 
5598  bn = (Backend *) malloc(sizeof(Backend));
5599  if (bn)
5600  {
5601  bn->cancel_key = MyCancelKey;
5602 
5603  /* Autovac workers are not dead_end and need a child slot */
5604  bn->dead_end = false;
5606  bn->bgworker_notify = false;
5607 
5608  bn->pid = StartAutoVacWorker();
5609  if (bn->pid > 0)
5610  {
5612  dlist_push_head(&BackendList, &bn->elem);
5613 #ifdef EXEC_BACKEND
5614  ShmemBackendArrayAdd(bn);
5615 #endif
5616  /* all OK */
5617  return;
5618  }
5619 
5620  /*
5621  * fork failed, fall through to report -- actual error message was
5622  * logged by StartAutoVacWorker
5623  */
5625  free(bn);
5626  }
5627  else
5628  ereport(LOG,
5629  (errcode(ERRCODE_OUT_OF_MEMORY),
5630  errmsg("out of memory")));
5631  }
5632 
5633  /*
5634  * Report the failure to the launcher, if it's running. (If it's not, we
5635  * might not even be connected to shared memory, so don't try to call
5636  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5637  * responds to the condition, but we don't do that here, instead waiting
5638  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5639  * quick succession between the autovac launcher and postmaster in case
5640  * things get ugly.
5641  */
5642  if (AutoVacPID != 0)
5643  {
5645  avlauncher_needs_signal = true;
5646  }
5647 }
5648 
5649 /*
5650  * MaybeStartWalReceiver
5651  * Start the WAL receiver process, if not running and our state allows.
5652  *
5653  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5654  * clear WalReceiverRequested. However, there's a race condition if the
5655  * walreceiver terminates and the startup process immediately requests a new
5656  * one: it's quite possible to get the signal for the request before reaping
5657  * the dead walreceiver process. Better to risk launching an extra
5658  * walreceiver than to miss launching one we need. (The walreceiver code
5659  * has logic to recognize that it should go away if not needed.)
5660  */
5661 static void
5663 {
5664  if (WalReceiverPID == 0 &&
5665  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5666  pmState == PM_HOT_STANDBY) &&
5668  {
5670  if (WalReceiverPID != 0)
5671  WalReceiverRequested = false;
5672  /* else leave the flag set, so we'll try again later */
5673  }
5674 }
5675 
5676 
5677 /*
5678  * Create the opts file
5679  */
5680 static bool
5681 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5682 {
5683  FILE *fp;
5684  int i;
5685 
5686 #define OPTS_FILE "postmaster.opts"
5687 
5688  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5689  {
5690  ereport(LOG,
5692  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5693  return false;
5694  }
5695 
5696  fprintf(fp, "%s", fullprogname);
5697  for (i = 1; i < argc; i++)
5698  fprintf(fp, " \"%s\"", argv[i]);
5699  fputs("\n", fp);
5700 
5701  if (fclose(fp))
5702  {
5703  ereport(LOG,
5705  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5706  return false;
5707  }
5708 
5709  return true;
5710 }
5711 
5712 
5713 /*
5714  * MaxLivePostmasterChildren
5715  *
5716  * This reports the number of entries needed in per-child-process arrays
5717  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5718  * These arrays include regular backends, autovac workers, walsenders
5719  * and background workers, but not special children nor dead_end children.
5720  * This allows the arrays to have a fixed maximum size, to wit the same
5721  * too-many-children limit enforced by canAcceptConnections(). The exact value
5722  * isn't too critical as long as it's more than MaxBackends.
5723  */
5724 int
5726 {
5727  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5729 }
5730 
5731 /*
5732  * Connect background worker to a database.
5733  */
5734 void
5736 {
5738 
5739  /* XXX is this the right errcode? */
5741  ereport(FATAL,
5742  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5743  errmsg("database connection requirement not indicated during registration")));
5744 
5745  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5746 
5747  /* it had better not gotten out of "init" mode yet */
5748  if (!IsInitProcessingMode())
5749  ereport(ERROR,
5750  (errmsg("invalid processing mode in background worker")));
5752 }
5753 
5754 /*
5755  * Connect background worker to a database using OIDs.
5756  */
5757 void
5759 {
5761 
5762  /* XXX is this the right errcode? */
5764  ereport(FATAL,
5765  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5766  errmsg("database connection requirement not indicated during registration")));
5767 
5768  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5769 
5770  /* it had better not gotten out of "init" mode yet */
5771  if (!IsInitProcessingMode())
5772  ereport(ERROR,
5773  (errmsg("invalid processing mode in background worker")));
5775 }
5776 
5777 /*
5778  * Block/unblock signals in a background worker
5779  */
5780 void
5782 {
5783  PG_SETMASK(&BlockSig);
5784 }
5785 
5786 void
5788 {
5790 }
5791 
5792 #ifdef EXEC_BACKEND
5793 static pid_t
5794 bgworker_forkexec(int shmem_slot)
5795 {
5796  char *av[10];
5797  int ac = 0;
5798  char forkav[MAXPGPATH];
5799 
5800  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5801 
5802  av[ac++] = "postgres";
5803  av[ac++] = forkav;
5804  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5805  av[ac] = NULL;
5806 
5807  Assert(ac < lengthof(av));
5808 
5809  return postmaster_forkexec(ac, av);
5810 }
5811 #endif
5812 
5813 /*
5814  * Start a new bgworker.
5815  * Starting time conditions must have been checked already.
5816  *
5817  * Returns true on success, false on failure.
5818  * In either case, update the RegisteredBgWorker's state appropriately.
5819  *
5820  * This code is heavily based on autovacuum.c, q.v.
5821  */
5822 static bool
5824 {
5825  pid_t worker_pid;
5826 
5827  Assert(rw->rw_pid == 0);
5828 
5829  /*
5830  * Allocate and assign the Backend element. Note we must do this before
5831  * forking, so that we can handle failures (out of memory or child-process
5832  * slots) cleanly.
5833  *
5834  * Treat failure as though the worker had crashed. That way, the
5835  * postmaster will wait a bit before attempting to start it again; if we
5836  * tried again right away, most likely we'd find ourselves hitting the
5837  * same resource-exhaustion condition.
5838  */
5839  if (!assign_backendlist_entry(rw))
5840  {
5842  return false;
5843  }
5844 
5845  ereport(DEBUG1,
5846  (errmsg_internal("starting background worker process \"%s\"",
5847  rw->rw_worker.bgw_name)));
5848 
5849 #ifdef EXEC_BACKEND
5850  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5851 #else
5852  switch ((worker_pid = fork_process()))
5853 #endif
5854  {
5855  case -1:
5856  /* in postmaster, fork failed ... */
5857  ereport(LOG,
5858  (errmsg("could not fork worker process: %m")));
5859  /* undo what assign_backendlist_entry did */
5861  rw->rw_child_slot = 0;
5862  free(rw->rw_backend);
5863  rw->rw_backend = NULL;
5864  /* mark entry as crashed, so we'll try again later */
5866  break;
5867 
5868 #ifndef EXEC_BACKEND
5869  case 0:
5870  /* in postmaster child ... */
5872