PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "common/string.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqformat.h"
107 #include "libpq/pqsignal.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "port/pg_bswap.h"
111 #include "postmaster/autovacuum.h"
113 #include "postmaster/fork_process.h"
114 #include "postmaster/interrupt.h"
115 #include "postmaster/pgarch.h"
116 #include "postmaster/postmaster.h"
117 #include "postmaster/syslogger.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/timestamp.h"
133 #include "utils/varlena.h"
134 
135 #ifdef EXEC_BACKEND
136 #include "storage/spin.h"
137 #endif
138 
139 
140 /*
141  * Possible types of a backend. Beyond being the possible bkend_type values in
142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143  * and CountChildren().
144  */
145 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150 
151 /*
152  * List of active backends (or child processes anyway; we don't actually
153  * know whether a given child has become a backend or is still in the
154  * authorization phase). This is used mainly to keep track of how many
155  * children we have and send them appropriate signals when necessary.
156  *
157  * As shown in the above set of backend types, this list includes not only
158  * "normal" client sessions, but also autovacuum workers, walsenders, and
159  * background workers. (Note that at the time of launch, walsenders are
160  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
161  * upon noticing they've changed their PMChildFlags entry. Hence that check
162  * must be done before any operation that needs to distinguish walsenders
163  * from normal backends.)
164  *
165  * Also, "dead_end" children are in it: these are children launched just for
166  * the purpose of sending a friendly rejection message to a would-be client.
167  * We must track them because they are attached to shared memory, but we know
168  * they will never become live backends. dead_end children are not assigned a
169  * PMChildSlot. dead_end children have bkend_type NORMAL.
170  *
171  * "Special" children such as the startup, bgwriter and autovacuum launcher
172  * tasks are not in this list. They are tracked via StartupPID and other
173  * pid_t variables below. (Thus, there can't be more than one of any given
174  * "special" child process type. We use BackendList entries for any child
175  * process there can be more than one of.)
176  */
177 typedef struct bkend
178 {
179  pid_t pid; /* process id of backend */
180  int32 cancel_key; /* cancel key for cancels for this backend */
181  int child_slot; /* PMChildSlot for this backend, if any */
182  int bkend_type; /* child process flavor, see above */
183  bool dead_end; /* is it going to send an error and quit? */
184  bool bgworker_notify; /* gets bgworker start/stop notifications */
185  dlist_node elem; /* list link in BackendList */
186 } Backend;
187 
189 
190 #ifdef EXEC_BACKEND
191 static Backend *ShmemBackendArray;
192 #endif
193 
195 
196 
197 
198 /* The socket number we are listening for connections on */
200 
201 /* The directory names for Unix socket(s) */
203 
204 /* The TCP listen address(es) */
206 
207 /*
208  * ReservedBackends is the number of backends reserved for superuser use.
209  * This number is taken out of the pool size given by MaxConnections so
210  * number of backend slots available to non-superusers is
211  * (MaxConnections - ReservedBackends). Note what this really means is
212  * "if there are <= ReservedBackends connections available, only superusers
213  * can make new connections" --- pre-existing superuser connections don't
214  * count against the limit.
215  */
217 
218 /* The socket(s) we're listening to. */
219 #define MAXLISTEN 64
221 
222 /*
223  * These globals control the behavior of the postmaster in case some
224  * backend dumps core. Normally, it kills all peers of the dead backend
225  * and reinitializes shared memory. By specifying -s or -n, we can have
226  * the postmaster stop (rather than kill) peers and not reinitialize
227  * shared data structures. (Reinit is currently dead code, though.)
228  */
229 static bool Reinit = true;
230 static int SendStop = false;
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 bool Db_user_namespace = false;
241 
242 bool enable_bonjour = false;
246 
247 /* PIDs of special child processes; 0 when not running */
248 static pid_t StartupPID = 0,
257 
258 /* Startup process's status */
259 typedef enum
260 {
263  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 
268 
269 /* Startup/shutdown state */
270 #define NoShutdown 0
271 #define SmartShutdown 1
272 #define FastShutdown 2
273 #define ImmediateShutdown 3
274 
275 static int Shutdown = NoShutdown;
276 
277 static bool FatalError = false; /* T if recovering from backend crash */
278 
279 /*
280  * We use a simple state machine to control startup, shutdown, and
281  * crash recovery (which is rather like shutdown followed by startup).
282  *
283  * After doing all the postmaster initialization work, we enter PM_STARTUP
284  * state and the startup process is launched. The startup process begins by
285  * reading the control file and other preliminary initialization steps.
286  * In a normal startup, or after crash recovery, the startup process exits
287  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
288  * is handled specially since it takes much longer and we would like to support
289  * hot standby during archive recovery.
290  *
291  * When the startup process is ready to start archive recovery, it signals the
292  * postmaster, and we switch to PM_RECOVERY state. The background writer and
293  * checkpointer are launched, while the startup process continues applying WAL.
294  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
295  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
296  * state and begin accepting connections to perform read-only queries. When
297  * archive recovery is finished, the startup process exits with exit code 0
298  * and we switch to PM_RUN state.
299  *
300  * Normal child backends can only be launched when we are in PM_RUN or
301  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
316  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
317  * states when trying to recover from a crash). It can be true in PM_STARTUP
318  * state, because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_STOP_BACKENDS, /* need to stop remaining backends */
328  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
329  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
330  * ckpt */
331  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
332  * finish */
333  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
334  PM_NO_CHILDREN /* all important children have exited */
335 } PMState;
336 
338 
339 /*
340  * While performing a "smart shutdown", we restrict new connections but stay
341  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
342  * connsAllowed is a sub-state indicator showing the active restriction.
343  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
344  */
345 typedef enum
346 {
347  ALLOW_ALL_CONNS, /* normal not-shutting-down state */
348  ALLOW_SUPERUSER_CONNS, /* only superusers can connect */
349  ALLOW_NO_CONNS /* no new connections allowed, period */
351 
353 
354 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
355 /* Zero means timeout is not running */
356 static time_t AbortStartTime = 0;
357 
358 /* Length of said timeout */
359 #define SIGKILL_CHILDREN_AFTER_SECS 5
360 
361 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
362 
363 bool ClientAuthInProgress = false; /* T during new-client
364  * authentication */
365 
366 bool redirection_done = false; /* stderr redirected for syslogger? */
367 
368 /* received START_AUTOVAC_LAUNCHER signal */
369 static volatile sig_atomic_t start_autovac_launcher = false;
370 
371 /* the launcher needs to be signaled to communicate some condition */
372 static volatile bool avlauncher_needs_signal = false;
373 
374 /* received START_WALRECEIVER signal */
375 static volatile sig_atomic_t WalReceiverRequested = false;
376 
377 /* set when there's a worker that needs to be started up */
378 static volatile bool StartWorkerNeeded = true;
379 static volatile bool HaveCrashedWorker = false;
380 
381 #ifdef USE_SSL
382 /* Set when and if SSL has been initialized properly */
383 static bool LoadedSSL = false;
384 #endif
385 
386 #ifdef USE_BONJOUR
387 static DNSServiceRef bonjour_sdref = NULL;
388 #endif
389 
390 /*
391  * postmaster.c - function prototypes
392  */
393 static void CloseServerPorts(int status, Datum arg);
394 static void unlink_external_pid_file(int status, Datum arg);
395 static void getInstallationPaths(const char *argv0);
396 static void checkControlFile(void);
397 static Port *ConnCreate(int serverFd);
398 static void ConnFree(Port *port);
399 static void reset_shared(void);
400 static void SIGHUP_handler(SIGNAL_ARGS);
401 static void pmdie(SIGNAL_ARGS);
402 static void reaper(SIGNAL_ARGS);
403 static void sigusr1_handler(SIGNAL_ARGS);
405 static void dummy_handler(SIGNAL_ARGS);
406 static void StartupPacketTimeoutHandler(void);
407 static void CleanupBackend(int pid, int exitstatus);
408 static bool CleanupBackgroundWorker(int pid, int exitstatus);
409 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
410 static void LogChildExit(int lev, const char *procname,
411  int pid, int exitstatus);
412 static void PostmasterStateMachine(void);
413 static void BackendInitialize(Port *port);
414 static void BackendRun(Port *port) pg_attribute_noreturn();
415 static void ExitPostmaster(int status) pg_attribute_noreturn();
416 static int ServerLoop(void);
417 static int BackendStartup(Port *port);
418 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
419 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
420 static void processCancelRequest(Port *port, void *pkt);
421 static int initMasks(fd_set *rmask);
422 static void report_fork_failure_to_client(Port *port, int errnum);
423 static CAC_state canAcceptConnections(int backend_type);
424 static bool RandomCancelKey(int32 *cancel_key);
425 static void signal_child(pid_t pid, int signal);
426 static bool SignalSomeChildren(int signal, int targets);
427 static void TerminateChildren(int signal);
428 
429 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
430 
431 static int CountChildren(int target);
433 static void maybe_start_bgworkers(void);
434 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
435 static pid_t StartChildProcess(AuxProcType type);
436 static void StartAutovacuumWorker(void);
437 static void MaybeStartWalReceiver(void);
438 static void InitPostmasterDeathWatchHandle(void);
439 
440 /*
441  * Archiver is allowed to start up at the current postmaster state?
442  *
443  * If WAL archiving is enabled always, we are allowed to start archiver
444  * even during recovery.
445  */
446 #define PgArchStartupAllowed() \
447  (((XLogArchivingActive() && pmState == PM_RUN) || \
448  (XLogArchivingAlways() && \
449  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
450  PgArchCanRestart())
451 
452 #ifdef EXEC_BACKEND
453 
454 #ifdef WIN32
455 #define WNOHANG 0 /* ignored, so any integer value will do */
456 
457 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
458 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
459 
460 static HANDLE win32ChildQueue;
461 
462 typedef struct
463 {
464  HANDLE waitHandle;
465  HANDLE procHandle;
466  DWORD procId;
467 } win32_deadchild_waitinfo;
468 #endif /* WIN32 */
469 
470 static pid_t backend_forkexec(Port *port);
471 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
472 
473 /* Type for a socket that can be inherited to a client process */
474 #ifdef WIN32
475 typedef struct
476 {
477  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
478  * if not a socket */
479  WSAPROTOCOL_INFO wsainfo;
480 } InheritableSocket;
481 #else
482 typedef int InheritableSocket;
483 #endif
484 
485 /*
486  * Structure contains all variables passed to exec:ed backends
487  */
488 typedef struct
489 {
490  Port port;
491  InheritableSocket portsocket;
492  char DataDir[MAXPGPATH];
495  int MyPMChildSlot;
496 #ifndef WIN32
497  unsigned long UsedShmemSegID;
498 #else
499  void *ShmemProtectiveRegion;
500  HANDLE UsedShmemSegID;
501 #endif
502  void *UsedShmemSegAddr;
505  Backend *ShmemBackendArray;
506 #ifndef HAVE_SPINLOCKS
508 #endif
517  InheritableSocket pgStatSock;
518  pid_t PostmasterPid;
522  bool redirection_done;
523  bool IsBinaryUpgrade;
524  int max_safe_fds;
525  int MaxBackends;
526 #ifdef WIN32
527  HANDLE PostmasterHandle;
528  HANDLE initial_signal_pipe;
529  HANDLE syslogPipe[2];
530 #else
531  int postmaster_alive_fds[2];
532  int syslogPipe[2];
533 #endif
534  char my_exec_path[MAXPGPATH];
535  char pkglib_path[MAXPGPATH];
536 } BackendParameters;
537 
538 static void read_backend_variables(char *id, Port *port);
539 static void restore_backend_variables(BackendParameters *param, Port *port);
540 
541 #ifndef WIN32
542 static bool save_backend_variables(BackendParameters *param, Port *port);
543 #else
544 static bool save_backend_variables(BackendParameters *param, Port *port,
545  HANDLE childProcess, pid_t childPid);
546 #endif
547 
548 static void ShmemBackendArrayAdd(Backend *bn);
549 static void ShmemBackendArrayRemove(Backend *bn);
550 #endif /* EXEC_BACKEND */
551 
552 #define StartupDataBase() StartChildProcess(StartupProcess)
553 #define StartArchiver() StartChildProcess(ArchiverProcess)
554 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
555 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
556 #define StartWalWriter() StartChildProcess(WalWriterProcess)
557 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
558 
559 /* Macros to check exit status of a child process */
560 #define EXIT_STATUS_0(st) ((st) == 0)
561 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
562 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
563 
564 #ifndef WIN32
565 /*
566  * File descriptors for pipe used to monitor if postmaster is alive.
567  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
568  */
569 int postmaster_alive_fds[2] = {-1, -1};
570 #else
571 /* Process handle of postmaster used for the same purpose on Windows */
572 HANDLE PostmasterHandle;
573 #endif
574 
575 /*
576  * Postmaster main entry point
577  */
578 void
579 PostmasterMain(int argc, char *argv[])
580 {
581  int opt;
582  int status;
583  char *userDoption = NULL;
584  bool listen_addr_saved = false;
585  int i;
586  char *output_config_variable = NULL;
587 
589 
591 
593 
594  /*
595  * We should not be creating any files or directories before we check the
596  * data directory (see checkDataDir()), but just in case set the umask to
597  * the most restrictive (owner-only) permissions.
598  *
599  * checkDataDir() will reset the umask based on the data directory
600  * permissions.
601  */
602  umask(PG_MODE_MASK_OWNER);
603 
604  /*
605  * By default, palloc() requests in the postmaster will be allocated in
606  * the PostmasterContext, which is space that can be recycled by backends.
607  * Allocated data that needs to be available to backends should be
608  * allocated in TopMemoryContext.
609  */
611  "Postmaster",
614 
615  /* Initialize paths to installation files */
616  getInstallationPaths(argv[0]);
617 
618  /*
619  * Set up signal handlers for the postmaster process.
620  *
621  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
622  * is used by all child processes and client processes). That has a
623  * couple of special behaviors:
624  *
625  * 1. Except on Windows, we tell sigaction() to block all signals for the
626  * duration of the signal handler. This is faster than our old approach
627  * of blocking/unblocking explicitly in the signal handler, and it should
628  * also prevent excessive stack consumption if signals arrive quickly.
629  *
630  * 2. We do not set the SA_RESTART flag. This is because signals will be
631  * blocked at all times except when ServerLoop is waiting for something to
632  * happen, and during that window, we want signals to exit the select(2)
633  * wait so that ServerLoop can respond if anything interesting happened.
634  * On some platforms, signals marked SA_RESTART would not cause the
635  * select() wait to end.
636  *
637  * Child processes will generally want SA_RESTART, so pqsignal() sets that
638  * flag. We expect children to set up their own handlers before
639  * unblocking signals.
640  *
641  * CAUTION: when changing this list, check for side-effects on the signal
642  * handling setup of child processes. See tcop/postgres.c,
643  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
644  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
645  * postmaster/syslogger.c, postmaster/bgworker.c and
646  * postmaster/checkpointer.c.
647  */
648  pqinitmask();
650 
651  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
652  * children do same */
653  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
654  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
655  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
656  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
657  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
658  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
659  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
660  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
661 
662 #ifdef SIGURG
663  /*
664  * Ignore SIGURG for now. Child processes may change this (see
665  * InitializeLatchSupport), but they will not receive any such signals
666  * until they wait on a latch.
667  */
668  pqsignal_pm(SIGURG, SIG_IGN); /* ignored */
669 #endif
670 
671  /*
672  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
673  * ignore those signals in a postmaster environment, so that there is no
674  * risk of a child process freezing up due to writing to stderr. But for
675  * a standalone backend, their default handling is reasonable. Hence, all
676  * child processes should just allow the inherited settings to stand.
677  */
678 #ifdef SIGTTIN
679  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
680 #endif
681 #ifdef SIGTTOU
682  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
683 #endif
684 
685  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
686 #ifdef SIGXFSZ
687  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
688 #endif
689 
690  /*
691  * Options setup
692  */
694 
695  opterr = 1;
696 
697  /*
698  * Parse command-line options. CAUTION: keep this in sync with
699  * tcop/postgres.c (the option sets should not conflict) and with the
700  * common help() function in main/main.c.
701  */
702  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:W:-:")) != -1)
703  {
704  switch (opt)
705  {
706  case 'B':
707  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
708  break;
709 
710  case 'b':
711  /* Undocumented flag used for binary upgrades */
712  IsBinaryUpgrade = true;
713  break;
714 
715  case 'C':
716  output_config_variable = strdup(optarg);
717  break;
718 
719  case 'D':
720  userDoption = strdup(optarg);
721  break;
722 
723  case 'd':
725  break;
726 
727  case 'E':
728  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'e':
732  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
733  break;
734 
735  case 'F':
736  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
737  break;
738 
739  case 'f':
741  {
742  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
743  progname, optarg);
744  ExitPostmaster(1);
745  }
746  break;
747 
748  case 'h':
749  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
750  break;
751 
752  case 'i':
753  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
754  break;
755 
756  case 'j':
757  /* only used by interactive backend */
758  break;
759 
760  case 'k':
761  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
762  break;
763 
764  case 'l':
765  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
766  break;
767 
768  case 'N':
769  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
770  break;
771 
772  case 'n':
773  /* Don't reinit shared mem after abnormal exit */
774  Reinit = false;
775  break;
776 
777  case 'O':
778  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
779  break;
780 
781  case 'P':
782  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
783  break;
784 
785  case 'p':
787  break;
788 
789  case 'r':
790  /* only used by single-user backend */
791  break;
792 
793  case 'S':
795  break;
796 
797  case 's':
798  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
799  break;
800 
801  case 'T':
802 
803  /*
804  * In the event that some backend dumps core, send SIGSTOP,
805  * rather than SIGQUIT, to all its peers. This lets the wily
806  * post_hacker collect core dumps from everyone.
807  */
808  SendStop = true;
809  break;
810 
811  case 't':
812  {
813  const char *tmp = get_stats_option_name(optarg);
814 
815  if (tmp)
816  {
818  }
819  else
820  {
821  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
822  progname, optarg);
823  ExitPostmaster(1);
824  }
825  break;
826  }
827 
828  case 'W':
829  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
830  break;
831 
832  case 'c':
833  case '-':
834  {
835  char *name,
836  *value;
837 
838  ParseLongOption(optarg, &name, &value);
839  if (!value)
840  {
841  if (opt == '-')
842  ereport(ERROR,
843  (errcode(ERRCODE_SYNTAX_ERROR),
844  errmsg("--%s requires a value",
845  optarg)));
846  else
847  ereport(ERROR,
848  (errcode(ERRCODE_SYNTAX_ERROR),
849  errmsg("-c %s requires a value",
850  optarg)));
851  }
852 
854  free(name);
855  if (value)
856  free(value);
857  break;
858  }
859 
860  default:
861  write_stderr("Try \"%s --help\" for more information.\n",
862  progname);
863  ExitPostmaster(1);
864  }
865  }
866 
867  /*
868  * Postmaster accepts no non-option switch arguments.
869  */
870  if (optind < argc)
871  {
872  write_stderr("%s: invalid argument: \"%s\"\n",
873  progname, argv[optind]);
874  write_stderr("Try \"%s --help\" for more information.\n",
875  progname);
876  ExitPostmaster(1);
877  }
878 
879  /*
880  * Locate the proper configuration files and data directory, and read
881  * postgresql.conf for the first time.
882  */
883  if (!SelectConfigFiles(userDoption, progname))
884  ExitPostmaster(2);
885 
886  if (output_config_variable != NULL)
887  {
888  /*
889  * "-C guc" was specified, so print GUC's value and exit. No extra
890  * permission check is needed because the user is reading inside the
891  * data dir.
892  */
893  const char *config_val = GetConfigOption(output_config_variable,
894  false, false);
895 
896  puts(config_val ? config_val : "");
897  ExitPostmaster(0);
898  }
899 
900  /* Verify that DataDir looks reasonable */
901  checkDataDir();
902 
903  /* Check that pg_control exists */
905 
906  /* And switch working directory into it */
907  ChangeToDataDir();
908 
909  /*
910  * Check for invalid combinations of GUC settings.
911  */
913  {
914  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
915  progname,
917  ExitPostmaster(1);
918  }
920  ereport(ERROR,
921  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
923  ereport(ERROR,
924  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
925 
926  /*
927  * Other one-time internal sanity checks can go here, if they are fast.
928  * (Put any slow processing further down, after postmaster.pid creation.)
929  */
930  if (!CheckDateTokenTables())
931  {
932  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
933  ExitPostmaster(1);
934  }
935 
936  /*
937  * Now that we are done processing the postmaster arguments, reset
938  * getopt(3) library so that it will work correctly in subprocesses.
939  */
940  optind = 1;
941 #ifdef HAVE_INT_OPTRESET
942  optreset = 1; /* some systems need this too */
943 #endif
944 
945  /* For debugging: display postmaster environment */
946  {
947  extern char **environ;
948  char **p;
949 
950  ereport(DEBUG3,
951  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
952  progname)));
953  ereport(DEBUG3,
954  (errmsg_internal("-----------------------------------------")));
955  for (p = environ; *p; ++p)
956  ereport(DEBUG3,
957  (errmsg_internal("\t%s", *p)));
958  ereport(DEBUG3,
959  (errmsg_internal("-----------------------------------------")));
960  }
961 
962  /*
963  * Create lockfile for data directory.
964  *
965  * We want to do this before we try to grab the input sockets, because the
966  * data directory interlock is more reliable than the socket-file
967  * interlock (thanks to whoever decided to put socket files in /tmp :-().
968  * For the same reason, it's best to grab the TCP socket(s) before the
969  * Unix socket(s).
970  *
971  * Also note that this internally sets up the on_proc_exit function that
972  * is responsible for removing both data directory and socket lockfiles;
973  * so it must happen before opening sockets so that at exit, the socket
974  * lockfiles go away after CloseServerPorts runs.
975  */
976  CreateDataDirLockFile(true);
977 
978  /*
979  * Read the control file (for error checking and config info).
980  *
981  * Since we verify the control file's CRC, this has a useful side effect
982  * on machines where we need a run-time test for CRC support instructions.
983  * The postmaster will do the test once at startup, and then its child
984  * processes will inherit the correct function pointer and not need to
985  * repeat the test.
986  */
988 
989  /*
990  * Register the apply launcher. Since it registers a background worker,
991  * it needs to be called before InitializeMaxBackends(), and it's probably
992  * a good idea to call it before any modules had chance to take the
993  * background worker slots.
994  */
996 
997  /*
998  * process any libraries that should be preloaded at postmaster start
999  */
1001 
1002  /*
1003  * Initialize SSL library, if specified.
1004  */
1005 #ifdef USE_SSL
1006  if (EnableSSL)
1007  {
1008  (void) secure_initialize(true);
1009  LoadedSSL = true;
1010  }
1011 #endif
1012 
1013  /*
1014  * Now that loadable modules have had their chance to register background
1015  * workers, calculate MaxBackends.
1016  */
1018 
1019  /*
1020  * Set up shared memory and semaphores.
1021  */
1022  reset_shared();
1023 
1024  /*
1025  * Estimate number of openable files. This must happen after setting up
1026  * semaphores, because on some platforms semaphores count as open files.
1027  */
1028  set_max_safe_fds();
1029 
1030  /*
1031  * Set reference point for stack-depth checking.
1032  */
1033  set_stack_base();
1034 
1035  /*
1036  * Initialize pipe (or process handle on Windows) that allows children to
1037  * wake up from sleep on postmaster death.
1038  */
1040 
1041 #ifdef WIN32
1042 
1043  /*
1044  * Initialize I/O completion port used to deliver list of dead children.
1045  */
1046  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1047  if (win32ChildQueue == NULL)
1048  ereport(FATAL,
1049  (errmsg("could not create I/O completion port for child queue")));
1050 #endif
1051 
1052 #ifdef EXEC_BACKEND
1053  /* Write out nondefault GUC settings for child processes to use */
1054  write_nondefault_variables(PGC_POSTMASTER);
1055 
1056  /*
1057  * Clean out the temp directory used to transmit parameters to child
1058  * processes (see internal_forkexec, below). We must do this before
1059  * launching any child processes, else we have a race condition: we could
1060  * remove a parameter file before the child can read it. It should be
1061  * safe to do so now, because we verified earlier that there are no
1062  * conflicting Postgres processes in this data directory.
1063  */
1065 #endif
1066 
1067  /*
1068  * Forcibly remove the files signaling a standby promotion request.
1069  * Otherwise, the existence of those files triggers a promotion too early,
1070  * whether a user wants that or not.
1071  *
1072  * This removal of files is usually unnecessary because they can exist
1073  * only during a few moments during a standby promotion. However there is
1074  * a race condition: if pg_ctl promote is executed and creates the files
1075  * during a promotion, the files can stay around even after the server is
1076  * brought up to be the primary. Then, if a new standby starts by using
1077  * the backup taken from the new primary, the files can exist at server
1078  * startup and must be removed in order to avoid an unexpected promotion.
1079  *
1080  * Note that promotion signal files need to be removed before the startup
1081  * process is invoked. Because, after that, they can be used by
1082  * postmaster's SIGUSR1 signal handler.
1083  */
1085 
1086  /* Do the same for logrotate signal file */
1088 
1089  /* Remove any outdated file holding the current log filenames. */
1090  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1091  ereport(LOG,
1093  errmsg("could not remove file \"%s\": %m",
1095 
1096  /*
1097  * If enabled, start up syslogger collection subprocess
1098  */
1100 
1101  /*
1102  * Reset whereToSendOutput from DestDebug (its starting state) to
1103  * DestNone. This stops ereport from sending log messages to stderr unless
1104  * Log_destination permits. We don't do this until the postmaster is
1105  * fully launched, since startup failures may as well be reported to
1106  * stderr.
1107  *
1108  * If we are in fact disabling logging to stderr, first emit a log message
1109  * saying so, to provide a breadcrumb trail for users who may not remember
1110  * that their logging is configured to go somewhere else.
1111  */
1113  ereport(LOG,
1114  (errmsg("ending log output to stderr"),
1115  errhint("Future log output will go to log destination \"%s\".",
1117 
1119 
1120  /*
1121  * Report server startup in log. While we could emit this much earlier,
1122  * it seems best to do so after starting the log collector, if we intend
1123  * to use one.
1124  */
1125  ereport(LOG,
1126  (errmsg("starting %s", PG_VERSION_STR)));
1127 
1128  /*
1129  * Establish input sockets.
1130  *
1131  * First, mark them all closed, and set up an on_proc_exit function that's
1132  * charged with closing the sockets again at postmaster shutdown.
1133  */
1134  for (i = 0; i < MAXLISTEN; i++)
1136 
1138 
1139  if (ListenAddresses)
1140  {
1141  char *rawstring;
1142  List *elemlist;
1143  ListCell *l;
1144  int success = 0;
1145 
1146  /* Need a modifiable copy of ListenAddresses */
1147  rawstring = pstrdup(ListenAddresses);
1148 
1149  /* Parse string into list of hostnames */
1150  if (!SplitGUCList(rawstring, ',', &elemlist))
1151  {
1152  /* syntax error in list */
1153  ereport(FATAL,
1154  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1155  errmsg("invalid list syntax in parameter \"%s\"",
1156  "listen_addresses")));
1157  }
1158 
1159  foreach(l, elemlist)
1160  {
1161  char *curhost = (char *) lfirst(l);
1162 
1163  if (strcmp(curhost, "*") == 0)
1164  status = StreamServerPort(AF_UNSPEC, NULL,
1165  (unsigned short) PostPortNumber,
1166  NULL,
1168  else
1169  status = StreamServerPort(AF_UNSPEC, curhost,
1170  (unsigned short) PostPortNumber,
1171  NULL,
1172  ListenSocket, MAXLISTEN);
1173 
1174  if (status == STATUS_OK)
1175  {
1176  success++;
1177  /* record the first successful host addr in lockfile */
1178  if (!listen_addr_saved)
1179  {
1181  listen_addr_saved = true;
1182  }
1183  }
1184  else
1185  ereport(WARNING,
1186  (errmsg("could not create listen socket for \"%s\"",
1187  curhost)));
1188  }
1189 
1190  if (!success && elemlist != NIL)
1191  ereport(FATAL,
1192  (errmsg("could not create any TCP/IP sockets")));
1193 
1194  list_free(elemlist);
1195  pfree(rawstring);
1196  }
1197 
1198 #ifdef USE_BONJOUR
1199  /* Register for Bonjour only if we opened TCP socket(s) */
1201  {
1202  DNSServiceErrorType err;
1203 
1204  /*
1205  * We pass 0 for interface_index, which will result in registering on
1206  * all "applicable" interfaces. It's not entirely clear from the
1207  * DNS-SD docs whether this would be appropriate if we have bound to
1208  * just a subset of the available network interfaces.
1209  */
1210  err = DNSServiceRegister(&bonjour_sdref,
1211  0,
1212  0,
1213  bonjour_name,
1214  "_postgresql._tcp.",
1215  NULL,
1216  NULL,
1218  0,
1219  NULL,
1220  NULL,
1221  NULL);
1222  if (err != kDNSServiceErr_NoError)
1223  ereport(LOG,
1224  (errmsg("DNSServiceRegister() failed: error code %ld",
1225  (long) err)));
1226 
1227  /*
1228  * We don't bother to read the mDNS daemon's reply, and we expect that
1229  * it will automatically terminate our registration when the socket is
1230  * closed at postmaster termination. So there's nothing more to be
1231  * done here. However, the bonjour_sdref is kept around so that
1232  * forked children can close their copies of the socket.
1233  */
1234  }
1235 #endif
1236 
1237 #ifdef HAVE_UNIX_SOCKETS
1239  {
1240  char *rawstring;
1241  List *elemlist;
1242  ListCell *l;
1243  int success = 0;
1244 
1245  /* Need a modifiable copy of Unix_socket_directories */
1246  rawstring = pstrdup(Unix_socket_directories);
1247 
1248  /* Parse string into list of directories */
1249  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1250  {
1251  /* syntax error in list */
1252  ereport(FATAL,
1253  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1254  errmsg("invalid list syntax in parameter \"%s\"",
1255  "unix_socket_directories")));
1256  }
1257 
1258  foreach(l, elemlist)
1259  {
1260  char *socketdir = (char *) lfirst(l);
1261 
1262  status = StreamServerPort(AF_UNIX, NULL,
1263  (unsigned short) PostPortNumber,
1264  socketdir,
1265  ListenSocket, MAXLISTEN);
1266 
1267  if (status == STATUS_OK)
1268  {
1269  success++;
1270  /* record the first successful Unix socket in lockfile */
1271  if (success == 1)
1273  }
1274  else
1275  ereport(WARNING,
1276  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1277  socketdir)));
1278  }
1279 
1280  if (!success && elemlist != NIL)
1281  ereport(FATAL,
1282  (errmsg("could not create any Unix-domain sockets")));
1283 
1284  list_free_deep(elemlist);
1285  pfree(rawstring);
1286  }
1287 #endif
1288 
1289  /*
1290  * check that we have some socket to listen on
1291  */
1292  if (ListenSocket[0] == PGINVALID_SOCKET)
1293  ereport(FATAL,
1294  (errmsg("no socket created for listening")));
1295 
1296  /*
1297  * If no valid TCP ports, write an empty line for listen address,
1298  * indicating the Unix socket must be used. Note that this line is not
1299  * added to the lock file until there is a socket backing it.
1300  */
1301  if (!listen_addr_saved)
1303 
1304  /*
1305  * Record postmaster options. We delay this till now to avoid recording
1306  * bogus options (eg, unusable port number).
1307  */
1308  if (!CreateOptsFile(argc, argv, my_exec_path))
1309  ExitPostmaster(1);
1310 
1311  /*
1312  * Write the external PID file if requested
1313  */
1314  if (external_pid_file)
1315  {
1316  FILE *fpidfile = fopen(external_pid_file, "w");
1317 
1318  if (fpidfile)
1319  {
1320  fprintf(fpidfile, "%d\n", MyProcPid);
1321  fclose(fpidfile);
1322 
1323  /* Make PID file world readable */
1324  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1325  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1327  }
1328  else
1329  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1331 
1333  }
1334 
1335  /*
1336  * Remove old temporary files. At this point there can be no other
1337  * Postgres processes running in this directory, so this should be safe.
1338  */
1340 
1341  /*
1342  * Initialize stats collection subsystem (this does NOT start the
1343  * collector process!)
1344  */
1345  pgstat_init();
1346 
1347  /*
1348  * Initialize the autovacuum subsystem (again, no process start yet)
1349  */
1350  autovac_init();
1351 
1352  /*
1353  * Load configuration files for client authentication.
1354  */
1355  if (!load_hba())
1356  {
1357  /*
1358  * It makes no sense to continue if we fail to load the HBA file,
1359  * since there is no way to connect to the database in this case.
1360  */
1361  ereport(FATAL,
1362  (errmsg("could not load pg_hba.conf")));
1363  }
1364  if (!load_ident())
1365  {
1366  /*
1367  * We can start up without the IDENT file, although it means that you
1368  * cannot log in using any of the authentication methods that need a
1369  * user name mapping. load_ident() already logged the details of error
1370  * to the log.
1371  */
1372  }
1373 
1374 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1375 
1376  /*
1377  * On macOS, libintl replaces setlocale() with a version that calls
1378  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1379  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1380  * the process multithreaded. The postmaster calls sigprocmask() and
1381  * calls fork() without an immediate exec(), both of which have undefined
1382  * behavior in a multithreaded program. A multithreaded postmaster is the
1383  * normal case on Windows, which offers neither fork() nor sigprocmask().
1384  */
1385  if (pthread_is_threaded_np() != 0)
1386  ereport(FATAL,
1387  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1388  errmsg("postmaster became multithreaded during startup"),
1389  errhint("Set the LC_ALL environment variable to a valid locale.")));
1390 #endif
1391 
1392  /*
1393  * Remember postmaster startup time
1394  */
1396 
1397  /*
1398  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1399  * see what's happening.
1400  */
1402 
1403  /*
1404  * We're ready to rock and roll...
1405  */
1407  Assert(StartupPID != 0);
1409  pmState = PM_STARTUP;
1410 
1411  /* Some workers may be scheduled to start now */
1413 
1414  status = ServerLoop();
1415 
1416  /*
1417  * ServerLoop probably shouldn't ever return, but if it does, close down.
1418  */
1419  ExitPostmaster(status != STATUS_OK);
1420 
1421  abort(); /* not reached */
1422 }
1423 
1424 
1425 /*
1426  * on_proc_exit callback to close server's listen sockets
1427  */
1428 static void
1430 {
1431  int i;
1432 
1433  /*
1434  * First, explicitly close all the socket FDs. We used to just let this
1435  * happen implicitly at postmaster exit, but it's better to close them
1436  * before we remove the postmaster.pid lockfile; otherwise there's a race
1437  * condition if a new postmaster wants to re-use the TCP port number.
1438  */
1439  for (i = 0; i < MAXLISTEN; i++)
1440  {
1441  if (ListenSocket[i] != PGINVALID_SOCKET)
1442  {
1445  }
1446  }
1447 
1448  /*
1449  * Next, remove any filesystem entries for Unix sockets. To avoid race
1450  * conditions against incoming postmasters, this must happen after closing
1451  * the sockets and before removing lock files.
1452  */
1454 
1455  /*
1456  * We don't do anything about socket lock files here; those will be
1457  * removed in a later on_proc_exit callback.
1458  */
1459 }
1460 
1461 /*
1462  * on_proc_exit callback to delete external_pid_file
1463  */
1464 static void
1466 {
1467  if (external_pid_file)
1468  unlink(external_pid_file);
1469 }
1470 
1471 
1472 /*
1473  * Compute and check the directory paths to files that are part of the
1474  * installation (as deduced from the postgres executable's own location)
1475  */
1476 static void
1478 {
1479  DIR *pdir;
1480 
1481  /* Locate the postgres executable itself */
1482  if (find_my_exec(argv0, my_exec_path) < 0)
1483  ereport(FATAL,
1484  (errmsg("%s: could not locate my own executable path", argv0)));
1485 
1486 #ifdef EXEC_BACKEND
1487  /* Locate executable backend before we change working directory */
1488  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1489  postgres_exec_path) < 0)
1490  ereport(FATAL,
1491  (errmsg("%s: could not locate matching postgres executable",
1492  argv0)));
1493 #endif
1494 
1495  /*
1496  * Locate the pkglib directory --- this has to be set early in case we try
1497  * to load any modules from it in response to postgresql.conf entries.
1498  */
1500 
1501  /*
1502  * Verify that there's a readable directory there; otherwise the Postgres
1503  * installation is incomplete or corrupt. (A typical cause of this
1504  * failure is that the postgres executable has been moved or hardlinked to
1505  * some directory that's not a sibling of the installation lib/
1506  * directory.)
1507  */
1508  pdir = AllocateDir(pkglib_path);
1509  if (pdir == NULL)
1510  ereport(ERROR,
1512  errmsg("could not open directory \"%s\": %m",
1513  pkglib_path),
1514  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1515  my_exec_path)));
1516  FreeDir(pdir);
1517 
1518  /*
1519  * XXX is it worth similarly checking the share/ directory? If the lib/
1520  * directory is there, then share/ probably is too.
1521  */
1522 }
1523 
1524 /*
1525  * Check that pg_control exists in the correct location in the data directory.
1526  *
1527  * No attempt is made to validate the contents of pg_control here. This is
1528  * just a sanity check to see if we are looking at a real data directory.
1529  */
1530 static void
1532 {
1533  char path[MAXPGPATH];
1534  FILE *fp;
1535 
1536  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1537 
1538  fp = AllocateFile(path, PG_BINARY_R);
1539  if (fp == NULL)
1540  {
1541  write_stderr("%s: could not find the database system\n"
1542  "Expected to find it in the directory \"%s\",\n"
1543  "but could not open file \"%s\": %s\n",
1544  progname, DataDir, path, strerror(errno));
1545  ExitPostmaster(2);
1546  }
1547  FreeFile(fp);
1548 }
1549 
1550 /*
1551  * Determine how long should we let ServerLoop sleep.
1552  *
1553  * In normal conditions we wait at most one minute, to ensure that the other
1554  * background tasks handled by ServerLoop get done even when no requests are
1555  * arriving. However, if there are background workers waiting to be started,
1556  * we don't actually sleep so that they are quickly serviced. Other exception
1557  * cases are as shown in the code.
1558  */
1559 static void
1560 DetermineSleepTime(struct timeval *timeout)
1561 {
1562  TimestampTz next_wakeup = 0;
1563 
1564  /*
1565  * Normal case: either there are no background workers at all, or we're in
1566  * a shutdown sequence (during which we ignore bgworkers altogether).
1567  */
1568  if (Shutdown > NoShutdown ||
1570  {
1571  if (AbortStartTime != 0)
1572  {
1573  /* time left to abort; clamp to 0 in case it already expired */
1574  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1575  (time(NULL) - AbortStartTime);
1576  timeout->tv_sec = Max(timeout->tv_sec, 0);
1577  timeout->tv_usec = 0;
1578  }
1579  else
1580  {
1581  timeout->tv_sec = 60;
1582  timeout->tv_usec = 0;
1583  }
1584  return;
1585  }
1586 
1587  if (StartWorkerNeeded)
1588  {
1589  timeout->tv_sec = 0;
1590  timeout->tv_usec = 0;
1591  return;
1592  }
1593 
1594  if (HaveCrashedWorker)
1595  {
1596  slist_mutable_iter siter;
1597 
1598  /*
1599  * When there are crashed bgworkers, we sleep just long enough that
1600  * they are restarted when they request to be. Scan the list to
1601  * determine the minimum of all wakeup times according to most recent
1602  * crash time and requested restart interval.
1603  */
1605  {
1606  RegisteredBgWorker *rw;
1607  TimestampTz this_wakeup;
1608 
1609  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1610 
1611  if (rw->rw_crashed_at == 0)
1612  continue;
1613 
1615  || rw->rw_terminate)
1616  {
1617  ForgetBackgroundWorker(&siter);
1618  continue;
1619  }
1620 
1621  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1622  1000L * rw->rw_worker.bgw_restart_time);
1623  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1624  next_wakeup = this_wakeup;
1625  }
1626  }
1627 
1628  if (next_wakeup != 0)
1629  {
1630  long secs;
1631  int microsecs;
1632 
1634  &secs, &microsecs);
1635  timeout->tv_sec = secs;
1636  timeout->tv_usec = microsecs;
1637 
1638  /* Ensure we don't exceed one minute */
1639  if (timeout->tv_sec > 60)
1640  {
1641  timeout->tv_sec = 60;
1642  timeout->tv_usec = 0;
1643  }
1644  }
1645  else
1646  {
1647  timeout->tv_sec = 60;
1648  timeout->tv_usec = 0;
1649  }
1650 }
1651 
1652 /*
1653  * Main idle loop of postmaster
1654  *
1655  * NB: Needs to be called with signals blocked
1656  */
1657 static int
1659 {
1660  fd_set readmask;
1661  int nSockets;
1662  time_t last_lockfile_recheck_time,
1663  last_touch_time;
1664 
1665  last_lockfile_recheck_time = last_touch_time = time(NULL);
1666 
1667  nSockets = initMasks(&readmask);
1668 
1669  for (;;)
1670  {
1671  fd_set rmask;
1672  int selres;
1673  time_t now;
1674 
1675  /*
1676  * Wait for a connection request to arrive.
1677  *
1678  * We block all signals except while sleeping. That makes it safe for
1679  * signal handlers, which again block all signals while executing, to
1680  * do nontrivial work.
1681  *
1682  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1683  * any new connections, so we don't call select(), and just sleep.
1684  */
1685  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1686 
1687  if (pmState == PM_WAIT_DEAD_END)
1688  {
1690 
1691  pg_usleep(100000L); /* 100 msec seems reasonable */
1692  selres = 0;
1693 
1694  PG_SETMASK(&BlockSig);
1695  }
1696  else
1697  {
1698  /* must set timeout each time; some OSes change it! */
1699  struct timeval timeout;
1700 
1701  /* Needs to run with blocked signals! */
1702  DetermineSleepTime(&timeout);
1703 
1705 
1706  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1707 
1708  PG_SETMASK(&BlockSig);
1709  }
1710 
1711  /* Now check the select() result */
1712  if (selres < 0)
1713  {
1714  if (errno != EINTR && errno != EWOULDBLOCK)
1715  {
1716  ereport(LOG,
1718  errmsg("select() failed in postmaster: %m")));
1719  return STATUS_ERROR;
1720  }
1721  }
1722 
1723  /*
1724  * New connection pending on any of our sockets? If so, fork a child
1725  * process to deal with it.
1726  */
1727  if (selres > 0)
1728  {
1729  int i;
1730 
1731  for (i = 0; i < MAXLISTEN; i++)
1732  {
1733  if (ListenSocket[i] == PGINVALID_SOCKET)
1734  break;
1735  if (FD_ISSET(ListenSocket[i], &rmask))
1736  {
1737  Port *port;
1738 
1739  port = ConnCreate(ListenSocket[i]);
1740  if (port)
1741  {
1742  BackendStartup(port);
1743 
1744  /*
1745  * We no longer need the open socket or port structure
1746  * in this process
1747  */
1748  StreamClose(port->sock);
1749  ConnFree(port);
1750  }
1751  }
1752  }
1753  }
1754 
1755  /* If we have lost the log collector, try to start a new one */
1756  if (SysLoggerPID == 0 && Logging_collector)
1758 
1759  /*
1760  * If no background writer process is running, and we are not in a
1761  * state that prevents it, start one. It doesn't matter if this
1762  * fails, we'll just try again later. Likewise for the checkpointer.
1763  */
1764  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1766  {
1767  if (CheckpointerPID == 0)
1769  if (BgWriterPID == 0)
1771  }
1772 
1773  /*
1774  * Likewise, if we have lost the walwriter process, try to start a new
1775  * one. But this is needed only in normal operation (else we cannot
1776  * be writing any new WAL).
1777  */
1778  if (WalWriterPID == 0 && pmState == PM_RUN)
1780 
1781  /*
1782  * If we have lost the autovacuum launcher, try to start a new one. We
1783  * don't want autovacuum to run in binary upgrade mode because
1784  * autovacuum might update relfrozenxid for empty tables before the
1785  * physical files are put in place.
1786  */
1787  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1789  pmState == PM_RUN)
1790  {
1792  if (AutoVacPID != 0)
1793  start_autovac_launcher = false; /* signal processed */
1794  }
1795 
1796  /* If we have lost the stats collector, try to start a new one */
1797  if (PgStatPID == 0 &&
1798  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1799  PgStatPID = pgstat_start();
1800 
1801  /* If we have lost the archiver, try to start a new one. */
1802  if (PgArchPID == 0 && PgArchStartupAllowed())
1804 
1805  /* If we need to signal the autovacuum launcher, do so now */
1807  {
1808  avlauncher_needs_signal = false;
1809  if (AutoVacPID != 0)
1811  }
1812 
1813  /* If we need to start a WAL receiver, try to do that now */
1816 
1817  /* Get other worker processes running, if needed */
1820 
1821 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1822 
1823  /*
1824  * With assertions enabled, check regularly for appearance of
1825  * additional threads. All builds check at start and exit.
1826  */
1827  Assert(pthread_is_threaded_np() == 0);
1828 #endif
1829 
1830  /*
1831  * Lastly, check to see if it's time to do some things that we don't
1832  * want to do every single time through the loop, because they're a
1833  * bit expensive. Note that there's up to a minute of slop in when
1834  * these tasks will be performed, since DetermineSleepTime() will let
1835  * us sleep at most that long; except for SIGKILL timeout which has
1836  * special-case logic there.
1837  */
1838  now = time(NULL);
1839 
1840  /*
1841  * If we already sent SIGQUIT to children and they are slow to shut
1842  * down, it's time to send them SIGKILL. This doesn't happen
1843  * normally, but under certain conditions backends can get stuck while
1844  * shutting down. This is a last measure to get them unwedged.
1845  *
1846  * Note we also do this during recovery from a process crash.
1847  */
1848  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1849  AbortStartTime != 0 &&
1851  {
1852  /* We were gentle with them before. Not anymore */
1853  ereport(LOG,
1854  (errmsg("issuing SIGKILL to recalcitrant children")));
1856  /* reset flag so we don't SIGKILL again */
1857  AbortStartTime = 0;
1858  }
1859 
1860  /*
1861  * Once a minute, verify that postmaster.pid hasn't been removed or
1862  * overwritten. If it has, we force a shutdown. This avoids having
1863  * postmasters and child processes hanging around after their database
1864  * is gone, and maybe causing problems if a new database cluster is
1865  * created in the same place. It also provides some protection
1866  * against a DBA foolishly removing postmaster.pid and manually
1867  * starting a new postmaster. Data corruption is likely to ensue from
1868  * that anyway, but we can minimize the damage by aborting ASAP.
1869  */
1870  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1871  {
1872  if (!RecheckDataDirLockFile())
1873  {
1874  ereport(LOG,
1875  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1877  }
1878  last_lockfile_recheck_time = now;
1879  }
1880 
1881  /*
1882  * Touch Unix socket and lock files every 58 minutes, to ensure that
1883  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1884  * no one runs cleaners with cutoff times of less than an hour ...
1885  */
1886  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1887  {
1888  TouchSocketFiles();
1890  last_touch_time = now;
1891  }
1892  }
1893 }
1894 
1895 /*
1896  * Initialise the masks for select() for the ports we are listening on.
1897  * Return the number of sockets to listen on.
1898  */
1899 static int
1900 initMasks(fd_set *rmask)
1901 {
1902  int maxsock = -1;
1903  int i;
1904 
1905  FD_ZERO(rmask);
1906 
1907  for (i = 0; i < MAXLISTEN; i++)
1908  {
1909  int fd = ListenSocket[i];
1910 
1911  if (fd == PGINVALID_SOCKET)
1912  break;
1913  FD_SET(fd, rmask);
1914 
1915  if (fd > maxsock)
1916  maxsock = fd;
1917  }
1918 
1919  return maxsock + 1;
1920 }
1921 
1922 
1923 /*
1924  * Read a client's startup packet and do something according to it.
1925  *
1926  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1927  * not return at all.
1928  *
1929  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1930  * if that's what you want. Return STATUS_ERROR if you don't want to
1931  * send anything to the client, which would typically be appropriate
1932  * if we detect a communications failure.)
1933  *
1934  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1935  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1936  * encryption layer sets both flags, but a rejected negotiation sets only the
1937  * flag for that layer, since the client may wish to try the other one. We
1938  * should make no assumption here about the order in which the client may make
1939  * requests.
1940  */
1941 static int
1942 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1943 {
1944  int32 len;
1945  char *buf;
1946  ProtocolVersion proto;
1947  MemoryContext oldcontext;
1948 
1949  pq_startmsgread();
1950 
1951  /*
1952  * Grab the first byte of the length word separately, so that we can tell
1953  * whether we have no data at all or an incomplete packet. (This might
1954  * sound inefficient, but it's not really, because of buffering in
1955  * pqcomm.c.)
1956  */
1957  if (pq_getbytes((char *) &len, 1) == EOF)
1958  {
1959  /*
1960  * If we get no data at all, don't clutter the log with a complaint;
1961  * such cases often occur for legitimate reasons. An example is that
1962  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1963  * client didn't like our response, it'll probably just drop the
1964  * connection. Service-monitoring software also often just opens and
1965  * closes a connection without sending anything. (So do port
1966  * scanners, which may be less benign, but it's not really our job to
1967  * notice those.)
1968  */
1969  return STATUS_ERROR;
1970  }
1971 
1972  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1973  {
1974  /* Got a partial length word, so bleat about that */
1975  if (!ssl_done && !gss_done)
1977  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1978  errmsg("incomplete startup packet")));
1979  return STATUS_ERROR;
1980  }
1981 
1982  len = pg_ntoh32(len);
1983  len -= 4;
1984 
1985  if (len < (int32) sizeof(ProtocolVersion) ||
1987  {
1989  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1990  errmsg("invalid length of startup packet")));
1991  return STATUS_ERROR;
1992  }
1993 
1994  /*
1995  * Allocate space to hold the startup packet, plus one extra byte that's
1996  * initialized to be zero. This ensures we will have null termination of
1997  * all strings inside the packet.
1998  */
1999  buf = palloc(len + 1);
2000  buf[len] = '\0';
2001 
2002  if (pq_getbytes(buf, len) == EOF)
2003  {
2005  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2006  errmsg("incomplete startup packet")));
2007  return STATUS_ERROR;
2008  }
2009  pq_endmsgread();
2010 
2011  /*
2012  * The first field is either a protocol version number or a special
2013  * request code.
2014  */
2015  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2016 
2017  if (proto == CANCEL_REQUEST_CODE)
2018  {
2019  processCancelRequest(port, buf);
2020  /* Not really an error, but we don't want to proceed further */
2021  return STATUS_ERROR;
2022  }
2023 
2024  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2025  {
2026  char SSLok;
2027 
2028 #ifdef USE_SSL
2029  /* No SSL when disabled or on Unix sockets */
2030  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2031  SSLok = 'N';
2032  else
2033  SSLok = 'S'; /* Support for SSL */
2034 #else
2035  SSLok = 'N'; /* No support for SSL */
2036 #endif
2037 
2038 retry1:
2039  if (send(port->sock, &SSLok, 1, 0) != 1)
2040  {
2041  if (errno == EINTR)
2042  goto retry1; /* if interrupted, just retry */
2045  errmsg("failed to send SSL negotiation response: %m")));
2046  return STATUS_ERROR; /* close the connection */
2047  }
2048 
2049 #ifdef USE_SSL
2050  if (SSLok == 'S' && secure_open_server(port) == -1)
2051  return STATUS_ERROR;
2052 #endif
2053 
2054  /*
2055  * regular startup packet, cancel, etc packet should follow, but not
2056  * another SSL negotiation request, and a GSS request should only
2057  * follow if SSL was rejected (client may negotiate in either order)
2058  */
2059  return ProcessStartupPacket(port, true, SSLok == 'S');
2060  }
2061  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2062  {
2063  char GSSok = 'N';
2064 
2065 #ifdef ENABLE_GSS
2066  /* No GSSAPI encryption when on Unix socket */
2067  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2068  GSSok = 'G';
2069 #endif
2070 
2071  while (send(port->sock, &GSSok, 1, 0) != 1)
2072  {
2073  if (errno == EINTR)
2074  continue;
2077  errmsg("failed to send GSSAPI negotiation response: %m")));
2078  return STATUS_ERROR; /* close the connection */
2079  }
2080 
2081 #ifdef ENABLE_GSS
2082  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2083  return STATUS_ERROR;
2084 #endif
2085 
2086  /*
2087  * regular startup packet, cancel, etc packet should follow, but not
2088  * another GSS negotiation request, and an SSL request should only
2089  * follow if GSS was rejected (client may negotiate in either order)
2090  */
2091  return ProcessStartupPacket(port, GSSok == 'G', true);
2092  }
2093 
2094  /* Could add additional special packet types here */
2095 
2096  /*
2097  * Set FrontendProtocol now so that ereport() knows what format to send if
2098  * we fail during startup.
2099  */
2100  FrontendProtocol = proto;
2101 
2102  /* Check that the major protocol version is in range. */
2105  ereport(FATAL,
2106  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2107  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2108  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2112 
2113  /*
2114  * Now fetch parameters out of startup packet and save them into the Port
2115  * structure. All data structures attached to the Port struct must be
2116  * allocated in TopMemoryContext so that they will remain available in a
2117  * running backend (even after PostmasterContext is destroyed). We need
2118  * not worry about leaking this storage on failure, since we aren't in the
2119  * postmaster process anymore.
2120  */
2122 
2123  /* Handle protocol version 3 startup packet */
2124  {
2125  int32 offset = sizeof(ProtocolVersion);
2126  List *unrecognized_protocol_options = NIL;
2127 
2128  /*
2129  * Scan packet body for name/option pairs. We can assume any string
2130  * beginning within the packet body is null-terminated, thanks to
2131  * zeroing extra byte above.
2132  */
2133  port->guc_options = NIL;
2134 
2135  while (offset < len)
2136  {
2137  char *nameptr = buf + offset;
2138  int32 valoffset;
2139  char *valptr;
2140 
2141  if (*nameptr == '\0')
2142  break; /* found packet terminator */
2143  valoffset = offset + strlen(nameptr) + 1;
2144  if (valoffset >= len)
2145  break; /* missing value, will complain below */
2146  valptr = buf + valoffset;
2147 
2148  if (strcmp(nameptr, "database") == 0)
2149  port->database_name = pstrdup(valptr);
2150  else if (strcmp(nameptr, "user") == 0)
2151  port->user_name = pstrdup(valptr);
2152  else if (strcmp(nameptr, "options") == 0)
2153  port->cmdline_options = pstrdup(valptr);
2154  else if (strcmp(nameptr, "replication") == 0)
2155  {
2156  /*
2157  * Due to backward compatibility concerns the replication
2158  * parameter is a hybrid beast which allows the value to be
2159  * either boolean or the string 'database'. The latter
2160  * connects to a specific database which is e.g. required for
2161  * logical decoding while.
2162  */
2163  if (strcmp(valptr, "database") == 0)
2164  {
2165  am_walsender = true;
2166  am_db_walsender = true;
2167  }
2168  else if (!parse_bool(valptr, &am_walsender))
2169  ereport(FATAL,
2170  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2171  errmsg("invalid value for parameter \"%s\": \"%s\"",
2172  "replication",
2173  valptr),
2174  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2175  }
2176  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2177  {
2178  /*
2179  * Any option beginning with _pq_. is reserved for use as a
2180  * protocol-level option, but at present no such options are
2181  * defined.
2182  */
2183  unrecognized_protocol_options =
2184  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2185  }
2186  else
2187  {
2188  /* Assume it's a generic GUC option */
2189  port->guc_options = lappend(port->guc_options,
2190  pstrdup(nameptr));
2191  port->guc_options = lappend(port->guc_options,
2192  pstrdup(valptr));
2193 
2194  /*
2195  * Copy application_name to port if we come across it. This
2196  * is done so we can log the application_name in the
2197  * connection authorization message. Note that the GUC would
2198  * be used but we haven't gone through GUC setup yet.
2199  */
2200  if (strcmp(nameptr, "application_name") == 0)
2201  {
2202  char *tmp_app_name = pstrdup(valptr);
2203 
2204  pg_clean_ascii(tmp_app_name);
2205 
2206  port->application_name = tmp_app_name;
2207  }
2208  }
2209  offset = valoffset + strlen(valptr) + 1;
2210  }
2211 
2212  /*
2213  * If we didn't find a packet terminator exactly at the end of the
2214  * given packet length, complain.
2215  */
2216  if (offset != len - 1)
2217  ereport(FATAL,
2218  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2219  errmsg("invalid startup packet layout: expected terminator as last byte")));
2220 
2221  /*
2222  * If the client requested a newer protocol version or if the client
2223  * requested any protocol options we didn't recognize, let them know
2224  * the newest minor protocol version we do support and the names of
2225  * any unrecognized options.
2226  */
2228  unrecognized_protocol_options != NIL)
2229  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2230  }
2231 
2232  /* Check a user name was given. */
2233  if (port->user_name == NULL || port->user_name[0] == '\0')
2234  ereport(FATAL,
2235  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2236  errmsg("no PostgreSQL user name specified in startup packet")));
2237 
2238  /* The database defaults to the user name. */
2239  if (port->database_name == NULL || port->database_name[0] == '\0')
2240  port->database_name = pstrdup(port->user_name);
2241 
2242  if (Db_user_namespace)
2243  {
2244  /*
2245  * If user@, it is a global user, remove '@'. We only want to do this
2246  * if there is an '@' at the end and no earlier in the user string or
2247  * they may fake as a local user of another database attaching to this
2248  * database.
2249  */
2250  if (strchr(port->user_name, '@') ==
2251  port->user_name + strlen(port->user_name) - 1)
2252  *strchr(port->user_name, '@') = '\0';
2253  else
2254  {
2255  /* Append '@' and dbname */
2256  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2257  }
2258  }
2259 
2260  /*
2261  * Truncate given database and user names to length of a Postgres name.
2262  * This avoids lookup failures when overlength names are given.
2263  */
2264  if (strlen(port->database_name) >= NAMEDATALEN)
2265  port->database_name[NAMEDATALEN - 1] = '\0';
2266  if (strlen(port->user_name) >= NAMEDATALEN)
2267  port->user_name[NAMEDATALEN - 1] = '\0';
2268 
2269  if (am_walsender)
2271  else
2273 
2274  /*
2275  * Normal walsender backends, e.g. for streaming replication, are not
2276  * connected to a particular database. But walsenders used for logical
2277  * replication need to connect to a specific database. We allow streaming
2278  * replication commands to be issued even if connected to a database as it
2279  * can make sense to first make a basebackup and then stream changes
2280  * starting from that.
2281  */
2282  if (am_walsender && !am_db_walsender)
2283  port->database_name[0] = '\0';
2284 
2285  /*
2286  * Done putting stuff in TopMemoryContext.
2287  */
2288  MemoryContextSwitchTo(oldcontext);
2289 
2290  /*
2291  * If we're going to reject the connection due to database state, say so
2292  * now instead of wasting cycles on an authentication exchange. (This also
2293  * allows a pg_ping utility to be written.)
2294  */
2295  switch (port->canAcceptConnections)
2296  {
2297  case CAC_STARTUP:
2298  ereport(FATAL,
2300  errmsg("the database system is starting up")));
2301  break;
2302  case CAC_NOTCONSISTENT:
2303  if (EnableHotStandby)
2304  ereport(FATAL,
2306  errmsg("the database system is not yet accepting connections"),
2307  errdetail("Consistent recovery state has not been yet reached.")));
2308  else
2309  ereport(FATAL,
2311  errmsg("the database system is not accepting connections"),
2312  errdetail("Hot standby mode is disabled.")));
2313  break;
2314  case CAC_SHUTDOWN:
2315  ereport(FATAL,
2317  errmsg("the database system is shutting down")));
2318  break;
2319  case CAC_RECOVERY:
2320  ereport(FATAL,
2322  errmsg("the database system is in recovery mode")));
2323  break;
2324  case CAC_TOOMANY:
2325  ereport(FATAL,
2326  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2327  errmsg("sorry, too many clients already")));
2328  break;
2329  case CAC_SUPERUSER:
2330  /* OK for now, will check in InitPostgres */
2331  break;
2332  case CAC_OK:
2333  break;
2334  }
2335 
2336  return STATUS_OK;
2337 }
2338 
2339 /*
2340  * Send a NegotiateProtocolVersion to the client. This lets the client know
2341  * that they have requested a newer minor protocol version than we are able
2342  * to speak. We'll speak the highest version we know about; the client can,
2343  * of course, abandon the connection if that's a problem.
2344  *
2345  * We also include in the response a list of protocol options we didn't
2346  * understand. This allows clients to include optional parameters that might
2347  * be present either in newer protocol versions or third-party protocol
2348  * extensions without fear of having to reconnect if those options are not
2349  * understood, while at the same time making certain that the client is aware
2350  * of which options were actually accepted.
2351  */
2352 static void
2353 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2354 {
2356  ListCell *lc;
2357 
2358  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2360  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2361  foreach(lc, unrecognized_protocol_options)
2362  pq_sendstring(&buf, lfirst(lc));
2363  pq_endmessage(&buf);
2364 
2365  /* no need to flush, some other message will follow */
2366 }
2367 
2368 /*
2369  * The client has sent a cancel request packet, not a normal
2370  * start-a-new-connection packet. Perform the necessary processing.
2371  * Nothing is sent back to the client.
2372  */
2373 static void
2375 {
2376  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2377  int backendPID;
2378  int32 cancelAuthCode;
2379  Backend *bp;
2380 
2381 #ifndef EXEC_BACKEND
2382  dlist_iter iter;
2383 #else
2384  int i;
2385 #endif
2386 
2387  backendPID = (int) pg_ntoh32(canc->backendPID);
2388  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2389 
2390  /*
2391  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2392  * longer access the postmaster's own backend list, and must rely on the
2393  * duplicate array in shared memory.
2394  */
2395 #ifndef EXEC_BACKEND
2396  dlist_foreach(iter, &BackendList)
2397  {
2398  bp = dlist_container(Backend, elem, iter.cur);
2399 #else
2400  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2401  {
2402  bp = (Backend *) &ShmemBackendArray[i];
2403 #endif
2404  if (bp->pid == backendPID)
2405  {
2406  if (bp->cancel_key == cancelAuthCode)
2407  {
2408  /* Found a match; signal that backend to cancel current op */
2409  ereport(DEBUG2,
2410  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2411  backendPID)));
2412  signal_child(bp->pid, SIGINT);
2413  }
2414  else
2415  /* Right PID, wrong key: no way, Jose */
2416  ereport(LOG,
2417  (errmsg("wrong key in cancel request for process %d",
2418  backendPID)));
2419  return;
2420  }
2421 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2422  }
2423 #else
2424  }
2425 #endif
2426 
2427  /* No matching backend */
2428  ereport(LOG,
2429  (errmsg("PID %d in cancel request did not match any process",
2430  backendPID)));
2431 }
2432 
2433 /*
2434  * canAcceptConnections --- check to see if database state allows connections
2435  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2436  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2437  * know whether a NORMAL connection might turn into a walsender.)
2438  */
2439 static CAC_state
2440 canAcceptConnections(int backend_type)
2441 {
2442  CAC_state result = CAC_OK;
2443 
2444  /*
2445  * Can't start backends when in startup/shutdown/inconsistent recovery
2446  * state. We treat autovac workers the same as user backends for this
2447  * purpose. However, bgworkers are excluded from this test; we expect
2448  * bgworker_should_start_now() decided whether the DB state allows them.
2449  */
2450  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2451  backend_type != BACKEND_TYPE_BGWORKER)
2452  {
2453  if (Shutdown > NoShutdown)
2454  return CAC_SHUTDOWN; /* shutdown is pending */
2455  else if (!FatalError && pmState == PM_STARTUP)
2456  return CAC_STARTUP; /* normal startup */
2457  else if (!FatalError && pmState == PM_RECOVERY)
2458  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2459  * state */
2460  else
2461  return CAC_RECOVERY; /* else must be crash recovery */
2462  }
2463 
2464  /*
2465  * "Smart shutdown" restrictions are applied only to normal connections,
2466  * not to autovac workers or bgworkers. When only superusers can connect,
2467  * we return CAC_SUPERUSER to indicate that superuserness must be checked
2468  * later. Note that neither CAC_OK nor CAC_SUPERUSER can safely be
2469  * returned until we have checked for too many children.
2470  */
2471  if (connsAllowed != ALLOW_ALL_CONNS &&
2472  backend_type == BACKEND_TYPE_NORMAL)
2473  {
2475  result = CAC_SUPERUSER; /* allow superusers only */
2476  else
2477  return CAC_SHUTDOWN; /* shutdown is pending */
2478  }
2479 
2480  /*
2481  * Don't start too many children.
2482  *
2483  * We allow more connections here than we can have backends because some
2484  * might still be authenticating; they might fail auth, or some existing
2485  * backend might exit before the auth cycle is completed. The exact
2486  * MaxBackends limit is enforced when a new backend tries to join the
2487  * shared-inval backend array.
2488  *
2489  * The limit here must match the sizes of the per-child-process arrays;
2490  * see comments for MaxLivePostmasterChildren().
2491  */
2493  result = CAC_TOOMANY;
2494 
2495  return result;
2496 }
2497 
2498 
2499 /*
2500  * ConnCreate -- create a local connection data structure
2501  *
2502  * Returns NULL on failure, other than out-of-memory which is fatal.
2503  */
2504 static Port *
2505 ConnCreate(int serverFd)
2506 {
2507  Port *port;
2508 
2509  if (!(port = (Port *) calloc(1, sizeof(Port))))
2510  {
2511  ereport(LOG,
2512  (errcode(ERRCODE_OUT_OF_MEMORY),
2513  errmsg("out of memory")));
2514  ExitPostmaster(1);
2515  }
2516 
2517  if (StreamConnection(serverFd, port) != STATUS_OK)
2518  {
2519  if (port->sock != PGINVALID_SOCKET)
2520  StreamClose(port->sock);
2521  ConnFree(port);
2522  return NULL;
2523  }
2524 
2525  return port;
2526 }
2527 
2528 
2529 /*
2530  * ConnFree -- free a local connection data structure
2531  *
2532  * Caller has already closed the socket if any, so there's not much
2533  * to do here.
2534  */
2535 static void
2537 {
2538  free(conn);
2539 }
2540 
2541 
2542 /*
2543  * ClosePostmasterPorts -- close all the postmaster's open sockets
2544  *
2545  * This is called during child process startup to release file descriptors
2546  * that are not needed by that child process. The postmaster still has
2547  * them open, of course.
2548  *
2549  * Note: we pass am_syslogger as a boolean because we don't want to set
2550  * the global variable yet when this is called.
2551  */
2552 void
2553 ClosePostmasterPorts(bool am_syslogger)
2554 {
2555  int i;
2556 
2557 #ifndef WIN32
2558 
2559  /*
2560  * Close the write end of postmaster death watch pipe. It's important to
2561  * do this as early as possible, so that if postmaster dies, others won't
2562  * think that it's still running because we're holding the pipe open.
2563  */
2565  ereport(FATAL,
2567  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2569  /* Notify fd.c that we released one pipe FD. */
2571 #endif
2572 
2573  /*
2574  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2575  * so we don't call ReleaseExternalFD() here.
2576  */
2577  for (i = 0; i < MAXLISTEN; i++)
2578  {
2579  if (ListenSocket[i] != PGINVALID_SOCKET)
2580  {
2583  }
2584  }
2585 
2586  /*
2587  * If using syslogger, close the read side of the pipe. We don't bother
2588  * tracking this in fd.c, either.
2589  */
2590  if (!am_syslogger)
2591  {
2592 #ifndef WIN32
2593  if (syslogPipe[0] >= 0)
2594  close(syslogPipe[0]);
2595  syslogPipe[0] = -1;
2596 #else
2597  if (syslogPipe[0])
2598  CloseHandle(syslogPipe[0]);
2599  syslogPipe[0] = 0;
2600 #endif
2601  }
2602 
2603 #ifdef USE_BONJOUR
2604  /* If using Bonjour, close the connection to the mDNS daemon */
2605  if (bonjour_sdref)
2606  close(DNSServiceRefSockFD(bonjour_sdref));
2607 #endif
2608 }
2609 
2610 
2611 /*
2612  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2613  *
2614  * Called early in the postmaster and every backend.
2615  */
2616 void
2618 {
2619  unsigned int rseed;
2620 
2621  MyProcPid = getpid();
2624 
2625  /*
2626  * Set a different seed for random() in every process. We want something
2627  * unpredictable, so if possible, use high-quality random bits for the
2628  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2629  */
2630  if (!pg_strong_random(&rseed, sizeof(rseed)))
2631  {
2632  /*
2633  * Since PIDs and timestamps tend to change more frequently in their
2634  * least significant bits, shift the timestamp left to allow a larger
2635  * total number of seeds in a given time period. Since that would
2636  * leave only 20 bits of the timestamp that cycle every ~1 second,
2637  * also mix in some higher bits.
2638  */
2639  rseed = ((uint64) MyProcPid) ^
2640  ((uint64) MyStartTimestamp << 12) ^
2641  ((uint64) MyStartTimestamp >> 20);
2642  }
2643  srandom(rseed);
2644 }
2645 
2646 
2647 /*
2648  * reset_shared -- reset shared memory and semaphores
2649  */
2650 static void
2652 {
2653  /*
2654  * Create or re-create shared memory and semaphores.
2655  *
2656  * Note: in each "cycle of life" we will normally assign the same IPC keys
2657  * (if using SysV shmem and/or semas). This helps ensure that we will
2658  * clean up dead IPC objects if the postmaster crashes and is restarted.
2659  */
2661 }
2662 
2663 
2664 /*
2665  * SIGHUP -- reread config files, and tell children to do same
2666  */
2667 static void
2669 {
2670  int save_errno = errno;
2671 
2672  /*
2673  * We rely on the signal mechanism to have blocked all signals ... except
2674  * on Windows, which lacks sigaction(), so we have to do it manually.
2675  */
2676 #ifdef WIN32
2677  PG_SETMASK(&BlockSig);
2678 #endif
2679 
2680  if (Shutdown <= SmartShutdown)
2681  {
2682  ereport(LOG,
2683  (errmsg("received SIGHUP, reloading configuration files")));
2686  if (StartupPID != 0)
2688  if (BgWriterPID != 0)
2690  if (CheckpointerPID != 0)
2692  if (WalWriterPID != 0)
2694  if (WalReceiverPID != 0)
2696  if (AutoVacPID != 0)
2698  if (PgArchPID != 0)
2700  if (SysLoggerPID != 0)
2702  if (PgStatPID != 0)
2704 
2705  /* Reload authentication config files too */
2706  if (!load_hba())
2707  ereport(LOG,
2708  /* translator: %s is a configuration file */
2709  (errmsg("%s was not reloaded", "pg_hba.conf")));
2710 
2711  if (!load_ident())
2712  ereport(LOG,
2713  (errmsg("%s was not reloaded", "pg_ident.conf")));
2714 
2715 #ifdef USE_SSL
2716  /* Reload SSL configuration as well */
2717  if (EnableSSL)
2718  {
2719  if (secure_initialize(false) == 0)
2720  LoadedSSL = true;
2721  else
2722  ereport(LOG,
2723  (errmsg("SSL configuration was not reloaded")));
2724  }
2725  else
2726  {
2727  secure_destroy();
2728  LoadedSSL = false;
2729  }
2730 #endif
2731 
2732 #ifdef EXEC_BACKEND
2733  /* Update the starting-point file for future children */
2734  write_nondefault_variables(PGC_SIGHUP);
2735 #endif
2736  }
2737 
2738 #ifdef WIN32
2740 #endif
2741 
2742  errno = save_errno;
2743 }
2744 
2745 
2746 /*
2747  * pmdie -- signal handler for processing various postmaster signals.
2748  */
2749 static void
2751 {
2752  int save_errno = errno;
2753 
2754  /*
2755  * We rely on the signal mechanism to have blocked all signals ... except
2756  * on Windows, which lacks sigaction(), so we have to do it manually.
2757  */
2758 #ifdef WIN32
2759  PG_SETMASK(&BlockSig);
2760 #endif
2761 
2762  ereport(DEBUG2,
2763  (errmsg_internal("postmaster received signal %d",
2764  postgres_signal_arg)));
2765 
2766  switch (postgres_signal_arg)
2767  {
2768  case SIGTERM:
2769 
2770  /*
2771  * Smart Shutdown:
2772  *
2773  * Wait for children to end their work, then shut down.
2774  */
2775  if (Shutdown >= SmartShutdown)
2776  break;
2778  ereport(LOG,
2779  (errmsg("received smart shutdown request")));
2780 
2781  /* Report status */
2783 #ifdef USE_SYSTEMD
2784  sd_notify(0, "STOPPING=1");
2785 #endif
2786 
2787  /*
2788  * If we reached normal running, we have to wait for any online
2789  * backup mode to end; otherwise go straight to waiting for client
2790  * backends to exit. (The difference is that in the former state,
2791  * we'll still let in new superuser clients, so that somebody can
2792  * end the online backup mode.) If already in PM_STOP_BACKENDS or
2793  * a later state, do not change it.
2794  */
2795  if (pmState == PM_RUN)
2797  else if (pmState == PM_HOT_STANDBY)
2799  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2800  {
2801  /* There should be no clients, so proceed to stop children */
2803  }
2804 
2805  /*
2806  * Now wait for online backup mode to end and backends to exit. If
2807  * that is already the case, PostmasterStateMachine will take the
2808  * next step.
2809  */
2811  break;
2812 
2813  case SIGINT:
2814 
2815  /*
2816  * Fast Shutdown:
2817  *
2818  * Abort all children with SIGTERM (rollback active transactions
2819  * and exit) and shut down when they are gone.
2820  */
2821  if (Shutdown >= FastShutdown)
2822  break;
2824  ereport(LOG,
2825  (errmsg("received fast shutdown request")));
2826 
2827  /* Report status */
2829 #ifdef USE_SYSTEMD
2830  sd_notify(0, "STOPPING=1");
2831 #endif
2832 
2833  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2834  {
2835  /* Just shut down background processes silently */
2837  }
2838  else if (pmState == PM_RUN ||
2840  {
2841  /* Report that we're about to zap live client sessions */
2842  ereport(LOG,
2843  (errmsg("aborting any active transactions")));
2845  }
2846 
2847  /*
2848  * PostmasterStateMachine will issue any necessary signals, or
2849  * take the next step if no child processes need to be killed.
2850  */
2852  break;
2853 
2854  case SIGQUIT:
2855 
2856  /*
2857  * Immediate Shutdown:
2858  *
2859  * abort all children with SIGQUIT, wait for them to exit,
2860  * terminate remaining ones with SIGKILL, then exit without
2861  * attempt to properly shut down the data base system.
2862  */
2863  if (Shutdown >= ImmediateShutdown)
2864  break;
2866  ereport(LOG,
2867  (errmsg("received immediate shutdown request")));
2868 
2869  /* Report status */
2871 #ifdef USE_SYSTEMD
2872  sd_notify(0, "STOPPING=1");
2873 #endif
2874 
2875  /* tell children to shut down ASAP */
2879 
2880  /* set stopwatch for them to die */
2881  AbortStartTime = time(NULL);
2882 
2883  /*
2884  * Now wait for backends to exit. If there are none,
2885  * PostmasterStateMachine will take the next step.
2886  */
2888  break;
2889  }
2890 
2891 #ifdef WIN32
2893 #endif
2894 
2895  errno = save_errno;
2896 }
2897 
2898 /*
2899  * Reaper -- signal handler to cleanup after a child process dies.
2900  */
2901 static void
2903 {
2904  int save_errno = errno;
2905  int pid; /* process id of dead child process */
2906  int exitstatus; /* its exit status */
2907 
2908  /*
2909  * We rely on the signal mechanism to have blocked all signals ... except
2910  * on Windows, which lacks sigaction(), so we have to do it manually.
2911  */
2912 #ifdef WIN32
2913  PG_SETMASK(&BlockSig);
2914 #endif
2915 
2916  ereport(DEBUG4,
2917  (errmsg_internal("reaping dead processes")));
2918 
2919  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2920  {
2921  /*
2922  * Check if this child was a startup process.
2923  */
2924  if (pid == StartupPID)
2925  {
2926  StartupPID = 0;
2927 
2928  /*
2929  * Startup process exited in response to a shutdown request (or it
2930  * completed normally regardless of the shutdown request).
2931  */
2932  if (Shutdown > NoShutdown &&
2933  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2934  {
2937  /* PostmasterStateMachine logic does the rest */
2938  continue;
2939  }
2940 
2941  if (EXIT_STATUS_3(exitstatus))
2942  {
2943  ereport(LOG,
2944  (errmsg("shutdown at recovery target")));
2947  TerminateChildren(SIGTERM);
2949  /* PostmasterStateMachine logic does the rest */
2950  continue;
2951  }
2952 
2953  /*
2954  * Unexpected exit of startup process (including FATAL exit)
2955  * during PM_STARTUP is treated as catastrophic. There are no
2956  * other processes running yet, so we can just exit.
2957  */
2958  if (pmState == PM_STARTUP &&
2960  !EXIT_STATUS_0(exitstatus))
2961  {
2962  LogChildExit(LOG, _("startup process"),
2963  pid, exitstatus);
2964  ereport(LOG,
2965  (errmsg("aborting startup due to startup process failure")));
2966  ExitPostmaster(1);
2967  }
2968 
2969  /*
2970  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2971  * the startup process is catastrophic, so kill other children,
2972  * and set StartupStatus so we don't try to reinitialize after
2973  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2974  * then we previously sent the startup process a SIGQUIT; so
2975  * that's probably the reason it died, and we do want to try to
2976  * restart in that case.
2977  *
2978  * This stanza also handles the case where we sent a SIGQUIT
2979  * during PM_STARTUP due to some dead_end child crashing: in that
2980  * situation, if the startup process dies on the SIGQUIT, we need
2981  * to transition to PM_WAIT_BACKENDS state which will allow
2982  * PostmasterStateMachine to restart the startup process. (On the
2983  * other hand, the startup process might complete normally, if we
2984  * were too late with the SIGQUIT. In that case we'll fall
2985  * through and commence normal operations.)
2986  */
2987  if (!EXIT_STATUS_0(exitstatus))
2988  {
2990  {
2992  if (pmState == PM_STARTUP)
2994  }
2995  else
2997  HandleChildCrash(pid, exitstatus,
2998  _("startup process"));
2999  continue;
3000  }
3001 
3002  /*
3003  * Startup succeeded, commence normal operations
3004  */
3006  FatalError = false;
3007  AbortStartTime = 0;
3008  ReachedNormalRunning = true;
3009  pmState = PM_RUN;
3011 
3012  /*
3013  * Crank up the background tasks, if we didn't do that already
3014  * when we entered consistent recovery state. It doesn't matter
3015  * if this fails, we'll just try again later.
3016  */
3017  if (CheckpointerPID == 0)
3019  if (BgWriterPID == 0)
3021  if (WalWriterPID == 0)
3023 
3024  /*
3025  * Likewise, start other special children as needed. In a restart
3026  * situation, some of them may be alive already.
3027  */
3030  if (PgArchStartupAllowed() && PgArchPID == 0)
3032  if (PgStatPID == 0)
3033  PgStatPID = pgstat_start();
3034 
3035  /* workers may be scheduled to start now */
3037 
3038  /* at this point we are really open for business */
3039  ereport(LOG,
3040  (errmsg("database system is ready to accept connections")));
3041 
3042  /* Report status */
3044 #ifdef USE_SYSTEMD
3045  sd_notify(0, "READY=1");
3046 #endif
3047 
3048  continue;
3049  }
3050 
3051  /*
3052  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3053  * one at the next iteration of the postmaster's main loop, if
3054  * necessary. Any other exit condition is treated as a crash.
3055  */
3056  if (pid == BgWriterPID)
3057  {
3058  BgWriterPID = 0;
3059  if (!EXIT_STATUS_0(exitstatus))
3060  HandleChildCrash(pid, exitstatus,
3061  _("background writer process"));
3062  continue;
3063  }
3064 
3065  /*
3066  * Was it the checkpointer?
3067  */
3068  if (pid == CheckpointerPID)
3069  {
3070  CheckpointerPID = 0;
3071  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3072  {
3073  /*
3074  * OK, we saw normal exit of the checkpointer after it's been
3075  * told to shut down. We expect that it wrote a shutdown
3076  * checkpoint. (If for some reason it didn't, recovery will
3077  * occur on next postmaster start.)
3078  *
3079  * At this point we should have no normal backend children
3080  * left (else we'd not be in PM_SHUTDOWN state) but we might
3081  * have dead_end children to wait for.
3082  *
3083  * If we have an archiver subprocess, tell it to do a last
3084  * archive cycle and quit. Likewise, if we have walsender
3085  * processes, tell them to send any remaining WAL and quit.
3086  */
3088 
3089  /* Waken archiver for the last time */
3090  if (PgArchPID != 0)
3092 
3093  /*
3094  * Waken walsenders for the last time. No regular backends
3095  * should be around anymore.
3096  */
3098 
3100 
3101  /*
3102  * We can also shut down the stats collector now; there's
3103  * nothing left for it to do.
3104  */
3105  if (PgStatPID != 0)
3107  }
3108  else
3109  {
3110  /*
3111  * Any unexpected exit of the checkpointer (including FATAL
3112  * exit) is treated as a crash.
3113  */
3114  HandleChildCrash(pid, exitstatus,
3115  _("checkpointer process"));
3116  }
3117 
3118  continue;
3119  }
3120 
3121  /*
3122  * Was it the wal writer? Normal exit can be ignored; we'll start a
3123  * new one at the next iteration of the postmaster's main loop, if
3124  * necessary. Any other exit condition is treated as a crash.
3125  */
3126  if (pid == WalWriterPID)
3127  {
3128  WalWriterPID = 0;
3129  if (!EXIT_STATUS_0(exitstatus))
3130  HandleChildCrash(pid, exitstatus,
3131  _("WAL writer process"));
3132  continue;
3133  }
3134 
3135  /*
3136  * Was it the wal receiver? If exit status is zero (normal) or one
3137  * (FATAL exit), we assume everything is all right just like normal
3138  * backends. (If we need a new wal receiver, we'll start one at the
3139  * next iteration of the postmaster's main loop.)
3140  */
3141  if (pid == WalReceiverPID)
3142  {
3143  WalReceiverPID = 0;
3144  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3145  HandleChildCrash(pid, exitstatus,
3146  _("WAL receiver process"));
3147  continue;
3148  }
3149 
3150  /*
3151  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3152  * start a new one at the next iteration of the postmaster's main
3153  * loop, if necessary. Any other exit condition is treated as a
3154  * crash.
3155  */
3156  if (pid == AutoVacPID)
3157  {
3158  AutoVacPID = 0;
3159  if (!EXIT_STATUS_0(exitstatus))
3160  HandleChildCrash(pid, exitstatus,
3161  _("autovacuum launcher process"));
3162  continue;
3163  }
3164 
3165  /*
3166  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3167  * exit), we assume everything is all right just like normal backends
3168  * and just try to restart a new one so that we immediately retry
3169  * archiving remaining files. (If fail, we'll try again in future
3170  * cycles of the postmaster's main loop.) Unless we were waiting for
3171  * it to shut down; don't restart it in that case, and
3172  * PostmasterStateMachine() will advance to the next shutdown step.
3173  */
3174  if (pid == PgArchPID)
3175  {
3176  PgArchPID = 0;
3177  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3178  HandleChildCrash(pid, exitstatus,
3179  _("archiver process"));
3180  if (PgArchStartupAllowed())
3182  continue;
3183  }
3184 
3185  /*
3186  * Was it the statistics collector? If so, just try to start a new
3187  * one; no need to force reset of the rest of the system. (If fail,
3188  * we'll try again in future cycles of the main loop.)
3189  */
3190  if (pid == PgStatPID)
3191  {
3192  PgStatPID = 0;
3193  if (!EXIT_STATUS_0(exitstatus))
3194  LogChildExit(LOG, _("statistics collector process"),
3195  pid, exitstatus);
3196  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3197  PgStatPID = pgstat_start();
3198  continue;
3199  }
3200 
3201  /* Was it the system logger? If so, try to start a new one */
3202  if (pid == SysLoggerPID)
3203  {
3204  SysLoggerPID = 0;
3205  /* for safety's sake, launch new logger *first* */
3207  if (!EXIT_STATUS_0(exitstatus))
3208  LogChildExit(LOG, _("system logger process"),
3209  pid, exitstatus);
3210  continue;
3211  }
3212 
3213  /* Was it one of our background workers? */
3214  if (CleanupBackgroundWorker(pid, exitstatus))
3215  {
3216  /* have it be restarted */
3217  HaveCrashedWorker = true;
3218  continue;
3219  }
3220 
3221  /*
3222  * Else do standard backend child cleanup.
3223  */
3224  CleanupBackend(pid, exitstatus);
3225  } /* loop over pending child-death reports */
3226 
3227  /*
3228  * After cleaning out the SIGCHLD queue, see if we have any state changes
3229  * or actions to make.
3230  */
3232 
3233  /* Done with signal handler */
3234 #ifdef WIN32
3236 #endif
3237 
3238  errno = save_errno;
3239 }
3240 
3241 /*
3242  * Scan the bgworkers list and see if the given PID (which has just stopped
3243  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3244  * bgworker, return false.
3245  *
3246  * This is heavily based on CleanupBackend. One important difference is that
3247  * we don't know yet that the dying process is a bgworker, so we must be silent
3248  * until we're sure it is.
3249  */
3250 static bool
3252  int exitstatus) /* child's exit status */
3253 {
3254  char namebuf[MAXPGPATH];
3255  slist_mutable_iter iter;
3256 
3258  {
3259  RegisteredBgWorker *rw;
3260 
3261  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3262 
3263  if (rw->rw_pid != pid)
3264  continue;
3265 
3266 #ifdef WIN32
3267  /* see CleanupBackend */
3268  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3269  exitstatus = 0;
3270 #endif
3271 
3272  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3273  rw->rw_worker.bgw_type);
3274 
3275 
3276  if (!EXIT_STATUS_0(exitstatus))
3277  {
3278  /* Record timestamp, so we know when to restart the worker. */
3280  }
3281  else
3282  {
3283  /* Zero exit status means terminate */
3284  rw->rw_crashed_at = 0;
3285  rw->rw_terminate = true;
3286  }
3287 
3288  /*
3289  * Additionally, for shared-memory-connected workers, just like a
3290  * backend, any exit status other than 0 or 1 is considered a crash
3291  * and causes a system-wide restart.
3292  */
3293  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3294  {
3295  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3296  {
3297  HandleChildCrash(pid, exitstatus, namebuf);
3298  return true;
3299  }
3300  }
3301 
3302  /*
3303  * We must release the postmaster child slot whether this worker is
3304  * connected to shared memory or not, but we only treat it as a crash
3305  * if it is in fact connected.
3306  */
3309  {
3310  HandleChildCrash(pid, exitstatus, namebuf);
3311  return true;
3312  }
3313 
3314  /* Get it out of the BackendList and clear out remaining data */
3315  dlist_delete(&rw->rw_backend->elem);
3316 #ifdef EXEC_BACKEND
3317  ShmemBackendArrayRemove(rw->rw_backend);
3318 #endif
3319 
3320  /*
3321  * It's possible that this background worker started some OTHER
3322  * background worker and asked to be notified when that worker started
3323  * or stopped. If so, cancel any notifications destined for the
3324  * now-dead backend.
3325  */
3326  if (rw->rw_backend->bgworker_notify)
3328  free(rw->rw_backend);
3329  rw->rw_backend = NULL;
3330  rw->rw_pid = 0;
3331  rw->rw_child_slot = 0;
3332  ReportBackgroundWorkerExit(&iter); /* report child death */
3333 
3334  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3335  namebuf, pid, exitstatus);
3336 
3337  return true;
3338  }
3339 
3340  return false;
3341 }
3342 
3343 /*
3344  * CleanupBackend -- cleanup after terminated backend.
3345  *
3346  * Remove all local state associated with backend.
3347  *
3348  * If you change this, see also CleanupBackgroundWorker.
3349  */
3350 static void
3352  int exitstatus) /* child's exit status. */
3353 {
3354  dlist_mutable_iter iter;
3355 
3356  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3357 
3358  /*
3359  * If a backend dies in an ugly way then we must signal all other backends
3360  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3361  * assume everything is all right and proceed to remove the backend from
3362  * the active backend list.
3363  */
3364 
3365 #ifdef WIN32
3366 
3367  /*
3368  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3369  * since that sometimes happens under load when the process fails to start
3370  * properly (long before it starts using shared memory). Microsoft reports
3371  * it is related to mutex failure:
3372  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3373  */
3374  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3375  {
3376  LogChildExit(LOG, _("server process"), pid, exitstatus);
3377  exitstatus = 0;
3378  }
3379 #endif
3380 
3381  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3382  {
3383  HandleChildCrash(pid, exitstatus, _("server process"));
3384  return;
3385  }
3386 
3387  dlist_foreach_modify(iter, &BackendList)
3388  {
3389  Backend *bp = dlist_container(Backend, elem, iter.cur);
3390 
3391  if (bp->pid == pid)
3392  {
3393  if (!bp->dead_end)
3394  {
3396  {
3397  /*
3398  * Uh-oh, the child failed to clean itself up. Treat as a
3399  * crash after all.
3400  */
3401  HandleChildCrash(pid, exitstatus, _("server process"));
3402  return;
3403  }
3404 #ifdef EXEC_BACKEND
3405  ShmemBackendArrayRemove(bp);
3406 #endif
3407  }
3408  if (bp->bgworker_notify)
3409  {
3410  /*
3411  * This backend may have been slated to receive SIGUSR1 when
3412  * some background worker started or stopped. Cancel those
3413  * notifications, as we don't want to signal PIDs that are not
3414  * PostgreSQL backends. This gets skipped in the (probably
3415  * very common) case where the backend has never requested any
3416  * such notifications.
3417  */
3419  }
3420  dlist_delete(iter.cur);
3421  free(bp);
3422  break;
3423  }
3424  }
3425 }
3426 
3427 /*
3428  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3429  * walwriter, autovacuum, archiver or background worker.
3430  *
3431  * The objectives here are to clean up our local state about the child
3432  * process, and to signal all other remaining children to quickdie.
3433  */
3434 static void
3435 HandleChildCrash(int pid, int exitstatus, const char *procname)
3436 {
3437  dlist_mutable_iter iter;
3438  slist_iter siter;
3439  Backend *bp;
3440  bool take_action;
3441 
3442  /*
3443  * We only log messages and send signals if this is the first process
3444  * crash and we're not doing an immediate shutdown; otherwise, we're only
3445  * here to update postmaster's idea of live processes. If we have already
3446  * signaled children, nonzero exit status is to be expected, so don't
3447  * clutter log.
3448  */
3449  take_action = !FatalError && Shutdown != ImmediateShutdown;
3450 
3451  if (take_action)
3452  {
3453  LogChildExit(LOG, procname, pid, exitstatus);
3454  ereport(LOG,
3455  (errmsg("terminating any other active server processes")));
3457  }
3458 
3459  /* Process background workers. */
3461  {
3462  RegisteredBgWorker *rw;
3463 
3464  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3465  if (rw->rw_pid == 0)
3466  continue; /* not running */
3467  if (rw->rw_pid == pid)
3468  {
3469  /*
3470  * Found entry for freshly-dead worker, so remove it.
3471  */
3473  dlist_delete(&rw->rw_backend->elem);
3474 #ifdef EXEC_BACKEND
3475  ShmemBackendArrayRemove(rw->rw_backend);
3476 #endif
3477  free(rw->rw_backend);
3478  rw->rw_backend = NULL;
3479  rw->rw_pid = 0;
3480  rw->rw_child_slot = 0;
3481  /* don't reset crashed_at */
3482  /* don't report child stop, either */
3483  /* Keep looping so we can signal remaining workers */
3484  }
3485  else
3486  {
3487  /*
3488  * This worker is still alive. Unless we did so already, tell it
3489  * to commit hara-kiri.
3490  *
3491  * SIGQUIT is the special signal that says exit without proc_exit
3492  * and let the user know what's going on. But if SendStop is set
3493  * (-T on command line), then we send SIGSTOP instead, so that we
3494  * can get core dumps from all backends by hand.
3495  */
3496  if (take_action)
3497  {
3498  ereport(DEBUG2,
3499  (errmsg_internal("sending %s to process %d",
3500  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3501  (int) rw->rw_pid)));
3503  }
3504  }
3505  }
3506 
3507  /* Process regular backends */
3508  dlist_foreach_modify(iter, &BackendList)
3509  {
3510  bp = dlist_container(Backend, elem, iter.cur);
3511 
3512  if (bp->pid == pid)
3513  {
3514  /*
3515  * Found entry for freshly-dead backend, so remove it.
3516  */
3517  if (!bp->dead_end)
3518  {
3520 #ifdef EXEC_BACKEND
3521  ShmemBackendArrayRemove(bp);
3522 #endif
3523  }
3524  dlist_delete(iter.cur);
3525  free(bp);
3526  /* Keep looping so we can signal remaining backends */
3527  }
3528  else
3529  {
3530  /*
3531  * This backend is still alive. Unless we did so already, tell it
3532  * to commit hara-kiri.
3533  *
3534  * SIGQUIT is the special signal that says exit without proc_exit
3535  * and let the user know what's going on. But if SendStop is set
3536  * (-T on command line), then we send SIGSTOP instead, so that we
3537  * can get core dumps from all backends by hand.
3538  *
3539  * We could exclude dead_end children here, but at least in the
3540  * SIGSTOP case it seems better to include them.
3541  *
3542  * Background workers were already processed above; ignore them
3543  * here.
3544  */
3545  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3546  continue;
3547 
3548  if (take_action)
3549  {
3550  ereport(DEBUG2,
3551  (errmsg_internal("sending %s to process %d",
3552  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3553  (int) bp->pid)));
3554  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3555  }
3556  }
3557  }
3558 
3559  /* Take care of the startup process too */
3560  if (pid == StartupPID)
3561  {
3562  StartupPID = 0;
3563  /* Caller adjusts StartupStatus, so don't touch it here */
3564  }
3565  else if (StartupPID != 0 && take_action)
3566  {
3567  ereport(DEBUG2,
3568  (errmsg_internal("sending %s to process %d",
3569  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3570  (int) StartupPID)));
3571  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3573  }
3574 
3575  /* Take care of the bgwriter too */
3576  if (pid == BgWriterPID)
3577  BgWriterPID = 0;
3578  else if (BgWriterPID != 0 && take_action)
3579  {
3580  ereport(DEBUG2,
3581  (errmsg_internal("sending %s to process %d",
3582  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3583  (int) BgWriterPID)));
3584  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3585  }
3586 
3587  /* Take care of the checkpointer too */
3588  if (pid == CheckpointerPID)
3589  CheckpointerPID = 0;
3590  else if (CheckpointerPID != 0 && take_action)
3591  {
3592  ereport(DEBUG2,
3593  (errmsg_internal("sending %s to process %d",
3594  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3595  (int) CheckpointerPID)));
3596  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3597  }
3598 
3599  /* Take care of the walwriter too */
3600  if (pid == WalWriterPID)
3601  WalWriterPID = 0;
3602  else if (WalWriterPID != 0 && take_action)
3603  {
3604  ereport(DEBUG2,
3605  (errmsg_internal("sending %s to process %d",
3606  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3607  (int) WalWriterPID)));
3608  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3609  }
3610 
3611  /* Take care of the walreceiver too */
3612  if (pid == WalReceiverPID)
3613  WalReceiverPID = 0;
3614  else if (WalReceiverPID != 0 && take_action)
3615  {
3616  ereport(DEBUG2,
3617  (errmsg_internal("sending %s to process %d",
3618  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3619  (int) WalReceiverPID)));
3620  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3621  }
3622 
3623  /* Take care of the autovacuum launcher too */
3624  if (pid == AutoVacPID)
3625  AutoVacPID = 0;
3626  else if (AutoVacPID != 0 && take_action)
3627  {
3628  ereport(DEBUG2,
3629  (errmsg_internal("sending %s to process %d",
3630  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3631  (int) AutoVacPID)));
3632  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3633  }
3634 
3635  /* Take care of the archiver too */
3636  if (pid == PgArchPID)
3637  PgArchPID = 0;
3638  else if (PgArchPID != 0 && take_action)
3639  {
3640  ereport(DEBUG2,
3641  (errmsg_internal("sending %s to process %d",
3642  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3643  (int) PgArchPID)));
3644  signal_child(PgArchPID, (SendStop ? SIGSTOP : SIGQUIT));
3645  }
3646 
3647  /*
3648  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3649  * necessary, but it seems like a good idea for robustness, and it
3650  * simplifies the state-machine logic in the case where a shutdown request
3651  * arrives during crash processing.)
3652  */
3653  if (PgStatPID != 0 && take_action)
3654  {
3655  ereport(DEBUG2,
3656  (errmsg_internal("sending %s to process %d",
3657  "SIGQUIT",
3658  (int) PgStatPID)));
3659  signal_child(PgStatPID, SIGQUIT);
3661  }
3662 
3663  /* We do NOT restart the syslogger */
3664 
3665  if (Shutdown != ImmediateShutdown)
3666  FatalError = true;
3667 
3668  /* We now transit into a state of waiting for children to die */
3669  if (pmState == PM_RECOVERY ||
3670  pmState == PM_HOT_STANDBY ||
3671  pmState == PM_RUN ||
3673  pmState == PM_SHUTDOWN)
3675 
3676  /*
3677  * .. and if this doesn't happen quickly enough, now the clock is ticking
3678  * for us to kill them without mercy.
3679  */
3680  if (AbortStartTime == 0)
3681  AbortStartTime = time(NULL);
3682 }
3683 
3684 /*
3685  * Log the death of a child process.
3686  */
3687 static void
3688 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3689 {
3690  /*
3691  * size of activity_buffer is arbitrary, but set equal to default
3692  * track_activity_query_size
3693  */
3694  char activity_buffer[1024];
3695  const char *activity = NULL;
3696 
3697  if (!EXIT_STATUS_0(exitstatus))
3698  activity = pgstat_get_crashed_backend_activity(pid,
3699  activity_buffer,
3700  sizeof(activity_buffer));
3701 
3702  if (WIFEXITED(exitstatus))
3703  ereport(lev,
3704 
3705  /*------
3706  translator: %s is a noun phrase describing a child process, such as
3707  "server process" */
3708  (errmsg("%s (PID %d) exited with exit code %d",
3709  procname, pid, WEXITSTATUS(exitstatus)),
3710  activity ? errdetail("Failed process was running: %s", activity) : 0));
3711  else if (WIFSIGNALED(exitstatus))
3712  {
3713 #if defined(WIN32)
3714  ereport(lev,
3715 
3716  /*------
3717  translator: %s is a noun phrase describing a child process, such as
3718  "server process" */
3719  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3720  procname, pid, WTERMSIG(exitstatus)),
3721  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3722  activity ? errdetail("Failed process was running: %s", activity) : 0));
3723 #else
3724  ereport(lev,
3725 
3726  /*------
3727  translator: %s is a noun phrase describing a child process, such as
3728  "server process" */
3729  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3730  procname, pid, WTERMSIG(exitstatus),
3731  pg_strsignal(WTERMSIG(exitstatus))),
3732  activity ? errdetail("Failed process was running: %s", activity) : 0));
3733 #endif
3734  }
3735  else
3736  ereport(lev,
3737 
3738  /*------
3739  translator: %s is a noun phrase describing a child process, such as
3740  "server process" */
3741  (errmsg("%s (PID %d) exited with unrecognized status %d",
3742  procname, pid, exitstatus),
3743  activity ? errdetail("Failed process was running: %s", activity) : 0));
3744 }
3745 
3746 /*
3747  * Advance the postmaster's state machine and take actions as appropriate
3748  *
3749  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3750  * receive the signals that might mean we need to change state.
3751  */
3752 static void
3754 {
3755  /* If we're doing a smart shutdown, try to advance that state. */
3756  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3757  {
3759  {
3760  /*
3761  * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode
3762  * is not active.
3763  */
3764  if (!BackupInProgress())
3766  }
3767 
3769  {
3770  /*
3771  * ALLOW_NO_CONNS state ends when we have no normal client
3772  * backends running. Then we're ready to stop other children.
3773  */
3776  }
3777  }
3778 
3779  /*
3780  * If we're ready to do so, signal child processes to shut down. (This
3781  * isn't a persistent state, but treating it as a distinct pmState allows
3782  * us to share this code across multiple shutdown code paths.)
3783  */
3784  if (pmState == PM_STOP_BACKENDS)
3785  {
3786  /*
3787  * Forget any pending requests for background workers, since we're no
3788  * longer willing to launch any new workers. (If additional requests
3789  * arrive, BackgroundWorkerStateChange will reject them.)
3790  */
3792 
3793  /* Signal all backend children except walsenders */
3794  SignalSomeChildren(SIGTERM,
3796  /* and the autovac launcher too */
3797  if (AutoVacPID != 0)
3798  signal_child(AutoVacPID, SIGTERM);
3799  /* and the bgwriter too */
3800  if (BgWriterPID != 0)
3801  signal_child(BgWriterPID, SIGTERM);
3802  /* and the walwriter too */
3803  if (WalWriterPID != 0)
3804  signal_child(WalWriterPID, SIGTERM);
3805  /* If we're in recovery, also stop startup and walreceiver procs */
3806  if (StartupPID != 0)
3807  signal_child(StartupPID, SIGTERM);
3808  if (WalReceiverPID != 0)
3809  signal_child(WalReceiverPID, SIGTERM);
3810  /* checkpointer, archiver, stats, and syslogger may continue for now */
3811 
3812  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3814  }
3815 
3816  /*
3817  * If we are in a state-machine state that implies waiting for backends to
3818  * exit, see if they're all gone, and change state if so.
3819  */
3820  if (pmState == PM_WAIT_BACKENDS)
3821  {
3822  /*
3823  * PM_WAIT_BACKENDS state ends when we have no regular backends
3824  * (including autovac workers), no bgworkers (including unconnected
3825  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3826  * doing crash recovery or an immediate shutdown then we expect the
3827  * checkpointer to exit as well, otherwise not. The stats and
3828  * syslogger processes are disregarded since they are not connected to
3829  * shared memory; we also disregard dead_end children here. Walsenders
3830  * and archiver are also disregarded, they will be terminated later
3831  * after writing the checkpoint record.
3832  */
3834  StartupPID == 0 &&
3835  WalReceiverPID == 0 &&
3836  BgWriterPID == 0 &&
3837  (CheckpointerPID == 0 ||
3839  WalWriterPID == 0 &&
3840  AutoVacPID == 0)
3841  {
3843  {
3844  /*
3845  * Start waiting for dead_end children to die. This state
3846  * change causes ServerLoop to stop creating new ones.
3847  */
3849 
3850  /*
3851  * We already SIGQUIT'd the archiver and stats processes, if
3852  * any, when we started immediate shutdown or entered
3853  * FatalError state.
3854  */
3855  }
3856  else
3857  {
3858  /*
3859  * If we get here, we are proceeding with normal shutdown. All
3860  * the regular children are gone, and it's time to tell the
3861  * checkpointer to do a shutdown checkpoint.
3862  */
3864  /* Start the checkpointer if not running */
3865  if (CheckpointerPID == 0)
3867  /* And tell it to shut down */
3868  if (CheckpointerPID != 0)
3869  {
3871  pmState = PM_SHUTDOWN;
3872  }
3873  else
3874  {
3875  /*
3876  * If we failed to fork a checkpointer, just shut down.
3877  * Any required cleanup will happen at next restart. We
3878  * set FatalError so that an "abnormal shutdown" message
3879  * gets logged when we exit.
3880  */
3881  FatalError = true;
3883 
3884  /* Kill the walsenders, archiver and stats collector too */
3886  if (PgArchPID != 0)
3888  if (PgStatPID != 0)
3890  }
3891  }
3892  }
3893  }
3894 
3895  if (pmState == PM_SHUTDOWN_2)
3896  {
3897  /*
3898  * PM_SHUTDOWN_2 state ends when there's no other children than
3899  * dead_end children left. There shouldn't be any regular backends
3900  * left by now anyway; what we're really waiting for is walsenders and
3901  * archiver.
3902  */
3903  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3904  {
3906  }
3907  }
3908 
3909  if (pmState == PM_WAIT_DEAD_END)
3910  {
3911  /*
3912  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3913  * (ie, no dead_end children remain), and the archiver and stats
3914  * collector are gone too.
3915  *
3916  * The reason we wait for those two is to protect them against a new
3917  * postmaster starting conflicting subprocesses; this isn't an
3918  * ironclad protection, but it at least helps in the
3919  * shutdown-and-immediately-restart scenario. Note that they have
3920  * already been sent appropriate shutdown signals, either during a
3921  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3922  * FatalError processing.
3923  */
3924  if (dlist_is_empty(&BackendList) &&
3925  PgArchPID == 0 && PgStatPID == 0)
3926  {
3927  /* These other guys should be dead already */
3928  Assert(StartupPID == 0);
3929  Assert(WalReceiverPID == 0);
3930  Assert(BgWriterPID == 0);
3931  Assert(CheckpointerPID == 0);
3932  Assert(WalWriterPID == 0);
3933  Assert(AutoVacPID == 0);
3934  Assert(PgArchPID == 0);
3935  /* syslogger is not considered here */
3937  }
3938  }
3939 
3940  /*
3941  * If we've been told to shut down, we exit as soon as there are no
3942  * remaining children. If there was a crash, cleanup will occur at the
3943  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3944  * crash before exiting, but that seems unwise if we are quitting because
3945  * we got SIGTERM from init --- there may well not be time for recovery
3946  * before init decides to SIGKILL us.)
3947  *
3948  * Note that the syslogger continues to run. It will exit when it sees
3949  * EOF on its input pipe, which happens when there are no more upstream
3950  * processes.
3951  */
3953  {
3954  if (FatalError)
3955  {
3956  ereport(LOG, (errmsg("abnormal database system shutdown")));
3957  ExitPostmaster(1);
3958  }
3959  else
3960  {
3961  /*
3962  * Terminate exclusive backup mode to avoid recovery after a clean
3963  * fast shutdown. Since an exclusive backup can only be taken
3964  * during normal running (and not, for example, while running
3965  * under Hot Standby) it only makes sense to do this if we reached
3966  * normal running. If we're still in recovery, the backup file is
3967  * one we're recovering *from*, and we must keep it around so that
3968  * recovery restarts from the right place.
3969  */
3971  CancelBackup();
3972 
3973  /* Normal exit from the postmaster is here */
3974  ExitPostmaster(0);
3975  }
3976  }
3977 
3978  /*
3979  * If the startup process failed, or the user does not want an automatic
3980  * restart after backend crashes, wait for all non-syslogger children to
3981  * exit, and then exit postmaster. We don't try to reinitialize when the
3982  * startup process fails, because more than likely it will just fail again
3983  * and we will keep trying forever.
3984  */
3985  if (pmState == PM_NO_CHILDREN &&
3987  ExitPostmaster(1);
3988 
3989  /*
3990  * If we need to recover from a crash, wait for all non-syslogger children
3991  * to exit, then reset shmem and StartupDataBase.
3992  */
3993  if (FatalError && pmState == PM_NO_CHILDREN)
3994  {
3995  ereport(LOG,
3996  (errmsg("all server processes terminated; reinitializing")));
3997 
3998  /* remove leftover temporary files after a crash */
4001 
4002  /* allow background workers to immediately restart */
4004 
4005  shmem_exit(1);
4006 
4007  /* re-read control file into local memory */
4009 
4010  reset_shared();
4011 
4013  Assert(StartupPID != 0);
4015  pmState = PM_STARTUP;
4016  /* crash recovery started, reset SIGKILL flag */
4017  AbortStartTime = 0;
4018  }
4019 }
4020 
4021 
4022 /*
4023  * Send a signal to a postmaster child process
4024  *
4025  * On systems that have setsid(), each child process sets itself up as a
4026  * process group leader. For signals that are generally interpreted in the
4027  * appropriate fashion, we signal the entire process group not just the
4028  * direct child process. This allows us to, for example, SIGQUIT a blocked
4029  * archive_recovery script, or SIGINT a script being run by a backend via
4030  * system().
4031  *
4032  * There is a race condition for recently-forked children: they might not
4033  * have executed setsid() yet. So we signal the child directly as well as
4034  * the group. We assume such a child will handle the signal before trying
4035  * to spawn any grandchild processes. We also assume that signaling the
4036  * child twice will not cause any problems.
4037  */
4038 static void
4039 signal_child(pid_t pid, int signal)
4040 {
4041  if (kill(pid, signal) < 0)
4042  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4043 #ifdef HAVE_SETSID
4044  switch (signal)
4045  {
4046  case SIGINT:
4047  case SIGTERM:
4048  case SIGQUIT:
4049  case SIGSTOP:
4050  case SIGKILL:
4051  if (kill(-pid, signal) < 0)
4052  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4053  break;
4054  default:
4055  break;
4056  }
4057 #endif
4058 }
4059 
4060 /*
4061  * Send a signal to the targeted children (but NOT special children;
4062  * dead_end children are never signaled, either).
4063  */
4064 static bool
4065 SignalSomeChildren(int signal, int target)
4066 {
4067  dlist_iter iter;
4068  bool signaled = false;
4069 
4070  dlist_foreach(iter, &BackendList)
4071  {
4072  Backend *bp = dlist_container(Backend, elem, iter.cur);
4073 
4074  if (bp->dead_end)
4075  continue;
4076 
4077  /*
4078  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4079  * it first and avoid touching shared memory for every child.
4080  */
4081  if (target != BACKEND_TYPE_ALL)
4082  {
4083  /*
4084  * Assign bkend_type for any recently announced WAL Sender
4085  * processes.
4086  */
4087  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4090 
4091  if (!(target & bp->bkend_type))
4092  continue;
4093  }
4094 
4095  ereport(DEBUG4,
4096  (errmsg_internal("sending signal %d to process %d",
4097  signal, (int) bp->pid)));
4098  signal_child(bp->pid, signal);
4099  signaled = true;
4100  }
4101  return signaled;
4102 }
4103 
4104 /*
4105  * Send a termination signal to children. This considers all of our children
4106  * processes, except syslogger and dead_end backends.
4107  */
4108 static void
4110 {
4111  SignalChildren(signal);
4112  if (StartupPID != 0)
4113  {
4114  signal_child(StartupPID, signal);
4115  if (signal == SIGQUIT || signal == SIGKILL)
4117  }
4118  if (BgWriterPID != 0)
4119  signal_child(BgWriterPID, signal);
4120  if (CheckpointerPID != 0)
4121  signal_child(CheckpointerPID, signal);
4122  if (WalWriterPID != 0)
4123  signal_child(WalWriterPID, signal);
4124  if (WalReceiverPID != 0)
4125  signal_child(WalReceiverPID, signal);
4126  if (AutoVacPID != 0)
4127  signal_child(AutoVacPID, signal);
4128  if (PgArchPID != 0)
4129  signal_child(PgArchPID, signal);
4130  if (PgStatPID != 0)
4131  signal_child(PgStatPID, signal);
4132 }
4133 
4134 /*
4135  * BackendStartup -- start backend process
4136  *
4137  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4138  *
4139  * Note: if you change this code, also consider StartAutovacuumWorker.
4140  */
4141 static int
4143 {
4144  Backend *bn; /* for backend cleanup */
4145  pid_t pid;
4146 
4147  /*
4148  * Create backend data structure. Better before the fork() so we can
4149  * handle failure cleanly.
4150  */
4151  bn = (Backend *) malloc(sizeof(Backend));
4152  if (!bn)
4153  {
4154  ereport(LOG,
4155  (errcode(ERRCODE_OUT_OF_MEMORY),
4156  errmsg("out of memory")));
4157  return STATUS_ERROR;
4158  }
4159 
4160  /*
4161  * Compute the cancel key that will be assigned to this backend. The
4162  * backend will have its own copy in the forked-off process' value of
4163  * MyCancelKey, so that it can transmit the key to the frontend.
4164  */
4166  {
4167  free(bn);
4168  ereport(LOG,
4169  (errcode(ERRCODE_INTERNAL_ERROR),
4170  errmsg("could not generate random cancel key")));
4171  return STATUS_ERROR;
4172  }
4173 
4174  bn->cancel_key = MyCancelKey;
4175 
4176  /* Pass down canAcceptConnections state */
4178  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4180 
4181  /*
4182  * Unless it's a dead_end child, assign it a child slot number
4183  */
4184  if (!bn->dead_end)
4186  else
4187  bn->child_slot = 0;
4188 
4189  /* Hasn't asked to be notified about any bgworkers yet */
4190  bn->bgworker_notify = false;
4191 
4192 #ifdef EXEC_BACKEND
4193  pid = backend_forkexec(port);
4194 #else /* !EXEC_BACKEND */
4195  pid = fork_process();
4196  if (pid == 0) /* child */
4197  {
4198  free(bn);
4199 
4200  /* Detangle from postmaster */
4202 
4203  /* Close the postmaster's sockets */
4204  ClosePostmasterPorts(false);
4205 
4206  /* Perform additional initialization and collect startup packet */
4207  BackendInitialize(port);
4208 
4209  /* And run the backend */
4210  BackendRun(port);
4211  }
4212 #endif /* EXEC_BACKEND */
4213 
4214  if (pid < 0)
4215  {
4216  /* in parent, fork failed */
4217  int save_errno = errno;
4218 
4219  if (!bn->dead_end)
4221  free(bn);
4222  errno = save_errno;
4223  ereport(LOG,
4224  (errmsg("could not fork new process for connection: %m")));
4225  report_fork_failure_to_client(port, save_errno);
4226  return STATUS_ERROR;
4227  }
4228 
4229  /* in parent, successful fork */
4230  ereport(DEBUG2,
4231  (errmsg_internal("forked new backend, pid=%d socket=%d",
4232  (int) pid, (int) port->sock)));
4233 
4234  /*
4235  * Everything's been successful, it's safe to add this backend to our list
4236  * of backends.
4237  */
4238  bn->pid = pid;
4239  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4240  dlist_push_head(&BackendList, &bn->elem);
4241 
4242 #ifdef EXEC_BACKEND
4243  if (!bn->dead_end)
4244  ShmemBackendArrayAdd(bn);
4245 #endif
4246 
4247  return STATUS_OK;
4248 }
4249 
4250 /*
4251  * Try to report backend fork() failure to client before we close the
4252  * connection. Since we do not care to risk blocking the postmaster on
4253  * this connection, we set the connection to non-blocking and try only once.
4254  *
4255  * This is grungy special-purpose code; we cannot use backend libpq since
4256  * it's not up and running.
4257  */
4258 static void
4260 {
4261  char buffer[1000];
4262  int rc;
4263 
4264  /* Format the error message packet (always V2 protocol) */
4265  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4266  _("could not fork new process for connection: "),
4267  strerror(errnum));
4268 
4269  /* Set port to non-blocking. Don't do send() if this fails */
4270  if (!pg_set_noblock(port->sock))
4271  return;
4272 
4273  /* We'll retry after EINTR, but ignore all other failures */
4274  do
4275  {
4276  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4277  } while (rc < 0 && errno == EINTR);
4278 }
4279 
4280 
4281 /*
4282  * BackendInitialize -- initialize an interactive (postmaster-child)
4283  * backend process, and collect the client's startup packet.
4284  *
4285  * returns: nothing. Will not return at all if there's any failure.
4286  *
4287  * Note: this code does not depend on having any access to shared memory.
4288  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4289  * shared memory not have been touched yet; see comments within.
4290  * In the EXEC_BACKEND case, we are physically attached to shared memory
4291  * but have not yet set up most of our local pointers to shmem structures.
4292  */
4293 static void
4295 {
4296  int status;
4297  int ret;
4298  char remote_host[NI_MAXHOST];
4299  char remote_port[NI_MAXSERV];
4300  StringInfoData ps_data;
4301 
4302  /* Save port etc. for ps status */
4303  MyProcPort = port;
4304 
4305  /* Tell fd.c about the long-lived FD associated with the port */
4307 
4308  /*
4309  * PreAuthDelay is a debugging aid for investigating problems in the
4310  * authentication cycle: it can be set in postgresql.conf to allow time to
4311  * attach to the newly-forked backend with a debugger. (See also
4312  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4313  * is not honored until after authentication.)
4314  */
4315  if (PreAuthDelay > 0)
4316  pg_usleep(PreAuthDelay * 1000000L);
4317 
4318  /* This flag will remain set until InitPostgres finishes authentication */
4319  ClientAuthInProgress = true; /* limit visibility of log messages */
4320 
4321  /* set these to empty in case they are needed before we set them up */
4322  port->remote_host = "";
4323  port->remote_port = "";
4324 
4325  /*
4326  * Initialize libpq and enable reporting of ereport errors to the client.
4327  * Must do this now because authentication uses libpq to send messages.
4328  */
4329  pq_init(); /* initialize libpq to talk to client */
4330  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4331 
4332  /*
4333  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4334  * to collect the startup packet; while SIGQUIT results in _exit(2).
4335  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4336  * cleanly if a buggy client fails to send the packet promptly.
4337  *
4338  * Exiting with _exit(1) is only possible because we have not yet touched
4339  * shared memory; therefore no outside-the-process state needs to get
4340  * cleaned up.
4341  */
4343  /* SIGQUIT handler was already set up by InitPostmasterChild */
4344  InitializeTimeouts(); /* establishes SIGALRM handler */
4346 
4347  /*
4348  * Get the remote host name and port for logging and status display.
4349  */
4350  remote_host[0] = '\0';
4351  remote_port[0] = '\0';
4352  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4353  remote_host, sizeof(remote_host),
4354  remote_port, sizeof(remote_port),
4355  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4356  ereport(WARNING,
4357  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4358  gai_strerror(ret))));
4359 
4360  /*
4361  * Save remote_host and remote_port in port structure (after this, they
4362  * will appear in log_line_prefix data for log messages).
4363  */
4364  port->remote_host = strdup(remote_host);
4365  port->remote_port = strdup(remote_port);
4366 
4367  /* And now we can issue the Log_connections message, if wanted */
4368  if (Log_connections)
4369  {
4370  if (remote_port[0])
4371  ereport(LOG,
4372  (errmsg("connection received: host=%s port=%s",
4373  remote_host,
4374  remote_port)));
4375  else
4376  ereport(LOG,
4377  (errmsg("connection received: host=%s",
4378  remote_host)));
4379  }
4380 
4381  /*
4382  * If we did a reverse lookup to name, we might as well save the results
4383  * rather than possibly repeating the lookup during authentication.
4384  *
4385  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4386  * get nothing useful for a client without an rDNS entry. Therefore, we
4387  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4388  * it into remote_hostname if so. (This test is conservative and might
4389  * sometimes classify a hostname as numeric, but an error in that
4390  * direction is safe; it only results in a possible extra lookup.)
4391  */
4392  if (log_hostname &&
4393  ret == 0 &&
4394  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4395  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4396  port->remote_hostname = strdup(remote_host);
4397 
4398  /*
4399  * Ready to begin client interaction. We will give up and _exit(1) after
4400  * a time delay, so that a broken client can't hog a connection
4401  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4402  * against the time limit.
4403  *
4404  * Note: AuthenticationTimeout is applied here while waiting for the
4405  * startup packet, and then again in InitPostgres for the duration of any
4406  * authentication operations. So a hostile client could tie up the
4407  * process for nearly twice AuthenticationTimeout before we kick him off.
4408  *
4409  * Note: because PostgresMain will call InitializeTimeouts again, the
4410  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4411  * since we never use it again after this function.
4412  */
4415 
4416  /*
4417  * Receive the startup packet (which might turn out to be a cancel request
4418  * packet).
4419  */
4420  status = ProcessStartupPacket(port, false, false);
4421 
4422  /*
4423  * Disable the timeout, and prevent SIGTERM again.
4424  */
4426  PG_SETMASK(&BlockSig);
4427 
4428  /*
4429  * As a safety check that nothing in startup has yet performed
4430  * shared-memory modifications that would need to be undone if we had
4431  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4432  * handlers have been registered yet. (This isn't terribly bulletproof,
4433  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4434  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4435  * handlers unfortunately, since pq_init() already registered one.)
4436  */
4438 
4439  /*
4440  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4441  * already did any appropriate error reporting.
4442  */
4443  if (status != STATUS_OK)
4444  proc_exit(0);
4445 
4446  /*
4447  * Now that we have the user and database name, we can set the process
4448  * title for ps. It's good to do this as early as possible in startup.
4449  */
4450  initStringInfo(&ps_data);
4451  if (am_walsender)
4453  appendStringInfo(&ps_data, "%s ", port->user_name);
4454  if (!am_walsender)
4455  appendStringInfo(&ps_data, "%s ", port->database_name);
4456  appendStringInfo(&ps_data, "%s", port->remote_host);
4457  if (port->remote_port[0] != '\0')
4458  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4459 
4460  init_ps_display(ps_data.data);
4461  pfree(ps_data.data);
4462 
4463  set_ps_display("initializing");
4464 }
4465 
4466 
4467 /*
4468  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4469  *
4470  * returns:
4471  * Doesn't return at all.
4472  */
4473 static void
4475 {
4476  char *av[2];
4477  const int ac = 1;
4478 
4479  av[0] = "postgres";
4480  av[1] = NULL;
4481 
4482  /*
4483  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4484  * just yet, though, because InitPostgres will need the HBA data.)
4485  */
4487 
4488  PostgresMain(ac, av, port->database_name, port->user_name);
4489 }
4490 
4491 
4492 #ifdef EXEC_BACKEND
4493 
4494 /*
4495  * postmaster_forkexec -- fork and exec a postmaster subprocess
4496  *
4497  * The caller must have set up the argv array already, except for argv[2]
4498  * which will be filled with the name of the temp variable file.
4499  *
4500  * Returns the child process PID, or -1 on fork failure (a suitable error
4501  * message has been logged on failure).
4502  *
4503  * All uses of this routine will dispatch to SubPostmasterMain in the
4504  * child process.
4505  */
4506 pid_t
4507 postmaster_forkexec(int argc, char *argv[])
4508 {
4509  Port port;
4510 
4511  /* This entry point passes dummy values for the Port variables */
4512  memset(&port, 0, sizeof(port));
4513  return internal_forkexec(argc, argv, &port);
4514 }
4515 
4516 /*
4517  * backend_forkexec -- fork/exec off a backend process
4518  *
4519  * Some operating systems (WIN32) don't have fork() so we have to simulate
4520  * it by storing parameters that need to be passed to the child and
4521  * then create a new child process.
4522  *
4523  * returns the pid of the fork/exec'd process, or -1 on failure
4524  */
4525 static pid_t
4526 backend_forkexec(Port *port)
4527 {
4528  char *av[4];
4529  int ac = 0;
4530 
4531  av[ac++] = "postgres";
4532  av[ac++] = "--forkbackend";
4533  av[ac++] = NULL; /* filled in by internal_forkexec */
4534 
4535  av[ac] = NULL;
4536  Assert(ac < lengthof(av));
4537 
4538  return internal_forkexec(ac, av, port);
4539 }
4540 
4541 #ifndef WIN32
4542 
4543 /*
4544  * internal_forkexec non-win32 implementation
4545  *
4546  * - writes out backend variables to the parameter file
4547  * - fork():s, and then exec():s the child process
4548  */
4549 static pid_t
4550 internal_forkexec(int argc, char *argv[], Port *port)
4551 {
4552  static unsigned long tmpBackendFileNum = 0;
4553  pid_t pid;
4554  char tmpfilename[MAXPGPATH];
4555  BackendParameters param;
4556  FILE *fp;
4557 
4558  if (!save_backend_variables(&param, port))
4559  return -1; /* log made by save_backend_variables */
4560 
4561  /* Calculate name for temp file */
4562  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4564  MyProcPid, ++tmpBackendFileNum);
4565 
4566  /* Open file */
4567  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4568  if (!fp)
4569  {
4570  /*
4571  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4572  * directory, ignoring errors.
4573  */
4575 
4576  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4577  if (!fp)
4578  {
4579  ereport(LOG,
4581  errmsg("could not create file \"%s\": %m",
4582  tmpfilename)));
4583  return -1;
4584  }
4585  }
4586 
4587  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4588  {
4589  ereport(LOG,
4591  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4592  FreeFile(fp);
4593  return -1;
4594  }
4595 
4596  /* Release file */
4597  if (FreeFile(fp))
4598  {
4599  ereport(LOG,
4601  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4602  return -1;
4603  }
4604 
4605  /* Make sure caller set up argv properly */
4606  Assert(argc >= 3);
4607  Assert(argv[argc] == NULL);
4608  Assert(strncmp(argv[1], "--fork", 6) == 0);
4609  Assert(argv[2] == NULL);
4610 
4611  /* Insert temp file name after --fork argument */
4612  argv[2] = tmpfilename;
4613 
4614  /* Fire off execv in child */
4615  if ((pid = fork_process()) == 0)
4616  {
4617  if (execv(postgres_exec_path, argv) < 0)
4618  {
4619  ereport(LOG,
4620  (errmsg("could not execute server process \"%s\": %m",
4621  postgres_exec_path)));
4622  /* We're already in the child process here, can't return */
4623  exit(1);
4624  }
4625  }
4626 
4627  return pid; /* Parent returns pid, or -1 on fork failure */
4628 }
4629 #else /* WIN32 */
4630 
4631 /*
4632  * internal_forkexec win32 implementation
4633  *
4634  * - starts backend using CreateProcess(), in suspended state
4635  * - writes out backend variables to the parameter file
4636  * - during this, duplicates handles and sockets required for
4637  * inheritance into the new process
4638  * - resumes execution of the new process once the backend parameter
4639  * file is complete.
4640  */
4641 static pid_t
4642 internal_forkexec(int argc, char *argv[], Port *port)
4643 {
4644  int retry_count = 0;
4645  STARTUPINFO si;
4646  PROCESS_INFORMATION pi;
4647  int i;
4648  int j;
4649  char cmdLine[MAXPGPATH * 2];
4650  HANDLE paramHandle;
4651  BackendParameters *param;
4652  SECURITY_ATTRIBUTES sa;
4653  char paramHandleStr[32];
4654  win32_deadchild_waitinfo *childinfo;
4655 
4656  /* Make sure caller set up argv properly */
4657  Assert(argc >= 3);
4658  Assert(argv[argc] == NULL);
4659  Assert(strncmp(argv[1], "--fork", 6) == 0);
4660  Assert(argv[2] == NULL);
4661 
4662  /* Resume here if we need to retry */
4663 retry:
4664 
4665  /* Set up shared memory for parameter passing */
4666  ZeroMemory(&sa, sizeof(sa));
4667  sa.nLength = sizeof(sa);
4668  sa.bInheritHandle = TRUE;
4669  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4670  &sa,
4671  PAGE_READWRITE,
4672  0,
4673  sizeof(BackendParameters),
4674  NULL);
4675  if (paramHandle == INVALID_HANDLE_VALUE)
4676  {
4677  ereport(LOG,
4678  (errmsg("could not create backend parameter file mapping: error code %lu",
4679  GetLastError())));
4680  return -1;
4681  }
4682 
4683  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4684  if (!param)
4685  {
4686  ereport(LOG,
4687  (errmsg("could not map backend parameter memory: error code %lu",
4688  GetLastError())));
4689  CloseHandle(paramHandle);
4690  return -1;
4691  }
4692 
4693  /* Insert temp file name after --fork argument */
4694 #ifdef _WIN64
4695  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4696 #else
4697  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4698 #endif
4699  argv[2] = paramHandleStr;
4700 
4701  /* Format the cmd line */
4702  cmdLine[sizeof(cmdLine) - 1] = '\0';
4703  cmdLine[sizeof(cmdLine) - 2] = '\0';
4704  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4705  i = 0;
4706  while (argv[++i] != NULL)
4707  {
4708  j = strlen(cmdLine);
4709  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4710  }
4711  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4712  {
4713  ereport(LOG,
4714  (errmsg("subprocess command line too long")));
4715  UnmapViewOfFile(param);
4716  CloseHandle(paramHandle);
4717  return -1;
4718  }
4719 
4720  memset(&pi, 0, sizeof(pi));
4721  memset(&si, 0, sizeof(si));
4722  si.cb = sizeof(si);
4723 
4724  /*
4725  * Create the subprocess in a suspended state. This will be resumed later,
4726  * once we have written out the parameter file.
4727  */
4728  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4729  NULL, NULL, &si, &pi))
4730  {
4731  ereport(LOG,
4732  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4733  GetLastError())));
4734  UnmapViewOfFile(param);
4735  CloseHandle(paramHandle);
4736  return -1;
4737  }
4738 
4739  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4740  {
4741  /*
4742  * log made by save_backend_variables, but we have to clean up the
4743  * mess with the half-started process
4744  */
4745  if (!TerminateProcess(pi.hProcess, 255))
4746  ereport(LOG,
4747  (errmsg_internal("could not terminate unstarted process: error code %lu",
4748  GetLastError())));
4749  CloseHandle(pi.hProcess);
4750  CloseHandle(pi.hThread);
4751  UnmapViewOfFile(param);
4752  CloseHandle(paramHandle);
4753  return -1; /* log made by save_backend_variables */
4754  }
4755 
4756  /* Drop the parameter shared memory that is now inherited to the backend */
4757  if (!UnmapViewOfFile(param))
4758  ereport(LOG,
4759  (errmsg("could not unmap view of backend parameter file: error code %lu",
4760  GetLastError())));
4761  if (!CloseHandle(paramHandle))
4762  ereport(LOG,
4763  (errmsg("could not close handle to backend parameter file: error code %lu",
4764  GetLastError())));
4765 
4766  /*
4767  * Reserve the memory region used by our main shared memory segment before
4768  * we resume the child process. Normally this should succeed, but if ASLR
4769  * is active then it might sometimes fail due to the stack or heap having
4770  * gotten mapped into that range. In that case, just terminate the
4771  * process and retry.
4772  */
4773  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4774  {
4775  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4776  if (!TerminateProcess(pi.hProcess, 255))
4777  ereport(LOG,
4778  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4779  GetLastError())));
4780  CloseHandle(pi.hProcess);
4781  CloseHandle(pi.hThread);
4782  if (++retry_count < 100)
4783  goto retry;
4784  ereport(LOG,
4785  (errmsg("giving up after too many tries to reserve shared memory"),
4786  errhint("This might be caused by ASLR or antivirus software.")));
4787  return -1;
4788  }
4789 
4790  /*
4791  * Now that the backend variables are written out, we start the child
4792  * thread so it can start initializing while we set up the rest of the
4793  * parent state.
4794  */
4795  if (ResumeThread(pi.hThread) == -1)
4796  {
4797  if (!TerminateProcess(pi.hProcess, 255))
4798  {
4799  ereport(LOG,
4800  (errmsg_internal("could not terminate unstartable process: error code %lu",
4801  GetLastError())));
4802  CloseHandle(pi.hProcess);
4803  CloseHandle(pi.hThread);
4804  return -1;
4805  }
4806  CloseHandle(pi.hProcess);
4807  CloseHandle(pi.hThread);
4808  ereport(LOG,
4809  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4810  GetLastError())));
4811  return -1;
4812  }
4813 
4814  /*
4815  * Queue a waiter to signal when this child dies. The wait will be handled
4816  * automatically by an operating system thread pool.
4817  *
4818  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4819  * Struct will be free():d from the callback function that runs on a
4820  * different thread.
4821  */
4822  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4823  if (!childinfo)
4824  ereport(FATAL,
4825  (errcode(ERRCODE_OUT_OF_MEMORY),
4826  errmsg("out of memory")));
4827 
4828  childinfo->procHandle = pi.hProcess;
4829  childinfo->procId = pi.dwProcessId;
4830 
4831  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4832  pi.hProcess,
4833  pgwin32_deadchild_callback,
4834  childinfo,
4835  INFINITE,
4836  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4837  ereport(FATAL,
4838  (errmsg_internal("could not register process for wait: error code %lu",
4839  GetLastError())));
4840 
4841  /* Don't close pi.hProcess here - the wait thread needs access to it */
4842 
4843  CloseHandle(pi.hThread);
4844 
4845  return pi.dwProcessId;
4846 }
4847 #endif /* WIN32 */
4848 
4849 
4850 /*
4851  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4852  * to what it would be if we'd simply forked on Unix, and then
4853  * dispatch to the appropriate place.
4854  *
4855  * The first two command line arguments are expected to be "--forkFOO"
4856  * (where FOO indicates which postmaster child we are to become), and
4857  * the name of a variables file that we can read to load data that would
4858  * have been inherited by fork() on Unix. Remaining arguments go to the
4859  * subprocess FooMain() routine.
4860  */
4861 void
4862 SubPostmasterMain(int argc, char *argv[])
4863 {
4864  Port port;
4865 
4866  /* In EXEC_BACKEND case we will not have inherited these settings */
4867  IsPostmasterEnvironment = true;
4869 
4870  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4872 
4873  /* Check we got appropriate args */
4874  if (argc < 3)
4875  elog(FATAL, "invalid subpostmaster invocation");
4876 
4877  /* Read in the variables file */
4878  memset(&port, 0, sizeof(Port));
4879  read_backend_variables(argv[2], &port);
4880 
4881  /* Close the postmaster's sockets (as soon as we know them) */
4882  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4883 
4884  /*
4885  * Start our win32 signal implementation. This has to be done after we
4886  * read the backend variables, because we need to pick up the signal pipe
4887  * from the parent process.
4888  */
4889 #ifdef WIN32
4891 #endif
4892 
4893  /* Setup as postmaster child */
4895 
4896  /*
4897  * If appropriate, physically re-attach to shared memory segment. We want
4898  * to do this before going any further to ensure that we can attach at the
4899  * same address the postmaster used. On the other hand, if we choose not
4900  * to re-attach, we may have other cleanup to do.
4901  *
4902  * If testing EXEC_BACKEND on Linux, you should run this as root before
4903  * starting the postmaster:
4904  *
4905  * echo 0 >/proc/sys/kernel/randomize_va_space
4906  *
4907  * This prevents using randomized stack and code addresses that cause the
4908  * child process's memory map to be different from the parent's, making it
4909  * sometimes impossible to attach to shared memory at the desired address.
4910  * Return the setting to its old value (usually '1' or '2') when finished.
4911  */
4912  if (strcmp(argv[1], "--forkbackend") == 0 ||
4913  strcmp(argv[1], "--forkavlauncher") == 0 ||
4914  strcmp(argv[1], "--forkavworker") == 0 ||
4915  strcmp(argv[1], "--forkboot") == 0 ||
4916  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4918  else
4920 
4921  /* autovacuum needs this set before calling InitProcess */
4922  if (strcmp(argv[1], "--forkavlauncher") == 0)
4923  AutovacuumLauncherIAm();
4924  if (strcmp(argv[1], "--forkavworker") == 0)
4925  AutovacuumWorkerIAm();
4926 
4927  /* Read in remaining GUC variables */
4928  read_nondefault_variables();
4929 
4930  /*
4931  * Check that the data directory looks valid, which will also check the
4932  * privileges on the data directory and update our umask and file/group
4933  * variables for creating files later. Note: this should really be done
4934  * before we create any files or directories.
4935  */
4936  checkDataDir();
4937 
4938  /*
4939  * (re-)read control file, as it contains config. The postmaster will
4940  * already have read this, but this process doesn't know about that.
4941  */
4942  LocalProcessControlFile(false);
4943 
4944  /*
4945  * Reload any libraries that were preloaded by the postmaster. Since we
4946  * exec'd this process, those libraries didn't come along with us; but we
4947  * should load them into all child processes to be consistent with the
4948  * non-EXEC_BACKEND behavior.
4949  */
4951 
4952  /* Run backend or appropriate child */
4953  if (strcmp(argv[1], "--forkbackend") == 0)
4954  {
4955  Assert(argc == 3); /* shouldn't be any more args */
4956 
4957  /*
4958  * Need to reinitialize the SSL library in the backend, since the
4959  * context structures contain function pointers and cannot be passed
4960  * through the parameter file.
4961  *
4962  * If for some reason reload fails (maybe the user installed broken
4963  * key files), soldier on without SSL; that's better than all
4964  * connections becoming impossible.
4965  *
4966  * XXX should we do this in all child processes? For the moment it's
4967  * enough to do it in backend children.
4968  */
4969 #ifdef USE_SSL
4970  if (EnableSSL)
4971  {
4972  if (secure_initialize(false) == 0)
4973  LoadedSSL = true;
4974  else
4975  ereport(LOG,
4976  (errmsg("SSL configuration could not be loaded in child process")));
4977  }
4978 #endif
4979 
4980  /*
4981  * Perform additional initialization and collect startup packet.
4982  *
4983  * We want to do this before InitProcess() for a couple of reasons: 1.
4984  * so that we aren't eating up a PGPROC slot while waiting on the
4985  * client. 2. so that if InitProcess() fails due to being out of
4986  * PGPROC slots, we have already initialized libpq and are able to
4987  * report the error to the client.
4988  */
4989  BackendInitialize(&port);
4990 
4991  /* Restore basic shared memory pointers */
4993 
4994  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4995  InitProcess();
4996 
4997  /* Attach process to shared data structures */
4999 
5000  /* And run the backend */
5001  BackendRun(&port); /* does not return */
5002  }
5003  if (strcmp(argv[1], "--forkboot") == 0)
5004  {
5005  /* Restore basic shared memory pointers */
5007 
5008  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5010 
5011  /* Attach process to shared data structures */
5013 
5014  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
5015  }
5016  if (strcmp(argv[1], "--forkavlauncher") == 0)
5017  {
5018  /* Restore basic shared memory pointers */
5020 
5021  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5022  InitProcess();
5023 
5024  /* Attach process to shared data structures */
5026 
5027  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5028  }
5029  if (strcmp(argv[1], "--forkavworker") == 0)
5030  {
5031  /* Restore basic shared memory pointers */
5033 
5034  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5035  InitProcess();
5036 
5037  /* Attach process to shared data structures */
5039 
5040  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5041  }
5042  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5043  {
5044  int shmem_slot;
5045 
5046  /* do this as early as possible; in particular, before InitProcess() */
5047  IsBackgroundWorker = true;
5048 
5049  /* Restore basic shared memory pointers */
5051 
5052  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5053  InitProcess();
5054 
5055  /* Attach process to shared data structures */
5057 
5058  /* Fetch MyBgworkerEntry from shared memory */
5059  shmem_slot = atoi(argv[1] + 15);
5060  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5061 
5063  }
5064  if (strcmp(argv[1], "--forkcol") == 0)
5065  {
5066  /* Do not want to attach to shared memory */
5067 
5068  PgstatCollectorMain(argc, argv); /* does not return */
5069  }
5070  if (strcmp(argv[1], "--forklog") == 0)
5071  {
5072  /* Do not want to attach to shared memory */
5073 
5074  SysLoggerMain(argc, argv); /* does not return */
5075  }
5076 
5077  abort(); /* shouldn't get here */
5078 }
5079 #endif /* EXEC_BACKEND */
5080 
5081 
5082 /*
5083  * ExitPostmaster -- cleanup
5084  *
5085  * Do NOT call exit() directly --- always go through here!
5086  */
5087 static void
5089 {
5090 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5091 
5092  /*
5093  * There is no known cause for a postmaster to become multithreaded after
5094  * startup. Recheck to account for the possibility of unknown causes.
5095  * This message uses LOG level, because an unclean shutdown at this point
5096  * would usually not look much different from a clean shutdown.
5097  */
5098  if (pthread_is_threaded_np() != 0)
5099  ereport(LOG,
5100  (errcode(ERRCODE_INTERNAL_ERROR),
5101  errmsg_internal("postmaster became multithreaded"),
5102  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5103 #endif
5104 
5105  /* should cleanup shared memory and kill all backends */
5106 
5107  /*
5108  * Not sure of the semantics here. When the Postmaster dies, should the
5109  * backends all be killed? probably not.
5110  *
5111  * MUST -- vadim 05-10-1999
5112  */
5113 
5114  proc_exit(status);
5115 }
5116 
5117 /*
5118  * sigusr1_handler - handle signal conditions from child processes
5119  */
5120 static void
5122 {
5123  int save_errno = errno;
5124 
5125  /*
5126  * We rely on the signal mechanism to have blocked all signals ... except
5127  * on Windows, which lacks sigaction(), so we have to do it manually.
5128  */
5129 #ifdef WIN32
5130  PG_SETMASK(&BlockSig);
5131 #endif
5132 
5133  /*
5134  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5135  * unexpected states. If the startup process quickly starts up, completes
5136  * recovery, exits, we might process the death of the startup process
5137  * first. We don't want to go back to recovery in that case.
5138  */
5141  {
5142  /* WAL redo has started. We're out of reinitialization. */
5143  FatalError = false;
5144  AbortStartTime = 0;
5145 
5146  /*
5147  * Crank up the background tasks. It doesn't matter if this fails,
5148  * we'll just try again later.
5149  */
5150  Assert(CheckpointerPID == 0);
5152  Assert(BgWriterPID == 0);
5154 
5155  /*
5156  * Start the archiver if we're responsible for (re-)archiving received
5157  * files.
5158  */
5159  Assert(PgArchPID == 0);
5160  if (XLogArchivingAlways())
5162 
5163  /*
5164  * If we aren't planning to enter hot standby mode later, treat
5165  * RECOVERY_STARTED as meaning we're out of startup, and report status
5166  * accordingly.
5167  */
5168  if (!EnableHotStandby)
5169  {
5171 #ifdef USE_SYSTEMD
5172  sd_notify(0, "READY=1");
5173 #endif
5174  }
5175 
5176  pmState = PM_RECOVERY;
5177  }
5178 
5181  {
5182  /*
5183  * Likewise, start other special children as needed.
5184  */
5185  Assert(PgStatPID == 0);
5186  PgStatPID = pgstat_start();
5187 
5188  ereport(LOG,
5189  (errmsg("database system is ready to accept read only connections")));
5190 
5191  /* Report status */
5193 #ifdef USE_SYSTEMD
5194  sd_notify(0, "READY=1");
5195 #endif
5196 
5199 
5200  /* Some workers may be scheduled to start now */
5201  StartWorkerNeeded = true;
5202  }
5203 
5204  /* Process background worker state changes. */
5206  {
5207  /* Accept new worker requests only if not stopping. */
5209  StartWorkerNeeded = true;
5210  }
5211 
5214 
5215  /* Tell syslogger to rotate logfile if requested */
5216  if (SysLoggerPID != 0)
5217  {
5218  if (CheckLogrotateSignal())
5219  {
5222  }
5224  {
5226  }
5227  }
5228 
5231  {
5232  /*
5233  * Start one iteration of the autovacuum daemon, even if autovacuuming
5234  * is nominally not enabled. This is so we can have an active defense
5235  * against transaction ID wraparound. We set a flag for the main loop
5236  * to do it rather than trying to do it here --- this is because the
5237  * autovac process itself may send the signal, and we want to handle
5238  * that by launching another iteration as soon as the current one
5239  * completes.
5240  */
5241  start_autovac_launcher = true;
5242  }
5243 
5246  {
5247  /* The autovacuum launcher wants us to start a worker process. */
5249  }
5250 
5252  {
5253  /* Startup Process wants us to start the walreceiver process. */
5254  /* Start immediately if possible, else remember request for later. */
5255  WalReceiverRequested = true;
5257  }
5258 
5259  /*
5260  * Try to advance postmaster's state machine, if a child requests it.
5261  *
5262  * Be careful about the order of this action relative to sigusr1_handler's
5263  * other actions. Generally, this should be after other actions, in case
5264  * they have effects PostmasterStateMachine would need to know about.
5265  * However, we should do it before the CheckPromoteSignal step, which
5266  * cannot have any (immediate) effect on the state machine, but does
5267  * depend on what state we're in now.
5268  */
5270  {
5272  }
5273 
5274  if (StartupPID != 0 &&
5275  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5276  pmState == PM_HOT_STANDBY) &&
5278  {
5279  /*
5280  * Tell startup process to finish recovery.
5281  *
5282  * Leave the promote signal file in place and let the Startup process
5283  * do the unlink.
5284  */
5286  }
5287 
5288 #ifdef WIN32
5290 #endif
5291 
5292  errno = save_errno;
5293 }
5294 
5295 /*
5296  * SIGTERM while processing startup packet.
5297  *
5298  * Running proc_exit() from a signal handler would be quite unsafe.
5299  * However, since we have not yet touched shared memory, we can just
5300  * pull the plug and exit without running any atexit handlers.
5301  *
5302  * One might be tempted to try to send a message, or log one, indicating
5303  * why we are disconnecting. However, that would be quite unsafe in itself.
5304  * Also, it seems undesirable to provide clues about the database's state
5305  * to a client that has not yet completed authentication, or even sent us
5306  * a startup packet.
5307  */
5308 static void
5310 {
5311  _exit(1);
5312 }
5313 
5314 /*
5315  * Dummy signal handler
5316  *
5317  * We use this for signals that we don't actually use in the postmaster,
5318  * but we do use in backends. If we were to SIG_IGN such signals in the
5319  * postmaster, then a newly started backend might drop a signal that arrives
5320  * before it's able to reconfigure its signal processing. (See notes in
5321  * tcop/postgres.c.)
5322  */
5323 static void
5325 {
5326 }
5327 
5328 /*
5329  * Timeout while processing startup packet.
5330  * As for process_startup_packet_die(), we exit via _exit(1).
5331  */
5332 static void
5334 {
5335  _exit(1);
5336 }
5337 
5338 
5339 /*
5340  * Generate a random cancel key.
5341  */
5342 static bool
5344 {
5345  return pg_strong_random(cancel_key, sizeof(int32));
5346 }
5347 
5348 /*
5349  * Count up number of child processes of specified types (dead_end children
5350  * are always excluded).
5351  */
5352 static int
5353 CountChildren(int target)
5354 {
5355  dlist_iter iter;
5356  int cnt = 0;
5357 
5358  dlist_foreach(iter, &BackendList)
5359  {
5360  Backend *bp = dlist_container(Backend, elem, iter.cur);
5361 
5362  if (bp->dead_end)
5363  continue;
5364 
5365  /*
5366  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5367  * it first and avoid touching shared memory for every child.
5368  */
5369  if (target != BACKEND_TYPE_ALL)
5370  {
5371  /*
5372  * Assign bkend_type for any recently announced WAL Sender
5373  * processes.
5374  */
5375  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5378 
5379  if (!(target & bp->bkend_type))
5380  continue;
5381  }
5382 
5383  cnt++;
5384  }
5385  return cnt;
5386 }
5387 
5388 
5389 /*
5390  * StartChildProcess -- start an auxiliary process for the postmaster
5391  *
5392  * "type" determines what kind of child will be started. All child types
5393  * initially go to AuxiliaryProcessMain, which will handle common setup.
5394  *
5395  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5396  * to start subprocess.
5397  */
5398 static pid_t
5400 {
5401  pid_t pid;
5402  char *av[10];
5403  int ac = 0;
5404  char typebuf[32];
5405 
5406  /*
5407  * Set up command-line arguments for subprocess
5408  */
5409  av[ac++] = "postgres";
5410 
5411 #ifdef EXEC_BACKEND
5412  av[ac++] = "--forkboot";
5413  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5414 #endif
5415 
5416  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5417  av[ac++] = typebuf;
5418 
5419  av[ac] = NULL;
5420  Assert(ac < lengthof(av));
5421 
5422 #ifdef EXEC_BACKEND
5423  pid = postmaster_forkexec(ac, av);
5424 #else /* !EXEC_BACKEND */
5425  pid = fork_process();
5426 
5427  if (pid == 0) /* child */
5428  {
5430 
5431  /* Close the postmaster's sockets */
5432  ClosePostmasterPorts(false);
5433 
5434  /* Release postmaster's working memory context */
5437  PostmasterContext = NULL;
5438 
5439  AuxiliaryProcessMain(ac, av);
5440  ExitPostmaster(0);
5441  }
5442 #endif /* EXEC_BACKEND */
5443 
5444  if (pid < 0)
5445  {
5446  /* in parent, fork failed */
5447  int save_errno = errno;
5448 
5449  errno = save_errno;
5450  switch (type)
5451  {
5452  case StartupProcess:
5453  ereport(LOG,
5454  (errmsg("could not fork startup process: %m")));
5455  break;
5456  case ArchiverProcess:
5457  ereport(LOG,
5458  (errmsg("could not fork archiver process: %m")));
5459  break;
5460  case BgWriterProcess:
5461  ereport(LOG,
5462  (errmsg("could not fork background writer process: %m")));
5463  break;
5464  case CheckpointerProcess:
5465  ereport(LOG,
5466  (errmsg("could not fork checkpointer process: %m")));
5467  break;
5468  case WalWriterProcess:
5469  ereport(LOG,
5470  (errmsg("could not fork WAL writer process: %m")));
5471  break;
5472  case WalReceiverProcess:
5473  ereport(LOG,
5474  (errmsg("could not fork WAL receiver process: %m")));
5475  break;
5476  default:
5477  ereport(LOG,
5478  (errmsg("could not fork process: %m")));
5479  break;
5480  }
5481 
5482  /*
5483  * fork failure is fatal during startup, but there's no need to choke
5484  * immediately if starting other child types fails.
5485  */
5486  if (type == StartupProcess)
5487  ExitPostmaster(1);
5488  return 0;
5489  }
5490 
5491  /*
5492  * in parent, successful fork
5493  */
5494  return pid;
5495 }
5496 
5497 /*
5498  * StartAutovacuumWorker
5499  * Start an autovac worker process.
5500  *
5501  * This function is here because it enters the resulting PID into the
5502  * postmaster's private backends list.
5503  *
5504  * NB -- this code very roughly matches BackendStartup.
5505  */
5506 static void
5508 {
5509  Backend *bn;
5510 
5511  /*
5512  * If not in condition to run a process, don't try, but handle it like a
5513  * fork failure. This does not normally happen, since the signal is only
5514  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5515  * we have to check to avoid race-condition problems during DB state
5516  * changes.
5517  */
5519  {
5520  /*
5521  * Compute the cancel key that will be assigned to this session. We
5522  * probably don't need cancel keys for autovac workers, but we'd
5523  * better have something random in the field to prevent unfriendly
5524  * people from sending cancels to them.
5525  */
5527  {
5528  ereport(LOG,
5529  (errcode(ERRCODE_INTERNAL_ERROR),
5530  errmsg("could not generate random cancel key")));
5531  return;
5532  }
5533 
5534  bn = (Backend *) malloc(sizeof(Backend));
5535  if (bn)
5536  {
5537  bn->cancel_key = MyCancelKey;
5538 
5539  /* Autovac workers are not dead_end and need a child slot */
5540  bn->dead_end = false;
5542  bn->bgworker_notify = false;
5543 
5544  bn->pid = StartAutoVacWorker();
5545  if (bn->pid > 0)
5546  {
5548  dlist_push_head(&BackendList, &bn->elem);
5549 #ifdef EXEC_BACKEND
5550  ShmemBackendArrayAdd(bn);
5551 #endif
5552  /* all OK */
5553  return;
5554  }
5555 
5556  /*
5557  * fork failed, fall through to report -- actual error message was
5558  * logged by StartAutoVacWorker
5559  */
5561  free(bn);
5562  }
5563  else
5564  ereport(LOG,
5565  (errcode(ERRCODE_OUT_OF_MEMORY),
5566  errmsg("out of memory")));
5567  }
5568 
5569  /*
5570  * Report the failure to the launcher, if it's running. (If it's not, we
5571  * might not even be connected to shared memory, so don't try to call
5572  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5573  * responds to the condition, but we don't do that here, instead waiting
5574  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5575  * quick succession between the autovac launcher and postmaster in case
5576  * things get ugly.
5577  */
5578  if (AutoVacPID != 0)
5579  {
5581  avlauncher_needs_signal = true;
5582  }
5583 }
5584 
5585 /*
5586  * MaybeStartWalReceiver
5587  * Start the WAL receiver process, if not running and our state allows.
5588  *
5589  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5590  * clear WalReceiverRequested. However, there's a race condition if the
5591  * walreceiver terminates and the startup process immediately requests a new
5592  * one: it's quite possible to get the signal for the request before reaping
5593  * the dead walreceiver process. Better to risk launching an extra
5594  * walreceiver than to miss launching one we need. (The walreceiver code
5595  * has logic to recognize that it should go away if not needed.)
5596  */
5597 static void
5599 {
5600  if (WalReceiverPID == 0 &&
5601  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5602  pmState == PM_HOT_STANDBY) &&
5604  {
5606  if (WalReceiverPID != 0)
5607  WalReceiverRequested = false;
5608  /* else leave the flag set, so we'll try again later */
5609  }
5610 }
5611 
5612 
5613 /*
5614  * Create the opts file
5615  */
5616 static bool
5617 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5618 {
5619  FILE *fp;
5620  int i;
5621 
5622 #define OPTS_FILE "postmaster.opts"
5623 
5624  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5625  {
5626  ereport(LOG,
5628  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5629  return false;
5630  }
5631 
5632  fprintf(fp, "%s", fullprogname);
5633  for (i = 1; i < argc; i++)
5634  fprintf(fp, " \"%s\"", argv[i]);
5635  fputs("\n", fp);
5636 
5637  if (fclose(fp))
5638  {
5639  ereport(LOG,
5641  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5642  return false;
5643  }
5644 
5645  return true;
5646 }
5647 
5648 
5649 /*
5650  * MaxLivePostmasterChildren
5651  *
5652  * This reports the number of entries needed in per-child-process arrays
5653  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5654  * These arrays include regular backends, autovac workers, walsenders
5655  * and background workers, but not special children nor dead_end children.
5656  * This allows the arrays to have a fixed maximum size, to wit the same
5657  * too-many-children limit enforced by canAcceptConnections(). The exact value
5658  * isn't too critical as long as it's more than MaxBackends.
5659  */
5660 int
5662 {
5663  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5665 }
5666 
5667 /*
5668  * Connect background worker to a database.
5669  */
5670 void
5672 {
5674 
5675  /* XXX is this the right errcode? */
5677  ereport(FATAL,
5678  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5679  errmsg("database connection requirement not indicated during registration")));
5680 
5681  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5682 
5683  /* it had better not gotten out of "init" mode yet */
5684  if (!IsInitProcessingMode())
5685  ereport(ERROR,
5686  (errmsg("invalid processing mode in background worker")));
5688 }
5689 
5690 /*
5691  * Connect background worker to a database using OIDs.
5692  */
5693 void
5695 {
5697 
5698  /* XXX is this the right errcode? */
5700  ereport(FATAL,
5701  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5702  errmsg("database connection requirement not indicated during registration")));
5703 
5704  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5705 
5706  /* it had better not gotten out of "init" mode yet */
5707  if (!IsInitProcessingMode())
5708  ereport(ERROR,
5709  (errmsg("invalid processing mode in background worker")));
5711 }
5712 
5713 /*
5714  * Block/unblock signals in a background worker
5715  */
5716 void
5718 {
5719  PG_SETMASK(&BlockSig);
5720 }
5721 
5722 void
5724 {
5726 }
5727 
5728 #ifdef EXEC_BACKEND
5729 static pid_t
5730 bgworker_forkexec(int shmem_slot)
5731 {
5732  char *av[10];
5733  int ac = 0;
5734  char forkav[MAXPGPATH];
5735 
5736  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5737 
5738  av[ac++] = "postgres";
5739  av[ac++] = forkav;
5740  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5741  av[ac] = NULL;
5742 
5743  Assert(ac < lengthof(av));
5744 
5745  return postmaster_forkexec(ac, av);
5746 }
5747 #endif
5748 
5749 /*
5750  * Start a new bgworker.
5751  * Starting time conditions must have been checked already.
5752  *
5753  * Returns true on success, false on failure.
5754  * In either case, update the RegisteredBgWorker's state appropriately.
5755  *
5756  * This code is heavily based on autovacuum.c, q.v.
5757  */
5758 static bool
5760 {
5761  pid_t worker_pid;
5762 
5763  Assert(rw->rw_pid == 0);
5764 
5765  /*
5766  * Allocate and assign the Backend element. Note we must do this before
5767  * forking, so that we can handle failures (out of memory or child-process
5768  * slots) cleanly.
5769  *
5770  * Treat failure as though the worker had crashed. That way, the
5771  * postmaster will wait a bit before attempting to start it again; if we
5772  * tried again right away, most likely we'd find ourselves hitting the
5773  * same resource-exhaustion condition.
5774  */
5775  if (!assign_backendlist_entry(rw))
5776  {
5778  return false;
5779  }
5780 
5781  ereport(DEBUG1,
5782  (errmsg_internal("starting background worker process \"%s\"",
5783  rw->rw_worker.bgw_name)));
5784 
5785 #ifdef EXEC_BACKEND
5786  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5787 #else
5788  switch ((worker_pid = fork_process()))
5789 #endif
5790  {
5791  case -1:
5792  /* in postmaster, fork failed ... */
5793  ereport(LOG,
5794  (errmsg("could not fork worker process: %m")));
5795  /* undo what assign_backendlist_entry did */
5797  rw->rw_child_slot = 0;
5798  free(rw->rw_backend);
5799  rw->rw_backend = NULL;
5800  /* mark entry as crashed, so we'll try again later */
5802  break;
5803 
5804 #ifndef EXEC_BACKEND
5805  case 0:
5806  /* in postmaster child ... */
5808 
5809  /* Close the postmaster's sockets */
5810  ClosePostmasterPorts(false);
5811 
5812  /*
5813  * Before blowing away PostmasterContext, save this bgworker's
5814  * data where it can find it.
5815  */
5816  MyBgworkerEntry = (BackgroundWorker *)
5818  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5819 
5820  /* Release postmaster's working memory context */
5823  PostmasterContext = NULL;
5824 
5826 
5827  exit(1); /* should not get here */
5828  break;
5829 #endif
5830  default:
5831  /* in postmaster, fork successful ... */
5832  rw->rw_pid = worker_pid;
5833  rw->rw_backend->pid = rw->rw_pid;
5835  /* add new worker to lists of backends */
5836  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5837 #ifdef EXEC_BACKEND
5838  ShmemBackendArrayAdd(rw->rw_backend);
5839 #endif
5840  return true;
5841  }
5842 
5843  return false;
5844 }
5845 
5846 /*
5847  * Does the current postmaster state require starting a worker with the
5848  * specified start_time?
5849  */
5850 static bool
5852 {
5853  switch (pmState)
5854  {
5855  case PM_NO_CHILDREN:
5856  case PM_WAIT_DEAD_END:
5857  case PM_SHUTDOWN_2:
5858  case PM_SHUTDOWN:
5859  case PM_WAIT_BACKENDS:
5860  case PM_STOP_BACKENDS:
5861  break;