PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "access/xlogrecovery.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "common/pg_prng.h"
103 #include "common/string.h"
104 #include "lib/ilist.h"
105 #include "libpq/auth.h"
106 #include "libpq/libpq.h"
107 #include "libpq/pqformat.h"
108 #include "libpq/pqsignal.h"
109 #include "pg_getopt.h"
110 #include "pgstat.h"
111 #include "port/pg_bswap.h"
112 #include "postmaster/autovacuum.h"
113 #include "postmaster/auxprocess.h"
115 #include "postmaster/fork_process.h"
116 #include "postmaster/interrupt.h"
117 #include "postmaster/pgarch.h"
118 #include "postmaster/postmaster.h"
119 #include "postmaster/syslogger.h"
121 #include "replication/walsender.h"
122 #include "storage/fd.h"
123 #include "storage/ipc.h"
124 #include "storage/pg_shmem.h"
125 #include "storage/pmsignal.h"
126 #include "storage/proc.h"
127 #include "tcop/tcopprot.h"
128 #include "utils/builtins.h"
129 #include "utils/datetime.h"
130 #include "utils/memutils.h"
131 #include "utils/pidfile.h"
132 #include "utils/ps_status.h"
133 #include "utils/queryjumble.h"
134 #include "utils/timeout.h"
135 #include "utils/timestamp.h"
136 #include "utils/varlena.h"
137 
138 #ifdef EXEC_BACKEND
139 #include "storage/spin.h"
140 #endif
141 
142 
143 /*
144  * Possible types of a backend. Beyond being the possible bkend_type values in
145  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
146  * and CountChildren().
147  */
148 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
149 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
150 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
151 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
152 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
153 
154 /*
155  * List of active backends (or child processes anyway; we don't actually
156  * know whether a given child has become a backend or is still in the
157  * authorization phase). This is used mainly to keep track of how many
158  * children we have and send them appropriate signals when necessary.
159  *
160  * As shown in the above set of backend types, this list includes not only
161  * "normal" client sessions, but also autovacuum workers, walsenders, and
162  * background workers. (Note that at the time of launch, walsenders are
163  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
164  * upon noticing they've changed their PMChildFlags entry. Hence that check
165  * must be done before any operation that needs to distinguish walsenders
166  * from normal backends.)
167  *
168  * Also, "dead_end" children are in it: these are children launched just for
169  * the purpose of sending a friendly rejection message to a would-be client.
170  * We must track them because they are attached to shared memory, but we know
171  * they will never become live backends. dead_end children are not assigned a
172  * PMChildSlot. dead_end children have bkend_type NORMAL.
173  *
174  * "Special" children such as the startup, bgwriter and autovacuum launcher
175  * tasks are not in this list. They are tracked via StartupPID and other
176  * pid_t variables below. (Thus, there can't be more than one of any given
177  * "special" child process type. We use BackendList entries for any child
178  * process there can be more than one of.)
179  */
180 typedef struct bkend
181 {
182  pid_t pid; /* process id of backend */
183  int32 cancel_key; /* cancel key for cancels for this backend */
184  int child_slot; /* PMChildSlot for this backend, if any */
185  int bkend_type; /* child process flavor, see above */
186  bool dead_end; /* is it going to send an error and quit? */
187  bool bgworker_notify; /* gets bgworker start/stop notifications */
188  dlist_node elem; /* list link in BackendList */
190 
192 
193 #ifdef EXEC_BACKEND
194 static Backend *ShmemBackendArray;
195 #endif
196 
198 
199 
200 
201 /* The socket number we are listening for connections on */
203 
204 /* The directory names for Unix socket(s) */
206 
207 /* The TCP listen address(es) */
209 
210 /*
211  * ReservedBackends is the number of backends reserved for superuser use.
212  * This number is taken out of the pool size given by MaxConnections so
213  * number of backend slots available to non-superusers is
214  * (MaxConnections - ReservedBackends). Note what this really means is
215  * "if there are <= ReservedBackends connections available, only superusers
216  * can make new connections" --- pre-existing superuser connections don't
217  * count against the limit.
218  */
220 
221 /* The socket(s) we're listening to. */
222 #define MAXLISTEN 64
224 
225 /*
226  * These globals control the behavior of the postmaster in case some
227  * backend dumps core. Normally, it kills all peers of the dead backend
228  * and reinitializes shared memory. By specifying -s or -n, we can have
229  * the postmaster stop (rather than kill) peers and not reinitialize
230  * shared data structures. (Reinit is currently dead code, though.)
231  */
232 static bool Reinit = true;
233 static int SendStop = false;
234 
235 /* still more option variables */
236 bool EnableSSL = false;
237 
238 int PreAuthDelay = 0;
240 
241 bool log_hostname; /* for ps display and logging */
242 bool Log_connections = false;
243 bool Db_user_namespace = false;
244 
245 bool enable_bonjour = false;
249 
250 /* PIDs of special child processes; 0 when not running */
251 static pid_t StartupPID = 0,
259 
260 /* Startup process's status */
261 typedef enum
262 {
265  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
268 
270 
271 /* Startup/shutdown state */
272 #define NoShutdown 0
273 #define SmartShutdown 1
274 #define FastShutdown 2
275 #define ImmediateShutdown 3
276 
277 static int Shutdown = NoShutdown;
278 
279 static bool FatalError = false; /* T if recovering from backend crash */
280 
281 /*
282  * We use a simple state machine to control startup, shutdown, and
283  * crash recovery (which is rather like shutdown followed by startup).
284  *
285  * After doing all the postmaster initialization work, we enter PM_STARTUP
286  * state and the startup process is launched. The startup process begins by
287  * reading the control file and other preliminary initialization steps.
288  * In a normal startup, or after crash recovery, the startup process exits
289  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
290  * is handled specially since it takes much longer and we would like to support
291  * hot standby during archive recovery.
292  *
293  * When the startup process is ready to start archive recovery, it signals the
294  * postmaster, and we switch to PM_RECOVERY state. The background writer and
295  * checkpointer are launched, while the startup process continues applying WAL.
296  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
297  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
298  * state and begin accepting connections to perform read-only queries. When
299  * archive recovery is finished, the startup process exits with exit code 0
300  * and we switch to PM_RUN state.
301  *
302  * Normal child backends can only be launched when we are in PM_RUN or
303  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
304  * In other states we handle connection requests by launching "dead_end"
305  * child processes, which will simply send the client an error message and
306  * quit. (We track these in the BackendList so that we can know when they
307  * are all gone; this is important because they're still connected to shared
308  * memory, and would interfere with an attempt to destroy the shmem segment,
309  * possibly leading to SHMALL failure when we try to make a new one.)
310  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
311  * to drain out of the system, and therefore stop accepting connection
312  * requests at all until the last existing child has quit (which hopefully
313  * will not be very long).
314  *
315  * Notice that this state variable does not distinguish *why* we entered
316  * states later than PM_RUN --- Shutdown and FatalError must be consulted
317  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
318  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
319  * states when trying to recover from a crash). It can be true in PM_STARTUP
320  * state, because we don't clear it until we've successfully started WAL redo.
321  */
322 typedef enum
323 {
324  PM_INIT, /* postmaster starting */
325  PM_STARTUP, /* waiting for startup subprocess */
326  PM_RECOVERY, /* in archive recovery mode */
327  PM_HOT_STANDBY, /* in hot standby mode */
328  PM_RUN, /* normal "database is alive" state */
329  PM_STOP_BACKENDS, /* need to stop remaining backends */
330  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
331  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
332  * ckpt */
333  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
334  * finish */
335  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
336  PM_NO_CHILDREN /* all important children have exited */
338 
340 
341 /*
342  * While performing a "smart shutdown", we restrict new connections but stay
343  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
344  * connsAllowed is a sub-state indicator showing the active restriction.
345  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
346  */
347 static bool connsAllowed = true;
348 
349 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
350 /* Zero means timeout is not running */
351 static time_t AbortStartTime = 0;
352 
353 /* Length of said timeout */
354 #define SIGKILL_CHILDREN_AFTER_SECS 5
355 
356 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
357 
358 bool ClientAuthInProgress = false; /* T during new-client
359  * authentication */
360 
361 bool redirection_done = false; /* stderr redirected for syslogger? */
362 
363 /* received START_AUTOVAC_LAUNCHER signal */
364 static volatile sig_atomic_t start_autovac_launcher = false;
365 
366 /* the launcher needs to be signaled to communicate some condition */
367 static volatile bool avlauncher_needs_signal = false;
368 
369 /* received START_WALRECEIVER signal */
370 static volatile sig_atomic_t WalReceiverRequested = false;
371 
372 /* set when there's a worker that needs to be started up */
373 static volatile bool StartWorkerNeeded = true;
374 static volatile bool HaveCrashedWorker = false;
375 
376 #ifdef USE_SSL
377 /* Set when and if SSL has been initialized properly */
378 static bool LoadedSSL = false;
379 #endif
380 
381 #ifdef USE_BONJOUR
382 static DNSServiceRef bonjour_sdref = NULL;
383 #endif
384 
385 /*
386  * postmaster.c - function prototypes
387  */
388 static void CloseServerPorts(int status, Datum arg);
389 static void unlink_external_pid_file(int status, Datum arg);
390 static void getInstallationPaths(const char *argv0);
391 static void checkControlFile(void);
392 static Port *ConnCreate(int serverFd);
393 static void ConnFree(Port *port);
394 static void reset_shared(void);
395 static void SIGHUP_handler(SIGNAL_ARGS);
396 static void pmdie(SIGNAL_ARGS);
397 static void reaper(SIGNAL_ARGS);
398 static void sigusr1_handler(SIGNAL_ARGS);
400 static void dummy_handler(SIGNAL_ARGS);
401 static void StartupPacketTimeoutHandler(void);
402 static void CleanupBackend(int pid, int exitstatus);
403 static bool CleanupBackgroundWorker(int pid, int exitstatus);
404 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
405 static void LogChildExit(int lev, const char *procname,
406  int pid, int exitstatus);
407 static void PostmasterStateMachine(void);
408 static void BackendInitialize(Port *port);
409 static void BackendRun(Port *port) pg_attribute_noreturn();
410 static void ExitPostmaster(int status) pg_attribute_noreturn();
411 static int ServerLoop(void);
412 static int BackendStartup(Port *port);
413 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
414 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
415 static void processCancelRequest(Port *port, void *pkt);
416 static int initMasks(fd_set *rmask);
417 static void report_fork_failure_to_client(Port *port, int errnum);
418 static CAC_state canAcceptConnections(int backend_type);
419 static bool RandomCancelKey(int32 *cancel_key);
420 static void signal_child(pid_t pid, int signal);
421 static bool SignalSomeChildren(int signal, int targets);
422 static void TerminateChildren(int signal);
423 
424 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
425 
426 static int CountChildren(int target);
428 static void maybe_start_bgworkers(void);
429 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
430 static pid_t StartChildProcess(AuxProcType type);
431 static void StartAutovacuumWorker(void);
432 static void MaybeStartWalReceiver(void);
433 static void InitPostmasterDeathWatchHandle(void);
434 
435 /*
436  * Archiver is allowed to start up at the current postmaster state?
437  *
438  * If WAL archiving is enabled always, we are allowed to start archiver
439  * even during recovery.
440  */
441 #define PgArchStartupAllowed() \
442  (((XLogArchivingActive() && pmState == PM_RUN) || \
443  (XLogArchivingAlways() && \
444  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
445  PgArchCanRestart())
446 
447 #ifdef EXEC_BACKEND
448 
449 #ifdef WIN32
450 #define WNOHANG 0 /* ignored, so any integer value will do */
451 
452 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
453 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
454 
455 static HANDLE win32ChildQueue;
456 
457 typedef struct
458 {
459  HANDLE waitHandle;
460  HANDLE procHandle;
461  DWORD procId;
462 } win32_deadchild_waitinfo;
463 #endif /* WIN32 */
464 
465 static pid_t backend_forkexec(Port *port);
466 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
467 
468 /* Type for a socket that can be inherited to a client process */
469 #ifdef WIN32
470 typedef struct
471 {
472  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
473  * if not a socket */
474  WSAPROTOCOL_INFO wsainfo;
475 } InheritableSocket;
476 #else
477 typedef int InheritableSocket;
478 #endif
479 
480 /*
481  * Structure contains all variables passed to exec:ed backends
482  */
483 typedef struct
484 {
485  Port port;
486  InheritableSocket portsocket;
487  char DataDir[MAXPGPATH];
490  int MyPMChildSlot;
491 #ifndef WIN32
492  unsigned long UsedShmemSegID;
493 #else
494  void *ShmemProtectiveRegion;
495  HANDLE UsedShmemSegID;
496 #endif
497  void *UsedShmemSegAddr;
500  Backend *ShmemBackendArray;
501 #ifndef HAVE_SPINLOCKS
503 #endif
512  pid_t PostmasterPid;
516  bool redirection_done;
517  bool IsBinaryUpgrade;
518  bool query_id_enabled;
519  int max_safe_fds;
520  int MaxBackends;
521 #ifdef WIN32
522  HANDLE PostmasterHandle;
523  HANDLE initial_signal_pipe;
524  HANDLE syslogPipe[2];
525 #else
526  int postmaster_alive_fds[2];
527  int syslogPipe[2];
528 #endif
529  char my_exec_path[MAXPGPATH];
530  char pkglib_path[MAXPGPATH];
531 } BackendParameters;
532 
533 static void read_backend_variables(char *id, Port *port);
534 static void restore_backend_variables(BackendParameters *param, Port *port);
535 
536 #ifndef WIN32
537 static bool save_backend_variables(BackendParameters *param, Port *port);
538 #else
539 static bool save_backend_variables(BackendParameters *param, Port *port,
540  HANDLE childProcess, pid_t childPid);
541 #endif
542 
543 static void ShmemBackendArrayAdd(Backend *bn);
544 static void ShmemBackendArrayRemove(Backend *bn);
545 #endif /* EXEC_BACKEND */
546 
547 #define StartupDataBase() StartChildProcess(StartupProcess)
548 #define StartArchiver() StartChildProcess(ArchiverProcess)
549 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
550 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
551 #define StartWalWriter() StartChildProcess(WalWriterProcess)
552 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
553 
554 /* Macros to check exit status of a child process */
555 #define EXIT_STATUS_0(st) ((st) == 0)
556 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
557 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
558 
559 #ifndef WIN32
560 /*
561  * File descriptors for pipe used to monitor if postmaster is alive.
562  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
563  */
564 int postmaster_alive_fds[2] = {-1, -1};
565 #else
566 /* Process handle of postmaster used for the same purpose on Windows */
567 HANDLE PostmasterHandle;
568 #endif
569 
570 /*
571  * Postmaster main entry point
572  */
573 void
574 PostmasterMain(int argc, char *argv[])
575 {
576  int opt;
577  int status;
578  char *userDoption = NULL;
579  bool listen_addr_saved = false;
580  int i;
581  char *output_config_variable = NULL;
582 
584 
586 
588 
589  /*
590  * Start our win32 signal implementation
591  */
592 #ifdef WIN32
594 #endif
595 
596  /*
597  * We should not be creating any files or directories before we check the
598  * data directory (see checkDataDir()), but just in case set the umask to
599  * the most restrictive (owner-only) permissions.
600  *
601  * checkDataDir() will reset the umask based on the data directory
602  * permissions.
603  */
604  umask(PG_MODE_MASK_OWNER);
605 
606  /*
607  * By default, palloc() requests in the postmaster will be allocated in
608  * the PostmasterContext, which is space that can be recycled by backends.
609  * Allocated data that needs to be available to backends should be
610  * allocated in TopMemoryContext.
611  */
613  "Postmaster",
616 
617  /* Initialize paths to installation files */
618  getInstallationPaths(argv[0]);
619 
620  /*
621  * Set up signal handlers for the postmaster process.
622  *
623  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
624  * is used by all child processes and client processes). That has a
625  * couple of special behaviors:
626  *
627  * 1. Except on Windows, we tell sigaction() to block all signals for the
628  * duration of the signal handler. This is faster than our old approach
629  * of blocking/unblocking explicitly in the signal handler, and it should
630  * also prevent excessive stack consumption if signals arrive quickly.
631  *
632  * 2. We do not set the SA_RESTART flag. This is because signals will be
633  * blocked at all times except when ServerLoop is waiting for something to
634  * happen, and during that window, we want signals to exit the select(2)
635  * wait so that ServerLoop can respond if anything interesting happened.
636  * On some platforms, signals marked SA_RESTART would not cause the
637  * select() wait to end.
638  *
639  * Child processes will generally want SA_RESTART, so pqsignal() sets that
640  * flag. We expect children to set up their own handlers before
641  * unblocking signals.
642  *
643  * CAUTION: when changing this list, check for side-effects on the signal
644  * handling setup of child processes. See tcop/postgres.c,
645  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
646  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
647  * postmaster/bgworker.c and postmaster/checkpointer.c.
648  */
649  pqinitmask();
651 
652  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
653  * children do same */
654  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
655  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
656  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
657  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
658  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
659  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
660  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
661  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
662 
663 #ifdef SIGURG
664 
665  /*
666  * Ignore SIGURG for now. Child processes may change this (see
667  * InitializeLatchSupport), but they will not receive any such signals
668  * until they wait on a latch.
669  */
670  pqsignal_pm(SIGURG, SIG_IGN); /* ignored */
671 #endif
672 
673  /*
674  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
675  * ignore those signals in a postmaster environment, so that there is no
676  * risk of a child process freezing up due to writing to stderr. But for
677  * a standalone backend, their default handling is reasonable. Hence, all
678  * child processes should just allow the inherited settings to stand.
679  */
680 #ifdef SIGTTIN
681  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
682 #endif
683 #ifdef SIGTTOU
684  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
685 #endif
686 
687  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
688 #ifdef SIGXFSZ
689  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
690 #endif
691 
692  /*
693  * Options setup
694  */
696 
697  opterr = 1;
698 
699  /*
700  * Parse command-line options. CAUTION: keep this in sync with
701  * tcop/postgres.c (the option sets should not conflict) and with the
702  * common help() function in main/main.c.
703  */
704  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:W:-:")) != -1)
705  {
706  switch (opt)
707  {
708  case 'B':
709  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
710  break;
711 
712  case 'b':
713  /* Undocumented flag used for binary upgrades */
714  IsBinaryUpgrade = true;
715  break;
716 
717  case 'C':
718  output_config_variable = strdup(optarg);
719  break;
720 
721  case 'D':
722  userDoption = strdup(optarg);
723  break;
724 
725  case 'd':
727  break;
728 
729  case 'E':
730  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
731  break;
732 
733  case 'e':
734  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
735  break;
736 
737  case 'F':
738  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
739  break;
740 
741  case 'f':
743  {
744  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
745  progname, optarg);
746  ExitPostmaster(1);
747  }
748  break;
749 
750  case 'h':
751  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
752  break;
753 
754  case 'i':
755  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
756  break;
757 
758  case 'j':
759  /* only used by interactive backend */
760  break;
761 
762  case 'k':
763  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
764  break;
765 
766  case 'l':
767  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
768  break;
769 
770  case 'N':
771  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
772  break;
773 
774  case 'n':
775  /* Don't reinit shared mem after abnormal exit */
776  Reinit = false;
777  break;
778 
779  case 'O':
780  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
781  break;
782 
783  case 'P':
784  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
785  break;
786 
787  case 'p':
789  break;
790 
791  case 'r':
792  /* only used by single-user backend */
793  break;
794 
795  case 'S':
797  break;
798 
799  case 's':
800  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
801  break;
802 
803  case 'T':
804 
805  /*
806  * In the event that some backend dumps core, send SIGSTOP,
807  * rather than SIGQUIT, to all its peers. This lets the wily
808  * post_hacker collect core dumps from everyone.
809  */
810  SendStop = true;
811  break;
812 
813  case 't':
814  {
815  const char *tmp = get_stats_option_name(optarg);
816 
817  if (tmp)
818  {
820  }
821  else
822  {
823  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
824  progname, optarg);
825  ExitPostmaster(1);
826  }
827  break;
828  }
829 
830  case 'W':
831  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
832  break;
833 
834  case 'c':
835  case '-':
836  {
837  char *name,
838  *value;
839 
841  if (!value)
842  {
843  if (opt == '-')
844  ereport(ERROR,
845  (errcode(ERRCODE_SYNTAX_ERROR),
846  errmsg("--%s requires a value",
847  optarg)));
848  else
849  ereport(ERROR,
850  (errcode(ERRCODE_SYNTAX_ERROR),
851  errmsg("-c %s requires a value",
852  optarg)));
853  }
854 
856  free(name);
857  if (value)
858  free(value);
859  break;
860  }
861 
862  default:
863  write_stderr("Try \"%s --help\" for more information.\n",
864  progname);
865  ExitPostmaster(1);
866  }
867  }
868 
869  /*
870  * Postmaster accepts no non-option switch arguments.
871  */
872  if (optind < argc)
873  {
874  write_stderr("%s: invalid argument: \"%s\"\n",
875  progname, argv[optind]);
876  write_stderr("Try \"%s --help\" for more information.\n",
877  progname);
878  ExitPostmaster(1);
879  }
880 
881  /*
882  * Locate the proper configuration files and data directory, and read
883  * postgresql.conf for the first time.
884  */
886  ExitPostmaster(2);
887 
888  if (output_config_variable != NULL)
889  {
890  /*
891  * If this is a runtime-computed GUC, it hasn't yet been initialized,
892  * and the present value is not useful. However, this is a convenient
893  * place to print the value for most GUCs because it is safe to run
894  * postmaster startup to this point even if the server is already
895  * running. For the handful of runtime-computed GUCs that we cannot
896  * provide meaningful values for yet, we wait until later in
897  * postmaster startup to print the value. We won't be able to use -C
898  * on running servers for those GUCs, but using this option now would
899  * lead to incorrect results for them.
900  */
901  int flags = GetConfigOptionFlags(output_config_variable, true);
902 
903  if ((flags & GUC_RUNTIME_COMPUTED) == 0)
904  {
905  /*
906  * "-C guc" was specified, so print GUC's value and exit. No
907  * extra permission check is needed because the user is reading
908  * inside the data dir.
909  */
910  const char *config_val = GetConfigOption(output_config_variable,
911  false, false);
912 
913  puts(config_val ? config_val : "");
914  ExitPostmaster(0);
915  }
916 
917  /*
918  * A runtime-computed GUC will be printed later on. As we initialize
919  * a server startup sequence, silence any log messages that may show
920  * up in the output generated. FATAL and more severe messages are
921  * useful to show, even if one would only expect at least PANIC. LOG
922  * entries are hidden.
923  */
924  SetConfigOption("log_min_messages", "FATAL", PGC_INTERNAL,
926  }
927 
928  /* Verify that DataDir looks reasonable */
929  checkDataDir();
930 
931  /* Check that pg_control exists */
933 
934  /* And switch working directory into it */
935  ChangeToDataDir();
936 
937  /*
938  * Check for invalid combinations of GUC settings.
939  */
941  {
942  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
943  progname,
945  ExitPostmaster(1);
946  }
948  ereport(ERROR,
949  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
951  ereport(ERROR,
952  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
953 
954  /*
955  * Other one-time internal sanity checks can go here, if they are fast.
956  * (Put any slow processing further down, after postmaster.pid creation.)
957  */
958  if (!CheckDateTokenTables())
959  {
960  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
961  ExitPostmaster(1);
962  }
963 
964  /*
965  * Now that we are done processing the postmaster arguments, reset
966  * getopt(3) library so that it will work correctly in subprocesses.
967  */
968  optind = 1;
969 #ifdef HAVE_INT_OPTRESET
970  optreset = 1; /* some systems need this too */
971 #endif
972 
973  /* For debugging: display postmaster environment */
974  {
975  extern char **environ;
976  char **p;
977 
978  ereport(DEBUG3,
979  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
980  progname)));
981  ereport(DEBUG3,
982  (errmsg_internal("-----------------------------------------")));
983  for (p = environ; *p; ++p)
984  ereport(DEBUG3,
985  (errmsg_internal("\t%s", *p)));
986  ereport(DEBUG3,
987  (errmsg_internal("-----------------------------------------")));
988  }
989 
990  /*
991  * Create lockfile for data directory.
992  *
993  * We want to do this before we try to grab the input sockets, because the
994  * data directory interlock is more reliable than the socket-file
995  * interlock (thanks to whoever decided to put socket files in /tmp :-().
996  * For the same reason, it's best to grab the TCP socket(s) before the
997  * Unix socket(s).
998  *
999  * Also note that this internally sets up the on_proc_exit function that
1000  * is responsible for removing both data directory and socket lockfiles;
1001  * so it must happen before opening sockets so that at exit, the socket
1002  * lockfiles go away after CloseServerPorts runs.
1003  */
1004  CreateDataDirLockFile(true);
1005 
1006  /*
1007  * Read the control file (for error checking and config info).
1008  *
1009  * Since we verify the control file's CRC, this has a useful side effect
1010  * on machines where we need a run-time test for CRC support instructions.
1011  * The postmaster will do the test once at startup, and then its child
1012  * processes will inherit the correct function pointer and not need to
1013  * repeat the test.
1014  */
1015  LocalProcessControlFile(false);
1016 
1017  /*
1018  * Register the apply launcher. It's probably a good idea to call this
1019  * before any modules had a chance to take the background worker slots.
1020  */
1022 
1023  /*
1024  * process any libraries that should be preloaded at postmaster start
1025  */
1027 
1028  /*
1029  * Initialize SSL library, if specified.
1030  */
1031 #ifdef USE_SSL
1032  if (EnableSSL)
1033  {
1034  (void) secure_initialize(true);
1035  LoadedSSL = true;
1036  }
1037 #endif
1038 
1039  /*
1040  * Now that loadable modules have had their chance to alter any GUCs,
1041  * calculate MaxBackends.
1042  */
1044 
1045  /*
1046  * Give preloaded libraries a chance to request additional shared memory.
1047  */
1049 
1050  /*
1051  * Now that loadable modules have had their chance to request additional
1052  * shared memory, determine the value of any runtime-computed GUCs that
1053  * depend on the amount of shared memory required.
1054  */
1056 
1057  /*
1058  * Now that modules have been loaded, we can process any custom resource
1059  * managers specified in the wal_consistency_checking GUC.
1060  */
1062 
1063  /*
1064  * If -C was specified with a runtime-computed GUC, we held off printing
1065  * the value earlier, as the GUC was not yet initialized. We handle -C
1066  * for most GUCs before we lock the data directory so that the option may
1067  * be used on a running server. However, a handful of GUCs are runtime-
1068  * computed and do not have meaningful values until after locking the data
1069  * directory, and we cannot safely calculate their values earlier on a
1070  * running server. At this point, such GUCs should be properly
1071  * initialized, and we haven't yet set up shared memory, so this is a good
1072  * time to handle the -C option for these special GUCs.
1073  */
1074  if (output_config_variable != NULL)
1075  {
1076  const char *config_val = GetConfigOption(output_config_variable,
1077  false, false);
1078 
1079  puts(config_val ? config_val : "");
1080  ExitPostmaster(0);
1081  }
1082 
1083  /*
1084  * Set up shared memory and semaphores.
1085  */
1086  reset_shared();
1087 
1088  /*
1089  * Estimate number of openable files. This must happen after setting up
1090  * semaphores, because on some platforms semaphores count as open files.
1091  */
1092  set_max_safe_fds();
1093 
1094  /*
1095  * Set reference point for stack-depth checking.
1096  */
1097  (void) set_stack_base();
1098 
1099  /*
1100  * Initialize pipe (or process handle on Windows) that allows children to
1101  * wake up from sleep on postmaster death.
1102  */
1104 
1105 #ifdef WIN32
1106 
1107  /*
1108  * Initialize I/O completion port used to deliver list of dead children.
1109  */
1110  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1111  if (win32ChildQueue == NULL)
1112  ereport(FATAL,
1113  (errmsg("could not create I/O completion port for child queue")));
1114 #endif
1115 
1116 #ifdef EXEC_BACKEND
1117  /* Write out nondefault GUC settings for child processes to use */
1118  write_nondefault_variables(PGC_POSTMASTER);
1119 
1120  /*
1121  * Clean out the temp directory used to transmit parameters to child
1122  * processes (see internal_forkexec, below). We must do this before
1123  * launching any child processes, else we have a race condition: we could
1124  * remove a parameter file before the child can read it. It should be
1125  * safe to do so now, because we verified earlier that there are no
1126  * conflicting Postgres processes in this data directory.
1127  */
1129 #endif
1130 
1131  /*
1132  * Forcibly remove the files signaling a standby promotion request.
1133  * Otherwise, the existence of those files triggers a promotion too early,
1134  * whether a user wants that or not.
1135  *
1136  * This removal of files is usually unnecessary because they can exist
1137  * only during a few moments during a standby promotion. However there is
1138  * a race condition: if pg_ctl promote is executed and creates the files
1139  * during a promotion, the files can stay around even after the server is
1140  * brought up to be the primary. Then, if a new standby starts by using
1141  * the backup taken from the new primary, the files can exist at server
1142  * startup and must be removed in order to avoid an unexpected promotion.
1143  *
1144  * Note that promotion signal files need to be removed before the startup
1145  * process is invoked. Because, after that, they can be used by
1146  * postmaster's SIGUSR1 signal handler.
1147  */
1149 
1150  /* Do the same for logrotate signal file */
1152 
1153  /* Remove any outdated file holding the current log filenames. */
1154  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1155  ereport(LOG,
1157  errmsg("could not remove file \"%s\": %m",
1159 
1160  /*
1161  * If enabled, start up syslogger collection subprocess
1162  */
1164 
1165  /*
1166  * Reset whereToSendOutput from DestDebug (its starting state) to
1167  * DestNone. This stops ereport from sending log messages to stderr unless
1168  * Log_destination permits. We don't do this until the postmaster is
1169  * fully launched, since startup failures may as well be reported to
1170  * stderr.
1171  *
1172  * If we are in fact disabling logging to stderr, first emit a log message
1173  * saying so, to provide a breadcrumb trail for users who may not remember
1174  * that their logging is configured to go somewhere else.
1175  */
1177  ereport(LOG,
1178  (errmsg("ending log output to stderr"),
1179  errhint("Future log output will go to log destination \"%s\".",
1181 
1183 
1184  /*
1185  * Report server startup in log. While we could emit this much earlier,
1186  * it seems best to do so after starting the log collector, if we intend
1187  * to use one.
1188  */
1189  ereport(LOG,
1190  (errmsg("starting %s", PG_VERSION_STR)));
1191 
1192  /*
1193  * Establish input sockets.
1194  *
1195  * First, mark them all closed, and set up an on_proc_exit function that's
1196  * charged with closing the sockets again at postmaster shutdown.
1197  */
1198  for (i = 0; i < MAXLISTEN; i++)
1200 
1202 
1203  if (ListenAddresses)
1204  {
1205  char *rawstring;
1206  List *elemlist;
1207  ListCell *l;
1208  int success = 0;
1209 
1210  /* Need a modifiable copy of ListenAddresses */
1211  rawstring = pstrdup(ListenAddresses);
1212 
1213  /* Parse string into list of hostnames */
1214  if (!SplitGUCList(rawstring, ',', &elemlist))
1215  {
1216  /* syntax error in list */
1217  ereport(FATAL,
1218  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1219  errmsg("invalid list syntax in parameter \"%s\"",
1220  "listen_addresses")));
1221  }
1222 
1223  foreach(l, elemlist)
1224  {
1225  char *curhost = (char *) lfirst(l);
1226 
1227  if (strcmp(curhost, "*") == 0)
1228  status = StreamServerPort(AF_UNSPEC, NULL,
1229  (unsigned short) PostPortNumber,
1230  NULL,
1232  else
1233  status = StreamServerPort(AF_UNSPEC, curhost,
1234  (unsigned short) PostPortNumber,
1235  NULL,
1237 
1238  if (status == STATUS_OK)
1239  {
1240  success++;
1241  /* record the first successful host addr in lockfile */
1242  if (!listen_addr_saved)
1243  {
1245  listen_addr_saved = true;
1246  }
1247  }
1248  else
1249  ereport(WARNING,
1250  (errmsg("could not create listen socket for \"%s\"",
1251  curhost)));
1252  }
1253 
1254  if (!success && elemlist != NIL)
1255  ereport(FATAL,
1256  (errmsg("could not create any TCP/IP sockets")));
1257 
1258  list_free(elemlist);
1259  pfree(rawstring);
1260  }
1261 
1262 #ifdef USE_BONJOUR
1263  /* Register for Bonjour only if we opened TCP socket(s) */
1265  {
1266  DNSServiceErrorType err;
1267 
1268  /*
1269  * We pass 0 for interface_index, which will result in registering on
1270  * all "applicable" interfaces. It's not entirely clear from the
1271  * DNS-SD docs whether this would be appropriate if we have bound to
1272  * just a subset of the available network interfaces.
1273  */
1274  err = DNSServiceRegister(&bonjour_sdref,
1275  0,
1276  0,
1277  bonjour_name,
1278  "_postgresql._tcp.",
1279  NULL,
1280  NULL,
1282  0,
1283  NULL,
1284  NULL,
1285  NULL);
1286  if (err != kDNSServiceErr_NoError)
1287  ereport(LOG,
1288  (errmsg("DNSServiceRegister() failed: error code %ld",
1289  (long) err)));
1290 
1291  /*
1292  * We don't bother to read the mDNS daemon's reply, and we expect that
1293  * it will automatically terminate our registration when the socket is
1294  * closed at postmaster termination. So there's nothing more to be
1295  * done here. However, the bonjour_sdref is kept around so that
1296  * forked children can close their copies of the socket.
1297  */
1298  }
1299 #endif
1300 
1301 #ifdef HAVE_UNIX_SOCKETS
1303  {
1304  char *rawstring;
1305  List *elemlist;
1306  ListCell *l;
1307  int success = 0;
1308 
1309  /* Need a modifiable copy of Unix_socket_directories */
1310  rawstring = pstrdup(Unix_socket_directories);
1311 
1312  /* Parse string into list of directories */
1313  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1314  {
1315  /* syntax error in list */
1316  ereport(FATAL,
1317  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1318  errmsg("invalid list syntax in parameter \"%s\"",
1319  "unix_socket_directories")));
1320  }
1321 
1322  foreach(l, elemlist)
1323  {
1324  char *socketdir = (char *) lfirst(l);
1325 
1326  status = StreamServerPort(AF_UNIX, NULL,
1327  (unsigned short) PostPortNumber,
1328  socketdir,
1330 
1331  if (status == STATUS_OK)
1332  {
1333  success++;
1334  /* record the first successful Unix socket in lockfile */
1335  if (success == 1)
1337  }
1338  else
1339  ereport(WARNING,
1340  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1341  socketdir)));
1342  }
1343 
1344  if (!success && elemlist != NIL)
1345  ereport(FATAL,
1346  (errmsg("could not create any Unix-domain sockets")));
1347 
1348  list_free_deep(elemlist);
1349  pfree(rawstring);
1350  }
1351 #endif
1352 
1353  /*
1354  * check that we have some socket to listen on
1355  */
1356  if (ListenSocket[0] == PGINVALID_SOCKET)
1357  ereport(FATAL,
1358  (errmsg("no socket created for listening")));
1359 
1360  /*
1361  * If no valid TCP ports, write an empty line for listen address,
1362  * indicating the Unix socket must be used. Note that this line is not
1363  * added to the lock file until there is a socket backing it.
1364  */
1365  if (!listen_addr_saved)
1367 
1368  /*
1369  * Record postmaster options. We delay this till now to avoid recording
1370  * bogus options (eg, unusable port number).
1371  */
1372  if (!CreateOptsFile(argc, argv, my_exec_path))
1373  ExitPostmaster(1);
1374 
1375  /*
1376  * Write the external PID file if requested
1377  */
1378  if (external_pid_file)
1379  {
1380  FILE *fpidfile = fopen(external_pid_file, "w");
1381 
1382  if (fpidfile)
1383  {
1384  fprintf(fpidfile, "%d\n", MyProcPid);
1385  fclose(fpidfile);
1386 
1387  /* Make PID file world readable */
1388  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1389  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1391  }
1392  else
1393  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1395 
1397  }
1398 
1399  /*
1400  * Remove old temporary files. At this point there can be no other
1401  * Postgres processes running in this directory, so this should be safe.
1402  */
1404 
1405  /*
1406  * Initialize the autovacuum subsystem (again, no process start yet)
1407  */
1408  autovac_init();
1409 
1410  /*
1411  * Load configuration files for client authentication.
1412  */
1413  if (!load_hba())
1414  {
1415  /*
1416  * It makes no sense to continue if we fail to load the HBA file,
1417  * since there is no way to connect to the database in this case.
1418  */
1419  ereport(FATAL,
1420  (errmsg("could not load pg_hba.conf")));
1421  }
1422  if (!load_ident())
1423  {
1424  /*
1425  * We can start up without the IDENT file, although it means that you
1426  * cannot log in using any of the authentication methods that need a
1427  * user name mapping. load_ident() already logged the details of error
1428  * to the log.
1429  */
1430  }
1431 
1432 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1433 
1434  /*
1435  * On macOS, libintl replaces setlocale() with a version that calls
1436  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1437  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1438  * the process multithreaded. The postmaster calls sigprocmask() and
1439  * calls fork() without an immediate exec(), both of which have undefined
1440  * behavior in a multithreaded program. A multithreaded postmaster is the
1441  * normal case on Windows, which offers neither fork() nor sigprocmask().
1442  */
1443  if (pthread_is_threaded_np() != 0)
1444  ereport(FATAL,
1445  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1446  errmsg("postmaster became multithreaded during startup"),
1447  errhint("Set the LC_ALL environment variable to a valid locale.")));
1448 #endif
1449 
1450  /*
1451  * Remember postmaster startup time
1452  */
1454 
1455  /*
1456  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1457  * see what's happening.
1458  */
1460 
1461  /* Start bgwriter and checkpointer so they can help with recovery */
1462  if (CheckpointerPID == 0)
1464  if (BgWriterPID == 0)
1466 
1467  /*
1468  * We're ready to rock and roll...
1469  */
1471  Assert(StartupPID != 0);
1473  pmState = PM_STARTUP;
1474 
1475  /* Some workers may be scheduled to start now */
1477 
1478  status = ServerLoop();
1479 
1480  /*
1481  * ServerLoop probably shouldn't ever return, but if it does, close down.
1482  */
1484 
1485  abort(); /* not reached */
1486 }
1487 
1488 
1489 /*
1490  * on_proc_exit callback to close server's listen sockets
1491  */
1492 static void
1494 {
1495  int i;
1496 
1497  /*
1498  * First, explicitly close all the socket FDs. We used to just let this
1499  * happen implicitly at postmaster exit, but it's better to close them
1500  * before we remove the postmaster.pid lockfile; otherwise there's a race
1501  * condition if a new postmaster wants to re-use the TCP port number.
1502  */
1503  for (i = 0; i < MAXLISTEN; i++)
1504  {
1506  {
1509  }
1510  }
1511 
1512  /*
1513  * Next, remove any filesystem entries for Unix sockets. To avoid race
1514  * conditions against incoming postmasters, this must happen after closing
1515  * the sockets and before removing lock files.
1516  */
1518 
1519  /*
1520  * We don't do anything about socket lock files here; those will be
1521  * removed in a later on_proc_exit callback.
1522  */
1523 }
1524 
1525 /*
1526  * on_proc_exit callback to delete external_pid_file
1527  */
1528 static void
1530 {
1531  if (external_pid_file)
1532  unlink(external_pid_file);
1533 }
1534 
1535 
1536 /*
1537  * Compute and check the directory paths to files that are part of the
1538  * installation (as deduced from the postgres executable's own location)
1539  */
1540 static void
1542 {
1543  DIR *pdir;
1544 
1545  /* Locate the postgres executable itself */
1546  if (find_my_exec(argv0, my_exec_path) < 0)
1547  ereport(FATAL,
1548  (errmsg("%s: could not locate my own executable path", argv0)));
1549 
1550 #ifdef EXEC_BACKEND
1551  /* Locate executable backend before we change working directory */
1552  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1553  postgres_exec_path) < 0)
1554  ereport(FATAL,
1555  (errmsg("%s: could not locate matching postgres executable",
1556  argv0)));
1557 #endif
1558 
1559  /*
1560  * Locate the pkglib directory --- this has to be set early in case we try
1561  * to load any modules from it in response to postgresql.conf entries.
1562  */
1564 
1565  /*
1566  * Verify that there's a readable directory there; otherwise the Postgres
1567  * installation is incomplete or corrupt. (A typical cause of this
1568  * failure is that the postgres executable has been moved or hardlinked to
1569  * some directory that's not a sibling of the installation lib/
1570  * directory.)
1571  */
1572  pdir = AllocateDir(pkglib_path);
1573  if (pdir == NULL)
1574  ereport(ERROR,
1576  errmsg("could not open directory \"%s\": %m",
1577  pkglib_path),
1578  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1579  my_exec_path)));
1580  FreeDir(pdir);
1581 
1582  /*
1583  * XXX is it worth similarly checking the share/ directory? If the lib/
1584  * directory is there, then share/ probably is too.
1585  */
1586 }
1587 
1588 /*
1589  * Check that pg_control exists in the correct location in the data directory.
1590  *
1591  * No attempt is made to validate the contents of pg_control here. This is
1592  * just a sanity check to see if we are looking at a real data directory.
1593  */
1594 static void
1596 {
1597  char path[MAXPGPATH];
1598  FILE *fp;
1599 
1600  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1601 
1602  fp = AllocateFile(path, PG_BINARY_R);
1603  if (fp == NULL)
1604  {
1605  write_stderr("%s: could not find the database system\n"
1606  "Expected to find it in the directory \"%s\",\n"
1607  "but could not open file \"%s\": %s\n",
1608  progname, DataDir, path, strerror(errno));
1609  ExitPostmaster(2);
1610  }
1611  FreeFile(fp);
1612 }
1613 
1614 /*
1615  * Determine how long should we let ServerLoop sleep.
1616  *
1617  * In normal conditions we wait at most one minute, to ensure that the other
1618  * background tasks handled by ServerLoop get done even when no requests are
1619  * arriving. However, if there are background workers waiting to be started,
1620  * we don't actually sleep so that they are quickly serviced. Other exception
1621  * cases are as shown in the code.
1622  */
1623 static void
1624 DetermineSleepTime(struct timeval *timeout)
1625 {
1626  TimestampTz next_wakeup = 0;
1627 
1628  /*
1629  * Normal case: either there are no background workers at all, or we're in
1630  * a shutdown sequence (during which we ignore bgworkers altogether).
1631  */
1632  if (Shutdown > NoShutdown ||
1634  {
1635  if (AbortStartTime != 0)
1636  {
1637  /* time left to abort; clamp to 0 in case it already expired */
1638  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1639  (time(NULL) - AbortStartTime);
1640  timeout->tv_sec = Max(timeout->tv_sec, 0);
1641  timeout->tv_usec = 0;
1642  }
1643  else
1644  {
1645  timeout->tv_sec = 60;
1646  timeout->tv_usec = 0;
1647  }
1648  return;
1649  }
1650 
1651  if (StartWorkerNeeded)
1652  {
1653  timeout->tv_sec = 0;
1654  timeout->tv_usec = 0;
1655  return;
1656  }
1657 
1658  if (HaveCrashedWorker)
1659  {
1660  slist_mutable_iter siter;
1661 
1662  /*
1663  * When there are crashed bgworkers, we sleep just long enough that
1664  * they are restarted when they request to be. Scan the list to
1665  * determine the minimum of all wakeup times according to most recent
1666  * crash time and requested restart interval.
1667  */
1669  {
1670  RegisteredBgWorker *rw;
1671  TimestampTz this_wakeup;
1672 
1673  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1674 
1675  if (rw->rw_crashed_at == 0)
1676  continue;
1677 
1679  || rw->rw_terminate)
1680  {
1681  ForgetBackgroundWorker(&siter);
1682  continue;
1683  }
1684 
1685  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1686  1000L * rw->rw_worker.bgw_restart_time);
1687  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1688  next_wakeup = this_wakeup;
1689  }
1690  }
1691 
1692  if (next_wakeup != 0)
1693  {
1694  long secs;
1695  int microsecs;
1696 
1698  &secs, &microsecs);
1699  timeout->tv_sec = secs;
1700  timeout->tv_usec = microsecs;
1701 
1702  /* Ensure we don't exceed one minute */
1703  if (timeout->tv_sec > 60)
1704  {
1705  timeout->tv_sec = 60;
1706  timeout->tv_usec = 0;
1707  }
1708  }
1709  else
1710  {
1711  timeout->tv_sec = 60;
1712  timeout->tv_usec = 0;
1713  }
1714 }
1715 
1716 /*
1717  * Main idle loop of postmaster
1718  *
1719  * NB: Needs to be called with signals blocked
1720  */
1721 static int
1723 {
1724  fd_set readmask;
1725  int nSockets;
1726  time_t last_lockfile_recheck_time,
1727  last_touch_time;
1728 
1729  last_lockfile_recheck_time = last_touch_time = time(NULL);
1730 
1731  nSockets = initMasks(&readmask);
1732 
1733  for (;;)
1734  {
1735  fd_set rmask;
1736  int selres;
1737  time_t now;
1738 
1739  /*
1740  * Wait for a connection request to arrive.
1741  *
1742  * We block all signals except while sleeping. That makes it safe for
1743  * signal handlers, which again block all signals while executing, to
1744  * do nontrivial work.
1745  *
1746  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1747  * any new connections, so we don't call select(), and just sleep.
1748  */
1749  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1750 
1751  if (pmState == PM_WAIT_DEAD_END)
1752  {
1754 
1755  pg_usleep(100000L); /* 100 msec seems reasonable */
1756  selres = 0;
1757 
1758  PG_SETMASK(&BlockSig);
1759  }
1760  else
1761  {
1762  /* must set timeout each time; some OSes change it! */
1763  struct timeval timeout;
1764 
1765  /* Needs to run with blocked signals! */
1766  DetermineSleepTime(&timeout);
1767 
1769 
1770  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1771 
1772  PG_SETMASK(&BlockSig);
1773  }
1774 
1775  /* Now check the select() result */
1776  if (selres < 0)
1777  {
1778  if (errno != EINTR && errno != EWOULDBLOCK)
1779  {
1780  ereport(LOG,
1782  errmsg("select() failed in postmaster: %m")));
1783  return STATUS_ERROR;
1784  }
1785  }
1786 
1787  /*
1788  * New connection pending on any of our sockets? If so, fork a child
1789  * process to deal with it.
1790  */
1791  if (selres > 0)
1792  {
1793  int i;
1794 
1795  for (i = 0; i < MAXLISTEN; i++)
1796  {
1798  break;
1799  if (FD_ISSET(ListenSocket[i], &rmask))
1800  {
1801  Port *port;
1802 
1804  if (port)
1805  {
1807 
1808  /*
1809  * We no longer need the open socket or port structure
1810  * in this process
1811  */
1812  StreamClose(port->sock);
1813  ConnFree(port);
1814  }
1815  }
1816  }
1817  }
1818 
1819  /* If we have lost the log collector, try to start a new one */
1820  if (SysLoggerPID == 0 && Logging_collector)
1822 
1823  /*
1824  * If no background writer process is running, and we are not in a
1825  * state that prevents it, start one. It doesn't matter if this
1826  * fails, we'll just try again later. Likewise for the checkpointer.
1827  */
1828  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1830  {
1831  if (CheckpointerPID == 0)
1833  if (BgWriterPID == 0)
1835  }
1836 
1837  /*
1838  * Likewise, if we have lost the walwriter process, try to start a new
1839  * one. But this is needed only in normal operation (else we cannot
1840  * be writing any new WAL).
1841  */
1842  if (WalWriterPID == 0 && pmState == PM_RUN)
1844 
1845  /*
1846  * If we have lost the autovacuum launcher, try to start a new one. We
1847  * don't want autovacuum to run in binary upgrade mode because
1848  * autovacuum might update relfrozenxid for empty tables before the
1849  * physical files are put in place.
1850  */
1851  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1853  pmState == PM_RUN)
1854  {
1856  if (AutoVacPID != 0)
1857  start_autovac_launcher = false; /* signal processed */
1858  }
1859 
1860  /* If we have lost the archiver, try to start a new one. */
1861  if (PgArchPID == 0 && PgArchStartupAllowed())
1863 
1864  /* If we need to signal the autovacuum launcher, do so now */
1866  {
1867  avlauncher_needs_signal = false;
1868  if (AutoVacPID != 0)
1870  }
1871 
1872  /* If we need to start a WAL receiver, try to do that now */
1875 
1876  /* Get other worker processes running, if needed */
1879 
1880 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1881 
1882  /*
1883  * With assertions enabled, check regularly for appearance of
1884  * additional threads. All builds check at start and exit.
1885  */
1886  Assert(pthread_is_threaded_np() == 0);
1887 #endif
1888 
1889  /*
1890  * Lastly, check to see if it's time to do some things that we don't
1891  * want to do every single time through the loop, because they're a
1892  * bit expensive. Note that there's up to a minute of slop in when
1893  * these tasks will be performed, since DetermineSleepTime() will let
1894  * us sleep at most that long; except for SIGKILL timeout which has
1895  * special-case logic there.
1896  */
1897  now = time(NULL);
1898 
1899  /*
1900  * If we already sent SIGQUIT to children and they are slow to shut
1901  * down, it's time to send them SIGKILL. This doesn't happen
1902  * normally, but under certain conditions backends can get stuck while
1903  * shutting down. This is a last measure to get them unwedged.
1904  *
1905  * Note we also do this during recovery from a process crash.
1906  */
1907  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1908  AbortStartTime != 0 &&
1910  {
1911  /* We were gentle with them before. Not anymore */
1912  ereport(LOG,
1913  (errmsg("issuing SIGKILL to recalcitrant children")));
1915  /* reset flag so we don't SIGKILL again */
1916  AbortStartTime = 0;
1917  }
1918 
1919  /*
1920  * Once a minute, verify that postmaster.pid hasn't been removed or
1921  * overwritten. If it has, we force a shutdown. This avoids having
1922  * postmasters and child processes hanging around after their database
1923  * is gone, and maybe causing problems if a new database cluster is
1924  * created in the same place. It also provides some protection
1925  * against a DBA foolishly removing postmaster.pid and manually
1926  * starting a new postmaster. Data corruption is likely to ensue from
1927  * that anyway, but we can minimize the damage by aborting ASAP.
1928  */
1929  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1930  {
1931  if (!RecheckDataDirLockFile())
1932  {
1933  ereport(LOG,
1934  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1936  }
1937  last_lockfile_recheck_time = now;
1938  }
1939 
1940  /*
1941  * Touch Unix socket and lock files every 58 minutes, to ensure that
1942  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1943  * no one runs cleaners with cutoff times of less than an hour ...
1944  */
1945  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1946  {
1947  TouchSocketFiles();
1949  last_touch_time = now;
1950  }
1951  }
1952 }
1953 
1954 /*
1955  * Initialise the masks for select() for the ports we are listening on.
1956  * Return the number of sockets to listen on.
1957  */
1958 static int
1959 initMasks(fd_set *rmask)
1960 {
1961  int maxsock = -1;
1962  int i;
1963 
1964  FD_ZERO(rmask);
1965 
1966  for (i = 0; i < MAXLISTEN; i++)
1967  {
1968  int fd = ListenSocket[i];
1969 
1970  if (fd == PGINVALID_SOCKET)
1971  break;
1972  FD_SET(fd, rmask);
1973 
1974  if (fd > maxsock)
1975  maxsock = fd;
1976  }
1977 
1978  return maxsock + 1;
1979 }
1980 
1981 
1982 /*
1983  * Read a client's startup packet and do something according to it.
1984  *
1985  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1986  * not return at all.
1987  *
1988  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1989  * if that's what you want. Return STATUS_ERROR if you don't want to
1990  * send anything to the client, which would typically be appropriate
1991  * if we detect a communications failure.)
1992  *
1993  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1994  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1995  * encryption layer sets both flags, but a rejected negotiation sets only the
1996  * flag for that layer, since the client may wish to try the other one. We
1997  * should make no assumption here about the order in which the client may make
1998  * requests.
1999  */
2000 static int
2001 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
2002 {
2003  int32 len;
2004  char *buf;
2005  ProtocolVersion proto;
2006  MemoryContext oldcontext;
2007 
2008  pq_startmsgread();
2009 
2010  /*
2011  * Grab the first byte of the length word separately, so that we can tell
2012  * whether we have no data at all or an incomplete packet. (This might
2013  * sound inefficient, but it's not really, because of buffering in
2014  * pqcomm.c.)
2015  */
2016  if (pq_getbytes((char *) &len, 1) == EOF)
2017  {
2018  /*
2019  * If we get no data at all, don't clutter the log with a complaint;
2020  * such cases often occur for legitimate reasons. An example is that
2021  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
2022  * client didn't like our response, it'll probably just drop the
2023  * connection. Service-monitoring software also often just opens and
2024  * closes a connection without sending anything. (So do port
2025  * scanners, which may be less benign, but it's not really our job to
2026  * notice those.)
2027  */
2028  return STATUS_ERROR;
2029  }
2030 
2031  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
2032  {
2033  /* Got a partial length word, so bleat about that */
2034  if (!ssl_done && !gss_done)
2036  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2037  errmsg("incomplete startup packet")));
2038  return STATUS_ERROR;
2039  }
2040 
2041  len = pg_ntoh32(len);
2042  len -= 4;
2043 
2044  if (len < (int32) sizeof(ProtocolVersion) ||
2046  {
2048  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2049  errmsg("invalid length of startup packet")));
2050  return STATUS_ERROR;
2051  }
2052 
2053  /*
2054  * Allocate space to hold the startup packet, plus one extra byte that's
2055  * initialized to be zero. This ensures we will have null termination of
2056  * all strings inside the packet.
2057  */
2058  buf = palloc(len + 1);
2059  buf[len] = '\0';
2060 
2061  if (pq_getbytes(buf, len) == EOF)
2062  {
2064  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2065  errmsg("incomplete startup packet")));
2066  return STATUS_ERROR;
2067  }
2068  pq_endmsgread();
2069 
2070  /*
2071  * The first field is either a protocol version number or a special
2072  * request code.
2073  */
2074  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2075 
2076  if (proto == CANCEL_REQUEST_CODE)
2077  {
2079  /* Not really an error, but we don't want to proceed further */
2080  return STATUS_ERROR;
2081  }
2082 
2083  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2084  {
2085  char SSLok;
2086 
2087 #ifdef USE_SSL
2088  /* No SSL when disabled or on Unix sockets */
2089  if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2090  SSLok = 'N';
2091  else
2092  SSLok = 'S'; /* Support for SSL */
2093 #else
2094  SSLok = 'N'; /* No support for SSL */
2095 #endif
2096 
2097 retry1:
2098  if (send(port->sock, &SSLok, 1, 0) != 1)
2099  {
2100  if (errno == EINTR)
2101  goto retry1; /* if interrupted, just retry */
2104  errmsg("failed to send SSL negotiation response: %m")));
2105  return STATUS_ERROR; /* close the connection */
2106  }
2107 
2108 #ifdef USE_SSL
2109  if (SSLok == 'S' && secure_open_server(port) == -1)
2110  return STATUS_ERROR;
2111 #endif
2112 
2113  /*
2114  * At this point we should have no data already buffered. If we do,
2115  * it was received before we performed the SSL handshake, so it wasn't
2116  * encrypted and indeed may have been injected by a man-in-the-middle.
2117  * We report this case to the client.
2118  */
2119  if (pq_buffer_has_data())
2120  ereport(FATAL,
2121  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2122  errmsg("received unencrypted data after SSL request"),
2123  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2124 
2125  /*
2126  * regular startup packet, cancel, etc packet should follow, but not
2127  * another SSL negotiation request, and a GSS request should only
2128  * follow if SSL was rejected (client may negotiate in either order)
2129  */
2130  return ProcessStartupPacket(port, true, SSLok == 'S');
2131  }
2132  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2133  {
2134  char GSSok = 'N';
2135 
2136 #ifdef ENABLE_GSS
2137  /* No GSSAPI encryption when on Unix socket */
2138  if (port->laddr.addr.ss_family != AF_UNIX)
2139  GSSok = 'G';
2140 #endif
2141 
2142  while (send(port->sock, &GSSok, 1, 0) != 1)
2143  {
2144  if (errno == EINTR)
2145  continue;
2148  errmsg("failed to send GSSAPI negotiation response: %m")));
2149  return STATUS_ERROR; /* close the connection */
2150  }
2151 
2152 #ifdef ENABLE_GSS
2153  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2154  return STATUS_ERROR;
2155 #endif
2156 
2157  /*
2158  * At this point we should have no data already buffered. If we do,
2159  * it was received before we performed the GSS handshake, so it wasn't
2160  * encrypted and indeed may have been injected by a man-in-the-middle.
2161  * We report this case to the client.
2162  */
2163  if (pq_buffer_has_data())
2164  ereport(FATAL,
2165  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2166  errmsg("received unencrypted data after GSSAPI encryption request"),
2167  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2168 
2169  /*
2170  * regular startup packet, cancel, etc packet should follow, but not
2171  * another GSS negotiation request, and an SSL request should only
2172  * follow if GSS was rejected (client may negotiate in either order)
2173  */
2174  return ProcessStartupPacket(port, GSSok == 'G', true);
2175  }
2176 
2177  /* Could add additional special packet types here */
2178 
2179  /*
2180  * Set FrontendProtocol now so that ereport() knows what format to send if
2181  * we fail during startup.
2182  */
2183  FrontendProtocol = proto;
2184 
2185  /* Check that the major protocol version is in range. */
2188  ereport(FATAL,
2189  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2190  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2191  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2195 
2196  /*
2197  * Now fetch parameters out of startup packet and save them into the Port
2198  * structure. All data structures attached to the Port struct must be
2199  * allocated in TopMemoryContext so that they will remain available in a
2200  * running backend (even after PostmasterContext is destroyed). We need
2201  * not worry about leaking this storage on failure, since we aren't in the
2202  * postmaster process anymore.
2203  */
2205 
2206  /* Handle protocol version 3 startup packet */
2207  {
2208  int32 offset = sizeof(ProtocolVersion);
2209  List *unrecognized_protocol_options = NIL;
2210 
2211  /*
2212  * Scan packet body for name/option pairs. We can assume any string
2213  * beginning within the packet body is null-terminated, thanks to
2214  * zeroing extra byte above.
2215  */
2216  port->guc_options = NIL;
2217 
2218  while (offset < len)
2219  {
2220  char *nameptr = buf + offset;
2221  int32 valoffset;
2222  char *valptr;
2223 
2224  if (*nameptr == '\0')
2225  break; /* found packet terminator */
2226  valoffset = offset + strlen(nameptr) + 1;
2227  if (valoffset >= len)
2228  break; /* missing value, will complain below */
2229  valptr = buf + valoffset;
2230 
2231  if (strcmp(nameptr, "database") == 0)
2232  port->database_name = pstrdup(valptr);
2233  else if (strcmp(nameptr, "user") == 0)
2234  port->user_name = pstrdup(valptr);
2235  else if (strcmp(nameptr, "options") == 0)
2236  port->cmdline_options = pstrdup(valptr);
2237  else if (strcmp(nameptr, "replication") == 0)
2238  {
2239  /*
2240  * Due to backward compatibility concerns the replication
2241  * parameter is a hybrid beast which allows the value to be
2242  * either boolean or the string 'database'. The latter
2243  * connects to a specific database which is e.g. required for
2244  * logical decoding while.
2245  */
2246  if (strcmp(valptr, "database") == 0)
2247  {
2248  am_walsender = true;
2249  am_db_walsender = true;
2250  }
2251  else if (!parse_bool(valptr, &am_walsender))
2252  ereport(FATAL,
2253  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2254  errmsg("invalid value for parameter \"%s\": \"%s\"",
2255  "replication",
2256  valptr),
2257  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2258  }
2259  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2260  {
2261  /*
2262  * Any option beginning with _pq_. is reserved for use as a
2263  * protocol-level option, but at present no such options are
2264  * defined.
2265  */
2266  unrecognized_protocol_options =
2267  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2268  }
2269  else
2270  {
2271  /* Assume it's a generic GUC option */
2272  port->guc_options = lappend(port->guc_options,
2273  pstrdup(nameptr));
2274  port->guc_options = lappend(port->guc_options,
2275  pstrdup(valptr));
2276 
2277  /*
2278  * Copy application_name to port if we come across it. This
2279  * is done so we can log the application_name in the
2280  * connection authorization message. Note that the GUC would
2281  * be used but we haven't gone through GUC setup yet.
2282  */
2283  if (strcmp(nameptr, "application_name") == 0)
2284  {
2285  char *tmp_app_name = pstrdup(valptr);
2286 
2287  pg_clean_ascii(tmp_app_name);
2288 
2289  port->application_name = tmp_app_name;
2290  }
2291  }
2292  offset = valoffset + strlen(valptr) + 1;
2293  }
2294 
2295  /*
2296  * If we didn't find a packet terminator exactly at the end of the
2297  * given packet length, complain.
2298  */
2299  if (offset != len - 1)
2300  ereport(FATAL,
2301  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2302  errmsg("invalid startup packet layout: expected terminator as last byte")));
2303 
2304  /*
2305  * If the client requested a newer protocol version or if the client
2306  * requested any protocol options we didn't recognize, let them know
2307  * the newest minor protocol version we do support and the names of
2308  * any unrecognized options.
2309  */
2311  unrecognized_protocol_options != NIL)
2312  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2313  }
2314 
2315  /* Check a user name was given. */
2316  if (port->user_name == NULL || port->user_name[0] == '\0')
2317  ereport(FATAL,
2318  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2319  errmsg("no PostgreSQL user name specified in startup packet")));
2320 
2321  /* The database defaults to the user name. */
2322  if (port->database_name == NULL || port->database_name[0] == '\0')
2323  port->database_name = pstrdup(port->user_name);
2324 
2325  if (Db_user_namespace)
2326  {
2327  /*
2328  * If user@, it is a global user, remove '@'. We only want to do this
2329  * if there is an '@' at the end and no earlier in the user string or
2330  * they may fake as a local user of another database attaching to this
2331  * database.
2332  */
2333  if (strchr(port->user_name, '@') ==
2334  port->user_name + strlen(port->user_name) - 1)
2335  *strchr(port->user_name, '@') = '\0';
2336  else
2337  {
2338  /* Append '@' and dbname */
2339  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2340  }
2341  }
2342 
2343  /*
2344  * Truncate given database and user names to length of a Postgres name.
2345  * This avoids lookup failures when overlength names are given.
2346  */
2347  if (strlen(port->database_name) >= NAMEDATALEN)
2348  port->database_name[NAMEDATALEN - 1] = '\0';
2349  if (strlen(port->user_name) >= NAMEDATALEN)
2350  port->user_name[NAMEDATALEN - 1] = '\0';
2351 
2352  if (am_walsender)
2354  else
2356 
2357  /*
2358  * Normal walsender backends, e.g. for streaming replication, are not
2359  * connected to a particular database. But walsenders used for logical
2360  * replication need to connect to a specific database. We allow streaming
2361  * replication commands to be issued even if connected to a database as it
2362  * can make sense to first make a basebackup and then stream changes
2363  * starting from that.
2364  */
2365  if (am_walsender && !am_db_walsender)
2366  port->database_name[0] = '\0';
2367 
2368  /*
2369  * Done putting stuff in TopMemoryContext.
2370  */
2371  MemoryContextSwitchTo(oldcontext);
2372 
2373  /*
2374  * If we're going to reject the connection due to database state, say so
2375  * now instead of wasting cycles on an authentication exchange. (This also
2376  * allows a pg_ping utility to be written.)
2377  */
2378  switch (port->canAcceptConnections)
2379  {
2380  case CAC_STARTUP:
2381  ereport(FATAL,
2383  errmsg("the database system is starting up")));
2384  break;
2385  case CAC_NOTCONSISTENT:
2386  if (EnableHotStandby)
2387  ereport(FATAL,
2389  errmsg("the database system is not yet accepting connections"),
2390  errdetail("Consistent recovery state has not been yet reached.")));
2391  else
2392  ereport(FATAL,
2394  errmsg("the database system is not accepting connections"),
2395  errdetail("Hot standby mode is disabled.")));
2396  break;
2397  case CAC_SHUTDOWN:
2398  ereport(FATAL,
2400  errmsg("the database system is shutting down")));
2401  break;
2402  case CAC_RECOVERY:
2403  ereport(FATAL,
2405  errmsg("the database system is in recovery mode")));
2406  break;
2407  case CAC_TOOMANY:
2408  ereport(FATAL,
2409  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2410  errmsg("sorry, too many clients already")));
2411  break;
2412  case CAC_OK:
2413  break;
2414  }
2415 
2416  return STATUS_OK;
2417 }
2418 
2419 /*
2420  * Send a NegotiateProtocolVersion to the client. This lets the client know
2421  * that they have requested a newer minor protocol version than we are able
2422  * to speak. We'll speak the highest version we know about; the client can,
2423  * of course, abandon the connection if that's a problem.
2424  *
2425  * We also include in the response a list of protocol options we didn't
2426  * understand. This allows clients to include optional parameters that might
2427  * be present either in newer protocol versions or third-party protocol
2428  * extensions without fear of having to reconnect if those options are not
2429  * understood, while at the same time making certain that the client is aware
2430  * of which options were actually accepted.
2431  */
2432 static void
2433 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2434 {
2436  ListCell *lc;
2437 
2438  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2440  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2441  foreach(lc, unrecognized_protocol_options)
2442  pq_sendstring(&buf, lfirst(lc));
2443  pq_endmessage(&buf);
2444 
2445  /* no need to flush, some other message will follow */
2446 }
2447 
2448 /*
2449  * The client has sent a cancel request packet, not a normal
2450  * start-a-new-connection packet. Perform the necessary processing.
2451  * Nothing is sent back to the client.
2452  */
2453 static void
2455 {
2456  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2457  int backendPID;
2458  int32 cancelAuthCode;
2459  Backend *bp;
2460 
2461 #ifndef EXEC_BACKEND
2462  dlist_iter iter;
2463 #else
2464  int i;
2465 #endif
2466 
2467  backendPID = (int) pg_ntoh32(canc->backendPID);
2468  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2469 
2470  /*
2471  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2472  * longer access the postmaster's own backend list, and must rely on the
2473  * duplicate array in shared memory.
2474  */
2475 #ifndef EXEC_BACKEND
2476  dlist_foreach(iter, &BackendList)
2477  {
2478  bp = dlist_container(Backend, elem, iter.cur);
2479 #else
2480  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2481  {
2482  bp = (Backend *) &ShmemBackendArray[i];
2483 #endif
2484  if (bp->pid == backendPID)
2485  {
2486  if (bp->cancel_key == cancelAuthCode)
2487  {
2488  /* Found a match; signal that backend to cancel current op */
2489  ereport(DEBUG2,
2490  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2491  backendPID)));
2492  signal_child(bp->pid, SIGINT);
2493  }
2494  else
2495  /* Right PID, wrong key: no way, Jose */
2496  ereport(LOG,
2497  (errmsg("wrong key in cancel request for process %d",
2498  backendPID)));
2499  return;
2500  }
2501 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2502  }
2503 #else
2504  }
2505 #endif
2506 
2507  /* No matching backend */
2508  ereport(LOG,
2509  (errmsg("PID %d in cancel request did not match any process",
2510  backendPID)));
2511 }
2512 
2513 /*
2514  * canAcceptConnections --- check to see if database state allows connections
2515  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2516  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2517  * know whether a NORMAL connection might turn into a walsender.)
2518  */
2519 static CAC_state
2520 canAcceptConnections(int backend_type)
2521 {
2522  CAC_state result = CAC_OK;
2523 
2524  /*
2525  * Can't start backends when in startup/shutdown/inconsistent recovery
2526  * state. We treat autovac workers the same as user backends for this
2527  * purpose. However, bgworkers are excluded from this test; we expect
2528  * bgworker_should_start_now() decided whether the DB state allows them.
2529  */
2530  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2531  backend_type != BACKEND_TYPE_BGWORKER)
2532  {
2533  if (Shutdown > NoShutdown)
2534  return CAC_SHUTDOWN; /* shutdown is pending */
2535  else if (!FatalError && pmState == PM_STARTUP)
2536  return CAC_STARTUP; /* normal startup */
2537  else if (!FatalError && pmState == PM_RECOVERY)
2538  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2539  * state */
2540  else
2541  return CAC_RECOVERY; /* else must be crash recovery */
2542  }
2543 
2544  /*
2545  * "Smart shutdown" restrictions are applied only to normal connections,
2546  * not to autovac workers or bgworkers.
2547  */
2548  if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2549  return CAC_SHUTDOWN; /* shutdown is pending */
2550 
2551  /*
2552  * Don't start too many children.
2553  *
2554  * We allow more connections here than we can have backends because some
2555  * might still be authenticating; they might fail auth, or some existing
2556  * backend might exit before the auth cycle is completed. The exact
2557  * MaxBackends limit is enforced when a new backend tries to join the
2558  * shared-inval backend array.
2559  *
2560  * The limit here must match the sizes of the per-child-process arrays;
2561  * see comments for MaxLivePostmasterChildren().
2562  */
2564  result = CAC_TOOMANY;
2565 
2566  return result;
2567 }
2568 
2569 
2570 /*
2571  * ConnCreate -- create a local connection data structure
2572  *
2573  * Returns NULL on failure, other than out-of-memory which is fatal.
2574  */
2575 static Port *
2576 ConnCreate(int serverFd)
2577 {
2578  Port *port;
2579 
2580  if (!(port = (Port *) calloc(1, sizeof(Port))))
2581  {
2582  ereport(LOG,
2583  (errcode(ERRCODE_OUT_OF_MEMORY),
2584  errmsg("out of memory")));
2585  ExitPostmaster(1);
2586  }
2587 
2588  if (StreamConnection(serverFd, port) != STATUS_OK)
2589  {
2590  if (port->sock != PGINVALID_SOCKET)
2591  StreamClose(port->sock);
2592  ConnFree(port);
2593  return NULL;
2594  }
2595 
2596  return port;
2597 }
2598 
2599 
2600 /*
2601  * ConnFree -- free a local connection data structure
2602  *
2603  * Caller has already closed the socket if any, so there's not much
2604  * to do here.
2605  */
2606 static void
2608 {
2609  free(conn);
2610 }
2611 
2612 
2613 /*
2614  * ClosePostmasterPorts -- close all the postmaster's open sockets
2615  *
2616  * This is called during child process startup to release file descriptors
2617  * that are not needed by that child process. The postmaster still has
2618  * them open, of course.
2619  *
2620  * Note: we pass am_syslogger as a boolean because we don't want to set
2621  * the global variable yet when this is called.
2622  */
2623 void
2624 ClosePostmasterPorts(bool am_syslogger)
2625 {
2626  int i;
2627 
2628 #ifndef WIN32
2629 
2630  /*
2631  * Close the write end of postmaster death watch pipe. It's important to
2632  * do this as early as possible, so that if postmaster dies, others won't
2633  * think that it's still running because we're holding the pipe open.
2634  */
2636  ereport(FATAL,
2638  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2640  /* Notify fd.c that we released one pipe FD. */
2642 #endif
2643 
2644  /*
2645  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2646  * so we don't call ReleaseExternalFD() here.
2647  */
2648  for (i = 0; i < MAXLISTEN; i++)
2649  {
2651  {
2654  }
2655  }
2656 
2657  /*
2658  * If using syslogger, close the read side of the pipe. We don't bother
2659  * tracking this in fd.c, either.
2660  */
2661  if (!am_syslogger)
2662  {
2663 #ifndef WIN32
2664  if (syslogPipe[0] >= 0)
2665  close(syslogPipe[0]);
2666  syslogPipe[0] = -1;
2667 #else
2668  if (syslogPipe[0])
2669  CloseHandle(syslogPipe[0]);
2670  syslogPipe[0] = 0;
2671 #endif
2672  }
2673 
2674 #ifdef USE_BONJOUR
2675  /* If using Bonjour, close the connection to the mDNS daemon */
2676  if (bonjour_sdref)
2677  close(DNSServiceRefSockFD(bonjour_sdref));
2678 #endif
2679 }
2680 
2681 
2682 /*
2683  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2684  *
2685  * Called early in the postmaster and every backend.
2686  */
2687 void
2689 {
2690  MyProcPid = getpid();
2693 
2694  /*
2695  * Set a different global seed in every process. We want something
2696  * unpredictable, so if possible, use high-quality random bits for the
2697  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2698  */
2700  {
2701  uint64 rseed;
2702 
2703  /*
2704  * Since PIDs and timestamps tend to change more frequently in their
2705  * least significant bits, shift the timestamp left to allow a larger
2706  * total number of seeds in a given time period. Since that would
2707  * leave only 20 bits of the timestamp that cycle every ~1 second,
2708  * also mix in some higher bits.
2709  */
2710  rseed = ((uint64) MyProcPid) ^
2711  ((uint64) MyStartTimestamp << 12) ^
2712  ((uint64) MyStartTimestamp >> 20);
2713 
2715  }
2716 
2717  /*
2718  * Also make sure that we've set a good seed for random(3). Use of that
2719  * is deprecated in core Postgres, but extensions might use it.
2720  */
2721 #ifndef WIN32
2723 #endif
2724 }
2725 
2726 
2727 /*
2728  * reset_shared -- reset shared memory and semaphores
2729  */
2730 static void
2732 {
2733  /*
2734  * Create or re-create shared memory and semaphores.
2735  *
2736  * Note: in each "cycle of life" we will normally assign the same IPC keys
2737  * (if using SysV shmem and/or semas). This helps ensure that we will
2738  * clean up dead IPC objects if the postmaster crashes and is restarted.
2739  */
2741 }
2742 
2743 
2744 /*
2745  * SIGHUP -- reread config files, and tell children to do same
2746  */
2747 static void
2749 {
2750  int save_errno = errno;
2751 
2752  /*
2753  * We rely on the signal mechanism to have blocked all signals ... except
2754  * on Windows, which lacks sigaction(), so we have to do it manually.
2755  */
2756 #ifdef WIN32
2757  PG_SETMASK(&BlockSig);
2758 #endif
2759 
2760  if (Shutdown <= SmartShutdown)
2761  {
2762  ereport(LOG,
2763  (errmsg("received SIGHUP, reloading configuration files")));
2766  if (StartupPID != 0)
2768  if (BgWriterPID != 0)
2770  if (CheckpointerPID != 0)
2772  if (WalWriterPID != 0)
2774  if (WalReceiverPID != 0)
2776  if (AutoVacPID != 0)
2778  if (PgArchPID != 0)
2780  if (SysLoggerPID != 0)
2782 
2783  /* Reload authentication config files too */
2784  if (!load_hba())
2785  ereport(LOG,
2786  /* translator: %s is a configuration file */
2787  (errmsg("%s was not reloaded", "pg_hba.conf")));
2788 
2789  if (!load_ident())
2790  ereport(LOG,
2791  (errmsg("%s was not reloaded", "pg_ident.conf")));
2792 
2793 #ifdef USE_SSL
2794  /* Reload SSL configuration as well */
2795  if (EnableSSL)
2796  {
2797  if (secure_initialize(false) == 0)
2798  LoadedSSL = true;
2799  else
2800  ereport(LOG,
2801  (errmsg("SSL configuration was not reloaded")));
2802  }
2803  else
2804  {
2805  secure_destroy();
2806  LoadedSSL = false;
2807  }
2808 #endif
2809 
2810 #ifdef EXEC_BACKEND
2811  /* Update the starting-point file for future children */
2812  write_nondefault_variables(PGC_SIGHUP);
2813 #endif
2814  }
2815 
2816 #ifdef WIN32
2818 #endif
2819 
2820  errno = save_errno;
2821 }
2822 
2823 
2824 /*
2825  * pmdie -- signal handler for processing various postmaster signals.
2826  */
2827 static void
2829 {
2830  int save_errno = errno;
2831 
2832  /*
2833  * We rely on the signal mechanism to have blocked all signals ... except
2834  * on Windows, which lacks sigaction(), so we have to do it manually.
2835  */
2836 #ifdef WIN32
2837  PG_SETMASK(&BlockSig);
2838 #endif
2839 
2840  ereport(DEBUG2,
2841  (errmsg_internal("postmaster received signal %d",
2842  postgres_signal_arg)));
2843 
2844  switch (postgres_signal_arg)
2845  {
2846  case SIGTERM:
2847 
2848  /*
2849  * Smart Shutdown:
2850  *
2851  * Wait for children to end their work, then shut down.
2852  */
2853  if (Shutdown >= SmartShutdown)
2854  break;
2856  ereport(LOG,
2857  (errmsg("received smart shutdown request")));
2858 
2859  /* Report status */
2861 #ifdef USE_SYSTEMD
2862  sd_notify(0, "STOPPING=1");
2863 #endif
2864 
2865  /*
2866  * If we reached normal running, we go straight to waiting for
2867  * client backends to exit. If already in PM_STOP_BACKENDS or a
2868  * later state, do not change it.
2869  */
2870  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2871  connsAllowed = false;
2872  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2873  {
2874  /* There should be no clients, so proceed to stop children */
2876  }
2877 
2878  /*
2879  * Now wait for online backup mode to end and backends to exit. If
2880  * that is already the case, PostmasterStateMachine will take the
2881  * next step.
2882  */
2884  break;
2885 
2886  case SIGINT:
2887 
2888  /*
2889  * Fast Shutdown:
2890  *
2891  * Abort all children with SIGTERM (rollback active transactions
2892  * and exit) and shut down when they are gone.
2893  */
2894  if (Shutdown >= FastShutdown)
2895  break;
2897  ereport(LOG,
2898  (errmsg("received fast shutdown request")));
2899 
2900  /* Report status */
2902 #ifdef USE_SYSTEMD
2903  sd_notify(0, "STOPPING=1");
2904 #endif
2905 
2906  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2907  {
2908  /* Just shut down background processes silently */
2910  }
2911  else if (pmState == PM_RUN ||
2913  {
2914  /* Report that we're about to zap live client sessions */
2915  ereport(LOG,
2916  (errmsg("aborting any active transactions")));
2918  }
2919 
2920  /*
2921  * PostmasterStateMachine will issue any necessary signals, or
2922  * take the next step if no child processes need to be killed.
2923  */
2925  break;
2926 
2927  case SIGQUIT:
2928 
2929  /*
2930  * Immediate Shutdown:
2931  *
2932  * abort all children with SIGQUIT, wait for them to exit,
2933  * terminate remaining ones with SIGKILL, then exit without
2934  * attempt to properly shut down the data base system.
2935  */
2936  if (Shutdown >= ImmediateShutdown)
2937  break;
2939  ereport(LOG,
2940  (errmsg("received immediate shutdown request")));
2941 
2942  /* Report status */
2944 #ifdef USE_SYSTEMD
2945  sd_notify(0, "STOPPING=1");
2946 #endif
2947 
2948  /* tell children to shut down ASAP */
2952 
2953  /* set stopwatch for them to die */
2954  AbortStartTime = time(NULL);
2955 
2956  /*
2957  * Now wait for backends to exit. If there are none,
2958  * PostmasterStateMachine will take the next step.
2959  */
2961  break;
2962  }
2963 
2964 #ifdef WIN32
2966 #endif
2967 
2968  errno = save_errno;
2969 }
2970 
2971 /*
2972  * Reaper -- signal handler to cleanup after a child process dies.
2973  */
2974 static void
2976 {
2977  int save_errno = errno;
2978  int pid; /* process id of dead child process */
2979  int exitstatus; /* its exit status */
2980 
2981  /*
2982  * We rely on the signal mechanism to have blocked all signals ... except
2983  * on Windows, which lacks sigaction(), so we have to do it manually.
2984  */
2985 #ifdef WIN32
2986  PG_SETMASK(&BlockSig);
2987 #endif
2988 
2989  ereport(DEBUG4,
2990  (errmsg_internal("reaping dead processes")));
2991 
2992  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2993  {
2994  /*
2995  * Check if this child was a startup process.
2996  */
2997  if (pid == StartupPID)
2998  {
2999  StartupPID = 0;
3000 
3001  /*
3002  * Startup process exited in response to a shutdown request (or it
3003  * completed normally regardless of the shutdown request).
3004  */
3005  if (Shutdown > NoShutdown &&
3006  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
3007  {
3010  /* PostmasterStateMachine logic does the rest */
3011  continue;
3012  }
3013 
3014  if (EXIT_STATUS_3(exitstatus))
3015  {
3016  ereport(LOG,
3017  (errmsg("shutdown at recovery target")));
3020  TerminateChildren(SIGTERM);
3022  /* PostmasterStateMachine logic does the rest */
3023  continue;
3024  }
3025 
3026  /*
3027  * Unexpected exit of startup process (including FATAL exit)
3028  * during PM_STARTUP is treated as catastrophic. There are no
3029  * other processes running yet, so we can just exit.
3030  */
3031  if (pmState == PM_STARTUP &&
3033  !EXIT_STATUS_0(exitstatus))
3034  {
3035  LogChildExit(LOG, _("startup process"),
3036  pid, exitstatus);
3037  ereport(LOG,
3038  (errmsg("aborting startup due to startup process failure")));
3039  ExitPostmaster(1);
3040  }
3041 
3042  /*
3043  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3044  * the startup process is catastrophic, so kill other children,
3045  * and set StartupStatus so we don't try to reinitialize after
3046  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3047  * then we previously sent the startup process a SIGQUIT; so
3048  * that's probably the reason it died, and we do want to try to
3049  * restart in that case.
3050  *
3051  * This stanza also handles the case where we sent a SIGQUIT
3052  * during PM_STARTUP due to some dead_end child crashing: in that
3053  * situation, if the startup process dies on the SIGQUIT, we need
3054  * to transition to PM_WAIT_BACKENDS state which will allow
3055  * PostmasterStateMachine to restart the startup process. (On the
3056  * other hand, the startup process might complete normally, if we
3057  * were too late with the SIGQUIT. In that case we'll fall
3058  * through and commence normal operations.)
3059  */
3060  if (!EXIT_STATUS_0(exitstatus))
3061  {
3063  {
3065  if (pmState == PM_STARTUP)
3067  }
3068  else
3070  HandleChildCrash(pid, exitstatus,
3071  _("startup process"));
3072  continue;
3073  }
3074 
3075  /*
3076  * Startup succeeded, commence normal operations
3077  */
3079  FatalError = false;
3080  AbortStartTime = 0;
3081  ReachedNormalRunning = true;
3082  pmState = PM_RUN;
3083  connsAllowed = true;
3084 
3085  /*
3086  * Crank up the background tasks, if we didn't do that already
3087  * when we entered consistent recovery state. It doesn't matter
3088  * if this fails, we'll just try again later.
3089  */
3090  if (CheckpointerPID == 0)
3092  if (BgWriterPID == 0)
3094  if (WalWriterPID == 0)
3096 
3097  /*
3098  * Likewise, start other special children as needed. In a restart
3099  * situation, some of them may be alive already.
3100  */
3103  if (PgArchStartupAllowed() && PgArchPID == 0)
3105 
3106  /* workers may be scheduled to start now */
3108 
3109  /* at this point we are really open for business */
3110  ereport(LOG,
3111  (errmsg("database system is ready to accept connections")));
3112 
3113  /* Report status */
3115 #ifdef USE_SYSTEMD
3116  sd_notify(0, "READY=1");
3117 #endif
3118 
3119  continue;
3120  }
3121 
3122  /*
3123  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3124  * one at the next iteration of the postmaster's main loop, if
3125  * necessary. Any other exit condition is treated as a crash.
3126  */
3127  if (pid == BgWriterPID)
3128  {
3129  BgWriterPID = 0;
3130  if (!EXIT_STATUS_0(exitstatus))
3131  HandleChildCrash(pid, exitstatus,
3132  _("background writer process"));
3133  continue;
3134  }
3135 
3136  /*
3137  * Was it the checkpointer?
3138  */
3139  if (pid == CheckpointerPID)
3140  {
3141  CheckpointerPID = 0;
3142  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3143  {
3144  /*
3145  * OK, we saw normal exit of the checkpointer after it's been
3146  * told to shut down. We expect that it wrote a shutdown
3147  * checkpoint. (If for some reason it didn't, recovery will
3148  * occur on next postmaster start.)
3149  *
3150  * At this point we should have no normal backend children
3151  * left (else we'd not be in PM_SHUTDOWN state) but we might
3152  * have dead_end children to wait for.
3153  *
3154  * If we have an archiver subprocess, tell it to do a last
3155  * archive cycle and quit. Likewise, if we have walsender
3156  * processes, tell them to send any remaining WAL and quit.
3157  */
3159 
3160  /* Waken archiver for the last time */
3161  if (PgArchPID != 0)
3163 
3164  /*
3165  * Waken walsenders for the last time. No regular backends
3166  * should be around anymore.
3167  */
3169 
3171  }
3172  else
3173  {
3174  /*
3175  * Any unexpected exit of the checkpointer (including FATAL
3176  * exit) is treated as a crash.
3177  */
3178  HandleChildCrash(pid, exitstatus,
3179  _("checkpointer process"));
3180  }
3181 
3182  continue;
3183  }
3184 
3185  /*
3186  * Was it the wal writer? Normal exit can be ignored; we'll start a
3187  * new one at the next iteration of the postmaster's main loop, if
3188  * necessary. Any other exit condition is treated as a crash.
3189  */
3190  if (pid == WalWriterPID)
3191  {
3192  WalWriterPID = 0;
3193  if (!EXIT_STATUS_0(exitstatus))
3194  HandleChildCrash(pid, exitstatus,
3195  _("WAL writer process"));
3196  continue;
3197  }
3198 
3199  /*
3200  * Was it the wal receiver? If exit status is zero (normal) or one
3201  * (FATAL exit), we assume everything is all right just like normal
3202  * backends. (If we need a new wal receiver, we'll start one at the
3203  * next iteration of the postmaster's main loop.)
3204  */
3205  if (pid == WalReceiverPID)
3206  {
3207  WalReceiverPID = 0;
3208  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3209  HandleChildCrash(pid, exitstatus,
3210  _("WAL receiver process"));
3211  continue;
3212  }
3213 
3214  /*
3215  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3216  * start a new one at the next iteration of the postmaster's main
3217  * loop, if necessary. Any other exit condition is treated as a
3218  * crash.
3219  */
3220  if (pid == AutoVacPID)
3221  {
3222  AutoVacPID = 0;
3223  if (!EXIT_STATUS_0(exitstatus))
3224  HandleChildCrash(pid, exitstatus,
3225  _("autovacuum launcher process"));
3226  continue;
3227  }
3228 
3229  /*
3230  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3231  * exit), we assume everything is all right just like normal backends
3232  * and just try to restart a new one so that we immediately retry
3233  * archiving remaining files. (If fail, we'll try again in future
3234  * cycles of the postmaster's main loop.) Unless we were waiting for
3235  * it to shut down; don't restart it in that case, and
3236  * PostmasterStateMachine() will advance to the next shutdown step.
3237  */
3238  if (pid == PgArchPID)
3239  {
3240  PgArchPID = 0;
3241  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3242  HandleChildCrash(pid, exitstatus,
3243  _("archiver process"));
3244  if (PgArchStartupAllowed())
3246  continue;
3247  }
3248 
3249  /* Was it the system logger? If so, try to start a new one */
3250  if (pid == SysLoggerPID)
3251  {
3252  SysLoggerPID = 0;
3253  /* for safety's sake, launch new logger *first* */
3255  if (!EXIT_STATUS_0(exitstatus))
3256  LogChildExit(LOG, _("system logger process"),
3257  pid, exitstatus);
3258  continue;
3259  }
3260 
3261  /* Was it one of our background workers? */
3262  if (CleanupBackgroundWorker(pid, exitstatus))
3263  {
3264  /* have it be restarted */
3265  HaveCrashedWorker = true;
3266  continue;
3267  }
3268 
3269  /*
3270  * Else do standard backend child cleanup.
3271  */
3272  CleanupBackend(pid, exitstatus);
3273  } /* loop over pending child-death reports */
3274 
3275  /*
3276  * After cleaning out the SIGCHLD queue, see if we have any state changes
3277  * or actions to make.
3278  */
3280 
3281  /* Done with signal handler */
3282 #ifdef WIN32
3284 #endif
3285 
3286  errno = save_errno;
3287 }
3288 
3289 /*
3290  * Scan the bgworkers list and see if the given PID (which has just stopped
3291  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3292  * bgworker, return false.
3293  *
3294  * This is heavily based on CleanupBackend. One important difference is that
3295  * we don't know yet that the dying process is a bgworker, so we must be silent
3296  * until we're sure it is.
3297  */
3298 static bool
3300  int exitstatus) /* child's exit status */
3301 {
3302  char namebuf[MAXPGPATH];
3303  slist_mutable_iter iter;
3304 
3306  {
3307  RegisteredBgWorker *rw;
3308 
3309  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3310 
3311  if (rw->rw_pid != pid)
3312  continue;
3313 
3314 #ifdef WIN32
3315  /* see CleanupBackend */
3316  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3317  exitstatus = 0;
3318 #endif
3319 
3320  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3321  rw->rw_worker.bgw_type);
3322 
3323 
3324  if (!EXIT_STATUS_0(exitstatus))
3325  {
3326  /* Record timestamp, so we know when to restart the worker. */
3328  }
3329  else
3330  {
3331  /* Zero exit status means terminate */
3332  rw->rw_crashed_at = 0;
3333  rw->rw_terminate = true;
3334  }
3335 
3336  /*
3337  * Additionally, just like a backend, any exit status other than 0 or
3338  * 1 is considered a crash and causes a system-wide restart.
3339  */
3340  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3341  {
3342  HandleChildCrash(pid, exitstatus, namebuf);
3343  return true;
3344  }
3345 
3346  /*
3347  * We must release the postmaster child slot. If the worker failed to
3348  * do so, it did not clean up after itself, requiring a crash-restart
3349  * cycle.
3350  */
3352  {
3353  HandleChildCrash(pid, exitstatus, namebuf);
3354  return true;
3355  }
3356 
3357  /* Get it out of the BackendList and clear out remaining data */
3358  dlist_delete(&rw->rw_backend->elem);
3359 #ifdef EXEC_BACKEND
3360  ShmemBackendArrayRemove(rw->rw_backend);
3361 #endif
3362 
3363  /*
3364  * It's possible that this background worker started some OTHER
3365  * background worker and asked to be notified when that worker started
3366  * or stopped. If so, cancel any notifications destined for the
3367  * now-dead backend.
3368  */
3369  if (rw->rw_backend->bgworker_notify)
3371  free(rw->rw_backend);
3372  rw->rw_backend = NULL;
3373  rw->rw_pid = 0;
3374  rw->rw_child_slot = 0;
3375  ReportBackgroundWorkerExit(&iter); /* report child death */
3376 
3377  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3378  namebuf, pid, exitstatus);
3379 
3380  return true;
3381  }
3382 
3383  return false;
3384 }
3385 
3386 /*
3387  * CleanupBackend -- cleanup after terminated backend.
3388  *
3389  * Remove all local state associated with backend.
3390  *
3391  * If you change this, see also CleanupBackgroundWorker.
3392  */
3393 static void
3395  int exitstatus) /* child's exit status. */
3396 {
3397  dlist_mutable_iter iter;
3398 
3399  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3400 
3401  /*
3402  * If a backend dies in an ugly way then we must signal all other backends
3403  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3404  * assume everything is all right and proceed to remove the backend from
3405  * the active backend list.
3406  */
3407 
3408 #ifdef WIN32
3409 
3410  /*
3411  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3412  * since that sometimes happens under load when the process fails to start
3413  * properly (long before it starts using shared memory). Microsoft reports
3414  * it is related to mutex failure:
3415  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3416  */
3417  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3418  {
3419  LogChildExit(LOG, _("server process"), pid, exitstatus);
3420  exitstatus = 0;
3421  }
3422 #endif
3423 
3424  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3425  {
3426  HandleChildCrash(pid, exitstatus, _("server process"));
3427  return;
3428  }
3429 
3431  {
3432  Backend *bp = dlist_container(Backend, elem, iter.cur);
3433 
3434  if (bp->pid == pid)
3435  {
3436  if (!bp->dead_end)
3437  {
3439  {
3440  /*
3441  * Uh-oh, the child failed to clean itself up. Treat as a
3442  * crash after all.
3443  */
3444  HandleChildCrash(pid, exitstatus, _("server process"));
3445  return;
3446  }
3447 #ifdef EXEC_BACKEND
3448  ShmemBackendArrayRemove(bp);
3449 #endif
3450  }
3451  if (bp->bgworker_notify)
3452  {
3453  /*
3454  * This backend may have been slated to receive SIGUSR1 when
3455  * some background worker started or stopped. Cancel those
3456  * notifications, as we don't want to signal PIDs that are not
3457  * PostgreSQL backends. This gets skipped in the (probably
3458  * very common) case where the backend has never requested any
3459  * such notifications.
3460  */
3462  }
3463  dlist_delete(iter.cur);
3464  free(bp);
3465  break;
3466  }
3467  }
3468 }
3469 
3470 /*
3471  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3472  * walwriter, autovacuum, archiver or background worker.
3473  *
3474  * The objectives here are to clean up our local state about the child
3475  * process, and to signal all other remaining children to quickdie.
3476  */
3477 static void
3478 HandleChildCrash(int pid, int exitstatus, const char *procname)
3479 {
3480  dlist_mutable_iter iter;
3481  slist_iter siter;
3482  Backend *bp;
3483  bool take_action;
3484 
3485  /*
3486  * We only log messages and send signals if this is the first process
3487  * crash and we're not doing an immediate shutdown; otherwise, we're only
3488  * here to update postmaster's idea of live processes. If we have already
3489  * signaled children, nonzero exit status is to be expected, so don't
3490  * clutter log.
3491  */
3492  take_action = !FatalError && Shutdown != ImmediateShutdown;
3493 
3494  if (take_action)
3495  {
3496  LogChildExit(LOG, procname, pid, exitstatus);
3497  ereport(LOG,
3498  (errmsg("terminating any other active server processes")));
3500  }
3501 
3502  /* Process background workers. */
3504  {
3505  RegisteredBgWorker *rw;
3506 
3507  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3508  if (rw->rw_pid == 0)
3509  continue; /* not running */
3510  if (rw->rw_pid == pid)
3511  {
3512  /*
3513  * Found entry for freshly-dead worker, so remove it.
3514  */
3516  dlist_delete(&rw->rw_backend->elem);
3517 #ifdef EXEC_BACKEND
3518  ShmemBackendArrayRemove(rw->rw_backend);
3519 #endif
3520  free(rw->rw_backend);
3521  rw->rw_backend = NULL;
3522  rw->rw_pid = 0;
3523  rw->rw_child_slot = 0;
3524  /* don't reset crashed_at */
3525  /* don't report child stop, either */
3526  /* Keep looping so we can signal remaining workers */
3527  }
3528  else
3529  {
3530  /*
3531  * This worker is still alive. Unless we did so already, tell it
3532  * to commit hara-kiri.
3533  *
3534  * SIGQUIT is the special signal that says exit without proc_exit
3535  * and let the user know what's going on. But if SendStop is set
3536  * (-T on command line), then we send SIGSTOP instead, so that we
3537  * can get core dumps from all backends by hand.
3538  */
3539  if (take_action)
3540  {
3541  ereport(DEBUG2,
3542  (errmsg_internal("sending %s to process %d",
3543  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3544  (int) rw->rw_pid)));
3546  }
3547  }
3548  }
3549 
3550  /* Process regular backends */
3552  {
3553  bp = dlist_container(Backend, elem, iter.cur);
3554 
3555  if (bp->pid == pid)
3556  {
3557  /*
3558  * Found entry for freshly-dead backend, so remove it.
3559  */
3560  if (!bp->dead_end)
3561  {
3563 #ifdef EXEC_BACKEND
3564  ShmemBackendArrayRemove(bp);
3565 #endif
3566  }
3567  dlist_delete(iter.cur);
3568  free(bp);
3569  /* Keep looping so we can signal remaining backends */
3570  }
3571  else
3572  {
3573  /*
3574  * This backend is still alive. Unless we did so already, tell it
3575  * to commit hara-kiri.
3576  *
3577  * SIGQUIT is the special signal that says exit without proc_exit
3578  * and let the user know what's going on. But if SendStop is set
3579  * (-T on command line), then we send SIGSTOP instead, so that we
3580  * can get core dumps from all backends by hand.
3581  *
3582  * We could exclude dead_end children here, but at least in the
3583  * SIGSTOP case it seems better to include them.
3584  *
3585  * Background workers were already processed above; ignore them
3586  * here.
3587  */
3588  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3589  continue;
3590 
3591  if (take_action)
3592  {
3593  ereport(DEBUG2,
3594  (errmsg_internal("sending %s to process %d",
3595  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3596  (int) bp->pid)));
3597  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3598  }
3599  }
3600  }
3601 
3602  /* Take care of the startup process too */
3603  if (pid == StartupPID)
3604  {
3605  StartupPID = 0;
3606  /* Caller adjusts StartupStatus, so don't touch it here */
3607  }
3608  else if (StartupPID != 0 && take_action)
3609  {
3610  ereport(DEBUG2,
3611  (errmsg_internal("sending %s to process %d",
3612  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3613  (int) StartupPID)));
3616  }
3617 
3618  /* Take care of the bgwriter too */
3619  if (pid == BgWriterPID)
3620  BgWriterPID = 0;
3621  else if (BgWriterPID != 0 && take_action)
3622  {
3623  ereport(DEBUG2,
3624  (errmsg_internal("sending %s to process %d",
3625  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3626  (int) BgWriterPID)));
3628  }
3629 
3630  /* Take care of the checkpointer too */
3631  if (pid == CheckpointerPID)
3632  CheckpointerPID = 0;
3633  else if (CheckpointerPID != 0 && take_action)
3634  {
3635  ereport(DEBUG2,
3636  (errmsg_internal("sending %s to process %d",
3637  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3638  (int) CheckpointerPID)));
3640  }
3641 
3642  /* Take care of the walwriter too */
3643  if (pid == WalWriterPID)
3644  WalWriterPID = 0;
3645  else if (WalWriterPID != 0 && take_action)
3646  {
3647  ereport(DEBUG2,
3648  (errmsg_internal("sending %s to process %d",
3649  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3650  (int) WalWriterPID)));
3652  }
3653 
3654  /* Take care of the walreceiver too */
3655  if (pid == WalReceiverPID)
3656  WalReceiverPID = 0;
3657  else if (WalReceiverPID != 0 && take_action)
3658  {
3659  ereport(DEBUG2,
3660  (errmsg_internal("sending %s to process %d",
3661  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3662  (int) WalReceiverPID)));
3664  }
3665 
3666  /* Take care of the autovacuum launcher too */
3667  if (pid == AutoVacPID)
3668  AutoVacPID = 0;
3669  else if (AutoVacPID != 0 && take_action)
3670  {
3671  ereport(DEBUG2,
3672  (errmsg_internal("sending %s to process %d",
3673  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3674  (int) AutoVacPID)));
3676  }
3677 
3678  /* Take care of the archiver too */
3679  if (pid == PgArchPID)
3680  PgArchPID = 0;
3681  else if (PgArchPID != 0 && take_action)
3682  {
3683  ereport(DEBUG2,
3684  (errmsg_internal("sending %s to process %d",
3685  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3686  (int) PgArchPID)));
3688  }
3689 
3690  /* We do NOT restart the syslogger */
3691 
3692  if (Shutdown != ImmediateShutdown)
3693  FatalError = true;
3694 
3695  /* We now transit into a state of waiting for children to die */
3696  if (pmState == PM_RECOVERY ||
3697  pmState == PM_HOT_STANDBY ||
3698  pmState == PM_RUN ||
3700  pmState == PM_SHUTDOWN)
3702 
3703  /*
3704  * .. and if this doesn't happen quickly enough, now the clock is ticking
3705  * for us to kill them without mercy.
3706  */
3707  if (AbortStartTime == 0)
3708  AbortStartTime = time(NULL);
3709 }
3710 
3711 /*
3712  * Log the death of a child process.
3713  */
3714 static void
3715 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3716 {
3717  /*
3718  * size of activity_buffer is arbitrary, but set equal to default
3719  * track_activity_query_size
3720  */
3721  char activity_buffer[1024];
3722  const char *activity = NULL;
3723 
3724  if (!EXIT_STATUS_0(exitstatus))
3725  activity = pgstat_get_crashed_backend_activity(pid,
3726  activity_buffer,
3727  sizeof(activity_buffer));
3728 
3729  if (WIFEXITED(exitstatus))
3730  ereport(lev,
3731 
3732  /*------
3733  translator: %s is a noun phrase describing a child process, such as
3734  "server process" */
3735  (errmsg("%s (PID %d) exited with exit code %d",
3736  procname, pid, WEXITSTATUS(exitstatus)),
3737  activity ? errdetail("Failed process was running: %s", activity) : 0));
3738  else if (WIFSIGNALED(exitstatus))
3739  {
3740 #if defined(WIN32)
3741  ereport(lev,
3742 
3743  /*------
3744  translator: %s is a noun phrase describing a child process, such as
3745  "server process" */
3746  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3747  procname, pid, WTERMSIG(exitstatus)),
3748  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3749  activity ? errdetail("Failed process was running: %s", activity) : 0));
3750 #else
3751  ereport(lev,
3752 
3753  /*------
3754  translator: %s is a noun phrase describing a child process, such as
3755  "server process" */
3756  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3757  procname, pid, WTERMSIG(exitstatus),
3758  pg_strsignal(WTERMSIG(exitstatus))),
3759  activity ? errdetail("Failed process was running: %s", activity) : 0));
3760 #endif
3761  }
3762  else
3763  ereport(lev,
3764 
3765  /*------
3766  translator: %s is a noun phrase describing a child process, such as
3767  "server process" */
3768  (errmsg("%s (PID %d) exited with unrecognized status %d",
3769  procname, pid, exitstatus),
3770  activity ? errdetail("Failed process was running: %s", activity) : 0));
3771 }
3772 
3773 /*
3774  * Advance the postmaster's state machine and take actions as appropriate
3775  *
3776  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3777  * receive the signals that might mean we need to change state.
3778  */
3779 static void
3781 {
3782  /* If we're doing a smart shutdown, try to advance that state. */
3783  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3784  {
3785  if (!connsAllowed)
3786  {
3787  /*
3788  * This state ends when we have no normal client backends running.
3789  * Then we're ready to stop other children.
3790  */
3793  }
3794  }
3795 
3796  /*
3797  * If we're ready to do so, signal child processes to shut down. (This
3798  * isn't a persistent state, but treating it as a distinct pmState allows
3799  * us to share this code across multiple shutdown code paths.)
3800  */
3801  if (pmState == PM_STOP_BACKENDS)
3802  {
3803  /*
3804  * Forget any pending requests for background workers, since we're no
3805  * longer willing to launch any new workers. (If additional requests
3806  * arrive, BackgroundWorkerStateChange will reject them.)
3807  */
3809 
3810  /* Signal all backend children except walsenders */
3811  SignalSomeChildren(SIGTERM,
3813  /* and the autovac launcher too */
3814  if (AutoVacPID != 0)
3815  signal_child(AutoVacPID, SIGTERM);
3816  /* and the bgwriter too */
3817  if (BgWriterPID != 0)
3818  signal_child(BgWriterPID, SIGTERM);
3819  /* and the walwriter too */
3820  if (WalWriterPID != 0)
3821  signal_child(WalWriterPID, SIGTERM);
3822  /* If we're in recovery, also stop startup and walreceiver procs */
3823  if (StartupPID != 0)
3824  signal_child(StartupPID, SIGTERM);
3825  if (WalReceiverPID != 0)
3826  signal_child(WalReceiverPID, SIGTERM);
3827  /* checkpointer, archiver, stats, and syslogger may continue for now */
3828 
3829  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3831  }
3832 
3833  /*
3834  * If we are in a state-machine state that implies waiting for backends to
3835  * exit, see if they're all gone, and change state if so.
3836  */
3837  if (pmState == PM_WAIT_BACKENDS)
3838  {
3839  /*
3840  * PM_WAIT_BACKENDS state ends when we have no regular backends
3841  * (including autovac workers), no bgworkers (including unconnected
3842  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3843  * doing crash recovery or an immediate shutdown then we expect the
3844  * checkpointer to exit as well, otherwise not. The stats and
3845  * syslogger processes are disregarded since they are not connected to
3846  * shared memory; we also disregard dead_end children here. Walsenders
3847  * and archiver are also disregarded, they will be terminated later
3848  * after writing the checkpoint record.
3849  */
3851  StartupPID == 0 &&
3852  WalReceiverPID == 0 &&
3853  BgWriterPID == 0 &&
3854  (CheckpointerPID == 0 ||
3856  WalWriterPID == 0 &&
3857  AutoVacPID == 0)
3858  {
3860  {
3861  /*
3862  * Start waiting for dead_end children to die. This state
3863  * change causes ServerLoop to stop creating new ones.
3864  */
3866 
3867  /*
3868  * We already SIGQUIT'd the archiver and stats processes, if
3869  * any, when we started immediate shutdown or entered
3870  * FatalError state.
3871  */
3872  }
3873  else
3874  {
3875  /*
3876  * If we get here, we are proceeding with normal shutdown. All
3877  * the regular children are gone, and it's time to tell the
3878  * checkpointer to do a shutdown checkpoint.
3879  */
3881  /* Start the checkpointer if not running */
3882  if (CheckpointerPID == 0)
3884  /* And tell it to shut down */
3885  if (CheckpointerPID != 0)
3886  {
3888  pmState = PM_SHUTDOWN;
3889  }
3890  else
3891  {
3892  /*
3893  * If we failed to fork a checkpointer, just shut down.
3894  * Any required cleanup will happen at next restart. We
3895  * set FatalError so that an "abnormal shutdown" message
3896  * gets logged when we exit.
3897  */
3898  FatalError = true;
3900 
3901  /* Kill the walsenders and archiver too */
3903  if (PgArchPID != 0)
3905  }
3906  }
3907  }
3908  }
3909 
3910  if (pmState == PM_SHUTDOWN_2)
3911  {
3912  /*
3913  * PM_SHUTDOWN_2 state ends when there's no other children than
3914  * dead_end children left. There shouldn't be any regular backends
3915  * left by now anyway; what we're really waiting for is walsenders and
3916  * archiver.
3917  */
3918  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3919  {
3921  }
3922  }
3923 
3924  if (pmState == PM_WAIT_DEAD_END)
3925  {
3926  /*
3927  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3928  * (ie, no dead_end children remain), and the archiver is gone too.
3929  *
3930  * The reason we wait for those two is to protect them against a new
3931  * postmaster starting conflicting subprocesses; this isn't an
3932  * ironclad protection, but it at least helps in the
3933  * shutdown-and-immediately-restart scenario. Note that they have
3934  * already been sent appropriate shutdown signals, either during a
3935  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3936  * FatalError processing.
3937  */
3938  if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3939  {
3940  /* These other guys should be dead already */
3941  Assert(StartupPID == 0);
3942  Assert(WalReceiverPID == 0);
3943  Assert(BgWriterPID == 0);
3944  Assert(CheckpointerPID == 0);
3945  Assert(WalWriterPID == 0);
3946  Assert(AutoVacPID == 0);
3947  /* syslogger is not considered here */
3949  }
3950  }
3951 
3952  /*
3953  * If we've been told to shut down, we exit as soon as there are no
3954  * remaining children. If there was a crash, cleanup will occur at the
3955  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3956  * crash before exiting, but that seems unwise if we are quitting because
3957  * we got SIGTERM from init --- there may well not be time for recovery
3958  * before init decides to SIGKILL us.)
3959  *
3960  * Note that the syslogger continues to run. It will exit when it sees
3961  * EOF on its input pipe, which happens when there are no more upstream
3962  * processes.
3963  */
3965  {
3966  if (FatalError)
3967  {
3968  ereport(LOG, (errmsg("abnormal database system shutdown")));
3969  ExitPostmaster(1);
3970  }
3971  else
3972  {
3973  /*
3974  * Normal exit from the postmaster is here. We don't need to log
3975  * anything here, since the UnlinkLockFiles proc_exit callback
3976  * will do so, and that should be the last user-visible action.
3977  */
3978  ExitPostmaster(0);
3979  }
3980  }
3981 
3982  /*
3983  * If the startup process failed, or the user does not want an automatic
3984  * restart after backend crashes, wait for all non-syslogger children to
3985  * exit, and then exit postmaster. We don't try to reinitialize when the
3986  * startup process fails, because more than likely it will just fail again
3987  * and we will keep trying forever.
3988  */
3989  if (pmState == PM_NO_CHILDREN)
3990  {
3992  {
3993  ereport(LOG,
3994  (errmsg("shutting down due to startup process failure")));
3995  ExitPostmaster(1);
3996  }
3997  if (!restart_after_crash)
3998  {
3999  ereport(LOG,
4000  (errmsg("shutting down because restart_after_crash is off")));
4001  ExitPostmaster(1);
4002  }
4003  }
4004 
4005  /*
4006  * If we need to recover from a crash, wait for all non-syslogger children
4007  * to exit, then reset shmem and StartupDataBase.
4008  */
4009  if (FatalError && pmState == PM_NO_CHILDREN)
4010  {
4011  ereport(LOG,
4012  (errmsg("all server processes terminated; reinitializing")));
4013 
4014  /* remove leftover temporary files after a crash */
4017 
4018  /* allow background workers to immediately restart */
4020 
4021  shmem_exit(1);
4022 
4023  /* re-read control file into local memory */
4025 
4026  reset_shared();
4027 
4029  Assert(StartupPID != 0);
4031  pmState = PM_STARTUP;
4032  /* crash recovery started, reset SIGKILL flag */
4033  AbortStartTime = 0;
4034  }
4035 }
4036 
4037 
4038 /*
4039  * Send a signal to a postmaster child process
4040  *
4041  * On systems that have setsid(), each child process sets itself up as a
4042  * process group leader. For signals that are generally interpreted in the
4043  * appropriate fashion, we signal the entire process group not just the
4044  * direct child process. This allows us to, for example, SIGQUIT a blocked
4045  * archive_recovery script, or SIGINT a script being run by a backend via
4046  * system().
4047  *
4048  * There is a race condition for recently-forked children: they might not
4049  * have executed setsid() yet. So we signal the child directly as well as
4050  * the group. We assume such a child will handle the signal before trying
4051  * to spawn any grandchild processes. We also assume that signaling the
4052  * child twice will not cause any problems.
4053  */
4054 static void
4055 signal_child(pid_t pid, int signal)
4056 {
4057  if (kill(pid, signal) < 0)
4058  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4059 #ifdef HAVE_SETSID
4060  switch (signal)
4061  {
4062  case SIGINT:
4063  case SIGTERM:
4064  case SIGQUIT:
4065  case SIGSTOP:
4066  case SIGKILL:
4067  if (kill(-pid, signal) < 0)
4068  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4069  break;
4070  default:
4071  break;
4072  }
4073 #endif
4074 }
4075 
4076 /*
4077  * Send a signal to the targeted children (but NOT special children;
4078  * dead_end children are never signaled, either).
4079  */
4080 static bool
4081 SignalSomeChildren(int signal, int target)
4082 {
4083  dlist_iter iter;
4084  bool signaled = false;
4085 
4086  dlist_foreach(iter, &BackendList)
4087  {
4088  Backend *bp = dlist_container(Backend, elem, iter.cur);
4089 
4090  if (bp->dead_end)
4091  continue;
4092 
4093  /*
4094  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4095  * it first and avoid touching shared memory for every child.
4096  */
4097  if (target != BACKEND_TYPE_ALL)
4098  {
4099  /*
4100  * Assign bkend_type for any recently announced WAL Sender
4101  * processes.
4102  */
4103  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4106 
4107  if (!(target & bp->bkend_type))
4108  continue;
4109  }
4110 
4111  ereport(DEBUG4,
4112  (errmsg_internal("sending signal %d to process %d",
4113  signal, (int) bp->pid)));
4114  signal_child(bp->pid, signal);
4115  signaled = true;
4116  }
4117  return signaled;
4118 }
4119 
4120 /*
4121  * Send a termination signal to children. This considers all of our children
4122  * processes, except syslogger and dead_end backends.
4123  */
4124 static void
4126 {
4127  SignalChildren(signal);
4128  if (StartupPID != 0)
4129  {
4130  signal_child(StartupPID, signal);
4131  if (signal == SIGQUIT || signal == SIGKILL)
4133  }
4134  if (BgWriterPID != 0)
4135  signal_child(BgWriterPID, signal);
4136  if (CheckpointerPID != 0)
4137  signal_child(CheckpointerPID, signal);
4138  if (WalWriterPID != 0)
4139  signal_child(WalWriterPID, signal);
4140  if (WalReceiverPID != 0)
4141  signal_child(WalReceiverPID, signal);
4142  if (AutoVacPID != 0)
4143  signal_child(AutoVacPID, signal);
4144  if (PgArchPID != 0)
4145  signal_child(PgArchPID, signal);
4146 }
4147 
4148 /*
4149  * BackendStartup -- start backend process
4150  *
4151  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4152  *
4153  * Note: if you change this code, also consider StartAutovacuumWorker.
4154  */
4155 static int
4157 {
4158  Backend *bn; /* for backend cleanup */
4159  pid_t pid;
4160 
4161  /*
4162  * Create backend data structure. Better before the fork() so we can
4163  * handle failure cleanly.
4164  */
4165  bn = (Backend *) malloc(sizeof(Backend));
4166  if (!bn)
4167  {
4168  ereport(LOG,
4169  (errcode(ERRCODE_OUT_OF_MEMORY),
4170  errmsg("out of memory")));
4171  return STATUS_ERROR;
4172  }
4173 
4174  /*
4175  * Compute the cancel key that will be assigned to this backend. The
4176  * backend will have its own copy in the forked-off process' value of
4177  * MyCancelKey, so that it can transmit the key to the frontend.
4178  */
4180  {
4181  free(bn);
4182  ereport(LOG,
4183  (errcode(ERRCODE_INTERNAL_ERROR),
4184  errmsg("could not generate random cancel key")));
4185  return STATUS_ERROR;
4186  }
4187 
4188  bn->cancel_key = MyCancelKey;
4189 
4190  /* Pass down canAcceptConnections state */
4191  port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4192  bn->dead_end = (port->canAcceptConnections != CAC_OK);
4193 
4194  /*
4195  * Unless it's a dead_end child, assign it a child slot number
4196  */
4197  if (!bn->dead_end)
4199  else
4200  bn->child_slot = 0;
4201 
4202  /* Hasn't asked to be notified about any bgworkers yet */
4203  bn->bgworker_notify = false;
4204 
4205 #ifdef EXEC_BACKEND
4206  pid = backend_forkexec(port);
4207 #else /* !EXEC_BACKEND */
4208  pid = fork_process();
4209  if (pid == 0) /* child */
4210  {
4211  free(bn);
4212 
4213  /* Detangle from postmaster */
4215 
4216  /* Close the postmaster's sockets */
4217  ClosePostmasterPorts(false);
4218 
4219  /* Perform additional initialization and collect startup packet */
4221 
4222  /*
4223  * Create a per-backend PGPROC struct in shared memory. We must do
4224  * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4225  * this could be delayed a bit further, but EXEC_BACKEND needs to do
4226  * stuff with LWLocks before PostgresMain(), so we do it here as well
4227  * for symmetry.
4228  */
4229  InitProcess();
4230 
4231  /* And run the backend */
4232  BackendRun(port);
4233  }
4234 #endif /* EXEC_BACKEND */
4235 
4236  if (pid < 0)
4237  {
4238  /* in parent, fork failed */
4239  int save_errno = errno;
4240 
4241  if (!bn->dead_end)
4243  free(bn);
4244  errno = save_errno;
4245  ereport(LOG,
4246  (errmsg("could not fork new process for connection: %m")));
4247  report_fork_failure_to_client(port, save_errno);
4248  return STATUS_ERROR;
4249  }
4250 
4251  /* in parent, successful fork */
4252  ereport(DEBUG2,
4253  (errmsg_internal("forked new backend, pid=%d socket=%d",
4254  (int) pid, (int) port->sock)));
4255 
4256  /*
4257  * Everything's been successful, it's safe to add this backend to our list
4258  * of backends.
4259  */
4260  bn->pid = pid;
4261  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4263 
4264 #ifdef EXEC_BACKEND
4265  if (!bn->dead_end)
4266  ShmemBackendArrayAdd(bn);
4267 #endif
4268 
4269  return STATUS_OK;
4270 }
4271 
4272 /*
4273  * Try to report backend fork() failure to client before we close the
4274  * connection. Since we do not care to risk blocking the postmaster on
4275  * this connection, we set the connection to non-blocking and try only once.
4276  *
4277  * This is grungy special-purpose code; we cannot use backend libpq since
4278  * it's not up and running.
4279  */
4280 static void
4282 {
4283  char buffer[1000];
4284  int rc;
4285 
4286  /* Format the error message packet (always V2 protocol) */
4287  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4288  _("could not fork new process for connection: "),
4289  strerror(errnum));
4290 
4291  /* Set port to non-blocking. Don't do send() if this fails */
4292  if (!pg_set_noblock(port->sock))
4293  return;
4294 
4295  /* We'll retry after EINTR, but ignore all other failures */
4296  do
4297  {
4298  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4299  } while (rc < 0 && errno == EINTR);
4300 }
4301 
4302 
4303 /*
4304  * BackendInitialize -- initialize an interactive (postmaster-child)
4305  * backend process, and collect the client's startup packet.
4306  *
4307  * returns: nothing. Will not return at all if there's any failure.
4308  *
4309  * Note: this code does not depend on having any access to shared memory.
4310  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4311  * shared memory not have been touched yet; see comments within.
4312  * In the EXEC_BACKEND case, we are physically attached to shared memory
4313  * but have not yet set up most of our local pointers to shmem structures.
4314  */
4315 static void
4317 {
4318  int status;
4319  int ret;
4320  char remote_host[NI_MAXHOST];
4321  char remote_port[NI_MAXSERV];
4322  StringInfoData ps_data;
4323 
4324  /* Save port etc. for ps status */
4325  MyProcPort = port;
4326 
4327  /* Tell fd.c about the long-lived FD associated with the port */
4329 
4330  /*
4331  * PreAuthDelay is a debugging aid for investigating problems in the
4332  * authentication cycle: it can be set in postgresql.conf to allow time to
4333  * attach to the newly-forked backend with a debugger. (See also
4334  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4335  * is not honored until after authentication.)
4336  */
4337  if (PreAuthDelay > 0)
4338  pg_usleep(PreAuthDelay * 1000000L);
4339 
4340  /* This flag will remain set until InitPostgres finishes authentication */
4341  ClientAuthInProgress = true; /* limit visibility of log messages */
4342 
4343  /* set these to empty in case they are needed before we set them up */
4344  port->remote_host = "";
4345  port->remote_port = "";
4346 
4347  /*
4348  * Initialize libpq and enable reporting of ereport errors to the client.
4349  * Must do this now because authentication uses libpq to send messages.
4350  */
4351  pq_init(); /* initialize libpq to talk to client */
4352  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4353 
4354  /*
4355  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4356  * to collect the startup packet; while SIGQUIT results in _exit(2).
4357  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4358  * cleanly if a buggy client fails to send the packet promptly.
4359  *
4360  * Exiting with _exit(1) is only possible because we have not yet touched
4361  * shared memory; therefore no outside-the-process state needs to get
4362  * cleaned up.
4363  */
4365  /* SIGQUIT handler was already set up by InitPostmasterChild */
4366  InitializeTimeouts(); /* establishes SIGALRM handler */
4368 
4369  /*
4370  * Get the remote host name and port for logging and status display.
4371  */
4372  remote_host[0] = '\0';
4373  remote_port[0] = '\0';
4374  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4375  remote_host, sizeof(remote_host),
4376  remote_port, sizeof(remote_port),
4377  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4378  ereport(WARNING,
4379  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4380  gai_strerror(ret))));
4381 
4382  /*
4383  * Save remote_host and remote_port in port structure (after this, they
4384  * will appear in log_line_prefix data for log messages).
4385  */
4386  port->remote_host = strdup(remote_host);
4387  port->remote_port = strdup(remote_port);
4388 
4389  /* And now we can issue the Log_connections message, if wanted */
4390  if (Log_connections)
4391  {
4392  if (remote_port[0])
4393  ereport(LOG,
4394  (errmsg("connection received: host=%s port=%s",
4395  remote_host,
4396  remote_port)));
4397  else
4398  ereport(LOG,
4399  (errmsg("connection received: host=%s",
4400  remote_host)));
4401  }
4402 
4403  /*
4404  * If we did a reverse lookup to name, we might as well save the results
4405  * rather than possibly repeating the lookup during authentication.
4406  *
4407  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4408  * get nothing useful for a client without an rDNS entry. Therefore, we
4409  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4410  * it into remote_hostname if so. (This test is conservative and might
4411  * sometimes classify a hostname as numeric, but an error in that
4412  * direction is safe; it only results in a possible extra lookup.)
4413  */
4414  if (log_hostname &&
4415  ret == 0 &&
4416  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4417  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4418  port->remote_hostname = strdup(remote_host);
4419 
4420  /*
4421  * Ready to begin client interaction. We will give up and _exit(1) after
4422  * a time delay, so that a broken client can't hog a connection
4423  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4424  * against the time limit.
4425  *
4426  * Note: AuthenticationTimeout is applied here while waiting for the
4427  * startup packet, and then again in InitPostgres for the duration of any
4428  * authentication operations. So a hostile client could tie up the
4429  * process for nearly twice AuthenticationTimeout before we kick him off.
4430  *
4431  * Note: because PostgresMain will call InitializeTimeouts again, the
4432  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4433  * since we never use it again after this function.
4434  */
4437 
4438  /*
4439  * Receive the startup packet (which might turn out to be a cancel request
4440  * packet).
4441  */
4442  status = ProcessStartupPacket(port, false, false);
4443 
4444  /*
4445  * Disable the timeout, and prevent SIGTERM again.
4446  */
4448  PG_SETMASK(&BlockSig);
4449 
4450  /*
4451  * As a safety check that nothing in startup has yet performed
4452  * shared-memory modifications that would need to be undone if we had
4453  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4454  * handlers have been registered yet. (This isn't terribly bulletproof,
4455  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4456  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4457  * handlers unfortunately, since pq_init() already registered one.)
4458  */
4460 
4461  /*
4462  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4463  * already did any appropriate error reporting.
4464  */
4465  if (status != STATUS_OK)
4466  proc_exit(0);
4467 
4468  /*
4469  * Now that we have the user and database name, we can set the process
4470  * title for ps. It's good to do this as early as possible in startup.
4471  */
4472  initStringInfo(&ps_data);
4473  if (am_walsender)
4475  appendStringInfo(&ps_data, "%s ", port->user_name);
4476  if (!am_walsender)
4477  appendStringInfo(&ps_data, "%s ", port->database_name);
4478  appendStringInfo(&ps_data, "%s", port->remote_host);
4479  if (port->remote_port[0] != '\0')
4480  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4481 
4482  init_ps_display(ps_data.data);
4483  pfree(ps_data.data);
4484 
4485  set_ps_display("initializing");
4486 }
4487 
4488 
4489 /*
4490  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4491  *
4492  * returns:
4493  * Doesn't return at all.
4494  */
4495 static void
4497 {
4498  /*
4499  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4500  * just yet, though, because InitPostgres will need the HBA data.)
4501  */
4503 
4504  PostgresMain(port->database_name, port->user_name);
4505 }
4506 
4507 
4508 #ifdef EXEC_BACKEND
4509 
4510 /*
4511  * postmaster_forkexec -- fork and exec a postmaster subprocess
4512  *
4513  * The caller must have set up the argv array already, except for argv[2]
4514  * which will be filled with the name of the temp variable file.
4515  *
4516  * Returns the child process PID, or -1 on fork failure (a suitable error
4517  * message has been logged on failure).
4518  *
4519  * All uses of this routine will dispatch to SubPostmasterMain in the
4520  * child process.
4521  */
4522 pid_t
4523 postmaster_forkexec(int argc, char *argv[])
4524 {
4525  Port port;
4526 
4527  /* This entry point passes dummy values for the Port variables */
4528  memset(&port, 0, sizeof(port));
4529  return internal_forkexec(argc, argv, &port);
4530 }
4531 
4532 /*
4533  * backend_forkexec -- fork/exec off a backend process
4534  *
4535  * Some operating systems (WIN32) don't have fork() so we have to simulate
4536  * it by storing parameters that need to be passed to the child and
4537  * then create a new child process.
4538  *
4539  * returns the pid of the fork/exec'd process, or -1 on failure
4540  */
4541 static pid_t
4542 backend_forkexec(Port *port)
4543 {
4544  char *av[4];
4545  int ac = 0;
4546 
4547  av[ac++] = "postgres";
4548  av[ac++] = "--forkbackend";
4549  av[ac++] = NULL; /* filled in by internal_forkexec */
4550 
4551  av[ac] = NULL;
4552  Assert(ac < lengthof(av));
4553 
4554  return internal_forkexec(ac, av, port);
4555 }
4556 
4557 #ifndef WIN32
4558 
4559 /*
4560  * internal_forkexec non-win32 implementation
4561  *
4562  * - writes out backend variables to the parameter file
4563  * - fork():s, and then exec():s the child process
4564  */
4565 static pid_t
4566 internal_forkexec(int argc, char *argv[], Port *port)
4567 {
4568  static unsigned long tmpBackendFileNum = 0;
4569  pid_t pid;
4570  char tmpfilename[MAXPGPATH];
4571  BackendParameters param;
4572  FILE *fp;
4573 
4574  if (!save_backend_variables(&param, port))
4575  return -1; /* log made by save_backend_variables */
4576 
4577  /* Calculate name for temp file */
4578  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4580  MyProcPid, ++tmpBackendFileNum);
4581 
4582  /* Open file */
4583  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4584  if (!fp)
4585  {
4586  /*
4587  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4588  * directory, ignoring errors.
4589  */
4591 
4592  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4593  if (!fp)
4594  {
4595  ereport(LOG,
4597  errmsg("could not create file \"%s\": %m",
4598  tmpfilename)));
4599  return -1;
4600  }
4601  }
4602 
4603  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4604  {
4605  ereport(LOG,
4607  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4608  FreeFile(fp);
4609  return -1;
4610  }
4611 
4612  /* Release file */
4613  if (FreeFile(fp))
4614  {
4615  ereport(LOG,
4617  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4618  return -1;
4619  }
4620 
4621  /* Make sure caller set up argv properly */
4622  Assert(argc >= 3);
4623  Assert(argv[argc] == NULL);
4624  Assert(strncmp(argv[1], "--fork", 6) == 0);
4625  Assert(argv[2] == NULL);
4626 
4627  /* Insert temp file name after --fork argument */
4628  argv[2] = tmpfilename;
4629 
4630  /* Fire off execv in child */
4631  if ((pid = fork_process()) == 0)
4632  {
4633  if (execv(postgres_exec_path, argv) < 0)
4634  {
4635  ereport(LOG,
4636  (errmsg("could not execute server process \"%s\": %m",
4637  postgres_exec_path)));
4638  /* We're already in the child process here, can't return */
4639  exit(1);
4640  }
4641  }
4642 
4643  return pid; /* Parent returns pid, or -1 on fork failure */
4644 }
4645 #else /* WIN32 */
4646 
4647 /*
4648  * internal_forkexec win32 implementation
4649  *
4650  * - starts backend using CreateProcess(), in suspended state
4651  * - writes out backend variables to the parameter file
4652  * - during this, duplicates handles and sockets required for
4653  * inheritance into the new process
4654  * - resumes execution of the new process once the backend parameter
4655  * file is complete.
4656  */
4657 static pid_t
4658 internal_forkexec(int argc, char *argv[], Port *port)
4659 {
4660  int retry_count = 0;
4661  STARTUPINFO si;
4662  PROCESS_INFORMATION pi;
4663  int i;
4664  int j;
4665  char cmdLine[MAXPGPATH * 2];
4666  HANDLE paramHandle;
4667  BackendParameters *param;
4668  SECURITY_ATTRIBUTES sa;
4669  char paramHandleStr[32];
4670  win32_deadchild_waitinfo *childinfo;
4671 
4672  /* Make sure caller set up argv properly */
4673  Assert(argc >= 3);
4674  Assert(argv[argc] == NULL);
4675  Assert(strncmp(argv[1], "--fork", 6) == 0);
4676  Assert(argv[2] == NULL);
4677 
4678  /* Resume here if we need to retry */
4679 retry:
4680 
4681  /* Set up shared memory for parameter passing */
4682  ZeroMemory(&sa, sizeof(sa));
4683  sa.nLength = sizeof(sa);
4684  sa.bInheritHandle = TRUE;
4685  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4686  &sa,
4687  PAGE_READWRITE,
4688  0,
4689  sizeof(BackendParameters),
4690  NULL);
4691  if (paramHandle == INVALID_HANDLE_VALUE)
4692  {
4693  ereport(LOG,
4694  (errmsg("could not create backend parameter file mapping: error code %lu",
4695  GetLastError())));
4696  return -1;
4697  }
4698 
4699  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4700  if (!param)
4701  {
4702  ereport(LOG,
4703  (errmsg("could not map backend parameter memory: error code %lu",
4704  GetLastError())));
4705  CloseHandle(paramHandle);
4706  return -1;
4707  }
4708 
4709  /* Insert temp file name after --fork argument */
4710 #ifdef _WIN64
4711  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4712 #else
4713  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4714 #endif
4715  argv[2] = paramHandleStr;
4716 
4717  /* Format the cmd line */
4718  cmdLine[sizeof(cmdLine) - 1] = '\0';
4719  cmdLine[sizeof(cmdLine) - 2] = '\0';
4720  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4721  i = 0;
4722  while (argv[++i] != NULL)
4723  {
4724  j = strlen(cmdLine);
4725  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4726  }
4727  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4728  {
4729  ereport(LOG,
4730  (errmsg("subprocess command line too long")));
4731  UnmapViewOfFile(param);
4732  CloseHandle(paramHandle);
4733  return -1;
4734  }
4735 
4736  memset(&pi, 0, sizeof(pi));
4737  memset(&si, 0, sizeof(si));
4738  si.cb = sizeof(si);
4739 
4740  /*
4741  * Create the subprocess in a suspended state. This will be resumed later,
4742  * once we have written out the parameter file.
4743  */
4744  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4745  NULL, NULL, &si, &pi))
4746  {
4747  ereport(LOG,
4748  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4749  GetLastError())));
4750  UnmapViewOfFile(param);
4751  CloseHandle(paramHandle);
4752  return -1;
4753  }
4754 
4755  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4756  {
4757  /*
4758  * log made by save_backend_variables, but we have to clean up the
4759  * mess with the half-started process
4760  */
4761  if (!TerminateProcess(pi.hProcess, 255))
4762  ereport(LOG,
4763  (errmsg_internal("could not terminate unstarted process: error code %lu",
4764  GetLastError())));
4765  CloseHandle(pi.hProcess);
4766  CloseHandle(pi.hThread);
4767  UnmapViewOfFile(param);
4768  CloseHandle(paramHandle);
4769  return -1; /* log made by save_backend_variables */
4770  }
4771 
4772  /* Drop the parameter shared memory that is now inherited to the backend */
4773  if (!UnmapViewOfFile(param))
4774  ereport(LOG,
4775  (errmsg("could not unmap view of backend parameter file: error code %lu",
4776  GetLastError())));
4777  if (!CloseHandle(paramHandle))
4778  ereport(LOG,
4779  (errmsg("could not close handle to backend parameter file: error code %lu",
4780  GetLastError())));
4781 
4782  /*
4783  * Reserve the memory region used by our main shared memory segment before
4784  * we resume the child process. Normally this should succeed, but if ASLR
4785  * is active then it might sometimes fail due to the stack or heap having
4786  * gotten mapped into that range. In that case, just terminate the
4787  * process and retry.
4788  */
4789  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4790  {
4791  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4792  if (!TerminateProcess(pi.hProcess, 255))
4793  ereport(LOG,
4794  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4795  GetLastError())));
4796  CloseHandle(pi.hProcess);
4797  CloseHandle(pi.hThread);
4798  if (++retry_count < 100)
4799  goto retry;
4800  ereport(LOG,
4801  (errmsg("giving up after too many tries to reserve shared memory"),
4802  errhint("This might be caused by ASLR or antivirus software.")));
4803  return -1;
4804  }
4805 
4806  /*
4807  * Now that the backend variables are written out, we start the child
4808  * thread so it can start initializing while we set up the rest of the
4809  * parent state.
4810  */
4811  if (ResumeThread(pi.hThread) == -1)
4812  {
4813  if (!TerminateProcess(pi.hProcess, 255))
4814  {
4815  ereport(LOG,
4816  (errmsg_internal("could not terminate unstartable process: error code %lu",
4817  GetLastError())));
4818  CloseHandle(pi.hProcess);
4819  CloseHandle(pi.hThread);
4820  return -1;
4821  }
4822  CloseHandle(pi.hProcess);
4823  CloseHandle(pi.hThread);
4824  ereport(LOG,
4825  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4826  GetLastError())));
4827  return -1;
4828  }
4829 
4830  /*
4831  * Queue a waiter to signal when this child dies. The wait will be handled
4832  * automatically by an operating system thread pool.
4833  *
4834  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4835  * Struct will be free():d from the callback function that runs on a
4836  * different thread.
4837  */
4838  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4839  if (!childinfo)
4840  ereport(FATAL,
4841  (errcode(ERRCODE_OUT_OF_MEMORY),
4842  errmsg("out of memory")));
4843 
4844  childinfo->procHandle = pi.hProcess;
4845  childinfo->procId = pi.dwProcessId;
4846 
4847  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4848  pi.hProcess,
4849  pgwin32_deadchild_callback,
4850  childinfo,
4851  INFINITE,
4852  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4853  ereport(FATAL,
4854  (errmsg_internal("could not register process for wait: error code %lu",
4855  GetLastError())));
4856 
4857  /* Don't close pi.hProcess here - the wait thread needs access to it */
4858 
4859  CloseHandle(pi.hThread);
4860 
4861  return pi.dwProcessId;
4862 }
4863 #endif /* WIN32 */
4864 
4865 
4866 /*
4867  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4868  * to what it would be if we'd simply forked on Unix, and then
4869  * dispatch to the appropriate place.
4870  *
4871  * The first two command line arguments are expected to be "--forkFOO"
4872  * (where FOO indicates which postmaster child we are to become), and
4873  * the name of a variables file that we can read to load data that would
4874  * have been inherited by fork() on Unix. Remaining arguments go to the
4875  * subprocess FooMain() routine.
4876  */
4877 void
4878 SubPostmasterMain(int argc, char *argv[])
4879 {
4880  Port port;
4881 
4882  /* In EXEC_BACKEND case we will not have inherited these settings */
4883  IsPostmasterEnvironment = true;
4885 
4886  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4888 
4889  /* Check we got appropriate args */
4890  if (argc < 3)
4891  elog(FATAL, "invalid subpostmaster invocation");
4892 
4893  /* Read in the variables file */
4894  memset(&port, 0, sizeof(Port));
4895  read_backend_variables(argv[2], &port);
4896 
4897  /* Close the postmaster's sockets (as soon as we know them) */
4898  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4899 
4900  /* Setup as postmaster child */
4902 
4903  /*
4904  * If appropriate, physically re-attach to shared memory segment. We want
4905  * to do this before going any further to ensure that we can attach at the
4906  * same address the postmaster used. On the other hand, if we choose not
4907  * to re-attach, we may have other cleanup to do.
4908  *
4909  * If testing EXEC_BACKEND on Linux, you should run this as root before
4910  * starting the postmaster:
4911  *
4912  * echo 0 >/proc/sys/kernel/randomize_va_space
4913  *
4914  * This prevents using randomized stack and code addresses that cause the
4915  * child process's memory map to be different from the parent's, making it
4916  * sometimes impossible to attach to shared memory at the desired address.
4917  * Return the setting to its old value (usually '1' or '2') when finished.
4918  */
4919  if (strcmp(argv[1], "--forkbackend") == 0 ||
4920  strcmp(argv[1], "--forkavlauncher") == 0 ||
4921  strcmp(argv[1], "--forkavworker") == 0 ||
4922  strcmp(argv[1], "--forkaux") == 0 ||
4923  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4925  else
4927 
4928  /* autovacuum needs this set before calling InitProcess */
4929  if (strcmp(argv[1], "--forkavlauncher") == 0)
4930  AutovacuumLauncherIAm();
4931  if (strcmp(argv[1], "--forkavworker") == 0)
4932  AutovacuumWorkerIAm();
4933 
4934  /* Read in remaining GUC variables */
4935  read_nondefault_variables();
4936 
4937  /*
4938  * Check that the data directory looks valid, which will also check the
4939  * privileges on the data directory and update our umask and file/group
4940  * variables for creating files later. Note: this should really be done
4941  * before we create any files or directories.
4942  */
4943  checkDataDir();
4944 
4945  /*
4946  * (re-)read control file, as it contains config. The postmaster will
4947  * already have read this, but this process doesn't know about that.
4948  */
4949  LocalProcessControlFile(false);
4950 
4951  /*
4952  * Reload any libraries that were preloaded by the postmaster. Since we
4953  * exec'd this process, those libraries didn't come along with us; but we
4954  * should load them into all child processes to be consistent with the
4955  * non-EXEC_BACKEND behavior.
4956  */
4958 
4959  /* Run backend or appropriate child */
4960  if (strcmp(argv[1], "--forkbackend") == 0)
4961  {
4962  Assert(argc == 3); /* shouldn't be any more args */
4963 
4964  /*
4965  * Need to reinitialize the SSL library in the backend, since the
4966  * context structures contain function pointers and cannot be passed
4967  * through the parameter file.
4968  *
4969  * If for some reason reload fails (maybe the user installed broken
4970  * key files), soldier on without SSL; that's better than all
4971  * connections becoming impossible.
4972  *
4973  * XXX should we do this in all child processes? For the moment it's
4974  * enough to do it in backend children.
4975  */
4976 #ifdef USE_SSL
4977  if (EnableSSL)
4978  {
4979  if (secure_initialize(false) == 0)
4980  LoadedSSL = true;
4981  else
4982  ereport(LOG,
4983  (errmsg("SSL configuration could not be loaded in child process")));
4984  }
4985 #endif
4986 
4987  /*
4988  * Perform additional initialization and collect startup packet.
4989  *
4990  * We want to do this before InitProcess() for a couple of reasons: 1.
4991  * so that we aren't eating up a PGPROC slot while waiting on the
4992  * client. 2. so that if InitProcess() fails due to being out of
4993  * PGPROC slots, we have already initialized libpq and are able to
4994  * report the error to the client.
4995  */
4997 
4998  /* Restore basic shared memory pointers */
5000 
5001  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5002  InitProcess();
5003 
5004  /* Attach process to shared data structures */
5006 
5007  /* And run the backend */
5008  BackendRun(&port); /* does not return */
5009  }
5010  if (strcmp(argv[1], "--forkaux") == 0)
5011  {
5012  AuxProcType auxtype;
5013 
5014  Assert(argc == 4);
5015 
5016  /* Restore basic shared memory pointers */
5018 
5019  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5021 
5022  /* Attach process to shared data structures */
5024 
5025  auxtype = atoi(argv[3]);
5026  AuxiliaryProcessMain(auxtype); /* does not return */
5027  }
5028  if (strcmp(argv[1], "--forkavlauncher") == 0)
5029  {
5030  /* Restore basic shared memory pointers */
5032 
5033  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5034  InitProcess();
5035 
5036  /* Attach process to shared data structures */
5038 
5039  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5040  }
5041  if (strcmp(argv[1], "--forkavworker") == 0)
5042  {
5043  /* Restore basic shared memory pointers */
5045 
5046  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5047  InitProcess();
5048 
5049  /* Attach process to shared data structures */
5051 
5052  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5053  }
5054  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5055  {
5056  int shmem_slot;
5057 
5058  /* do this as early as possible; in particular, before InitProcess() */
5059  IsBackgroundWorker = true;
5060 
5061  /* Restore basic shared memory pointers */
5063 
5064  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5065  InitProcess();
5066 
5067  /* Attach process to shared data structures */
5069 
5070  /* Fetch MyBgworkerEntry from shared memory */
5071  shmem_slot = atoi(argv[1] + 15);
5072  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5073 
5075  }
5076  if (strcmp(argv[1], "--forklog") == 0)
5077  {
5078  /* Do not want to attach to shared memory */
5079 
5080  SysLoggerMain(argc, argv); /* does not return */
5081  }
5082 
5083  abort(); /* shouldn't get here */
5084 }
5085 #endif /* EXEC_BACKEND */
5086 
5087 
5088 /*
5089  * ExitPostmaster -- cleanup
5090  *
5091  * Do NOT call exit() directly --- always go through here!
5092  */
5093 static void
5095 {
5096 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5097 
5098  /*
5099  * There is no known cause for a postmaster to become multithreaded after
5100  * startup. Recheck to account for the possibility of unknown causes.
5101  * This message uses LOG level, because an unclean shutdown at this point
5102  * would usually not look much different from a clean shutdown.
5103  */
5104  if (pthread_is_threaded_np() != 0)
5105  ereport(LOG,
5106  (errcode(ERRCODE_INTERNAL_ERROR),
5107  errmsg_internal("postmaster became multithreaded"),
5108  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5109 #endif
5110 
5111  /* should cleanup shared memory and kill all backends */
5112 
5113  /*
5114  * Not sure of the semantics here. When the Postmaster dies, should the
5115  * backends all be killed? probably not.
5116  *
5117  * MUST -- vadim 05-10-1999
5118  */
5119 
5120  proc_exit(status);
5121 }
5122 
5123 /*
5124  * sigusr1_handler - handle signal conditions from child processes
5125  */
5126 static void
5128 {
5129  int save_errno = errno;
5130 
5131  /*
5132  * We rely on the signal mechanism to have blocked all signals ... except
5133  * on Windows, which lacks sigaction(), so we have to do it manually.
5134  */
5135 #ifdef WIN32
5136  PG_SETMASK(&BlockSig);
5137 #endif
5138 
5139  /*
5140  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5141  * unexpected states. If the startup process quickly starts up, completes
5142  * recovery, exits, we might process the death of the startup process
5143  * first. We don't want to go back to recovery in that case.
5144  */
5147  {
5148  /* WAL redo has started. We're out of reinitialization. */
5149  FatalError = false;
5150  AbortStartTime = 0;
5151 
5152  /*
5153  * Start the archiver if we're responsible for (re-)archiving received
5154  * files.
5155  */
5156  Assert(PgArchPID == 0);
5157  if (XLogArchivingAlways())
5159 
5160  /*
5161  * If we aren't planning to enter hot standby mode later, treat
5162  * RECOVERY_STARTED as meaning we're out of startup, and report status
5163  * accordingly.
5164  */
5165  if (!EnableHotStandby)
5166  {
5168 #ifdef USE_SYSTEMD
5169  sd_notify(0, "READY=1");
5170 #endif
5171  }
5172 
5173  pmState = PM_RECOVERY;
5174  }
5175 
5178  {
5179  ereport(LOG,
5180  (errmsg("database system is ready to accept read-only connections")));
5181 
5182  /* Report status */
5184 #ifdef USE_SYSTEMD
5185  sd_notify(0, "READY=1");
5186 #endif
5187 
5189  connsAllowed = true;
5190 
5191  /* Some workers may be scheduled to start now */
5192  StartWorkerNeeded = true;
5193  }
5194 
5195  /* Process background worker state changes. */
5197  {
5198  /* Accept new worker requests only if not stopping. */
5200  StartWorkerNeeded = true;
5201  }
5202 
5205 
5206  /* Tell syslogger to rotate logfile if requested */
5207  if (SysLoggerPID != 0)
5208  {
5209  if (CheckLogrotateSignal())
5210  {
5213  }
5215  {
5217  }
5218  }
5219 
5222  {
5223  /*
5224  * Start one iteration of the autovacuum daemon, even if autovacuuming
5225  * is nominally not enabled. This is so we can have an active defense
5226  * against transaction ID wraparound. We set a flag for the main loop
5227  * to do it rather than trying to do it here --- this is because the
5228  * autovac process itself may send the signal, and we want to handle
5229  * that by launching another iteration as soon as the current one
5230  * completes.
5231  */
5232  start_autovac_launcher = true;
5233  }
5234 
5237  {
5238  /* The autovacuum launcher wants us to start a worker process. */
5240  }
5241 
5243  {
5244  /* Startup Process wants us to start the walreceiver process. */
5245  /* Start immediately if possible, else remember request for later. */
5246  WalReceiverRequested = true;
5248  }
5249 
5250  /*
5251  * Try to advance postmaster's state machine, if a child requests it.
5252  *
5253  * Be careful about the order of this action relative to sigusr1_handler's
5254  * other actions. Generally, this should be after other actions, in case
5255  * they have effects PostmasterStateMachine would need to know about.
5256  * However, we should do it before the CheckPromoteSignal step, which
5257  * cannot have any (immediate) effect on the state machine, but does
5258  * depend on what state we're in now.
5259  */
5261  {
5263  }
5264 
5265  if (StartupPID != 0 &&
5266  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5267  pmState == PM_HOT_STANDBY) &&
5269  {
5270  /*
5271  * Tell startup process to finish recovery.
5272  *
5273  * Leave the promote signal file in place and let the Startup process
5274  * do the unlink.
5275  */
5277  }
5278 
5279 #ifdef WIN32
5281 #endif
5282 
5283  errno = save_errno;
5284 }
5285 
5286 /*
5287  * SIGTERM while processing startup packet.
5288  *
5289  * Running proc_exit() from a signal handler would be quite unsafe.
5290  * However, since we have not yet touched shared memory, we can just
5291  * pull the plug and exit without running any atexit handlers.
5292  *
5293  * One might be tempted to try to send a message, or log one, indicating
5294  * why we are disconnecting. However, that would be quite unsafe in itself.
5295  * Also, it seems undesirable to provide clues about the database's state
5296  * to a client that has not yet completed authentication, or even sent us
5297  * a startup packet.
5298  */
5299 static void
5301 {
5302  _exit(1);
5303 }
5304 
5305 /*
5306  * Dummy signal handler
5307  *
5308  * We use this for signals that we don't actually use in the postmaster,
5309  * but we do use in backends. If we were to SIG_IGN such signals in the
5310  * postmaster, then a newly started backend might drop a signal that arrives
5311  * before it's able to reconfigure its signal processing. (See notes in
5312  * tcop/postgres.c.)
5313  */
5314 static void
5316 {
5317 }
5318 
5319 /*
5320  * Timeout while processing startup packet.
5321  * As for process_startup_packet_die(), we exit via _exit(1).
5322  */
5323 static void
5325 {
5326  _exit(1);
5327 }
5328 
5329 
5330 /*
5331  * Generate a random cancel key.
5332  */
5333 static bool
5335 {
5336  return pg_strong_random(cancel_key, sizeof(int32));
5337 }
5338 
5339 /*
5340  * Count up number of child processes of specified types (dead_end children
5341  * are always excluded).
5342  */
5343 static int
5344 CountChildren(int target)
5345 {
5346  dlist_iter iter;
5347  int cnt = 0;
5348 
5349  dlist_foreach(iter, &BackendList)
5350  {
5351  Backend *bp = dlist_container(Backend, elem, iter.cur);
5352 
5353  if (bp->dead_end)
5354  continue;
5355 
5356  /*
5357  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5358  * it first and avoid touching shared memory for every child.
5359  */
5360  if (target != BACKEND_TYPE_ALL)
5361  {
5362  /*
5363  * Assign bkend_type for any recently announced WAL Sender
5364  * processes.
5365  */
5366  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5369 
5370  if (!(target & bp->bkend_type))
5371  continue;
5372  }
5373 
5374  cnt++;
5375  }
5376  return cnt;
5377 }
5378 
5379 
5380 /*
5381  * StartChildProcess -- start an auxiliary process for the postmaster
5382  *
5383  * "type" determines what kind of child will be started. All child types
5384  * initially go to AuxiliaryProcessMain, which will handle common setup.
5385  *
5386  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5387  * to start subprocess.
5388  */
5389 static pid_t
5391 {
5392  pid_t pid;
5393 
5394 #ifdef EXEC_BACKEND
5395  {
5396  char *av[10];
5397  int ac = 0;
5398  char typebuf[32];
5399 
5400  /*
5401  * Set up command-line arguments for subprocess
5402  */
5403  av[ac++] = "postgres";
5404  av[ac++] = "--forkaux";
5405  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5406 
5407  snprintf(typebuf, sizeof(typebuf), "%d", type);
5408  av[ac++] = typebuf;
5409 
5410  av[ac] = NULL;
5411  Assert(ac < lengthof(av));
5412 
5413  pid = postmaster_forkexec(ac, av);
5414  }
5415 #else /* !EXEC_BACKEND */
5416  pid = fork_process();
5417 
5418  if (pid == 0) /* child */
5419  {
5421 
5422  /* Close the postmaster's sockets */
5423  ClosePostmasterPorts(false);
5424 
5425  /* Release postmaster's working memory context */
5428  PostmasterContext = NULL;
5429 
5430  AuxiliaryProcessMain(type); /* does not return */
5431  }
5432 #endif /* EXEC_BACKEND */
5433 
5434  if (pid < 0)
5435  {
5436  /* in parent, fork failed */
5437  int save_errno = errno;
5438 
5439  errno = save_errno;
5440  switch (type)
5441  {
5442  case StartupProcess:
5443  ereport(LOG,
5444  (errmsg("could not fork startup process: %m")));
5445  break;
5446  case ArchiverProcess:
5447  ereport(LOG,
5448  (errmsg("could not fork archiver process: %m")));
5449  break;
5450  case BgWriterProcess:
5451  ereport(LOG,
5452  (errmsg("could not fork background writer process: %m")));
5453  break;
5454  case CheckpointerProcess:
5455  ereport(LOG,
5456  (errmsg("could not fork checkpointer process: %m")));
5457  break;
5458  case WalWriterProcess:
5459  ereport(LOG,
5460  (errmsg("could not fork WAL writer process: %m")));
5461  break;
5462  case WalReceiverProcess:
5463  ereport(LOG,
5464  (errmsg("could not fork WAL receiver process: %m")));
5465  break;
5466  default:
5467  ereport(LOG,
5468  (errmsg("could not fork process: %m")));
5469  break;
5470  }
5471 
5472  /*
5473  * fork failure is fatal during startup, but there's no need to choke
5474  * immediately if starting other child types fails.
5475  */
5476  if (type == StartupProcess)
5477  ExitPostmaster(1);
5478  return 0;
5479  }
5480 
5481  /*
5482  * in parent, successful fork
5483  */
5484  return pid;
5485 }
5486 
5487 /*
5488  * StartAutovacuumWorker
5489  * Start an autovac worker process.
5490  *
5491  * This function is here because it enters the resulting PID into the
5492  * postmaster's private backends list.
5493  *
5494  * NB -- this code very roughly matches BackendStartup.
5495  */
5496 static void
5498 {
5499  Backend *bn;
5500 
5501  /*
5502  * If not in condition to run a process, don't try, but handle it like a
5503  * fork failure. This does not normally happen, since the signal is only
5504  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5505  * we have to check to avoid race-condition problems during DB state
5506  * changes.
5507  */
5509  {
5510  /*
5511  * Compute the cancel key that will be assigned to this session. We
5512  * probably don't need cancel keys for autovac workers, but we'd
5513  * better have something random in the field to prevent unfriendly
5514  * people from sending cancels to them.
5515  */
5517  {
5518  ereport(LOG,
5519  (errcode(ERRCODE_INTERNAL_ERROR),
5520  errmsg("could not generate random cancel key")));
5521  return;
5522  }
5523 
5524  bn = (Backend *) malloc(sizeof(Backend));
5525  if (bn)
5526  {
5527  bn->cancel_key = MyCancelKey;
5528 
5529  /* Autovac workers are not dead_end and need a child slot */
5530  bn->dead_end = false;
5532  bn->bgworker_notify = false;
5533 
5534  bn->pid = StartAutoVacWorker();
5535  if (bn->pid > 0)
5536  {
5539 #ifdef EXEC_BACKEND
5540  ShmemBackendArrayAdd(bn);
5541 #endif
5542  /* all OK */
5543  return;
5544  }
5545 
5546  /*
5547  * fork failed, fall through to report -- actual error message was
5548  * logged by StartAutoVacWorker
5549  */
5551  free(bn);
5552  }
5553  else
5554  ereport(LOG,
5555  (errcode(ERRCODE_OUT_OF_MEMORY),
5556  errmsg("out of memory")));
5557  }
5558 
5559  /*
5560  * Report the failure to the launcher, if it's running. (If it's not, we
5561  * might not even be connected to shared memory, so don't try to call
5562  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5563  * responds to the condition, but we don't do that here, instead waiting
5564  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5565  * quick succession between the autovac launcher and postmaster in case
5566  * things get ugly.
5567  */
5568  if (AutoVacPID != 0)
5569  {
5571  avlauncher_needs_signal = true;
5572  }
5573 }
5574 
5575 /*
5576  * MaybeStartWalReceiver
5577  * Start the WAL receiver process, if not running and our state allows.
5578  *
5579  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5580  * clear WalReceiverRequested. However, there's a race condition if the
5581  * walreceiver terminates and the startup process immediately requests a new
5582  * one: it's quite possible to get the signal for the request before reaping
5583  * the dead walreceiver process. Better to risk launching an extra
5584  * walreceiver than to miss launching one we need. (The walreceiver code
5585  * has logic to recognize that it should go away if not needed.)
5586  */
5587 static void
5589 {
5590  if (WalReceiverPID == 0 &&
5591  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5592  pmState == PM_HOT_STANDBY) &&
5594  {
5596  if (WalReceiverPID != 0)
5597  WalReceiverRequested = false;
5598  /* else leave the flag set, so we'll try again later */
5599  }
5600 }
5601 
5602 
5603 /*
5604  * Create the opts file
5605  */
5606 static bool
5607 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5608 {
5609  FILE *fp;
5610  int i;
5611 
5612 #define OPTS_FILE "postmaster.opts"
5613 
5614  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5615  {
5616  ereport(LOG,
5618  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5619  return false;
5620  }
5621 
5622  fprintf(fp, "%s", fullprogname);
5623  for (i = 1; i < argc; i++)
5624  fprintf(fp, " \"%s\"", argv[i]);
5625  fputs("\n", fp);
5626 
5627  if (fclose(fp))
5628  {
5629  ereport(LOG,
5631  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5632  return false;
5633  }
5634 
5635  return true;
5636 }
5637 
5638 
5639 /*
5640  * MaxLivePostmasterChildren
5641  *
5642  * This reports the number of entries needed in per-child-process arrays
5643  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5644  * These arrays include regular backends, autovac workers, walsenders
5645  * and background workers, but not special children nor dead_end children.
5646  * This allows the arrays to have a fixed maximum size, to wit the same
5647  * too-many-children limit enforced by canAcceptConnections(). The exact value
5648  * isn't too critical as long as it's more than MaxBackends.
5649  */
5650 int
5652 {
5653  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5655 }
5656 
5657 /*
5658  * Connect background worker to a database.
5659  */
5660 void
5662 {
5664 
5665  /* XXX is this the right errcode? */
5667  ereport(FATAL,
5668  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5669  errmsg("database connection requirement not indicated during registration")));
5670 
5672 
5673  /* it had better not gotten out of "init" mode yet */
5674  if (!IsInitProcessingMode())
5675  ereport(ERROR,
5676  (errmsg("invalid processing mode in background worker")));
5678 }
5679 
5680 /*
5681  * Connect background worker to a database using OIDs.
5682  */
5683 void
5685 {
5687 
5688  /* XXX is this the right errcode? */
5690  ereport(FATAL,
5691  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5692  errmsg("database connection requirement not indicated during registration")));
5693 
5694  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5695 
5696  /* it had better not gotten out of "init" mode yet */
5697  if (!IsInitProcessingMode())
5698  ereport(ERROR,
5699  (errmsg("invalid processing mode in background worker")));
5701 }
5702 
5703 /*
5704  * Block/unblock signals in a background worker
5705  */
5706 void
5708 {
5709  PG_SETMASK(&BlockSig);
5710 }
5711 
5712 void
5714 {
5716 }
5717 
5718 #ifdef EXEC_BACKEND
5719 static pid_t
5720 bgworker_forkexec(int shmem_slot)
5721 {
5722  char *av[10];
5723  int ac = 0;
5724  char forkav[MAXPGPATH];
5725 
5726  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5727 
5728  av[ac++] = "postgres";
5729  av[ac++] = forkav;
5730  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5731  av[ac] = NULL;
5732 
5733  Assert(ac < lengthof(av));
5734 
5735  return postmaster_forkexec(ac, av);
5736 }
5737 #endif
5738 
5739 /*
5740  * Start a new bgworker.
5741  * Starting time conditions must have been checked already.
5742  *
5743  * Returns true on success, false on failure.
5744  * In either case, update the RegisteredBgWorker's state appropriately.
5745  *
5746  * This code is heavily based on autovacuum.c, q.v.
5747  */
5748 static bool
5750 {
5751  pid_t worker_pid;
5752 
5753  Assert(rw->rw_pid == 0);
5754 
5755  /*
5756  * Allocate and assign the Backend element. Note we must do this before
5757  * forking, so that we can handle failures (out of memory or child-process
5758  * slots) cleanly.
5759  *
5760  * Treat failure as though the worker had crashed. That way, the
5761  * postmaster will wait a bit before attempting to start it again; if we
5762  * tried again right away, most likely we'd find ourselves hitting the
5763  * same resource-exhaustion condition.
5764  */
5765  if (!assign_backendlist_entry(rw))
5766  {
5768  return false;
5769  }
5770 
5771  ereport(DEBUG1,
5772  (errmsg_internal("starting background worker process \"%s\"",
5773  rw->rw_worker.bgw_name)));
5774 
5775 #ifdef EXEC_BACKEND
5776  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5777 #else
5778  switch ((worker_pid =