PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "common/string.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqformat.h"
107 #include "libpq/pqsignal.h"
108 #include "miscadmin.h"
109 #include "pg_getopt.h"
110 #include "pgstat.h"
111 #include "port/pg_bswap.h"
112 #include "postmaster/autovacuum.h"
114 #include "postmaster/fork_process.h"
115 #include "postmaster/interrupt.h"
116 #include "postmaster/pgarch.h"
117 #include "postmaster/postmaster.h"
118 #include "postmaster/syslogger.h"
120 #include "replication/walsender.h"
121 #include "storage/fd.h"
122 #include "storage/ipc.h"
123 #include "storage/pg_shmem.h"
124 #include "storage/pmsignal.h"
125 #include "storage/proc.h"
126 #include "tcop/tcopprot.h"
127 #include "utils/builtins.h"
128 #include "utils/datetime.h"
129 #include "utils/memutils.h"
130 #include "utils/pidfile.h"
131 #include "utils/ps_status.h"
132 #include "utils/timeout.h"
133 #include "utils/timestamp.h"
134 #include "utils/varlena.h"
135 
136 #ifdef EXEC_BACKEND
137 #include "storage/spin.h"
138 #endif
139 
140 
141 /*
142  * Possible types of a backend. Beyond being the possible bkend_type values in
143  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
144  * and CountChildren().
145  */
146 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
147 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
148 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
149 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
150 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
151 
152 /*
153  * List of active backends (or child processes anyway; we don't actually
154  * know whether a given child has become a backend or is still in the
155  * authorization phase). This is used mainly to keep track of how many
156  * children we have and send them appropriate signals when necessary.
157  *
158  * As shown in the above set of backend types, this list includes not only
159  * "normal" client sessions, but also autovacuum workers, walsenders, and
160  * background workers. (Note that at the time of launch, walsenders are
161  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
162  * upon noticing they've changed their PMChildFlags entry. Hence that check
163  * must be done before any operation that needs to distinguish walsenders
164  * from normal backends.)
165  *
166  * Also, "dead_end" children are in it: these are children launched just for
167  * the purpose of sending a friendly rejection message to a would-be client.
168  * We must track them because they are attached to shared memory, but we know
169  * they will never become live backends. dead_end children are not assigned a
170  * PMChildSlot. dead_end children have bkend_type NORMAL.
171  *
172  * "Special" children such as the startup, bgwriter and autovacuum launcher
173  * tasks are not in this list. They are tracked via StartupPID and other
174  * pid_t variables below. (Thus, there can't be more than one of any given
175  * "special" child process type. We use BackendList entries for any child
176  * process there can be more than one of.)
177  */
178 typedef struct bkend
179 {
180  pid_t pid; /* process id of backend */
181  int32 cancel_key; /* cancel key for cancels for this backend */
182  int child_slot; /* PMChildSlot for this backend, if any */
183  int bkend_type; /* child process flavor, see above */
184  bool dead_end; /* is it going to send an error and quit? */
185  bool bgworker_notify; /* gets bgworker start/stop notifications */
186  dlist_node elem; /* list link in BackendList */
187 } Backend;
188 
190 
191 #ifdef EXEC_BACKEND
192 static Backend *ShmemBackendArray;
193 #endif
194 
196 
197 
198 
199 /* The socket number we are listening for connections on */
201 
202 /* The directory names for Unix socket(s) */
204 
205 /* The TCP listen address(es) */
207 
208 /*
209  * ReservedBackends is the number of backends reserved for superuser use.
210  * This number is taken out of the pool size given by MaxConnections so
211  * number of backend slots available to non-superusers is
212  * (MaxConnections - ReservedBackends). Note what this really means is
213  * "if there are <= ReservedBackends connections available, only superusers
214  * can make new connections" --- pre-existing superuser connections don't
215  * count against the limit.
216  */
218 
219 /* The socket(s) we're listening to. */
220 #define MAXLISTEN 64
222 
223 /*
224  * Set by the -o option
225  */
226 static char ExtraOptions[MAXPGPATH];
227 
228 /*
229  * These globals control the behavior of the postmaster in case some
230  * backend dumps core. Normally, it kills all peers of the dead backend
231  * and reinitializes shared memory. By specifying -s or -n, we can have
232  * the postmaster stop (rather than kill) peers and not reinitialize
233  * shared data structures. (Reinit is currently dead code, though.)
234  */
235 static bool Reinit = true;
236 static int SendStop = false;
237 
238 /* still more option variables */
239 bool EnableSSL = false;
240 
241 int PreAuthDelay = 0;
243 
244 bool log_hostname; /* for ps display and logging */
245 bool Log_connections = false;
246 bool Db_user_namespace = false;
247 
248 bool enable_bonjour = false;
251 
252 /* PIDs of special child processes; 0 when not running */
253 static pid_t StartupPID = 0,
262 
263 /* Startup process's status */
264 typedef enum
265 {
268  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
271 
273 
274 /* Startup/shutdown state */
275 #define NoShutdown 0
276 #define SmartShutdown 1
277 #define FastShutdown 2
278 #define ImmediateShutdown 3
279 
280 static int Shutdown = NoShutdown;
281 
282 static bool FatalError = false; /* T if recovering from backend crash */
283 
284 /*
285  * We use a simple state machine to control startup, shutdown, and
286  * crash recovery (which is rather like shutdown followed by startup).
287  *
288  * After doing all the postmaster initialization work, we enter PM_STARTUP
289  * state and the startup process is launched. The startup process begins by
290  * reading the control file and other preliminary initialization steps.
291  * In a normal startup, or after crash recovery, the startup process exits
292  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
293  * is handled specially since it takes much longer and we would like to support
294  * hot standby during archive recovery.
295  *
296  * When the startup process is ready to start archive recovery, it signals the
297  * postmaster, and we switch to PM_RECOVERY state. The background writer and
298  * checkpointer are launched, while the startup process continues applying WAL.
299  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
300  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
301  * state and begin accepting connections to perform read-only queries. When
302  * archive recovery is finished, the startup process exits with exit code 0
303  * and we switch to PM_RUN state.
304  *
305  * Normal child backends can only be launched when we are in PM_RUN or
306  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
307  * In other states we handle connection requests by launching "dead_end"
308  * child processes, which will simply send the client an error message and
309  * quit. (We track these in the BackendList so that we can know when they
310  * are all gone; this is important because they're still connected to shared
311  * memory, and would interfere with an attempt to destroy the shmem segment,
312  * possibly leading to SHMALL failure when we try to make a new one.)
313  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
314  * to drain out of the system, and therefore stop accepting connection
315  * requests at all until the last existing child has quit (which hopefully
316  * will not be very long).
317  *
318  * Notice that this state variable does not distinguish *why* we entered
319  * states later than PM_RUN --- Shutdown and FatalError must be consulted
320  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
321  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
322  * states when trying to recover from a crash). It can be true in PM_STARTUP
323  * state, because we don't clear it until we've successfully started WAL redo.
324  */
325 typedef enum
326 {
327  PM_INIT, /* postmaster starting */
328  PM_STARTUP, /* waiting for startup subprocess */
329  PM_RECOVERY, /* in archive recovery mode */
330  PM_HOT_STANDBY, /* in hot standby mode */
331  PM_RUN, /* normal "database is alive" state */
332  PM_STOP_BACKENDS, /* need to stop remaining backends */
333  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
334  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
335  * ckpt */
336  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
337  * finish */
338  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
339  PM_NO_CHILDREN /* all important children have exited */
340 } PMState;
341 
343 
344 /*
345  * While performing a "smart shutdown", we restrict new connections but stay
346  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
347  * connsAllowed is a sub-state indicator showing the active restriction.
348  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
349  */
350 typedef enum
351 {
352  ALLOW_ALL_CONNS, /* normal not-shutting-down state */
353  ALLOW_SUPERUSER_CONNS, /* only superusers can connect */
354  ALLOW_NO_CONNS /* no new connections allowed, period */
356 
358 
359 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
360 /* Zero means timeout is not running */
361 static time_t AbortStartTime = 0;
362 
363 /* Length of said timeout */
364 #define SIGKILL_CHILDREN_AFTER_SECS 5
365 
366 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
367 
368 bool ClientAuthInProgress = false; /* T during new-client
369  * authentication */
370 
371 bool redirection_done = false; /* stderr redirected for syslogger? */
372 
373 /* received START_AUTOVAC_LAUNCHER signal */
374 static volatile sig_atomic_t start_autovac_launcher = false;
375 
376 /* the launcher needs to be signaled to communicate some condition */
377 static volatile bool avlauncher_needs_signal = false;
378 
379 /* received START_WALRECEIVER signal */
380 static volatile sig_atomic_t WalReceiverRequested = false;
381 
382 /* set when there's a worker that needs to be started up */
383 static volatile bool StartWorkerNeeded = true;
384 static volatile bool HaveCrashedWorker = false;
385 
386 #ifdef USE_SSL
387 /* Set when and if SSL has been initialized properly */
388 static bool LoadedSSL = false;
389 #endif
390 
391 #ifdef USE_BONJOUR
392 static DNSServiceRef bonjour_sdref = NULL;
393 #endif
394 
395 /*
396  * postmaster.c - function prototypes
397  */
398 static void CloseServerPorts(int status, Datum arg);
399 static void unlink_external_pid_file(int status, Datum arg);
400 static void getInstallationPaths(const char *argv0);
401 static void checkControlFile(void);
402 static Port *ConnCreate(int serverFd);
403 static void ConnFree(Port *port);
404 static void reset_shared(void);
405 static void SIGHUP_handler(SIGNAL_ARGS);
406 static void pmdie(SIGNAL_ARGS);
407 static void reaper(SIGNAL_ARGS);
408 static void sigusr1_handler(SIGNAL_ARGS);
410 static void dummy_handler(SIGNAL_ARGS);
411 static void StartupPacketTimeoutHandler(void);
412 static void CleanupBackend(int pid, int exitstatus);
413 static bool CleanupBackgroundWorker(int pid, int exitstatus);
414 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
415 static void LogChildExit(int lev, const char *procname,
416  int pid, int exitstatus);
417 static void PostmasterStateMachine(void);
418 static void BackendInitialize(Port *port);
419 static void BackendRun(Port *port) pg_attribute_noreturn();
420 static void ExitPostmaster(int status) pg_attribute_noreturn();
421 static int ServerLoop(void);
422 static int BackendStartup(Port *port);
423 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
424 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
425 static void processCancelRequest(Port *port, void *pkt);
426 static int initMasks(fd_set *rmask);
427 static void report_fork_failure_to_client(Port *port, int errnum);
428 static CAC_state canAcceptConnections(int backend_type);
429 static bool RandomCancelKey(int32 *cancel_key);
430 static void signal_child(pid_t pid, int signal);
431 static bool SignalSomeChildren(int signal, int targets);
432 static void TerminateChildren(int signal);
433 
434 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
435 
436 static int CountChildren(int target);
438 static void maybe_start_bgworkers(void);
439 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
440 static pid_t StartChildProcess(AuxProcType type);
441 static void StartAutovacuumWorker(void);
442 static void MaybeStartWalReceiver(void);
443 static void InitPostmasterDeathWatchHandle(void);
444 
445 /*
446  * Archiver is allowed to start up at the current postmaster state?
447  *
448  * If WAL archiving is enabled always, we are allowed to start archiver
449  * even during recovery.
450  */
451 #define PgArchStartupAllowed() \
452  ((XLogArchivingActive() && pmState == PM_RUN) || \
453  (XLogArchivingAlways() && \
454  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
455 
456 #ifdef EXEC_BACKEND
457 
458 #ifdef WIN32
459 #define WNOHANG 0 /* ignored, so any integer value will do */
460 
461 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
462 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
463 
464 static HANDLE win32ChildQueue;
465 
466 typedef struct
467 {
468  HANDLE waitHandle;
469  HANDLE procHandle;
470  DWORD procId;
471 } win32_deadchild_waitinfo;
472 #endif /* WIN32 */
473 
474 static pid_t backend_forkexec(Port *port);
475 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
476 
477 /* Type for a socket that can be inherited to a client process */
478 #ifdef WIN32
479 typedef struct
480 {
481  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
482  * if not a socket */
483  WSAPROTOCOL_INFO wsainfo;
484 } InheritableSocket;
485 #else
486 typedef int InheritableSocket;
487 #endif
488 
489 /*
490  * Structure contains all variables passed to exec:ed backends
491  */
492 typedef struct
493 {
494  Port port;
495  InheritableSocket portsocket;
496  char DataDir[MAXPGPATH];
499  int MyPMChildSlot;
500 #ifndef WIN32
501  unsigned long UsedShmemSegID;
502 #else
503  void *ShmemProtectiveRegion;
504  HANDLE UsedShmemSegID;
505 #endif
506  void *UsedShmemSegAddr;
509  Backend *ShmemBackendArray;
510 #ifndef HAVE_SPINLOCKS
512 #endif
521  InheritableSocket pgStatSock;
522  pid_t PostmasterPid;
526  bool redirection_done;
527  bool IsBinaryUpgrade;
528  int max_safe_fds;
529  int MaxBackends;
530 #ifdef WIN32
531  HANDLE PostmasterHandle;
532  HANDLE initial_signal_pipe;
533  HANDLE syslogPipe[2];
534 #else
535  int postmaster_alive_fds[2];
536  int syslogPipe[2];
537 #endif
538  char my_exec_path[MAXPGPATH];
539  char pkglib_path[MAXPGPATH];
540  char ExtraOptions[MAXPGPATH];
541 } BackendParameters;
542 
543 static void read_backend_variables(char *id, Port *port);
544 static void restore_backend_variables(BackendParameters *param, Port *port);
545 
546 #ifndef WIN32
547 static bool save_backend_variables(BackendParameters *param, Port *port);
548 #else
549 static bool save_backend_variables(BackendParameters *param, Port *port,
550  HANDLE childProcess, pid_t childPid);
551 #endif
552 
553 static void ShmemBackendArrayAdd(Backend *bn);
554 static void ShmemBackendArrayRemove(Backend *bn);
555 #endif /* EXEC_BACKEND */
556 
557 #define StartupDataBase() StartChildProcess(StartupProcess)
558 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
559 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
560 #define StartWalWriter() StartChildProcess(WalWriterProcess)
561 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
562 
563 /* Macros to check exit status of a child process */
564 #define EXIT_STATUS_0(st) ((st) == 0)
565 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
566 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
567 
568 #ifndef WIN32
569 /*
570  * File descriptors for pipe used to monitor if postmaster is alive.
571  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
572  */
573 int postmaster_alive_fds[2] = {-1, -1};
574 #else
575 /* Process handle of postmaster used for the same purpose on Windows */
576 HANDLE PostmasterHandle;
577 #endif
578 
579 /*
580  * Postmaster main entry point
581  */
582 void
583 PostmasterMain(int argc, char *argv[])
584 {
585  int opt;
586  int status;
587  char *userDoption = NULL;
588  bool listen_addr_saved = false;
589  int i;
590  char *output_config_variable = NULL;
591 
593 
595 
597 
598  /*
599  * We should not be creating any files or directories before we check the
600  * data directory (see checkDataDir()), but just in case set the umask to
601  * the most restrictive (owner-only) permissions.
602  *
603  * checkDataDir() will reset the umask based on the data directory
604  * permissions.
605  */
606  umask(PG_MODE_MASK_OWNER);
607 
608  /*
609  * By default, palloc() requests in the postmaster will be allocated in
610  * the PostmasterContext, which is space that can be recycled by backends.
611  * Allocated data that needs to be available to backends should be
612  * allocated in TopMemoryContext.
613  */
615  "Postmaster",
618 
619  /* Initialize paths to installation files */
620  getInstallationPaths(argv[0]);
621 
622  /*
623  * Set up signal handlers for the postmaster process.
624  *
625  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
626  * is used by all child processes and client processes). That has a
627  * couple of special behaviors:
628  *
629  * 1. Except on Windows, we tell sigaction() to block all signals for the
630  * duration of the signal handler. This is faster than our old approach
631  * of blocking/unblocking explicitly in the signal handler, and it should
632  * also prevent excessive stack consumption if signals arrive quickly.
633  *
634  * 2. We do not set the SA_RESTART flag. This is because signals will be
635  * blocked at all times except when ServerLoop is waiting for something to
636  * happen, and during that window, we want signals to exit the select(2)
637  * wait so that ServerLoop can respond if anything interesting happened.
638  * On some platforms, signals marked SA_RESTART would not cause the
639  * select() wait to end.
640  *
641  * Child processes will generally want SA_RESTART, so pqsignal() sets that
642  * flag. We expect children to set up their own handlers before
643  * unblocking signals.
644  *
645  * CAUTION: when changing this list, check for side-effects on the signal
646  * handling setup of child processes. See tcop/postgres.c,
647  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
648  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
649  * postmaster/syslogger.c, postmaster/bgworker.c and
650  * postmaster/checkpointer.c.
651  */
652  pqinitmask();
654 
655  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
656  * children do same */
657  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
658  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
659  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
660  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
661  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
662  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
663  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
664  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
665 
666  /*
667  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
668  * ignore those signals in a postmaster environment, so that there is no
669  * risk of a child process freezing up due to writing to stderr. But for
670  * a standalone backend, their default handling is reasonable. Hence, all
671  * child processes should just allow the inherited settings to stand.
672  */
673 #ifdef SIGTTIN
674  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
675 #endif
676 #ifdef SIGTTOU
677  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
678 #endif
679 
680  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
681 #ifdef SIGXFSZ
682  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
683 #endif
684 
685  /*
686  * Options setup
687  */
689 
690  opterr = 1;
691 
692  /*
693  * Parse command-line options. CAUTION: keep this in sync with
694  * tcop/postgres.c (the option sets should not conflict) and with the
695  * common help() function in main/main.c.
696  */
697  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
698  {
699  switch (opt)
700  {
701  case 'B':
702  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
703  break;
704 
705  case 'b':
706  /* Undocumented flag used for binary upgrades */
707  IsBinaryUpgrade = true;
708  break;
709 
710  case 'C':
711  output_config_variable = strdup(optarg);
712  break;
713 
714  case 'D':
715  userDoption = strdup(optarg);
716  break;
717 
718  case 'd':
720  break;
721 
722  case 'E':
723  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
724  break;
725 
726  case 'e':
727  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
728  break;
729 
730  case 'F':
731  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
732  break;
733 
734  case 'f':
736  {
737  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
738  progname, optarg);
739  ExitPostmaster(1);
740  }
741  break;
742 
743  case 'h':
744  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
745  break;
746 
747  case 'i':
748  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
749  break;
750 
751  case 'j':
752  /* only used by interactive backend */
753  break;
754 
755  case 'k':
756  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
757  break;
758 
759  case 'l':
760  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
761  break;
762 
763  case 'N':
764  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
765  break;
766 
767  case 'n':
768  /* Don't reinit shared mem after abnormal exit */
769  Reinit = false;
770  break;
771 
772  case 'O':
773  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
774  break;
775 
776  case 'o':
777  /* Other options to pass to the backend on the command line */
779  sizeof(ExtraOptions) - strlen(ExtraOptions),
780  " %s", optarg);
781  break;
782 
783  case 'P':
784  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
785  break;
786 
787  case 'p':
789  break;
790 
791  case 'r':
792  /* only used by single-user backend */
793  break;
794 
795  case 'S':
797  break;
798 
799  case 's':
800  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
801  break;
802 
803  case 'T':
804 
805  /*
806  * In the event that some backend dumps core, send SIGSTOP,
807  * rather than SIGQUIT, to all its peers. This lets the wily
808  * post_hacker collect core dumps from everyone.
809  */
810  SendStop = true;
811  break;
812 
813  case 't':
814  {
815  const char *tmp = get_stats_option_name(optarg);
816 
817  if (tmp)
818  {
820  }
821  else
822  {
823  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
824  progname, optarg);
825  ExitPostmaster(1);
826  }
827  break;
828  }
829 
830  case 'W':
831  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
832  break;
833 
834  case 'c':
835  case '-':
836  {
837  char *name,
838  *value;
839 
840  ParseLongOption(optarg, &name, &value);
841  if (!value)
842  {
843  if (opt == '-')
844  ereport(ERROR,
845  (errcode(ERRCODE_SYNTAX_ERROR),
846  errmsg("--%s requires a value",
847  optarg)));
848  else
849  ereport(ERROR,
850  (errcode(ERRCODE_SYNTAX_ERROR),
851  errmsg("-c %s requires a value",
852  optarg)));
853  }
854 
856  free(name);
857  if (value)
858  free(value);
859  break;
860  }
861 
862  default:
863  write_stderr("Try \"%s --help\" for more information.\n",
864  progname);
865  ExitPostmaster(1);
866  }
867  }
868 
869  /*
870  * Postmaster accepts no non-option switch arguments.
871  */
872  if (optind < argc)
873  {
874  write_stderr("%s: invalid argument: \"%s\"\n",
875  progname, argv[optind]);
876  write_stderr("Try \"%s --help\" for more information.\n",
877  progname);
878  ExitPostmaster(1);
879  }
880 
881  /*
882  * Locate the proper configuration files and data directory, and read
883  * postgresql.conf for the first time.
884  */
885  if (!SelectConfigFiles(userDoption, progname))
886  ExitPostmaster(2);
887 
888  if (output_config_variable != NULL)
889  {
890  /*
891  * "-C guc" was specified, so print GUC's value and exit. No extra
892  * permission check is needed because the user is reading inside the
893  * data dir.
894  */
895  const char *config_val = GetConfigOption(output_config_variable,
896  false, false);
897 
898  puts(config_val ? config_val : "");
899  ExitPostmaster(0);
900  }
901 
902  /* Verify that DataDir looks reasonable */
903  checkDataDir();
904 
905  /* Check that pg_control exists */
907 
908  /* And switch working directory into it */
909  ChangeToDataDir();
910 
911  /*
912  * Check for invalid combinations of GUC settings.
913  */
915  {
916  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
917  progname,
919  ExitPostmaster(1);
920  }
922  ereport(ERROR,
923  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
925  ereport(ERROR,
926  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
927 
928  /*
929  * Other one-time internal sanity checks can go here, if they are fast.
930  * (Put any slow processing further down, after postmaster.pid creation.)
931  */
932  if (!CheckDateTokenTables())
933  {
934  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
935  ExitPostmaster(1);
936  }
937 
938  /*
939  * Now that we are done processing the postmaster arguments, reset
940  * getopt(3) library so that it will work correctly in subprocesses.
941  */
942  optind = 1;
943 #ifdef HAVE_INT_OPTRESET
944  optreset = 1; /* some systems need this too */
945 #endif
946 
947  /* For debugging: display postmaster environment */
948  {
949  extern char **environ;
950  char **p;
951 
952  ereport(DEBUG3,
953  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
954  progname)));
955  ereport(DEBUG3,
956  (errmsg_internal("-----------------------------------------")));
957  for (p = environ; *p; ++p)
958  ereport(DEBUG3,
959  (errmsg_internal("\t%s", *p)));
960  ereport(DEBUG3,
961  (errmsg_internal("-----------------------------------------")));
962  }
963 
964  /*
965  * Create lockfile for data directory.
966  *
967  * We want to do this before we try to grab the input sockets, because the
968  * data directory interlock is more reliable than the socket-file
969  * interlock (thanks to whoever decided to put socket files in /tmp :-().
970  * For the same reason, it's best to grab the TCP socket(s) before the
971  * Unix socket(s).
972  *
973  * Also note that this internally sets up the on_proc_exit function that
974  * is responsible for removing both data directory and socket lockfiles;
975  * so it must happen before opening sockets so that at exit, the socket
976  * lockfiles go away after CloseServerPorts runs.
977  */
978  CreateDataDirLockFile(true);
979 
980  /*
981  * Read the control file (for error checking and config info).
982  *
983  * Since we verify the control file's CRC, this has a useful side effect
984  * on machines where we need a run-time test for CRC support instructions.
985  * The postmaster will do the test once at startup, and then its child
986  * processes will inherit the correct function pointer and not need to
987  * repeat the test.
988  */
990 
991  /*
992  * Register the apply launcher. Since it registers a background worker,
993  * it needs to be called before InitializeMaxBackends(), and it's probably
994  * a good idea to call it before any modules had chance to take the
995  * background worker slots.
996  */
998 
999  /*
1000  * process any libraries that should be preloaded at postmaster start
1001  */
1003 
1004  /*
1005  * Initialize SSL library, if specified.
1006  */
1007 #ifdef USE_SSL
1008  if (EnableSSL)
1009  {
1010  (void) secure_initialize(true);
1011  LoadedSSL = true;
1012  }
1013 #endif
1014 
1015  /*
1016  * Now that loadable modules have had their chance to register background
1017  * workers, calculate MaxBackends.
1018  */
1020 
1021  /*
1022  * Set up shared memory and semaphores.
1023  */
1024  reset_shared();
1025 
1026  /*
1027  * Estimate number of openable files. This must happen after setting up
1028  * semaphores, because on some platforms semaphores count as open files.
1029  */
1030  set_max_safe_fds();
1031 
1032  /*
1033  * Set reference point for stack-depth checking.
1034  */
1035  set_stack_base();
1036 
1037  /*
1038  * Initialize pipe (or process handle on Windows) that allows children to
1039  * wake up from sleep on postmaster death.
1040  */
1042 
1043 #ifdef WIN32
1044 
1045  /*
1046  * Initialize I/O completion port used to deliver list of dead children.
1047  */
1048  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1049  if (win32ChildQueue == NULL)
1050  ereport(FATAL,
1051  (errmsg("could not create I/O completion port for child queue")));
1052 #endif
1053 
1054 #ifdef EXEC_BACKEND
1055  /* Write out nondefault GUC settings for child processes to use */
1056  write_nondefault_variables(PGC_POSTMASTER);
1057 
1058  /*
1059  * Clean out the temp directory used to transmit parameters to child
1060  * processes (see internal_forkexec, below). We must do this before
1061  * launching any child processes, else we have a race condition: we could
1062  * remove a parameter file before the child can read it. It should be
1063  * safe to do so now, because we verified earlier that there are no
1064  * conflicting Postgres processes in this data directory.
1065  */
1067 #endif
1068 
1069  /*
1070  * Forcibly remove the files signaling a standby promotion request.
1071  * Otherwise, the existence of those files triggers a promotion too early,
1072  * whether a user wants that or not.
1073  *
1074  * This removal of files is usually unnecessary because they can exist
1075  * only during a few moments during a standby promotion. However there is
1076  * a race condition: if pg_ctl promote is executed and creates the files
1077  * during a promotion, the files can stay around even after the server is
1078  * brought up to be the primary. Then, if a new standby starts by using
1079  * the backup taken from the new primary, the files can exist at server
1080  * startup and must be removed in order to avoid an unexpected promotion.
1081  *
1082  * Note that promotion signal files need to be removed before the startup
1083  * process is invoked. Because, after that, they can be used by
1084  * postmaster's SIGUSR1 signal handler.
1085  */
1087 
1088  /* Do the same for logrotate signal file */
1090 
1091  /* Remove any outdated file holding the current log filenames. */
1092  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1093  ereport(LOG,
1095  errmsg("could not remove file \"%s\": %m",
1097 
1098  /*
1099  * If enabled, start up syslogger collection subprocess
1100  */
1102 
1103  /*
1104  * Reset whereToSendOutput from DestDebug (its starting state) to
1105  * DestNone. This stops ereport from sending log messages to stderr unless
1106  * Log_destination permits. We don't do this until the postmaster is
1107  * fully launched, since startup failures may as well be reported to
1108  * stderr.
1109  *
1110  * If we are in fact disabling logging to stderr, first emit a log message
1111  * saying so, to provide a breadcrumb trail for users who may not remember
1112  * that their logging is configured to go somewhere else.
1113  */
1115  ereport(LOG,
1116  (errmsg("ending log output to stderr"),
1117  errhint("Future log output will go to log destination \"%s\".",
1119 
1121 
1122  /*
1123  * Report server startup in log. While we could emit this much earlier,
1124  * it seems best to do so after starting the log collector, if we intend
1125  * to use one.
1126  */
1127  ereport(LOG,
1128  (errmsg("starting %s", PG_VERSION_STR)));
1129 
1130  /*
1131  * Establish input sockets.
1132  *
1133  * First, mark them all closed, and set up an on_proc_exit function that's
1134  * charged with closing the sockets again at postmaster shutdown.
1135  */
1136  for (i = 0; i < MAXLISTEN; i++)
1138 
1140 
1141  if (ListenAddresses)
1142  {
1143  char *rawstring;
1144  List *elemlist;
1145  ListCell *l;
1146  int success = 0;
1147 
1148  /* Need a modifiable copy of ListenAddresses */
1149  rawstring = pstrdup(ListenAddresses);
1150 
1151  /* Parse string into list of hostnames */
1152  if (!SplitGUCList(rawstring, ',', &elemlist))
1153  {
1154  /* syntax error in list */
1155  ereport(FATAL,
1156  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1157  errmsg("invalid list syntax in parameter \"%s\"",
1158  "listen_addresses")));
1159  }
1160 
1161  foreach(l, elemlist)
1162  {
1163  char *curhost = (char *) lfirst(l);
1164 
1165  if (strcmp(curhost, "*") == 0)
1166  status = StreamServerPort(AF_UNSPEC, NULL,
1167  (unsigned short) PostPortNumber,
1168  NULL,
1170  else
1171  status = StreamServerPort(AF_UNSPEC, curhost,
1172  (unsigned short) PostPortNumber,
1173  NULL,
1174  ListenSocket, MAXLISTEN);
1175 
1176  if (status == STATUS_OK)
1177  {
1178  success++;
1179  /* record the first successful host addr in lockfile */
1180  if (!listen_addr_saved)
1181  {
1183  listen_addr_saved = true;
1184  }
1185  }
1186  else
1187  ereport(WARNING,
1188  (errmsg("could not create listen socket for \"%s\"",
1189  curhost)));
1190  }
1191 
1192  if (!success && elemlist != NIL)
1193  ereport(FATAL,
1194  (errmsg("could not create any TCP/IP sockets")));
1195 
1196  list_free(elemlist);
1197  pfree(rawstring);
1198  }
1199 
1200 #ifdef USE_BONJOUR
1201  /* Register for Bonjour only if we opened TCP socket(s) */
1203  {
1204  DNSServiceErrorType err;
1205 
1206  /*
1207  * We pass 0 for interface_index, which will result in registering on
1208  * all "applicable" interfaces. It's not entirely clear from the
1209  * DNS-SD docs whether this would be appropriate if we have bound to
1210  * just a subset of the available network interfaces.
1211  */
1212  err = DNSServiceRegister(&bonjour_sdref,
1213  0,
1214  0,
1215  bonjour_name,
1216  "_postgresql._tcp.",
1217  NULL,
1218  NULL,
1220  0,
1221  NULL,
1222  NULL,
1223  NULL);
1224  if (err != kDNSServiceErr_NoError)
1225  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1226  (long) err);
1227 
1228  /*
1229  * We don't bother to read the mDNS daemon's reply, and we expect that
1230  * it will automatically terminate our registration when the socket is
1231  * closed at postmaster termination. So there's nothing more to be
1232  * done here. However, the bonjour_sdref is kept around so that
1233  * forked children can close their copies of the socket.
1234  */
1235  }
1236 #endif
1237 
1238 #ifdef HAVE_UNIX_SOCKETS
1240  {
1241  char *rawstring;
1242  List *elemlist;
1243  ListCell *l;
1244  int success = 0;
1245 
1246  /* Need a modifiable copy of Unix_socket_directories */
1247  rawstring = pstrdup(Unix_socket_directories);
1248 
1249  /* Parse string into list of directories */
1250  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1251  {
1252  /* syntax error in list */
1253  ereport(FATAL,
1254  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1255  errmsg("invalid list syntax in parameter \"%s\"",
1256  "unix_socket_directories")));
1257  }
1258 
1259  foreach(l, elemlist)
1260  {
1261  char *socketdir = (char *) lfirst(l);
1262 
1263  status = StreamServerPort(AF_UNIX, NULL,
1264  (unsigned short) PostPortNumber,
1265  socketdir,
1266  ListenSocket, MAXLISTEN);
1267 
1268  if (status == STATUS_OK)
1269  {
1270  success++;
1271  /* record the first successful Unix socket in lockfile */
1272  if (success == 1)
1274  }
1275  else
1276  ereport(WARNING,
1277  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1278  socketdir)));
1279  }
1280 
1281  if (!success && elemlist != NIL)
1282  ereport(FATAL,
1283  (errmsg("could not create any Unix-domain sockets")));
1284 
1285  list_free_deep(elemlist);
1286  pfree(rawstring);
1287  }
1288 #endif
1289 
1290  /*
1291  * check that we have some socket to listen on
1292  */
1293  if (ListenSocket[0] == PGINVALID_SOCKET)
1294  ereport(FATAL,
1295  (errmsg("no socket created for listening")));
1296 
1297  /*
1298  * If no valid TCP ports, write an empty line for listen address,
1299  * indicating the Unix socket must be used. Note that this line is not
1300  * added to the lock file until there is a socket backing it.
1301  */
1302  if (!listen_addr_saved)
1304 
1305  /*
1306  * Record postmaster options. We delay this till now to avoid recording
1307  * bogus options (eg, unusable port number).
1308  */
1309  if (!CreateOptsFile(argc, argv, my_exec_path))
1310  ExitPostmaster(1);
1311 
1312  /*
1313  * Write the external PID file if requested
1314  */
1315  if (external_pid_file)
1316  {
1317  FILE *fpidfile = fopen(external_pid_file, "w");
1318 
1319  if (fpidfile)
1320  {
1321  fprintf(fpidfile, "%d\n", MyProcPid);
1322  fclose(fpidfile);
1323 
1324  /* Make PID file world readable */
1325  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1326  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1328  }
1329  else
1330  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1332 
1334  }
1335 
1336  /*
1337  * Remove old temporary files. At this point there can be no other
1338  * Postgres processes running in this directory, so this should be safe.
1339  */
1341 
1342  /*
1343  * Initialize stats collection subsystem (this does NOT start the
1344  * collector process!)
1345  */
1346  pgstat_init();
1347 
1348  /*
1349  * Initialize the autovacuum subsystem (again, no process start yet)
1350  */
1351  autovac_init();
1352 
1353  /*
1354  * Load configuration files for client authentication.
1355  */
1356  if (!load_hba())
1357  {
1358  /*
1359  * It makes no sense to continue if we fail to load the HBA file,
1360  * since there is no way to connect to the database in this case.
1361  */
1362  ereport(FATAL,
1363  (errmsg("could not load pg_hba.conf")));
1364  }
1365  if (!load_ident())
1366  {
1367  /*
1368  * We can start up without the IDENT file, although it means that you
1369  * cannot log in using any of the authentication methods that need a
1370  * user name mapping. load_ident() already logged the details of error
1371  * to the log.
1372  */
1373  }
1374 
1375 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1376 
1377  /*
1378  * On macOS, libintl replaces setlocale() with a version that calls
1379  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1380  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1381  * the process multithreaded. The postmaster calls sigprocmask() and
1382  * calls fork() without an immediate exec(), both of which have undefined
1383  * behavior in a multithreaded program. A multithreaded postmaster is the
1384  * normal case on Windows, which offers neither fork() nor sigprocmask().
1385  */
1386  if (pthread_is_threaded_np() != 0)
1387  ereport(FATAL,
1388  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1389  errmsg("postmaster became multithreaded during startup"),
1390  errhint("Set the LC_ALL environment variable to a valid locale.")));
1391 #endif
1392 
1393  /*
1394  * Remember postmaster startup time
1395  */
1397 
1398  /*
1399  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1400  * see what's happening.
1401  */
1403 
1404  /*
1405  * We're ready to rock and roll...
1406  */
1408  Assert(StartupPID != 0);
1410  pmState = PM_STARTUP;
1411 
1412  /* Some workers may be scheduled to start now */
1414 
1415  status = ServerLoop();
1416 
1417  /*
1418  * ServerLoop probably shouldn't ever return, but if it does, close down.
1419  */
1420  ExitPostmaster(status != STATUS_OK);
1421 
1422  abort(); /* not reached */
1423 }
1424 
1425 
1426 /*
1427  * on_proc_exit callback to close server's listen sockets
1428  */
1429 static void
1431 {
1432  int i;
1433 
1434  /*
1435  * First, explicitly close all the socket FDs. We used to just let this
1436  * happen implicitly at postmaster exit, but it's better to close them
1437  * before we remove the postmaster.pid lockfile; otherwise there's a race
1438  * condition if a new postmaster wants to re-use the TCP port number.
1439  */
1440  for (i = 0; i < MAXLISTEN; i++)
1441  {
1442  if (ListenSocket[i] != PGINVALID_SOCKET)
1443  {
1446  }
1447  }
1448 
1449  /*
1450  * Next, remove any filesystem entries for Unix sockets. To avoid race
1451  * conditions against incoming postmasters, this must happen after closing
1452  * the sockets and before removing lock files.
1453  */
1455 
1456  /*
1457  * We don't do anything about socket lock files here; those will be
1458  * removed in a later on_proc_exit callback.
1459  */
1460 }
1461 
1462 /*
1463  * on_proc_exit callback to delete external_pid_file
1464  */
1465 static void
1467 {
1468  if (external_pid_file)
1469  unlink(external_pid_file);
1470 }
1471 
1472 
1473 /*
1474  * Compute and check the directory paths to files that are part of the
1475  * installation (as deduced from the postgres executable's own location)
1476  */
1477 static void
1479 {
1480  DIR *pdir;
1481 
1482  /* Locate the postgres executable itself */
1483  if (find_my_exec(argv0, my_exec_path) < 0)
1484  elog(FATAL, "%s: could not locate my own executable path", argv0);
1485 
1486 #ifdef EXEC_BACKEND
1487  /* Locate executable backend before we change working directory */
1488  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1489  postgres_exec_path) < 0)
1490  ereport(FATAL,
1491  (errmsg("%s: could not locate matching postgres executable",
1492  argv0)));
1493 #endif
1494 
1495  /*
1496  * Locate the pkglib directory --- this has to be set early in case we try
1497  * to load any modules from it in response to postgresql.conf entries.
1498  */
1500 
1501  /*
1502  * Verify that there's a readable directory there; otherwise the Postgres
1503  * installation is incomplete or corrupt. (A typical cause of this
1504  * failure is that the postgres executable has been moved or hardlinked to
1505  * some directory that's not a sibling of the installation lib/
1506  * directory.)
1507  */
1508  pdir = AllocateDir(pkglib_path);
1509  if (pdir == NULL)
1510  ereport(ERROR,
1512  errmsg("could not open directory \"%s\": %m",
1513  pkglib_path),
1514  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1515  my_exec_path)));
1516  FreeDir(pdir);
1517 
1518  /*
1519  * XXX is it worth similarly checking the share/ directory? If the lib/
1520  * directory is there, then share/ probably is too.
1521  */
1522 }
1523 
1524 /*
1525  * Check that pg_control exists in the correct location in the data directory.
1526  *
1527  * No attempt is made to validate the contents of pg_control here. This is
1528  * just a sanity check to see if we are looking at a real data directory.
1529  */
1530 static void
1532 {
1533  char path[MAXPGPATH];
1534  FILE *fp;
1535 
1536  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1537 
1538  fp = AllocateFile(path, PG_BINARY_R);
1539  if (fp == NULL)
1540  {
1541  write_stderr("%s: could not find the database system\n"
1542  "Expected to find it in the directory \"%s\",\n"
1543  "but could not open file \"%s\": %s\n",
1544  progname, DataDir, path, strerror(errno));
1545  ExitPostmaster(2);
1546  }
1547  FreeFile(fp);
1548 }
1549 
1550 /*
1551  * Determine how long should we let ServerLoop sleep.
1552  *
1553  * In normal conditions we wait at most one minute, to ensure that the other
1554  * background tasks handled by ServerLoop get done even when no requests are
1555  * arriving. However, if there are background workers waiting to be started,
1556  * we don't actually sleep so that they are quickly serviced. Other exception
1557  * cases are as shown in the code.
1558  */
1559 static void
1560 DetermineSleepTime(struct timeval *timeout)
1561 {
1562  TimestampTz next_wakeup = 0;
1563 
1564  /*
1565  * Normal case: either there are no background workers at all, or we're in
1566  * a shutdown sequence (during which we ignore bgworkers altogether).
1567  */
1568  if (Shutdown > NoShutdown ||
1570  {
1571  if (AbortStartTime != 0)
1572  {
1573  /* time left to abort; clamp to 0 in case it already expired */
1574  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1575  (time(NULL) - AbortStartTime);
1576  timeout->tv_sec = Max(timeout->tv_sec, 0);
1577  timeout->tv_usec = 0;
1578  }
1579  else
1580  {
1581  timeout->tv_sec = 60;
1582  timeout->tv_usec = 0;
1583  }
1584  return;
1585  }
1586 
1587  if (StartWorkerNeeded)
1588  {
1589  timeout->tv_sec = 0;
1590  timeout->tv_usec = 0;
1591  return;
1592  }
1593 
1594  if (HaveCrashedWorker)
1595  {
1596  slist_mutable_iter siter;
1597 
1598  /*
1599  * When there are crashed bgworkers, we sleep just long enough that
1600  * they are restarted when they request to be. Scan the list to
1601  * determine the minimum of all wakeup times according to most recent
1602  * crash time and requested restart interval.
1603  */
1605  {
1606  RegisteredBgWorker *rw;
1607  TimestampTz this_wakeup;
1608 
1609  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1610 
1611  if (rw->rw_crashed_at == 0)
1612  continue;
1613 
1615  || rw->rw_terminate)
1616  {
1617  ForgetBackgroundWorker(&siter);
1618  continue;
1619  }
1620 
1621  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1622  1000L * rw->rw_worker.bgw_restart_time);
1623  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1624  next_wakeup = this_wakeup;
1625  }
1626  }
1627 
1628  if (next_wakeup != 0)
1629  {
1630  long secs;
1631  int microsecs;
1632 
1634  &secs, &microsecs);
1635  timeout->tv_sec = secs;
1636  timeout->tv_usec = microsecs;
1637 
1638  /* Ensure we don't exceed one minute */
1639  if (timeout->tv_sec > 60)
1640  {
1641  timeout->tv_sec = 60;
1642  timeout->tv_usec = 0;
1643  }
1644  }
1645  else
1646  {
1647  timeout->tv_sec = 60;
1648  timeout->tv_usec = 0;
1649  }
1650 }
1651 
1652 /*
1653  * Main idle loop of postmaster
1654  *
1655  * NB: Needs to be called with signals blocked
1656  */
1657 static int
1659 {
1660  fd_set readmask;
1661  int nSockets;
1662  time_t last_lockfile_recheck_time,
1663  last_touch_time;
1664 
1665  last_lockfile_recheck_time = last_touch_time = time(NULL);
1666 
1667  nSockets = initMasks(&readmask);
1668 
1669  for (;;)
1670  {
1671  fd_set rmask;
1672  int selres;
1673  time_t now;
1674 
1675  /*
1676  * Wait for a connection request to arrive.
1677  *
1678  * We block all signals except while sleeping. That makes it safe for
1679  * signal handlers, which again block all signals while executing, to
1680  * do nontrivial work.
1681  *
1682  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1683  * any new connections, so we don't call select(), and just sleep.
1684  */
1685  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1686 
1687  if (pmState == PM_WAIT_DEAD_END)
1688  {
1690 
1691  pg_usleep(100000L); /* 100 msec seems reasonable */
1692  selres = 0;
1693 
1694  PG_SETMASK(&BlockSig);
1695  }
1696  else
1697  {
1698  /* must set timeout each time; some OSes change it! */
1699  struct timeval timeout;
1700 
1701  /* Needs to run with blocked signals! */
1702  DetermineSleepTime(&timeout);
1703 
1705 
1706  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1707 
1708  PG_SETMASK(&BlockSig);
1709  }
1710 
1711  /* Now check the select() result */
1712  if (selres < 0)
1713  {
1714  if (errno != EINTR && errno != EWOULDBLOCK)
1715  {
1716  ereport(LOG,
1718  errmsg("select() failed in postmaster: %m")));
1719  return STATUS_ERROR;
1720  }
1721  }
1722 
1723  /*
1724  * New connection pending on any of our sockets? If so, fork a child
1725  * process to deal with it.
1726  */
1727  if (selres > 0)
1728  {
1729  int i;
1730 
1731  for (i = 0; i < MAXLISTEN; i++)
1732  {
1733  if (ListenSocket[i] == PGINVALID_SOCKET)
1734  break;
1735  if (FD_ISSET(ListenSocket[i], &rmask))
1736  {
1737  Port *port;
1738 
1739  port = ConnCreate(ListenSocket[i]);
1740  if (port)
1741  {
1742  BackendStartup(port);
1743 
1744  /*
1745  * We no longer need the open socket or port structure
1746  * in this process
1747  */
1748  StreamClose(port->sock);
1749  ConnFree(port);
1750  }
1751  }
1752  }
1753  }
1754 
1755  /* If we have lost the log collector, try to start a new one */
1756  if (SysLoggerPID == 0 && Logging_collector)
1758 
1759  /*
1760  * If no background writer process is running, and we are not in a
1761  * state that prevents it, start one. It doesn't matter if this
1762  * fails, we'll just try again later. Likewise for the checkpointer.
1763  */
1764  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1766  {
1767  if (CheckpointerPID == 0)
1769  if (BgWriterPID == 0)
1771  }
1772 
1773  /*
1774  * Likewise, if we have lost the walwriter process, try to start a new
1775  * one. But this is needed only in normal operation (else we cannot
1776  * be writing any new WAL).
1777  */
1778  if (WalWriterPID == 0 && pmState == PM_RUN)
1780 
1781  /*
1782  * If we have lost the autovacuum launcher, try to start a new one. We
1783  * don't want autovacuum to run in binary upgrade mode because
1784  * autovacuum might update relfrozenxid for empty tables before the
1785  * physical files are put in place.
1786  */
1787  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1789  pmState == PM_RUN)
1790  {
1792  if (AutoVacPID != 0)
1793  start_autovac_launcher = false; /* signal processed */
1794  }
1795 
1796  /* If we have lost the stats collector, try to start a new one */
1797  if (PgStatPID == 0 &&
1798  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1799  PgStatPID = pgstat_start();
1800 
1801  /* If we have lost the archiver, try to start a new one. */
1802  if (PgArchPID == 0 && PgArchStartupAllowed())
1803  PgArchPID = pgarch_start();
1804 
1805  /* If we need to signal the autovacuum launcher, do so now */
1807  {
1808  avlauncher_needs_signal = false;
1809  if (AutoVacPID != 0)
1811  }
1812 
1813  /* If we need to start a WAL receiver, try to do that now */
1816 
1817  /* Get other worker processes running, if needed */
1820 
1821 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1822 
1823  /*
1824  * With assertions enabled, check regularly for appearance of
1825  * additional threads. All builds check at start and exit.
1826  */
1827  Assert(pthread_is_threaded_np() == 0);
1828 #endif
1829 
1830  /*
1831  * Lastly, check to see if it's time to do some things that we don't
1832  * want to do every single time through the loop, because they're a
1833  * bit expensive. Note that there's up to a minute of slop in when
1834  * these tasks will be performed, since DetermineSleepTime() will let
1835  * us sleep at most that long; except for SIGKILL timeout which has
1836  * special-case logic there.
1837  */
1838  now = time(NULL);
1839 
1840  /*
1841  * If we already sent SIGQUIT to children and they are slow to shut
1842  * down, it's time to send them SIGKILL. This doesn't happen
1843  * normally, but under certain conditions backends can get stuck while
1844  * shutting down. This is a last measure to get them unwedged.
1845  *
1846  * Note we also do this during recovery from a process crash.
1847  */
1848  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1849  AbortStartTime != 0 &&
1851  {
1852  /* We were gentle with them before. Not anymore */
1853  ereport(LOG,
1854  (errmsg("issuing SIGKILL to recalcitrant children")));
1856  /* reset flag so we don't SIGKILL again */
1857  AbortStartTime = 0;
1858  }
1859 
1860  /*
1861  * Once a minute, verify that postmaster.pid hasn't been removed or
1862  * overwritten. If it has, we force a shutdown. This avoids having
1863  * postmasters and child processes hanging around after their database
1864  * is gone, and maybe causing problems if a new database cluster is
1865  * created in the same place. It also provides some protection
1866  * against a DBA foolishly removing postmaster.pid and manually
1867  * starting a new postmaster. Data corruption is likely to ensue from
1868  * that anyway, but we can minimize the damage by aborting ASAP.
1869  */
1870  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1871  {
1872  if (!RecheckDataDirLockFile())
1873  {
1874  ereport(LOG,
1875  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1877  }
1878  last_lockfile_recheck_time = now;
1879  }
1880 
1881  /*
1882  * Touch Unix socket and lock files every 58 minutes, to ensure that
1883  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1884  * no one runs cleaners with cutoff times of less than an hour ...
1885  */
1886  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1887  {
1888  TouchSocketFiles();
1890  last_touch_time = now;
1891  }
1892  }
1893 }
1894 
1895 /*
1896  * Initialise the masks for select() for the ports we are listening on.
1897  * Return the number of sockets to listen on.
1898  */
1899 static int
1900 initMasks(fd_set *rmask)
1901 {
1902  int maxsock = -1;
1903  int i;
1904 
1905  FD_ZERO(rmask);
1906 
1907  for (i = 0; i < MAXLISTEN; i++)
1908  {
1909  int fd = ListenSocket[i];
1910 
1911  if (fd == PGINVALID_SOCKET)
1912  break;
1913  FD_SET(fd, rmask);
1914 
1915  if (fd > maxsock)
1916  maxsock = fd;
1917  }
1918 
1919  return maxsock + 1;
1920 }
1921 
1922 
1923 /*
1924  * Read a client's startup packet and do something according to it.
1925  *
1926  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1927  * not return at all.
1928  *
1929  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1930  * if that's what you want. Return STATUS_ERROR if you don't want to
1931  * send anything to the client, which would typically be appropriate
1932  * if we detect a communications failure.)
1933  *
1934  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1935  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1936  * encryption layer sets both flags, but a rejected negotiation sets only the
1937  * flag for that layer, since the client may wish to try the other one. We
1938  * should make no assumption here about the order in which the client may make
1939  * requests.
1940  */
1941 static int
1942 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1943 {
1944  int32 len;
1945  void *buf;
1946  ProtocolVersion proto;
1947  MemoryContext oldcontext;
1948 
1949  pq_startmsgread();
1950 
1951  /*
1952  * Grab the first byte of the length word separately, so that we can tell
1953  * whether we have no data at all or an incomplete packet. (This might
1954  * sound inefficient, but it's not really, because of buffering in
1955  * pqcomm.c.)
1956  */
1957  if (pq_getbytes((char *) &len, 1) == EOF)
1958  {
1959  /*
1960  * If we get no data at all, don't clutter the log with a complaint;
1961  * such cases often occur for legitimate reasons. An example is that
1962  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1963  * client didn't like our response, it'll probably just drop the
1964  * connection. Service-monitoring software also often just opens and
1965  * closes a connection without sending anything. (So do port
1966  * scanners, which may be less benign, but it's not really our job to
1967  * notice those.)
1968  */
1969  return STATUS_ERROR;
1970  }
1971 
1972  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1973  {
1974  /* Got a partial length word, so bleat about that */
1975  if (!ssl_done && !gss_done)
1977  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1978  errmsg("incomplete startup packet")));
1979  return STATUS_ERROR;
1980  }
1981 
1982  len = pg_ntoh32(len);
1983  len -= 4;
1984 
1985  if (len < (int32) sizeof(ProtocolVersion) ||
1987  {
1989  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1990  errmsg("invalid length of startup packet")));
1991  return STATUS_ERROR;
1992  }
1993 
1994  /*
1995  * Allocate at least the size of an old-style startup packet, plus one
1996  * extra byte, and make sure all are zeroes. This ensures we will have
1997  * null termination of all strings, in both fixed- and variable-length
1998  * packet layouts.
1999  */
2000  if (len <= (int32) sizeof(StartupPacket))
2001  buf = palloc0(sizeof(StartupPacket) + 1);
2002  else
2003  buf = palloc0(len + 1);
2004 
2005  if (pq_getbytes(buf, len) == EOF)
2006  {
2008  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2009  errmsg("incomplete startup packet")));
2010  return STATUS_ERROR;
2011  }
2012  pq_endmsgread();
2013 
2014  /*
2015  * The first field is either a protocol version number or a special
2016  * request code.
2017  */
2018  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2019 
2020  if (proto == CANCEL_REQUEST_CODE)
2021  {
2022  processCancelRequest(port, buf);
2023  /* Not really an error, but we don't want to proceed further */
2024  return STATUS_ERROR;
2025  }
2026 
2027  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2028  {
2029  char SSLok;
2030 
2031 #ifdef USE_SSL
2032  /* No SSL when disabled or on Unix sockets */
2033  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2034  SSLok = 'N';
2035  else
2036  SSLok = 'S'; /* Support for SSL */
2037 #else
2038  SSLok = 'N'; /* No support for SSL */
2039 #endif
2040 
2041 retry1:
2042  if (send(port->sock, &SSLok, 1, 0) != 1)
2043  {
2044  if (errno == EINTR)
2045  goto retry1; /* if interrupted, just retry */
2048  errmsg("failed to send SSL negotiation response: %m")));
2049  return STATUS_ERROR; /* close the connection */
2050  }
2051 
2052 #ifdef USE_SSL
2053  if (SSLok == 'S' && secure_open_server(port) == -1)
2054  return STATUS_ERROR;
2055 #endif
2056 
2057  /*
2058  * regular startup packet, cancel, etc packet should follow, but not
2059  * another SSL negotiation request, and a GSS request should only
2060  * follow if SSL was rejected (client may negotiate in either order)
2061  */
2062  return ProcessStartupPacket(port, true, SSLok == 'S');
2063  }
2064  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2065  {
2066  char GSSok = 'N';
2067 #ifdef ENABLE_GSS
2068  /* No GSSAPI encryption when on Unix socket */
2069  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2070  GSSok = 'G';
2071 #endif
2072 
2073  while (send(port->sock, &GSSok, 1, 0) != 1)
2074  {
2075  if (errno == EINTR)
2076  continue;
2079  errmsg("failed to send GSSAPI negotiation response: %m")));
2080  return STATUS_ERROR; /* close the connection */
2081  }
2082 
2083 #ifdef ENABLE_GSS
2084  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2085  return STATUS_ERROR;
2086 #endif
2087 
2088  /*
2089  * regular startup packet, cancel, etc packet should follow, but not
2090  * another GSS negotiation request, and an SSL request should only
2091  * follow if GSS was rejected (client may negotiate in either order)
2092  */
2093  return ProcessStartupPacket(port, GSSok == 'G', true);
2094  }
2095 
2096  /* Could add additional special packet types here */
2097 
2098  /*
2099  * Set FrontendProtocol now so that ereport() knows what format to send if
2100  * we fail during startup.
2101  */
2102  FrontendProtocol = proto;
2103 
2104  /* Check that the major protocol version is in range. */
2107  ereport(FATAL,
2108  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2109  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2110  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2114 
2115  /*
2116  * Now fetch parameters out of startup packet and save them into the Port
2117  * structure. All data structures attached to the Port struct must be
2118  * allocated in TopMemoryContext so that they will remain available in a
2119  * running backend (even after PostmasterContext is destroyed). We need
2120  * not worry about leaking this storage on failure, since we aren't in the
2121  * postmaster process anymore.
2122  */
2124 
2125  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2126  {
2127  int32 offset = sizeof(ProtocolVersion);
2128  List *unrecognized_protocol_options = NIL;
2129 
2130  /*
2131  * Scan packet body for name/option pairs. We can assume any string
2132  * beginning within the packet body is null-terminated, thanks to
2133  * zeroing extra byte above.
2134  */
2135  port->guc_options = NIL;
2136 
2137  while (offset < len)
2138  {
2139  char *nameptr = ((char *) buf) + offset;
2140  int32 valoffset;
2141  char *valptr;
2142 
2143  if (*nameptr == '\0')
2144  break; /* found packet terminator */
2145  valoffset = offset + strlen(nameptr) + 1;
2146  if (valoffset >= len)
2147  break; /* missing value, will complain below */
2148  valptr = ((char *) buf) + valoffset;
2149 
2150  if (strcmp(nameptr, "database") == 0)
2151  port->database_name = pstrdup(valptr);
2152  else if (strcmp(nameptr, "user") == 0)
2153  port->user_name = pstrdup(valptr);
2154  else if (strcmp(nameptr, "options") == 0)
2155  port->cmdline_options = pstrdup(valptr);
2156  else if (strcmp(nameptr, "replication") == 0)
2157  {
2158  /*
2159  * Due to backward compatibility concerns the replication
2160  * parameter is a hybrid beast which allows the value to be
2161  * either boolean or the string 'database'. The latter
2162  * connects to a specific database which is e.g. required for
2163  * logical decoding while.
2164  */
2165  if (strcmp(valptr, "database") == 0)
2166  {
2167  am_walsender = true;
2168  am_db_walsender = true;
2169  }
2170  else if (!parse_bool(valptr, &am_walsender))
2171  ereport(FATAL,
2172  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2173  errmsg("invalid value for parameter \"%s\": \"%s\"",
2174  "replication",
2175  valptr),
2176  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2177  }
2178  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2179  {
2180  /*
2181  * Any option beginning with _pq_. is reserved for use as a
2182  * protocol-level option, but at present no such options are
2183  * defined.
2184  */
2185  unrecognized_protocol_options =
2186  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2187  }
2188  else
2189  {
2190  /* Assume it's a generic GUC option */
2191  port->guc_options = lappend(port->guc_options,
2192  pstrdup(nameptr));
2193  port->guc_options = lappend(port->guc_options,
2194  pstrdup(valptr));
2195 
2196  /*
2197  * Copy application_name to port if we come across it. This
2198  * is done so we can log the application_name in the
2199  * connection authorization message. Note that the GUC would
2200  * be used but we haven't gone through GUC setup yet.
2201  */
2202  if (strcmp(nameptr, "application_name") == 0)
2203  {
2204  char *tmp_app_name = pstrdup(valptr);
2205 
2206  pg_clean_ascii(tmp_app_name);
2207 
2208  port->application_name = tmp_app_name;
2209  }
2210  }
2211  offset = valoffset + strlen(valptr) + 1;
2212  }
2213 
2214  /*
2215  * If we didn't find a packet terminator exactly at the end of the
2216  * given packet length, complain.
2217  */
2218  if (offset != len - 1)
2219  ereport(FATAL,
2220  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2221  errmsg("invalid startup packet layout: expected terminator as last byte")));
2222 
2223  /*
2224  * If the client requested a newer protocol version or if the client
2225  * requested any protocol options we didn't recognize, let them know
2226  * the newest minor protocol version we do support and the names of
2227  * any unrecognized options.
2228  */
2230  unrecognized_protocol_options != NIL)
2231  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2232  }
2233  else
2234  {
2235  /*
2236  * Get the parameters from the old-style, fixed-width-fields startup
2237  * packet as C strings. The packet destination was cleared first so a
2238  * short packet has zeros silently added. We have to be prepared to
2239  * truncate the pstrdup result for oversize fields, though.
2240  */
2241  StartupPacket *packet = (StartupPacket *) buf;
2242 
2243  port->database_name = pstrdup(packet->database);
2244  if (strlen(port->database_name) > sizeof(packet->database))
2245  port->database_name[sizeof(packet->database)] = '\0';
2246  port->user_name = pstrdup(packet->user);
2247  if (strlen(port->user_name) > sizeof(packet->user))
2248  port->user_name[sizeof(packet->user)] = '\0';
2249  port->cmdline_options = pstrdup(packet->options);
2250  if (strlen(port->cmdline_options) > sizeof(packet->options))
2251  port->cmdline_options[sizeof(packet->options)] = '\0';
2252  port->guc_options = NIL;
2253  }
2254 
2255  /* Check a user name was given. */
2256  if (port->user_name == NULL || port->user_name[0] == '\0')
2257  ereport(FATAL,
2258  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2259  errmsg("no PostgreSQL user name specified in startup packet")));
2260 
2261  /* The database defaults to the user name. */
2262  if (port->database_name == NULL || port->database_name[0] == '\0')
2263  port->database_name = pstrdup(port->user_name);
2264 
2265  if (Db_user_namespace)
2266  {
2267  /*
2268  * If user@, it is a global user, remove '@'. We only want to do this
2269  * if there is an '@' at the end and no earlier in the user string or
2270  * they may fake as a local user of another database attaching to this
2271  * database.
2272  */
2273  if (strchr(port->user_name, '@') ==
2274  port->user_name + strlen(port->user_name) - 1)
2275  *strchr(port->user_name, '@') = '\0';
2276  else
2277  {
2278  /* Append '@' and dbname */
2279  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2280  }
2281  }
2282 
2283  /*
2284  * Truncate given database and user names to length of a Postgres name.
2285  * This avoids lookup failures when overlength names are given.
2286  */
2287  if (strlen(port->database_name) >= NAMEDATALEN)
2288  port->database_name[NAMEDATALEN - 1] = '\0';
2289  if (strlen(port->user_name) >= NAMEDATALEN)
2290  port->user_name[NAMEDATALEN - 1] = '\0';
2291 
2292  if (am_walsender)
2294  else
2296 
2297  /*
2298  * Normal walsender backends, e.g. for streaming replication, are not
2299  * connected to a particular database. But walsenders used for logical
2300  * replication need to connect to a specific database. We allow streaming
2301  * replication commands to be issued even if connected to a database as it
2302  * can make sense to first make a basebackup and then stream changes
2303  * starting from that.
2304  */
2305  if (am_walsender && !am_db_walsender)
2306  port->database_name[0] = '\0';
2307 
2308  /*
2309  * Done putting stuff in TopMemoryContext.
2310  */
2311  MemoryContextSwitchTo(oldcontext);
2312 
2313  /*
2314  * If we're going to reject the connection due to database state, say so
2315  * now instead of wasting cycles on an authentication exchange. (This also
2316  * allows a pg_ping utility to be written.)
2317  */
2318  switch (port->canAcceptConnections)
2319  {
2320  case CAC_STARTUP:
2321  ereport(FATAL,
2323  errmsg("the database system is starting up")));
2324  break;
2325  case CAC_SHUTDOWN:
2326  ereport(FATAL,
2328  errmsg("the database system is shutting down")));
2329  break;
2330  case CAC_RECOVERY:
2331  ereport(FATAL,
2333  errmsg("the database system is in recovery mode")));
2334  break;
2335  case CAC_TOOMANY:
2336  ereport(FATAL,
2337  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2338  errmsg("sorry, too many clients already")));
2339  break;
2340  case CAC_SUPERUSER:
2341  /* OK for now, will check in InitPostgres */
2342  break;
2343  case CAC_OK:
2344  break;
2345  }
2346 
2347  return STATUS_OK;
2348 }
2349 
2350 /*
2351  * Send a NegotiateProtocolVersion to the client. This lets the client know
2352  * that they have requested a newer minor protocol version than we are able
2353  * to speak. We'll speak the highest version we know about; the client can,
2354  * of course, abandon the connection if that's a problem.
2355  *
2356  * We also include in the response a list of protocol options we didn't
2357  * understand. This allows clients to include optional parameters that might
2358  * be present either in newer protocol versions or third-party protocol
2359  * extensions without fear of having to reconnect if those options are not
2360  * understood, while at the same time making certain that the client is aware
2361  * of which options were actually accepted.
2362  */
2363 static void
2364 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2365 {
2367  ListCell *lc;
2368 
2369  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2371  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2372  foreach(lc, unrecognized_protocol_options)
2373  pq_sendstring(&buf, lfirst(lc));
2374  pq_endmessage(&buf);
2375 
2376  /* no need to flush, some other message will follow */
2377 }
2378 
2379 /*
2380  * The client has sent a cancel request packet, not a normal
2381  * start-a-new-connection packet. Perform the necessary processing.
2382  * Nothing is sent back to the client.
2383  */
2384 static void
2386 {
2387  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2388  int backendPID;
2389  int32 cancelAuthCode;
2390  Backend *bp;
2391 
2392 #ifndef EXEC_BACKEND
2393  dlist_iter iter;
2394 #else
2395  int i;
2396 #endif
2397 
2398  backendPID = (int) pg_ntoh32(canc->backendPID);
2399  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2400 
2401  /*
2402  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2403  * longer access the postmaster's own backend list, and must rely on the
2404  * duplicate array in shared memory.
2405  */
2406 #ifndef EXEC_BACKEND
2407  dlist_foreach(iter, &BackendList)
2408  {
2409  bp = dlist_container(Backend, elem, iter.cur);
2410 #else
2411  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2412  {
2413  bp = (Backend *) &ShmemBackendArray[i];
2414 #endif
2415  if (bp->pid == backendPID)
2416  {
2417  if (bp->cancel_key == cancelAuthCode)
2418  {
2419  /* Found a match; signal that backend to cancel current op */
2420  ereport(DEBUG2,
2421  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2422  backendPID)));
2423  signal_child(bp->pid, SIGINT);
2424  }
2425  else
2426  /* Right PID, wrong key: no way, Jose */
2427  ereport(LOG,
2428  (errmsg("wrong key in cancel request for process %d",
2429  backendPID)));
2430  return;
2431  }
2432 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2433  }
2434 #else
2435  }
2436 #endif
2437 
2438  /* No matching backend */
2439  ereport(LOG,
2440  (errmsg("PID %d in cancel request did not match any process",
2441  backendPID)));
2442 }
2443 
2444 /*
2445  * canAcceptConnections --- check to see if database state allows connections
2446  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2447  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2448  * know whether a NORMAL connection might turn into a walsender.)
2449  */
2450 static CAC_state
2451 canAcceptConnections(int backend_type)
2452 {
2453  CAC_state result = CAC_OK;
2454 
2455  /*
2456  * Can't start backends when in startup/shutdown/inconsistent recovery
2457  * state. We treat autovac workers the same as user backends for this
2458  * purpose. However, bgworkers are excluded from this test; we expect
2459  * bgworker_should_start_now() decided whether the DB state allows them.
2460  */
2461  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2462  backend_type != BACKEND_TYPE_BGWORKER)
2463  {
2464  if (Shutdown > NoShutdown)
2465  return CAC_SHUTDOWN; /* shutdown is pending */
2466  else if (!FatalError &&
2467  (pmState == PM_STARTUP ||
2468  pmState == PM_RECOVERY))
2469  return CAC_STARTUP; /* normal startup */
2470  else
2471  return CAC_RECOVERY; /* else must be crash recovery */
2472  }
2473 
2474  /*
2475  * "Smart shutdown" restrictions are applied only to normal connections,
2476  * not to autovac workers or bgworkers. When only superusers can connect,
2477  * we return CAC_SUPERUSER to indicate that superuserness must be checked
2478  * later. Note that neither CAC_OK nor CAC_SUPERUSER can safely be
2479  * returned until we have checked for too many children.
2480  */
2481  if (connsAllowed != ALLOW_ALL_CONNS &&
2482  backend_type == BACKEND_TYPE_NORMAL)
2483  {
2485  result = CAC_SUPERUSER; /* allow superusers only */
2486  else
2487  return CAC_SHUTDOWN; /* shutdown is pending */
2488  }
2489 
2490  /*
2491  * Don't start too many children.
2492  *
2493  * We allow more connections here than we can have backends because some
2494  * might still be authenticating; they might fail auth, or some existing
2495  * backend might exit before the auth cycle is completed. The exact
2496  * MaxBackends limit is enforced when a new backend tries to join the
2497  * shared-inval backend array.
2498  *
2499  * The limit here must match the sizes of the per-child-process arrays;
2500  * see comments for MaxLivePostmasterChildren().
2501  */
2503  result = CAC_TOOMANY;
2504 
2505  return result;
2506 }
2507 
2508 
2509 /*
2510  * ConnCreate -- create a local connection data structure
2511  *
2512  * Returns NULL on failure, other than out-of-memory which is fatal.
2513  */
2514 static Port *
2515 ConnCreate(int serverFd)
2516 {
2517  Port *port;
2518 
2519  if (!(port = (Port *) calloc(1, sizeof(Port))))
2520  {
2521  ereport(LOG,
2522  (errcode(ERRCODE_OUT_OF_MEMORY),
2523  errmsg("out of memory")));
2524  ExitPostmaster(1);
2525  }
2526 
2527  if (StreamConnection(serverFd, port) != STATUS_OK)
2528  {
2529  if (port->sock != PGINVALID_SOCKET)
2530  StreamClose(port->sock);
2531  ConnFree(port);
2532  return NULL;
2533  }
2534 
2535  /*
2536  * Allocate GSSAPI specific state struct
2537  */
2538 #ifndef EXEC_BACKEND
2539 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2540  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2541  if (!port->gss)
2542  {
2543  ereport(LOG,
2544  (errcode(ERRCODE_OUT_OF_MEMORY),
2545  errmsg("out of memory")));
2546  ExitPostmaster(1);
2547  }
2548 #endif
2549 #endif
2550 
2551  return port;
2552 }
2553 
2554 
2555 /*
2556  * ConnFree -- free a local connection data structure
2557  */
2558 static void
2560 {
2561 #ifdef USE_SSL
2562  secure_close(conn);
2563 #endif
2564  if (conn->gss)
2565  free(conn->gss);
2566  free(conn);
2567 }
2568 
2569 
2570 /*
2571  * ClosePostmasterPorts -- close all the postmaster's open sockets
2572  *
2573  * This is called during child process startup to release file descriptors
2574  * that are not needed by that child process. The postmaster still has
2575  * them open, of course.
2576  *
2577  * Note: we pass am_syslogger as a boolean because we don't want to set
2578  * the global variable yet when this is called.
2579  */
2580 void
2581 ClosePostmasterPorts(bool am_syslogger)
2582 {
2583  int i;
2584 
2585 #ifndef WIN32
2586 
2587  /*
2588  * Close the write end of postmaster death watch pipe. It's important to
2589  * do this as early as possible, so that if postmaster dies, others won't
2590  * think that it's still running because we're holding the pipe open.
2591  */
2593  ereport(FATAL,
2595  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2597  /* Notify fd.c that we released one pipe FD. */
2599 #endif
2600 
2601  /*
2602  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2603  * so we don't call ReleaseExternalFD() here.
2604  */
2605  for (i = 0; i < MAXLISTEN; i++)
2606  {
2607  if (ListenSocket[i] != PGINVALID_SOCKET)
2608  {
2611  }
2612  }
2613 
2614  /*
2615  * If using syslogger, close the read side of the pipe. We don't bother
2616  * tracking this in fd.c, either.
2617  */
2618  if (!am_syslogger)
2619  {
2620 #ifndef WIN32
2621  if (syslogPipe[0] >= 0)
2622  close(syslogPipe[0]);
2623  syslogPipe[0] = -1;
2624 #else
2625  if (syslogPipe[0])
2626  CloseHandle(syslogPipe[0]);
2627  syslogPipe[0] = 0;
2628 #endif
2629  }
2630 
2631 #ifdef USE_BONJOUR
2632  /* If using Bonjour, close the connection to the mDNS daemon */
2633  if (bonjour_sdref)
2634  close(DNSServiceRefSockFD(bonjour_sdref));
2635 #endif
2636 }
2637 
2638 
2639 /*
2640  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2641  *
2642  * Called early in the postmaster and every backend.
2643  */
2644 void
2646 {
2647  unsigned int rseed;
2648 
2649  MyProcPid = getpid();
2652 
2653  /*
2654  * Set a different seed for random() in every process. We want something
2655  * unpredictable, so if possible, use high-quality random bits for the
2656  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2657  */
2658  if (!pg_strong_random(&rseed, sizeof(rseed)))
2659  {
2660  /*
2661  * Since PIDs and timestamps tend to change more frequently in their
2662  * least significant bits, shift the timestamp left to allow a larger
2663  * total number of seeds in a given time period. Since that would
2664  * leave only 20 bits of the timestamp that cycle every ~1 second,
2665  * also mix in some higher bits.
2666  */
2667  rseed = ((uint64) MyProcPid) ^
2668  ((uint64) MyStartTimestamp << 12) ^
2669  ((uint64) MyStartTimestamp >> 20);
2670  }
2671  srandom(rseed);
2672 }
2673 
2674 
2675 /*
2676  * reset_shared -- reset shared memory and semaphores
2677  */
2678 static void
2680 {
2681  /*
2682  * Create or re-create shared memory and semaphores.
2683  *
2684  * Note: in each "cycle of life" we will normally assign the same IPC keys
2685  * (if using SysV shmem and/or semas). This helps ensure that we will
2686  * clean up dead IPC objects if the postmaster crashes and is restarted.
2687  */
2689 }
2690 
2691 
2692 /*
2693  * SIGHUP -- reread config files, and tell children to do same
2694  */
2695 static void
2697 {
2698  int save_errno = errno;
2699 
2700  /*
2701  * We rely on the signal mechanism to have blocked all signals ... except
2702  * on Windows, which lacks sigaction(), so we have to do it manually.
2703  */
2704 #ifdef WIN32
2705  PG_SETMASK(&BlockSig);
2706 #endif
2707 
2708  if (Shutdown <= SmartShutdown)
2709  {
2710  ereport(LOG,
2711  (errmsg("received SIGHUP, reloading configuration files")));
2714  if (StartupPID != 0)
2716  if (BgWriterPID != 0)
2718  if (CheckpointerPID != 0)
2720  if (WalWriterPID != 0)
2722  if (WalReceiverPID != 0)
2724  if (AutoVacPID != 0)
2726  if (PgArchPID != 0)
2728  if (SysLoggerPID != 0)
2730  if (PgStatPID != 0)
2732 
2733  /* Reload authentication config files too */
2734  if (!load_hba())
2735  ereport(LOG,
2736  /* translator: %s is a configuration file */
2737  (errmsg("%s was not reloaded", "pg_hba.conf")));
2738 
2739  if (!load_ident())
2740  ereport(LOG,
2741  (errmsg("%s was not reloaded", "pg_ident.conf")));
2742 
2743 #ifdef USE_SSL
2744  /* Reload SSL configuration as well */
2745  if (EnableSSL)
2746  {
2747  if (secure_initialize(false) == 0)
2748  LoadedSSL = true;
2749  else
2750  ereport(LOG,
2751  (errmsg("SSL configuration was not reloaded")));
2752  }
2753  else
2754  {
2755  secure_destroy();
2756  LoadedSSL = false;
2757  }
2758 #endif
2759 
2760 #ifdef EXEC_BACKEND
2761  /* Update the starting-point file for future children */
2762  write_nondefault_variables(PGC_SIGHUP);
2763 #endif
2764  }
2765 
2766 #ifdef WIN32
2768 #endif
2769 
2770  errno = save_errno;
2771 }
2772 
2773 
2774 /*
2775  * pmdie -- signal handler for processing various postmaster signals.
2776  */
2777 static void
2779 {
2780  int save_errno = errno;
2781 
2782  /*
2783  * We rely on the signal mechanism to have blocked all signals ... except
2784  * on Windows, which lacks sigaction(), so we have to do it manually.
2785  */
2786 #ifdef WIN32
2787  PG_SETMASK(&BlockSig);
2788 #endif
2789 
2790  ereport(DEBUG2,
2791  (errmsg_internal("postmaster received signal %d",
2792  postgres_signal_arg)));
2793 
2794  switch (postgres_signal_arg)
2795  {
2796  case SIGTERM:
2797 
2798  /*
2799  * Smart Shutdown:
2800  *
2801  * Wait for children to end their work, then shut down.
2802  */
2803  if (Shutdown >= SmartShutdown)
2804  break;
2806  ereport(LOG,
2807  (errmsg("received smart shutdown request")));
2808 
2809  /* Report status */
2811 #ifdef USE_SYSTEMD
2812  sd_notify(0, "STOPPING=1");
2813 #endif
2814 
2815  /*
2816  * If we reached normal running, we have to wait for any online
2817  * backup mode to end; otherwise go straight to waiting for client
2818  * backends to exit. (The difference is that in the former state,
2819  * we'll still let in new superuser clients, so that somebody can
2820  * end the online backup mode.) If already in PM_STOP_BACKENDS or
2821  * a later state, do not change it.
2822  */
2823  if (pmState == PM_RUN)
2825  else if (pmState == PM_HOT_STANDBY)
2827  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2828  {
2829  /* There should be no clients, so proceed to stop children */
2831  }
2832 
2833  /*
2834  * Now wait for online backup mode to end and backends to exit. If
2835  * that is already the case, PostmasterStateMachine will take the
2836  * next step.
2837  */
2839  break;
2840 
2841  case SIGINT:
2842 
2843  /*
2844  * Fast Shutdown:
2845  *
2846  * Abort all children with SIGTERM (rollback active transactions
2847  * and exit) and shut down when they are gone.
2848  */
2849  if (Shutdown >= FastShutdown)
2850  break;
2852  ereport(LOG,
2853  (errmsg("received fast shutdown request")));
2854 
2855  /* Report status */
2857 #ifdef USE_SYSTEMD
2858  sd_notify(0, "STOPPING=1");
2859 #endif
2860 
2861  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2862  {
2863  /* Just shut down background processes silently */
2865  }
2866  else if (pmState == PM_RUN ||
2868  {
2869  /* Report that we're about to zap live client sessions */
2870  ereport(LOG,
2871  (errmsg("aborting any active transactions")));
2873  }
2874 
2875  /*
2876  * PostmasterStateMachine will issue any necessary signals, or
2877  * take the next step if no child processes need to be killed.
2878  */
2880  break;
2881 
2882  case SIGQUIT:
2883 
2884  /*
2885  * Immediate Shutdown:
2886  *
2887  * abort all children with SIGQUIT, wait for them to exit,
2888  * terminate remaining ones with SIGKILL, then exit without
2889  * attempt to properly shut down the data base system.
2890  */
2891  if (Shutdown >= ImmediateShutdown)
2892  break;
2894  ereport(LOG,
2895  (errmsg("received immediate shutdown request")));
2896 
2897  /* Report status */
2899 #ifdef USE_SYSTEMD
2900  sd_notify(0, "STOPPING=1");
2901 #endif
2902 
2905 
2906  /* set stopwatch for them to die */
2907  AbortStartTime = time(NULL);
2908 
2909  /*
2910  * Now wait for backends to exit. If there are none,
2911  * PostmasterStateMachine will take the next step.
2912  */
2914  break;
2915  }
2916 
2917 #ifdef WIN32
2919 #endif
2920 
2921  errno = save_errno;
2922 }
2923 
2924 /*
2925  * Reaper -- signal handler to cleanup after a child process dies.
2926  */
2927 static void
2929 {
2930  int save_errno = errno;
2931  int pid; /* process id of dead child process */
2932  int exitstatus; /* its exit status */
2933 
2934  /*
2935  * We rely on the signal mechanism to have blocked all signals ... except
2936  * on Windows, which lacks sigaction(), so we have to do it manually.
2937  */
2938 #ifdef WIN32
2939  PG_SETMASK(&BlockSig);
2940 #endif
2941 
2942  ereport(DEBUG4,
2943  (errmsg_internal("reaping dead processes")));
2944 
2945  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2946  {
2947  /*
2948  * Check if this child was a startup process.
2949  */
2950  if (pid == StartupPID)
2951  {
2952  StartupPID = 0;
2953 
2954  /*
2955  * Startup process exited in response to a shutdown request (or it
2956  * completed normally regardless of the shutdown request).
2957  */
2958  if (Shutdown > NoShutdown &&
2959  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2960  {
2963  /* PostmasterStateMachine logic does the rest */
2964  continue;
2965  }
2966 
2967  if (EXIT_STATUS_3(exitstatus))
2968  {
2969  ereport(LOG,
2970  (errmsg("shutdown at recovery target")));
2973  TerminateChildren(SIGTERM);
2975  /* PostmasterStateMachine logic does the rest */
2976  continue;
2977  }
2978 
2979  /*
2980  * Unexpected exit of startup process (including FATAL exit)
2981  * during PM_STARTUP is treated as catastrophic. There are no
2982  * other processes running yet, so we can just exit.
2983  */
2984  if (pmState == PM_STARTUP &&
2986  !EXIT_STATUS_0(exitstatus))
2987  {
2988  LogChildExit(LOG, _("startup process"),
2989  pid, exitstatus);
2990  ereport(LOG,
2991  (errmsg("aborting startup due to startup process failure")));
2992  ExitPostmaster(1);
2993  }
2994 
2995  /*
2996  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2997  * the startup process is catastrophic, so kill other children,
2998  * and set StartupStatus so we don't try to reinitialize after
2999  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3000  * then we previously sent the startup process a SIGQUIT; so
3001  * that's probably the reason it died, and we do want to try to
3002  * restart in that case.
3003  *
3004  * This stanza also handles the case where we sent a SIGQUIT
3005  * during PM_STARTUP due to some dead_end child crashing: in that
3006  * situation, if the startup process dies on the SIGQUIT, we need
3007  * to transition to PM_WAIT_BACKENDS state which will allow
3008  * PostmasterStateMachine to restart the startup process. (On the
3009  * other hand, the startup process might complete normally, if we
3010  * were too late with the SIGQUIT. In that case we'll fall
3011  * through and commence normal operations.)
3012  */
3013  if (!EXIT_STATUS_0(exitstatus))
3014  {
3016  {
3018  if (pmState == PM_STARTUP)
3020  }
3021  else
3023  HandleChildCrash(pid, exitstatus,
3024  _("startup process"));
3025  continue;
3026  }
3027 
3028  /*
3029  * Startup succeeded, commence normal operations
3030  */
3032  FatalError = false;
3033  AbortStartTime = 0;
3034  ReachedNormalRunning = true;
3035  pmState = PM_RUN;
3037 
3038  /*
3039  * Crank up the background tasks, if we didn't do that already
3040  * when we entered consistent recovery state. It doesn't matter
3041  * if this fails, we'll just try again later.
3042  */
3043  if (CheckpointerPID == 0)
3045  if (BgWriterPID == 0)
3047  if (WalWriterPID == 0)
3049 
3050  /*
3051  * Likewise, start other special children as needed. In a restart
3052  * situation, some of them may be alive already.
3053  */
3056  if (PgArchStartupAllowed() && PgArchPID == 0)
3057  PgArchPID = pgarch_start();
3058  if (PgStatPID == 0)
3059  PgStatPID = pgstat_start();
3060 
3061  /* workers may be scheduled to start now */
3063 
3064  /* at this point we are really open for business */
3065  ereport(LOG,
3066  (errmsg("database system is ready to accept connections")));
3067 
3068  /* Report status */
3070 #ifdef USE_SYSTEMD
3071  sd_notify(0, "READY=1");
3072 #endif
3073 
3074  continue;
3075  }
3076 
3077  /*
3078  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3079  * one at the next iteration of the postmaster's main loop, if
3080  * necessary. Any other exit condition is treated as a crash.
3081  */
3082  if (pid == BgWriterPID)
3083  {
3084  BgWriterPID = 0;
3085  if (!EXIT_STATUS_0(exitstatus))
3086  HandleChildCrash(pid, exitstatus,
3087  _("background writer process"));
3088  continue;
3089  }
3090 
3091  /*
3092  * Was it the checkpointer?
3093  */
3094  if (pid == CheckpointerPID)
3095  {
3096  CheckpointerPID = 0;
3097  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3098  {
3099  /*
3100  * OK, we saw normal exit of the checkpointer after it's been
3101  * told to shut down. We expect that it wrote a shutdown
3102  * checkpoint. (If for some reason it didn't, recovery will
3103  * occur on next postmaster start.)
3104  *
3105  * At this point we should have no normal backend children
3106  * left (else we'd not be in PM_SHUTDOWN state) but we might
3107  * have dead_end children to wait for.
3108  *
3109  * If we have an archiver subprocess, tell it to do a last
3110  * archive cycle and quit. Likewise, if we have walsender
3111  * processes, tell them to send any remaining WAL and quit.
3112  */
3114 
3115  /* Waken archiver for the last time */
3116  if (PgArchPID != 0)
3118 
3119  /*
3120  * Waken walsenders for the last time. No regular backends
3121  * should be around anymore.
3122  */
3124 
3126 
3127  /*
3128  * We can also shut down the stats collector now; there's
3129  * nothing left for it to do.
3130  */
3131  if (PgStatPID != 0)
3133  }
3134  else
3135  {
3136  /*
3137  * Any unexpected exit of the checkpointer (including FATAL
3138  * exit) is treated as a crash.
3139  */
3140  HandleChildCrash(pid, exitstatus,
3141  _("checkpointer process"));
3142  }
3143 
3144  continue;
3145  }
3146 
3147  /*
3148  * Was it the wal writer? Normal exit can be ignored; we'll start a
3149  * new one at the next iteration of the postmaster's main loop, if
3150  * necessary. Any other exit condition is treated as a crash.
3151  */
3152  if (pid == WalWriterPID)
3153  {
3154  WalWriterPID = 0;
3155  if (!EXIT_STATUS_0(exitstatus))
3156  HandleChildCrash(pid, exitstatus,
3157  _("WAL writer process"));
3158  continue;
3159  }
3160 
3161  /*
3162  * Was it the wal receiver? If exit status is zero (normal) or one
3163  * (FATAL exit), we assume everything is all right just like normal
3164  * backends. (If we need a new wal receiver, we'll start one at the
3165  * next iteration of the postmaster's main loop.)
3166  */
3167  if (pid == WalReceiverPID)
3168  {
3169  WalReceiverPID = 0;
3170  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3171  HandleChildCrash(pid, exitstatus,
3172  _("WAL receiver process"));
3173  continue;
3174  }
3175 
3176  /*
3177  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3178  * start a new one at the next iteration of the postmaster's main
3179  * loop, if necessary. Any other exit condition is treated as a
3180  * crash.
3181  */
3182  if (pid == AutoVacPID)
3183  {
3184  AutoVacPID = 0;
3185  if (!EXIT_STATUS_0(exitstatus))
3186  HandleChildCrash(pid, exitstatus,
3187  _("autovacuum launcher process"));
3188  continue;
3189  }
3190 
3191  /*
3192  * Was it the archiver? If so, just try to start a new one; no need
3193  * to force reset of the rest of the system. (If fail, we'll try
3194  * again in future cycles of the main loop.). Unless we were waiting
3195  * for it to shut down; don't restart it in that case, and
3196  * PostmasterStateMachine() will advance to the next shutdown step.
3197  */
3198  if (pid == PgArchPID)
3199  {
3200  PgArchPID = 0;
3201  if (!EXIT_STATUS_0(exitstatus))
3202  LogChildExit(LOG, _("archiver process"),
3203  pid, exitstatus);
3204  if (PgArchStartupAllowed())
3205  PgArchPID = pgarch_start();
3206  continue;
3207  }
3208 
3209  /*
3210  * Was it the statistics collector? If so, just try to start a new
3211  * one; no need to force reset of the rest of the system. (If fail,
3212  * we'll try again in future cycles of the main loop.)
3213  */
3214  if (pid == PgStatPID)
3215  {
3216  PgStatPID = 0;
3217  if (!EXIT_STATUS_0(exitstatus))
3218  LogChildExit(LOG, _("statistics collector process"),
3219  pid, exitstatus);
3220  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3221  PgStatPID = pgstat_start();
3222  continue;
3223  }
3224 
3225  /* Was it the system logger? If so, try to start a new one */
3226  if (pid == SysLoggerPID)
3227  {
3228  SysLoggerPID = 0;
3229  /* for safety's sake, launch new logger *first* */
3231  if (!EXIT_STATUS_0(exitstatus))
3232  LogChildExit(LOG, _("system logger process"),
3233  pid, exitstatus);
3234  continue;
3235  }
3236 
3237  /* Was it one of our background workers? */
3238  if (CleanupBackgroundWorker(pid, exitstatus))
3239  {
3240  /* have it be restarted */
3241  HaveCrashedWorker = true;
3242  continue;
3243  }
3244 
3245  /*
3246  * Else do standard backend child cleanup.
3247  */
3248  CleanupBackend(pid, exitstatus);
3249  } /* loop over pending child-death reports */
3250 
3251  /*
3252  * After cleaning out the SIGCHLD queue, see if we have any state changes
3253  * or actions to make.
3254  */
3256 
3257  /* Done with signal handler */
3258 #ifdef WIN32
3260 #endif
3261 
3262  errno = save_errno;
3263 }
3264 
3265 /*
3266  * Scan the bgworkers list and see if the given PID (which has just stopped
3267  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3268  * bgworker, return false.
3269  *
3270  * This is heavily based on CleanupBackend. One important difference is that
3271  * we don't know yet that the dying process is a bgworker, so we must be silent
3272  * until we're sure it is.
3273  */
3274 static bool
3276  int exitstatus) /* child's exit status */
3277 {
3278  char namebuf[MAXPGPATH];
3279  slist_mutable_iter iter;
3280 
3282  {
3283  RegisteredBgWorker *rw;
3284 
3285  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3286 
3287  if (rw->rw_pid != pid)
3288  continue;
3289 
3290 #ifdef WIN32
3291  /* see CleanupBackend */
3292  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3293  exitstatus = 0;
3294 #endif
3295 
3296  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3297  rw->rw_worker.bgw_type);
3298 
3299 
3300  if (!EXIT_STATUS_0(exitstatus))
3301  {
3302  /* Record timestamp, so we know when to restart the worker. */
3304  }
3305  else
3306  {
3307  /* Zero exit status means terminate */
3308  rw->rw_crashed_at = 0;
3309  rw->rw_terminate = true;
3310  }
3311 
3312  /*
3313  * Additionally, for shared-memory-connected workers, just like a
3314  * backend, any exit status other than 0 or 1 is considered a crash
3315  * and causes a system-wide restart.
3316  */
3317  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3318  {
3319  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3320  {
3321  HandleChildCrash(pid, exitstatus, namebuf);
3322  return true;
3323  }
3324  }
3325 
3326  /*
3327  * We must release the postmaster child slot whether this worker is
3328  * connected to shared memory or not, but we only treat it as a crash
3329  * if it is in fact connected.
3330  */
3333  {
3334  HandleChildCrash(pid, exitstatus, namebuf);
3335  return true;
3336  }
3337 
3338  /* Get it out of the BackendList and clear out remaining data */
3339  dlist_delete(&rw->rw_backend->elem);
3340 #ifdef EXEC_BACKEND
3341  ShmemBackendArrayRemove(rw->rw_backend);
3342 #endif
3343 
3344  /*
3345  * It's possible that this background worker started some OTHER
3346  * background worker and asked to be notified when that worker started
3347  * or stopped. If so, cancel any notifications destined for the
3348  * now-dead backend.
3349  */
3350  if (rw->rw_backend->bgworker_notify)
3352  free(rw->rw_backend);
3353  rw->rw_backend = NULL;
3354  rw->rw_pid = 0;
3355  rw->rw_child_slot = 0;
3356  ReportBackgroundWorkerExit(&iter); /* report child death */
3357 
3358  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3359  namebuf, pid, exitstatus);
3360 
3361  return true;
3362  }
3363 
3364  return false;
3365 }
3366 
3367 /*
3368  * CleanupBackend -- cleanup after terminated backend.
3369  *
3370  * Remove all local state associated with backend.
3371  *
3372  * If you change this, see also CleanupBackgroundWorker.
3373  */
3374 static void
3376  int exitstatus) /* child's exit status. */
3377 {
3378  dlist_mutable_iter iter;
3379 
3380  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3381 
3382  /*
3383  * If a backend dies in an ugly way then we must signal all other backends
3384  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3385  * assume everything is all right and proceed to remove the backend from
3386  * the active backend list.
3387  */
3388 
3389 #ifdef WIN32
3390 
3391  /*
3392  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3393  * since that sometimes happens under load when the process fails to start
3394  * properly (long before it starts using shared memory). Microsoft reports
3395  * it is related to mutex failure:
3396  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3397  */
3398  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3399  {
3400  LogChildExit(LOG, _("server process"), pid, exitstatus);
3401  exitstatus = 0;
3402  }
3403 #endif
3404 
3405  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3406  {
3407  HandleChildCrash(pid, exitstatus, _("server process"));
3408  return;
3409  }
3410 
3411  dlist_foreach_modify(iter, &BackendList)
3412  {
3413  Backend *bp = dlist_container(Backend, elem, iter.cur);
3414 
3415  if (bp->pid == pid)
3416  {
3417  if (!bp->dead_end)
3418  {
3420  {
3421  /*
3422  * Uh-oh, the child failed to clean itself up. Treat as a
3423  * crash after all.
3424  */
3425  HandleChildCrash(pid, exitstatus, _("server process"));
3426  return;
3427  }
3428 #ifdef EXEC_BACKEND
3429  ShmemBackendArrayRemove(bp);
3430 #endif
3431  }
3432  if (bp->bgworker_notify)
3433  {
3434  /*
3435  * This backend may have been slated to receive SIGUSR1 when
3436  * some background worker started or stopped. Cancel those
3437  * notifications, as we don't want to signal PIDs that are not
3438  * PostgreSQL backends. This gets skipped in the (probably
3439  * very common) case where the backend has never requested any
3440  * such notifications.
3441  */
3443  }
3444  dlist_delete(iter.cur);
3445  free(bp);
3446  break;
3447  }
3448  }
3449 }
3450 
3451 /*
3452  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3453  * walwriter, autovacuum, or background worker.
3454  *
3455  * The objectives here are to clean up our local state about the child
3456  * process, and to signal all other remaining children to quickdie.
3457  */
3458 static void
3459 HandleChildCrash(int pid, int exitstatus, const char *procname)
3460 {
3461  dlist_mutable_iter iter;
3462  slist_iter siter;
3463  Backend *bp;
3464  bool take_action;
3465 
3466  /*
3467  * We only log messages and send signals if this is the first process
3468  * crash and we're not doing an immediate shutdown; otherwise, we're only
3469  * here to update postmaster's idea of live processes. If we have already
3470  * signaled children, nonzero exit status is to be expected, so don't
3471  * clutter log.
3472  */
3473  take_action = !FatalError && Shutdown != ImmediateShutdown;
3474 
3475  if (take_action)
3476  {
3477  LogChildExit(LOG, procname, pid, exitstatus);
3478  ereport(LOG,
3479  (errmsg("terminating any other active server processes")));
3480  }
3481 
3482  /* Process background workers. */
3484  {
3485  RegisteredBgWorker *rw;
3486 
3487  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3488  if (rw->rw_pid == 0)
3489  continue; /* not running */
3490  if (rw->rw_pid == pid)
3491  {
3492  /*
3493  * Found entry for freshly-dead worker, so remove it.
3494  */
3496  dlist_delete(&rw->rw_backend->elem);
3497 #ifdef EXEC_BACKEND
3498  ShmemBackendArrayRemove(rw->rw_backend);
3499 #endif
3500  free(rw->rw_backend);
3501  rw->rw_backend = NULL;
3502  rw->rw_pid = 0;
3503  rw->rw_child_slot = 0;
3504  /* don't reset crashed_at */
3505  /* don't report child stop, either */
3506  /* Keep looping so we can signal remaining workers */
3507  }
3508  else
3509  {
3510  /*
3511  * This worker is still alive. Unless we did so already, tell it
3512  * to commit hara-kiri.
3513  *
3514  * SIGQUIT is the special signal that says exit without proc_exit
3515  * and let the user know what's going on. But if SendStop is set
3516  * (-s on command line), then we send SIGSTOP instead, so that we
3517  * can get core dumps from all backends by hand.
3518  */
3519  if (take_action)
3520  {
3521  ereport(DEBUG2,
3522  (errmsg_internal("sending %s to process %d",
3523  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3524  (int) rw->rw_pid)));
3526  }
3527  }
3528  }
3529 
3530  /* Process regular backends */
3531  dlist_foreach_modify(iter, &BackendList)
3532  {
3533  bp = dlist_container(Backend, elem, iter.cur);
3534 
3535  if (bp->pid == pid)
3536  {
3537  /*
3538  * Found entry for freshly-dead backend, so remove it.
3539  */
3540  if (!bp->dead_end)
3541  {
3543 #ifdef EXEC_BACKEND
3544  ShmemBackendArrayRemove(bp);
3545 #endif
3546  }
3547  dlist_delete(iter.cur);
3548  free(bp);
3549  /* Keep looping so we can signal remaining backends */
3550  }
3551  else
3552  {
3553  /*
3554  * This backend is still alive. Unless we did so already, tell it
3555  * to commit hara-kiri.
3556  *
3557  * SIGQUIT is the special signal that says exit without proc_exit
3558  * and let the user know what's going on. But if SendStop is set
3559  * (-s on command line), then we send SIGSTOP instead, so that we
3560  * can get core dumps from all backends by hand.
3561  *
3562  * We could exclude dead_end children here, but at least in the
3563  * SIGSTOP case it seems better to include them.
3564  *
3565  * Background workers were already processed above; ignore them
3566  * here.
3567  */
3568  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3569  continue;
3570 
3571  if (take_action)
3572  {
3573  ereport(DEBUG2,
3574  (errmsg_internal("sending %s to process %d",
3575  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3576  (int) bp->pid)));
3577  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3578  }
3579  }
3580  }
3581 
3582  /* Take care of the startup process too */
3583  if (pid == StartupPID)
3584  {
3585  StartupPID = 0;
3586  /* Caller adjusts StartupStatus, so don't touch it here */
3587  }
3588  else if (StartupPID != 0 && take_action)
3589  {
3590  ereport(DEBUG2,
3591  (errmsg_internal("sending %s to process %d",
3592  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3593  (int) StartupPID)));
3594  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3596  }
3597 
3598  /* Take care of the bgwriter too */
3599  if (pid == BgWriterPID)
3600  BgWriterPID = 0;
3601  else if (BgWriterPID != 0 && take_action)
3602  {
3603  ereport(DEBUG2,
3604  (errmsg_internal("sending %s to process %d",
3605  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3606  (int) BgWriterPID)));
3607  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3608  }
3609 
3610  /* Take care of the checkpointer too */
3611  if (pid == CheckpointerPID)
3612  CheckpointerPID = 0;
3613  else if (CheckpointerPID != 0 && take_action)
3614  {
3615  ereport(DEBUG2,
3616  (errmsg_internal("sending %s to process %d",
3617  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3618  (int) CheckpointerPID)));
3619  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3620  }
3621 
3622  /* Take care of the walwriter too */
3623  if (pid == WalWriterPID)
3624  WalWriterPID = 0;
3625  else if (WalWriterPID != 0 && take_action)
3626  {
3627  ereport(DEBUG2,
3628  (errmsg_internal("sending %s to process %d",
3629  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3630  (int) WalWriterPID)));
3631  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3632  }
3633 
3634  /* Take care of the walreceiver too */
3635  if (pid == WalReceiverPID)
3636  WalReceiverPID = 0;
3637  else if (WalReceiverPID != 0 && take_action)
3638  {
3639  ereport(DEBUG2,
3640  (errmsg_internal("sending %s to process %d",
3641  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3642  (int) WalReceiverPID)));
3643  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3644  }
3645 
3646  /* Take care of the autovacuum launcher too */
3647  if (pid == AutoVacPID)
3648  AutoVacPID = 0;
3649  else if (AutoVacPID != 0 && take_action)
3650  {
3651  ereport(DEBUG2,
3652  (errmsg_internal("sending %s to process %d",
3653  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3654  (int) AutoVacPID)));
3655  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3656  }
3657 
3658  /*
3659  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3660  * necessary, but it seems like a good idea for robustness, and it
3661  * simplifies the state-machine logic in the case where a shutdown request
3662  * arrives during crash processing.)
3663  */
3664  if (PgArchPID != 0 && take_action)
3665  {
3666  ereport(DEBUG2,
3667  (errmsg_internal("sending %s to process %d",
3668  "SIGQUIT",
3669  (int) PgArchPID)));
3670  signal_child(PgArchPID, SIGQUIT);
3671  }
3672 
3673  /*
3674  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3675  * necessary, but it seems like a good idea for robustness, and it
3676  * simplifies the state-machine logic in the case where a shutdown request
3677  * arrives during crash processing.)
3678  */
3679  if (PgStatPID != 0 && take_action)
3680  {
3681  ereport(DEBUG2,
3682  (errmsg_internal("sending %s to process %d",
3683  "SIGQUIT",
3684  (int) PgStatPID)));
3685  signal_child(PgStatPID, SIGQUIT);
3687  }
3688 
3689  /* We do NOT restart the syslogger */
3690 
3691  if (Shutdown != ImmediateShutdown)
3692  FatalError = true;
3693 
3694  /* We now transit into a state of waiting for children to die */
3695  if (pmState == PM_RECOVERY ||
3696  pmState == PM_HOT_STANDBY ||
3697  pmState == PM_RUN ||
3699  pmState == PM_SHUTDOWN)
3701 
3702  /*
3703  * .. and if this doesn't happen quickly enough, now the clock is ticking
3704  * for us to kill them without mercy.
3705  */
3706  if (AbortStartTime == 0)
3707  AbortStartTime = time(NULL);
3708 }
3709 
3710 /*
3711  * Log the death of a child process.
3712  */
3713 static void
3714 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3715 {
3716  /*
3717  * size of activity_buffer is arbitrary, but set equal to default
3718  * track_activity_query_size
3719  */
3720  char activity_buffer[1024];
3721  const char *activity = NULL;
3722 
3723  if (!EXIT_STATUS_0(exitstatus))
3724  activity = pgstat_get_crashed_backend_activity(pid,
3725  activity_buffer,
3726  sizeof(activity_buffer));
3727 
3728  if (WIFEXITED(exitstatus))
3729  ereport(lev,
3730 
3731  /*------
3732  translator: %s is a noun phrase describing a child process, such as
3733  "server process" */
3734  (errmsg("%s (PID %d) exited with exit code %d",
3735  procname, pid, WEXITSTATUS(exitstatus)),
3736  activity ? errdetail("Failed process was running: %s", activity) : 0));
3737  else if (WIFSIGNALED(exitstatus))
3738  {
3739 #if defined(WIN32)
3740  ereport(lev,
3741 
3742  /*------
3743  translator: %s is a noun phrase describing a child process, such as
3744  "server process" */
3745  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3746  procname, pid, WTERMSIG(exitstatus)),
3747  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3748  activity ? errdetail("Failed process was running: %s", activity) : 0));
3749 #else
3750  ereport(lev,
3751 
3752  /*------
3753  translator: %s is a noun phrase describing a child process, such as
3754  "server process" */
3755  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3756  procname, pid, WTERMSIG(exitstatus),
3757  pg_strsignal(WTERMSIG(exitstatus))),
3758  activity ? errdetail("Failed process was running: %s", activity) : 0));
3759 #endif
3760  }
3761  else
3762  ereport(lev,
3763 
3764  /*------
3765  translator: %s is a noun phrase describing a child process, such as
3766  "server process" */
3767  (errmsg("%s (PID %d) exited with unrecognized status %d",
3768  procname, pid, exitstatus),
3769  activity ? errdetail("Failed process was running: %s", activity) : 0));
3770 }
3771 
3772 /*
3773  * Advance the postmaster's state machine and take actions as appropriate
3774  *
3775  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3776  * receive the signals that might mean we need to change state.
3777  */
3778 static void
3780 {
3781  /* If we're doing a smart shutdown, try to advance that state. */
3782  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3783  {
3785  {
3786  /*
3787  * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode
3788  * is not active.
3789  */
3790  if (!BackupInProgress())
3792  }
3793 
3795  {
3796  /*
3797  * ALLOW_NO_CONNS state ends when we have no normal client
3798  * backends running. Then we're ready to stop other children.
3799  */
3802  }
3803  }
3804 
3805  /*
3806  * If we're ready to do so, signal child processes to shut down. (This
3807  * isn't a persistent state, but treating it as a distinct pmState allows
3808  * us to share this code across multiple shutdown code paths.)
3809  */
3810  if (pmState == PM_STOP_BACKENDS)
3811  {
3812  /* Signal all backend children except walsenders */
3813  SignalSomeChildren(SIGTERM,
3815  /* and the autovac launcher too */
3816  if (AutoVacPID != 0)
3817  signal_child(AutoVacPID, SIGTERM);
3818  /* and the bgwriter too */
3819  if (BgWriterPID != 0)
3820  signal_child(BgWriterPID, SIGTERM);
3821  /* and the walwriter too */
3822  if (WalWriterPID != 0)
3823  signal_child(WalWriterPID, SIGTERM);
3824  /* If we're in recovery, also stop startup and walreceiver procs */
3825  if (StartupPID != 0)
3826  signal_child(StartupPID, SIGTERM);
3827  if (WalReceiverPID != 0)
3828  signal_child(WalReceiverPID, SIGTERM);
3829  /* checkpointer, archiver, stats, and syslogger may continue for now */
3830 
3831  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3833  }
3834 
3835  /*
3836  * If we are in a state-machine state that implies waiting for backends to
3837  * exit, see if they're all gone, and change state if so.
3838  */
3839  if (pmState == PM_WAIT_BACKENDS)
3840  {
3841  /*
3842  * PM_WAIT_BACKENDS state ends when we have no regular backends
3843  * (including autovac workers), no bgworkers (including unconnected
3844  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3845  * doing crash recovery or an immediate shutdown then we expect the
3846  * checkpointer to exit as well, otherwise not. The archiver, stats,
3847  * and syslogger processes are disregarded since they are not
3848  * connected to shared memory; we also disregard dead_end children
3849  * here. Walsenders are also disregarded, they will be terminated
3850  * later after writing the checkpoint record, like the archiver
3851  * process.
3852  */
3854  StartupPID == 0 &&
3855  WalReceiverPID == 0 &&
3856  BgWriterPID == 0 &&
3857  (CheckpointerPID == 0 ||
3859  WalWriterPID == 0 &&
3860  AutoVacPID == 0)
3861  {
3863  {
3864  /*
3865  * Start waiting for dead_end children to die. This state
3866  * change causes ServerLoop to stop creating new ones.
3867  */
3869 
3870  /*
3871  * We already SIGQUIT'd the archiver and stats processes, if
3872  * any, when we started immediate shutdown or entered
3873  * FatalError state.
3874  */
3875  }
3876  else
3877  {
3878  /*
3879  * If we get here, we are proceeding with normal shutdown. All
3880  * the regular children are gone, and it's time to tell the
3881  * checkpointer to do a shutdown checkpoint.
3882  */
3884  /* Start the checkpointer if not running */
3885  if (CheckpointerPID == 0)
3887  /* And tell it to shut down */
3888  if (CheckpointerPID != 0)
3889  {
3891  pmState = PM_SHUTDOWN;
3892  }
3893  else
3894  {
3895  /*
3896  * If we failed to fork a checkpointer, just shut down.
3897  * Any required cleanup will happen at next restart. We
3898  * set FatalError so that an "abnormal shutdown" message
3899  * gets logged when we exit.
3900  */
3901  FatalError = true;
3903 
3904  /* Kill the walsenders, archiver and stats collector too */
3906  if (PgArchPID != 0)
3908  if (PgStatPID != 0)
3910  }
3911  }
3912  }
3913  }
3914 
3915  if (pmState == PM_SHUTDOWN_2)
3916  {
3917  /*
3918  * PM_SHUTDOWN_2 state ends when there's no other children than
3919  * dead_end children left. There shouldn't be any regular backends
3920  * left by now anyway; what we're really waiting for is walsenders and
3921  * archiver.
3922  */
3923  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3924  {
3926  }
3927  }
3928 
3929  if (pmState == PM_WAIT_DEAD_END)
3930  {
3931  /*
3932  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3933  * (ie, no dead_end children remain), and the archiver and stats
3934  * collector are gone too.
3935  *
3936  * The reason we wait for those two is to protect them against a new
3937  * postmaster starting conflicting subprocesses; this isn't an
3938  * ironclad protection, but it at least helps in the
3939  * shutdown-and-immediately-restart scenario. Note that they have
3940  * already been sent appropriate shutdown signals, either during a
3941  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3942  * FatalError processing.
3943  */
3944  if (dlist_is_empty(&BackendList) &&
3945  PgArchPID == 0 && PgStatPID == 0)
3946  {
3947  /* These other guys should be dead already */
3948  Assert(StartupPID == 0);
3949  Assert(WalReceiverPID == 0);
3950  Assert(BgWriterPID == 0);
3951  Assert(CheckpointerPID == 0);
3952  Assert(WalWriterPID == 0);
3953  Assert(AutoVacPID == 0);
3954  /* syslogger is not considered here */
3956  }
3957  }
3958 
3959  /*
3960  * If we've been told to shut down, we exit as soon as there are no
3961  * remaining children. If there was a crash, cleanup will occur at the
3962  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3963  * crash before exiting, but that seems unwise if we are quitting because
3964  * we got SIGTERM from init --- there may well not be time for recovery
3965  * before init decides to SIGKILL us.)
3966  *
3967  * Note that the syslogger continues to run. It will exit when it sees
3968  * EOF on its input pipe, which happens when there are no more upstream
3969  * processes.
3970  */
3972  {
3973  if (FatalError)
3974  {
3975  ereport(LOG, (errmsg("abnormal database system shutdown")));
3976  ExitPostmaster(1);
3977  }
3978  else
3979  {
3980  /*
3981  * Terminate exclusive backup mode to avoid recovery after a clean
3982  * fast shutdown. Since an exclusive backup can only be taken
3983  * during normal running (and not, for example, while running
3984  * under Hot Standby) it only makes sense to do this if we reached
3985  * normal running. If we're still in recovery, the backup file is
3986  * one we're recovering *from*, and we must keep it around so that
3987  * recovery restarts from the right place.
3988  */
3990  CancelBackup();
3991 
3992  /* Normal exit from the postmaster is here */
3993  ExitPostmaster(0);
3994  }
3995  }
3996 
3997  /*
3998  * If the startup process failed, or the user does not want an automatic
3999  * restart after backend crashes, wait for all non-syslogger children to
4000  * exit, and then exit postmaster. We don't try to reinitialize when the
4001  * startup process fails, because more than likely it will just fail again
4002  * and we will keep trying forever.
4003  */
4004  if (pmState == PM_NO_CHILDREN &&
4006  ExitPostmaster(1);
4007 
4008  /*
4009  * If we need to recover from a crash, wait for all non-syslogger children
4010  * to exit, then reset shmem and StartupDataBase.
4011  */
4012  if (FatalError && pmState == PM_NO_CHILDREN)
4013  {
4014  ereport(LOG,
4015  (errmsg("all server processes terminated; reinitializing")));
4016 
4017  /* allow background workers to immediately restart */
4019 
4020  shmem_exit(1);
4021 
4022  /* re-read control file into local memory */
4024 
4025  reset_shared();
4026 
4028  Assert(StartupPID != 0);
4030  pmState = PM_STARTUP;
4031  /* crash recovery started, reset SIGKILL flag */
4032  AbortStartTime = 0;
4033  }
4034 }
4035 
4036 
4037 /*
4038  * Send a signal to a postmaster child process
4039  *
4040  * On systems that have setsid(), each child process sets itself up as a
4041  * process group leader. For signals that are generally interpreted in the
4042  * appropriate fashion, we signal the entire process group not just the
4043  * direct child process. This allows us to, for example, SIGQUIT a blocked
4044  * archive_recovery script, or SIGINT a script being run by a backend via
4045  * system().
4046  *
4047  * There is a race condition for recently-forked children: they might not
4048  * have executed setsid() yet. So we signal the child directly as well as
4049  * the group. We assume such a child will handle the signal before trying
4050  * to spawn any grandchild processes. We also assume that signaling the
4051  * child twice will not cause any problems.
4052  */
4053 static void
4054 signal_child(pid_t pid, int signal)
4055 {
4056  if (kill(pid, signal) < 0)
4057  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4058 #ifdef HAVE_SETSID
4059  switch (signal)
4060  {
4061  case SIGINT:
4062  case SIGTERM:
4063  case SIGQUIT:
4064  case SIGSTOP:
4065  case SIGKILL:
4066  if (kill(-pid, signal) < 0)
4067  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4068  break;
4069  default:
4070  break;
4071  }
4072 #endif
4073 }
4074 
4075 /*
4076  * Send a signal to the targeted children (but NOT special children;
4077  * dead_end children are never signaled, either).
4078  */
4079 static bool
4080 SignalSomeChildren(int signal, int target)
4081 {
4082  dlist_iter iter;
4083  bool signaled = false;
4084 
4085  dlist_foreach(iter, &BackendList)
4086  {
4087  Backend *bp = dlist_container(Backend, elem, iter.cur);
4088 
4089  if (bp->dead_end)
4090  continue;
4091 
4092  /*
4093  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4094  * it first and avoid touching shared memory for every child.
4095  */
4096  if (target != BACKEND_TYPE_ALL)
4097  {
4098  /*
4099  * Assign bkend_type for any recently announced WAL Sender
4100  * processes.
4101  */
4102  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4105 
4106  if (!(target & bp->bkend_type))
4107  continue;
4108  }
4109 
4110  ereport(DEBUG4,
4111  (errmsg_internal("sending signal %d to process %d",
4112  signal, (int) bp->pid)));
4113  signal_child(bp->pid, signal);
4114  signaled = true;
4115  }
4116  return signaled;
4117 }
4118 
4119 /*
4120  * Send a termination signal to children. This considers all of our children
4121  * processes, except syslogger and dead_end backends.
4122  */
4123 static void
4125 {
4126  SignalChildren(signal);
4127  if (StartupPID != 0)
4128  {
4129  signal_child(StartupPID, signal);
4130  if (signal == SIGQUIT || signal == SIGKILL)
4132  }
4133  if (BgWriterPID != 0)
4134  signal_child(BgWriterPID, signal);
4135  if (CheckpointerPID != 0)
4136  signal_child(CheckpointerPID, signal);
4137  if (WalWriterPID != 0)
4138  signal_child(WalWriterPID, signal);
4139  if (WalReceiverPID != 0)
4140  signal_child(WalReceiverPID, signal);
4141  if (AutoVacPID != 0)
4142  signal_child(AutoVacPID, signal);
4143  if (PgArchPID != 0)
4144  signal_child(PgArchPID, signal);
4145  if (PgStatPID != 0)
4146  signal_child(PgStatPID, signal);
4147 }
4148 
4149 /*
4150  * BackendStartup -- start backend process
4151  *
4152  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4153  *
4154  * Note: if you change this code, also consider StartAutovacuumWorker.
4155  */
4156 static int
4158 {
4159  Backend *bn; /* for backend cleanup */
4160  pid_t pid;
4161 
4162  /*
4163  * Create backend data structure. Better before the fork() so we can
4164  * handle failure cleanly.
4165  */
4166  bn = (Backend *) malloc(sizeof(Backend));
4167  if (!bn)
4168  {
4169  ereport(LOG,
4170  (errcode(ERRCODE_OUT_OF_MEMORY),
4171  errmsg("out of memory")));
4172  return STATUS_ERROR;
4173  }
4174 
4175  /*
4176  * Compute the cancel key that will be assigned to this backend. The
4177  * backend will have its own copy in the forked-off process' value of
4178  * MyCancelKey, so that it can transmit the key to the frontend.
4179  */
4181  {
4182  free(bn);
4183  ereport(LOG,
4184  (errcode(ERRCODE_INTERNAL_ERROR),
4185  errmsg("could not generate random cancel key")));
4186  return STATUS_ERROR;
4187  }
4188 
4189  bn->cancel_key = MyCancelKey;
4190 
4191  /* Pass down canAcceptConnections state */
4193  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4195 
4196  /*
4197  * Unless it's a dead_end child, assign it a child slot number
4198  */
4199  if (!bn->dead_end)
4201  else
4202  bn->child_slot = 0;
4203 
4204  /* Hasn't asked to be notified about any bgworkers yet */
4205  bn->bgworker_notify = false;
4206 
4207 #ifdef EXEC_BACKEND
4208  pid = backend_forkexec(port);
4209 #else /* !EXEC_BACKEND */
4210  pid = fork_process();
4211  if (pid == 0) /* child */
4212  {
4213  free(bn);
4214 
4215  /* Detangle from postmaster */
4217 
4218  /* Close the postmaster's sockets */
4219  ClosePostmasterPorts(false);
4220 
4221  /* Perform additional initialization and collect startup packet */
4222  BackendInitialize(port);
4223 
4224  /* And run the backend */
4225  BackendRun(port);
4226  }
4227 #endif /* EXEC_BACKEND */
4228 
4229  if (pid < 0)
4230  {
4231  /* in parent, fork failed */
4232  int save_errno = errno;
4233 
4234  if (!bn->dead_end)
4236  free(bn);
4237  errno = save_errno;
4238  ereport(LOG,
4239  (errmsg("could not fork new process for connection: %m")));
4240  report_fork_failure_to_client(port, save_errno);
4241  return STATUS_ERROR;
4242  }
4243 
4244  /* in parent, successful fork */
4245  ereport(DEBUG2,
4246  (errmsg_internal("forked new backend, pid=%d socket=%d",
4247  (int) pid, (int) port->sock)));
4248 
4249  /*
4250  * Everything's been successful, it's safe to add this backend to our list
4251  * of backends.
4252  */
4253  bn->pid = pid;
4254  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4255  dlist_push_head(&BackendList, &bn->elem);
4256 
4257 #ifdef EXEC_BACKEND
4258  if (!bn->dead_end)
4259  ShmemBackendArrayAdd(bn);
4260 #endif
4261 
4262  return STATUS_OK;
4263 }
4264 
4265 /*
4266  * Try to report backend fork() failure to client before we close the
4267  * connection. Since we do not care to risk blocking the postmaster on
4268  * this connection, we set the connection to non-blocking and try only once.
4269  *
4270  * This is grungy special-purpose code; we cannot use backend libpq since
4271  * it's not up and running.
4272  */
4273 static void
4275 {
4276  char buffer[1000];
4277  int rc;
4278 
4279  /* Format the error message packet (always V2 protocol) */
4280  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4281  _("could not fork new process for connection: "),
4282  strerror(errnum));
4283 
4284  /* Set port to non-blocking. Don't do send() if this fails */
4285  if (!pg_set_noblock(port->sock))
4286  return;
4287 
4288  /* We'll retry after EINTR, but ignore all other failures */
4289  do
4290  {
4291  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4292  } while (rc < 0 && errno == EINTR);
4293 }
4294 
4295 
4296 /*
4297  * BackendInitialize -- initialize an interactive (postmaster-child)
4298  * backend process, and collect the client's startup packet.
4299  *
4300  * returns: nothing. Will not return at all if there's any failure.
4301  *
4302  * Note: this code does not depend on having any access to shared memory.
4303  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4304  * shared memory not have been touched yet; see comments within.
4305  * In the EXEC_BACKEND case, we are physically attached to shared memory
4306  * but have not yet set up most of our local pointers to shmem structures.
4307  */
4308 static void
4310 {
4311  int status;
4312  int ret;
4313  char remote_host[NI_MAXHOST];
4314  char remote_port[NI_MAXSERV];
4315  StringInfoData ps_data;
4316 
4317  /* Save port etc. for ps status */
4318  MyProcPort = port;
4319 
4320  /* Tell fd.c about the long-lived FD associated with the port */
4322 
4323  /*
4324  * PreAuthDelay is a debugging aid for investigating problems in the
4325  * authentication cycle: it can be set in postgresql.conf to allow time to
4326  * attach to the newly-forked backend with a debugger. (See also
4327  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4328  * is not honored until after authentication.)
4329  */
4330  if (PreAuthDelay > 0)
4331  pg_usleep(PreAuthDelay * 1000000L);
4332 
4333  /* This flag will remain set until InitPostgres finishes authentication */
4334  ClientAuthInProgress = true; /* limit visibility of log messages */
4335 
4336  /* set these to empty in case they are needed before we set them up */
4337  port->remote_host = "";
4338  port->remote_port = "";
4339 
4340  /*
4341  * Initialize libpq and enable reporting of ereport errors to the client.
4342  * Must do this now because authentication uses libpq to send messages.
4343  */
4344  pq_init(); /* initialize libpq to talk to client */
4345  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4346 
4347  /*
4348  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4349  * to collect the startup packet; while SIGQUIT results in _exit(2).
4350  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4351  * cleanly if a buggy client fails to send the packet promptly.
4352  *
4353  * Exiting with _exit(1) is only possible because we have not yet touched
4354  * shared memory; therefore no outside-the-process state needs to get
4355  * cleaned up.
4356  */
4358  /* SIGQUIT handler was already set up by InitPostmasterChild */
4359  InitializeTimeouts(); /* establishes SIGALRM handler */
4361 
4362  /*
4363  * Get the remote host name and port for logging and status display.
4364  */
4365  remote_host[0] = '\0';
4366  remote_port[0] = '\0';
4367  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4368  remote_host, sizeof(remote_host),
4369  remote_port, sizeof(remote_port),
4370  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4371  ereport(WARNING,
4372  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4373  gai_strerror(ret))));
4374 
4375  /*
4376  * Save remote_host and remote_port in port structure (after this, they
4377  * will appear in log_line_prefix data for log messages).
4378  */
4379  port->remote_host = strdup(remote_host);
4380  port->remote_port = strdup(remote_port);
4381 
4382  /* And now we can issue the Log_connections message, if wanted */
4383  if (Log_connections)
4384  {
4385  if (remote_port[0])
4386  ereport(LOG,
4387  (errmsg("connection received: host=%s port=%s",
4388  remote_host,
4389  remote_port)));
4390  else
4391  ereport(LOG,
4392  (errmsg("connection received: host=%s",
4393  remote_host)));
4394  }
4395 
4396  /*
4397  * If we did a reverse lookup to name, we might as well save the results
4398  * rather than possibly repeating the lookup during authentication.
4399  *
4400  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4401  * get nothing useful for a client without an rDNS entry. Therefore, we
4402  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4403  * it into remote_hostname if so. (This test is conservative and might
4404  * sometimes classify a hostname as numeric, but an error in that
4405  * direction is safe; it only results in a possible extra lookup.)
4406  */
4407  if (log_hostname &&
4408  ret == 0 &&
4409  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4410  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4411  port->remote_hostname = strdup(remote_host);
4412 
4413  /*
4414  * Ready to begin client interaction. We will give up and _exit(1) after
4415  * a time delay, so that a broken client can't hog a connection
4416  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4417  * against the time limit.
4418  *
4419  * Note: AuthenticationTimeout is applied here while waiting for the
4420  * startup packet, and then again in InitPostgres for the duration of any
4421  * authentication operations. So a hostile client could tie up the
4422  * process for nearly twice AuthenticationTimeout before we kick him off.
4423  *
4424  * Note: because PostgresMain will call InitializeTimeouts again, the
4425  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4426  * since we never use it again after this function.
4427  */
4430 
4431  /*
4432  * Receive the startup packet (which might turn out to be a cancel request
4433  * packet).
4434  */
4435  status = ProcessStartupPacket(port, false, false);
4436 
4437  /*
4438  * Disable the timeout, and prevent SIGTERM again.
4439  */
4441  PG_SETMASK(&BlockSig);
4442 
4443  /*
4444  * As a safety check that nothing in startup has yet performed
4445  * shared-memory modifications that would need to be undone if we had
4446  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4447  * handlers have been registered yet. (This isn't terribly bulletproof,
4448  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4449  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4450  * handlers unfortunately, since pq_init() already registered one.)
4451  */
4453 
4454  /*
4455  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4456  * already did any appropriate error reporting.
4457  */
4458  if (status != STATUS_OK)
4459  proc_exit(0);
4460 
4461  /*
4462  * Now that we have the user and database name, we can set the process
4463  * title for ps. It's good to do this as early as possible in startup.
4464  */
4465  initStringInfo(&ps_data);
4466  if (am_walsender)
4468  appendStringInfo(&ps_data, "%s ", port->user_name);
4469  if (!am_walsender)
4470  appendStringInfo(&ps_data, "%s ", port->database_name);
4471  appendStringInfo(&ps_data, "%s", port->remote_host);
4472  if (port->remote_port[0] != '\0')
4473  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4474 
4475  init_ps_display(ps_data.data);
4476  pfree(ps_data.data);
4477 
4478  set_ps_display("initializing");
4479 }
4480 
4481 
4482 /*
4483  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4484  *
4485  * returns:
4486  * Shouldn't return at all.
4487  * If PostgresMain() fails, return status.
4488  */
4489 static void
4491 {
4492  char **av;
4493  int maxac;
4494  int ac;
4495  int i;
4496 
4497  /*
4498  * Now, build the argv vector that will be given to PostgresMain.
4499  *
4500  * The maximum possible number of commandline arguments that could come
4501  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4502  * pg_split_opts().
4503  */
4504  maxac = 2; /* for fixed args supplied below */
4505  maxac += (strlen(ExtraOptions) + 1) / 2;
4506 
4507  av = (char **) MemoryContextAlloc(TopMemoryContext,
4508  maxac * sizeof(char *));
4509  ac = 0;
4510 
4511  av[ac++] = "postgres";
4512 
4513  /*
4514  * Pass any backend switches specified with -o on the postmaster's own
4515  * command line. We assume these are secure.
4516  */
4517  pg_split_opts(av, &ac, ExtraOptions);
4518 
4519  av[ac] = NULL;
4520 
4521  Assert(ac < maxac);
4522 
4523  /*
4524  * Debug: print arguments being passed to backend
4525  */
4526  ereport(DEBUG3,
4527  (errmsg_internal("%s child[%d]: starting with (",
4528  progname, (int) getpid())));
4529  for (i = 0; i < ac; ++i)
4530  ereport(DEBUG3,
4531  (errmsg_internal("\t%s", av[i])));
4532  ereport(DEBUG3,
4533  (errmsg_internal(")")));
4534 
4535  /*
4536  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4537  * just yet, though, because InitPostgres will need the HBA data.)
4538  */
4540 
4541  PostgresMain(ac, av, port->database_name, port->user_name);
4542 }
4543 
4544 
4545 #ifdef EXEC_BACKEND
4546 
4547 /*
4548  * postmaster_forkexec -- fork and exec a postmaster subprocess
4549  *
4550  * The caller must have set up the argv array already, except for argv[2]
4551  * which will be filled with the name of the temp variable file.
4552  *
4553  * Returns the child process PID, or -1 on fork failure (a suitable error
4554  * message has been logged on failure).
4555  *
4556  * All uses of this routine will dispatch to SubPostmasterMain in the
4557  * child process.
4558  */
4559 pid_t
4560 postmaster_forkexec(int argc, char *argv[])
4561 {
4562  Port port;
4563 
4564  /* This entry point passes dummy values for the Port variables */
4565  memset(&port, 0, sizeof(port));
4566  return internal_forkexec(argc, argv, &port);
4567 }
4568 
4569 /*
4570  * backend_forkexec -- fork/exec off a backend process
4571  *
4572  * Some operating systems (WIN32) don't have fork() so we have to simulate
4573  * it by storing parameters that need to be passed to the child and
4574  * then create a new child process.
4575  *
4576  * returns the pid of the fork/exec'd process, or -1 on failure
4577  */
4578 static pid_t
4579 backend_forkexec(Port *port)
4580 {
4581  char *av[4];
4582  int ac = 0;
4583 
4584  av[ac++] = "postgres";
4585  av[ac++] = "--forkbackend";
4586  av[ac++] = NULL; /* filled in by internal_forkexec */
4587 
4588  av[ac] = NULL;
4589  Assert(ac < lengthof(av));
4590 
4591  return internal_forkexec(ac, av, port);
4592 }
4593 
4594 #ifndef WIN32
4595 
4596 /*
4597  * internal_forkexec non-win32 implementation
4598  *
4599  * - writes out backend variables to the parameter file
4600  * - fork():s, and then exec():s the child process
4601  */
4602 static pid_t
4603 internal_forkexec(int argc, char *argv[], Port *port)
4604 {
4605  static unsigned long tmpBackendFileNum = 0;
4606  pid_t pid;
4607  char tmpfilename[MAXPGPATH];
4608  BackendParameters param;
4609  FILE *fp;
4610 
4611  if (!save_backend_variables(&param, port))
4612  return -1; /* log made by save_backend_variables */
4613 
4614  /* Calculate name for temp file */
4615  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4617  MyProcPid, ++tmpBackendFileNum);
4618 
4619  /* Open file */
4620  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4621  if (!fp)
4622  {
4623  /*
4624  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4625  * directory, ignoring errors.
4626  */
4628 
4629  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4630  if (!fp)
4631  {
4632  ereport(LOG,
4634  errmsg("could not create file \"%s\": %m",
4635  tmpfilename)));
4636  return -1;
4637  }
4638  }
4639 
4640  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4641  {
4642  ereport(LOG,
4644  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4645  FreeFile(fp);
4646  return -1;
4647  }
4648 
4649  /* Release file */
4650  if (FreeFile(fp))
4651  {
4652  ereport(LOG,
4654  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4655  return -1;
4656  }
4657 
4658  /* Make sure caller set up argv properly */
4659  Assert(argc >= 3);
4660  Assert(argv[argc] == NULL);
4661  Assert(strncmp(argv[1], "--fork", 6) == 0);
4662  Assert(argv[2] == NULL);
4663 
4664  /* Insert temp file name after --fork argument */
4665  argv[2] = tmpfilename;
4666 
4667  /* Fire off execv in child */
4668  if ((pid = fork_process()) == 0)
4669  {
4670  if (execv(postgres_exec_path, argv) < 0)
4671  {
4672  ereport(LOG,
4673  (errmsg("could not execute server process \"%s\": %m",
4674  postgres_exec_path)));
4675  /* We're already in the child process here, can't return */
4676  exit(1);
4677  }
4678  }
4679 
4680  return pid; /* Parent returns pid, or -1 on fork failure */
4681 }
4682 #else /* WIN32 */
4683 
4684 /*
4685  * internal_forkexec win32 implementation
4686  *
4687  * - starts backend using CreateProcess(), in suspended state
4688  * - writes out backend variables to the parameter file
4689  * - during this, duplicates handles and sockets required for
4690  * inheritance into the new process
4691  * - resumes execution of the new process once the backend parameter
4692  * file is complete.
4693  */
4694 static pid_t
4695 internal_forkexec(int argc, char *argv[], Port *port)
4696 {
4697  int retry_count = 0;
4698  STARTUPINFO si;
4699  PROCESS_INFORMATION pi;
4700  int i;
4701  int j;
4702  char cmdLine[MAXPGPATH * 2];
4703  HANDLE paramHandle;
4704  BackendParameters *param;
4705  SECURITY_ATTRIBUTES sa;
4706  char paramHandleStr[32];
4707  win32_deadchild_waitinfo *childinfo;
4708 
4709  /* Make sure caller set up argv properly */
4710  Assert(argc >= 3);
4711  Assert(argv[argc] == NULL);
4712  Assert(strncmp(argv[1], "--fork", 6) == 0);
4713  Assert(argv[2] == NULL);
4714 
4715  /* Resume here if we need to retry */
4716 retry:
4717 
4718  /* Set up shared memory for parameter passing */
4719  ZeroMemory(&sa, sizeof(sa));
4720  sa.nLength = sizeof(sa);
4721  sa.bInheritHandle = TRUE;
4722  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4723  &sa,
4724  PAGE_READWRITE,
4725  0,
4726  sizeof(BackendParameters),
4727  NULL);
4728  if (paramHandle == INVALID_HANDLE_VALUE)
4729  {
4730  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4731  GetLastError());
4732  return -1;
4733  }
4734 
4735  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4736  if (!param)
4737  {
4738  elog(LOG, "could not map backend parameter memory: error code %lu",
4739  GetLastError());
4740  CloseHandle(paramHandle);
4741  return -1;
4742  }
4743 
4744  /* Insert temp file name after --fork argument */
4745 #ifdef _WIN64
4746  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4747 #else
4748  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4749 #endif
4750  argv[2] = paramHandleStr;
4751 
4752  /* Format the cmd line */
4753  cmdLine[sizeof(cmdLine) - 1] = '\0';
4754  cmdLine[sizeof(cmdLine) - 2] = '\0';
4755  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4756  i = 0;
4757  while (argv[++i] != NULL)
4758  {
4759  j = strlen(cmdLine);
4760  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4761  }
4762  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4763  {
4764  elog(LOG, "subprocess command line too long");
4765  UnmapViewOfFile(param);
4766  CloseHandle(paramHandle);
4767  return -1;
4768  }
4769 
4770  memset(&pi, 0, sizeof(pi));
4771  memset(&si, 0, sizeof(si));
4772  si.cb = sizeof(si);
4773 
4774  /*
4775  * Create the subprocess in a suspended state. This will be resumed later,
4776  * once we have written out the parameter file.
4777  */
4778  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4779  NULL, NULL, &si, &pi))
4780  {
4781  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4782  GetLastError());
4783  UnmapViewOfFile(param);
4784  CloseHandle(paramHandle);
4785  return -1;
4786  }
4787 
4788  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4789  {
4790  /*
4791  * log made by save_backend_variables, but we have to clean up the
4792  * mess with the half-started process
4793  */
4794  if (!TerminateProcess(pi.hProcess, 255))
4795  ereport(LOG,
4796  (errmsg_internal("could not terminate unstarted process: error code %lu",
4797  GetLastError())));
4798  CloseHandle(pi.hProcess);
4799  CloseHandle(pi.hThread);
4800  UnmapViewOfFile(param);
4801  CloseHandle(paramHandle);
4802  return -1; /* log made by save_backend_variables */
4803  }
4804 
4805  /* Drop the parameter shared memory that is now inherited to the backend */
4806  if (!UnmapViewOfFile(param))
4807  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4808  GetLastError());
4809  if (!CloseHandle(paramHandle))
4810  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4811  GetLastError());
4812 
4813  /*
4814  * Reserve the memory region used by our main shared memory segment before
4815  * we resume the child process. Normally this should succeed, but if ASLR
4816  * is active then it might sometimes fail due to the stack or heap having
4817  * gotten mapped into that range. In that case, just terminate the
4818  * process and retry.
4819  */
4820  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4821  {
4822  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4823  if (!TerminateProcess(pi.hProcess, 255))
4824  ereport(LOG,
4825  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4826  GetLastError())));
4827  CloseHandle(pi.hProcess);
4828  CloseHandle(pi.hThread);
4829  if (++retry_count < 100)
4830  goto retry;
4831  ereport(LOG,
4832  (errmsg("giving up after too many tries to reserve shared memory"),
4833  errhint("This might be caused by ASLR or antivirus software.")));
4834  return -1;
4835  }
4836 
4837  /*
4838  * Now that the backend variables are written out, we start the child
4839  * thread so it can start initializing while we set up the rest of the
4840  * parent state.
4841  */
4842  if (ResumeThread(pi.hThread) == -1)
4843  {
4844  if (!TerminateProcess(pi.hProcess, 255))
4845  {
4846  ereport(LOG,
4847  (errmsg_internal("could not terminate unstartable process: error code %lu",
4848  GetLastError())));
4849  CloseHandle(pi.hProcess);
4850  CloseHandle(pi.hThread);
4851  return -1;
4852  }
4853  CloseHandle(pi.hProcess);
4854  CloseHandle(pi.hThread);
4855  ereport(LOG,
4856  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4857  GetLastError())));
4858  return -1;
4859  }
4860 
4861  /*
4862  * Queue a waiter to signal when this child dies. The wait will be handled
4863  * automatically by an operating system thread pool.
4864  *
4865  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4866  * Struct will be free():d from the callback function that runs on a
4867  * different thread.
4868  */
4869  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4870  if (!childinfo)
4871  ereport(FATAL,
4872  (errcode(ERRCODE_OUT_OF_MEMORY),
4873  errmsg("out of memory")));
4874 
4875  childinfo->procHandle = pi.hProcess;
4876  childinfo->procId = pi.dwProcessId;
4877 
4878  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4879  pi.hProcess,
4880  pgwin32_deadchild_callback,
4881  childinfo,
4882  INFINITE,
4883  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4884  ereport(FATAL,
4885  (errmsg_internal("could not register process for wait: error code %lu",
4886  GetLastError())));
4887 
4888  /* Don't close pi.hProcess here - the wait thread needs access to it */
4889 
4890  CloseHandle(pi.hThread);
4891 
4892  return pi.dwProcessId;
4893 }
4894 #endif /* WIN32 */
4895 
4896 
4897 /*
4898  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4899  * to what it would be if we'd simply forked on Unix, and then
4900  * dispatch to the appropriate place.
4901  *
4902  * The first two command line arguments are expected to be "--forkFOO"
4903  * (where FOO indicates which postmaster child we are to become), and
4904  * the name of a variables file that we can read to load data that would
4905  * have been inherited by fork() on Unix. Remaining arguments go to the
4906  * subprocess FooMain() routine.
4907  */
4908 void
4909 SubPostmasterMain(int argc, char *argv[])
4910 {
4911  Port port;
4912 
4913  /* In EXEC_BACKEND case we will not have inherited these settings */
4914  IsPostmasterEnvironment = true;
4916 
4917  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4919 
4920  /* Check we got appropriate args */
4921  if (argc < 3)
4922  elog(FATAL, "invalid subpostmaster invocation");
4923 
4924  /* Read in the variables file */
4925  memset(&port, 0, sizeof(Port));
4926  read_backend_variables(argv[2], &port);
4927 
4928  /* Close the postmaster's sockets (as soon as we know them) */
4929  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4930 
4931  /*
4932  * Start our win32 signal implementation. This has to be done after we
4933  * read the backend variables, because we need to pick up the signal pipe
4934  * from the parent process.
4935  */
4936 #ifdef WIN32
4938 #endif
4939 
4940  /* Setup as postmaster child */
4942 
4943  /*
4944  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4945  * for the non-exec case.
4946  */
4947 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4948  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4949  if (!port.gss)
4950  ereport(FATAL,
4951  (errcode(ERRCODE_OUT_OF_MEMORY),
4952  errmsg("out of memory")));
4953 #endif
4954 
4955  /*
4956  * If appropriate, physically re-attach to shared memory segment. We want
4957  * to do this before going any further to ensure that we can attach at the
4958  * same address the postmaster used. On the other hand, if we choose not
4959  * to re-attach, we may have other cleanup to do.
4960  *
4961  * If testing EXEC_BACKEND on Linux, you should run this as root before
4962  * starting the postmaster:
4963  *
4964  * echo 0 >/proc/sys/kernel/randomize_va_space
4965  *
4966  * This prevents using randomized stack and code addresses that cause the
4967  * child process's memory map to be different from the parent's, making it
4968  * sometimes impossible to attach to shared memory at the desired address.
4969  * Return the setting to its old value (usually '1' or '2') when finished.
4970  */
4971  if (strcmp(argv[1], "--forkbackend") == 0 ||
4972  strcmp(argv[1], "--forkavlauncher") == 0 ||
4973  strcmp(argv[1], "--forkavworker") == 0 ||
4974  strcmp(argv[1], "--forkboot") == 0 ||
4975  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4977  else
4979 
4980  /* autovacuum needs this set before calling InitProcess */
4981  if (strcmp(argv[1], "--forkavlauncher") == 0)
4982  AutovacuumLauncherIAm();
4983  if (strcmp(argv[1], "--forkavworker") == 0)
4984  AutovacuumWorkerIAm();
4985 
4986  /* Read in remaining GUC variables */
4987  read_nondefault_variables();
4988 
4989  /*
4990  * Check that the data directory looks valid, which will also check the
4991  * privileges on the data directory and update our umask and file/group
4992  * variables for creating files later. Note: this should really be done
4993  * before we create any files or directories.
4994  */
4995  checkDataDir();
4996 
4997  /*
4998  * (re-)read control file, as it contains config. The postmaster will
4999  * already have read this, but this process doesn't know about that.
5000  */
5001  LocalProcessControlFile(false);
5002 
5003  /*
5004  * Reload any libraries that were preloaded by the postmaster. Since we
5005  * exec'd this process, those libraries didn't come along with us; but we
5006  * should load them into all child processes to be consistent with the
5007  * non-EXEC_BACKEND behavior.
5008  */
5010 
5011  /* Run backend or appropriate child */
5012  if (strcmp(argv[1], "--forkbackend") == 0)
5013  {
5014  Assert(argc == 3); /* shouldn't be any more args */
5015 
5016  /*
5017  * Need to reinitialize the SSL library in the backend, since the
5018  * context structures contain function pointers and cannot be passed
5019  * through the parameter file.
5020  *
5021  * If for some reason reload fails (maybe the user installed broken
5022  * key files), soldier on without SSL; that's better than all
5023  * connections becoming impossible.
5024  *
5025  * XXX should we do this in all child processes? For the moment it's
5026  * enough to do it in backend children.
5027  */
5028 #ifdef USE_SSL
5029  if (EnableSSL)
5030  {
5031  if (secure_initialize(false) == 0)
5032  LoadedSSL = true;
5033  else
5034  ereport(LOG,
5035  (errmsg("SSL configuration could not be loaded in child process")));
5036  }
5037 #endif
5038 
5039  /*
5040  * Perform additional initialization and collect startup packet.
5041  *
5042  * We want to do this before InitProcess() for a couple of reasons: 1.
5043  * so that we aren't eating up a PGPROC slot while waiting on the
5044  * client. 2. so that if InitProcess() fails due to being out of
5045  * PGPROC slots, we have already initialized libpq and are able to
5046  * report the error to the client.
5047  */
5048  BackendInitialize(&port);
5049 
5050  /* Restore basic shared memory pointers */
5052 
5053  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5054  InitProcess();
5055 
5056  /* Attach process to shared data structures */
5058 
5059  /* And run the backend */
5060  BackendRun(&port); /* does not return */
5061  }
5062  if (strcmp(argv[1], "--forkboot") == 0)
5063  {
5064  /* Restore basic shared memory pointers */
5066 
5067  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5069 
5070  /* Attach process to shared data structures */
5072 
5073  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
5074  }
5075  if (strcmp(argv[1], "--forkavlauncher") == 0)
5076  {
5077  /* Restore basic shared memory pointers */
5079 
5080  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5081  InitProcess();
5082 
5083  /* Attach process to shared data structures */
5085 
5086  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5087  }
5088  if (strcmp(argv[1], "--forkavworker") == 0)
5089  {
5090  /* Restore basic shared memory pointers */
5092 
5093  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5094  InitProcess();
5095 
5096  /* Attach process to shared data structures */
5098 
5099  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5100  }
5101  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5102  {
5103  int shmem_slot;
5104 
5105  /* do this as early as possible; in particular, before InitProcess() */
5106  IsBackgroundWorker = true;
5107 
5108  /* Restore basic shared memory pointers */
5110 
5111  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5112  InitProcess();
5113 
5114  /* Attach process to shared data structures */
5116 
5117  /* Fetch MyBgworkerEntry from shared memory */
5118  shmem_slot = atoi(argv[1] + 15);
5119  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5120 
5122  }
5123  if (strcmp(argv[1], "--forkarch") == 0)
5124  {
5125  /* Do not want to attach to shared memory */
5126 
5127  PgArchiverMain(argc, argv); /* does not return */
5128  }
5129  if (strcmp(argv[1], "--forkcol") == 0)
5130  {
5131  /* Do not want to attach to shared memory */
5132 
5133  PgstatCollectorMain(argc, argv); /* does not return */
5134  }
5135  if (strcmp(argv[1], "--forklog") == 0)
5136  {
5137  /* Do not want to attach to shared memory */
5138 
5139  SysLoggerMain(argc, argv); /* does not return */
5140  }
5141 
5142  abort(); /* shouldn't get here */
5143 }
5144 #endif /* EXEC_BACKEND */
5145 
5146 
5147 /*
5148  * ExitPostmaster -- cleanup
5149  *
5150  * Do NOT call exit() directly --- always go through here!
5151  */
5152 static void
5154 {
5155 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5156 
5157  /*
5158  * There is no known cause for a postmaster to become multithreaded after
5159  * startup. Recheck to account for the possibility of unknown causes.
5160  * This message uses LOG level, because an unclean shutdown at this point
5161  * would usually not look much different from a clean shutdown.
5162  */
5163  if (pthread_is_threaded_np() != 0)
5164  ereport(LOG,
5165  (errcode(ERRCODE_INTERNAL_ERROR),
5166  errmsg_internal("postmaster became multithreaded"),
5167  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5168 #endif
5169 
5170  /* should cleanup shared memory and kill all backends */
5171 
5172  /*
5173  * Not sure of the semantics here. When the Postmaster dies, should the
5174  * backends all be killed? probably not.
5175  *
5176  * MUST -- vadim 05-10-1999
5177  */
5178 
5179  proc_exit(status);
5180 }
5181 
5182 /*
5183  * sigusr1_handler - handle signal conditions from child processes
5184  */
5185 static void
5187 {
5188  int save_errno = errno;
5189 
5190  /*
5191  * We rely on the signal mechanism to have blocked all signals ... except
5192  * on Windows, which lacks sigaction(), so we have to do it manually.
5193  */
5194 #ifdef WIN32
5195  PG_SETMASK(&BlockSig);
5196 #endif
5197 
5198  /* Process background worker state change. */
5200  {
5202  StartWorkerNeeded = true;
5203  }
5204 
5205  /*
5206  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5207  * unexpected states. If the startup process quickly starts up, completes
5208  * recovery, exits, we might process the death of the startup process
5209  * first. We don't want to go back to recovery in that case.
5210  */
5213  {
5214  /* WAL redo has started. We're out of reinitialization. */
5215  FatalError = false;
5216  AbortStartTime = 0;
5217 
5218  /*
5219  * Crank up the background tasks. It doesn't matter if this fails,
5220  * we'll just try again later.
5221  */
5222  Assert(CheckpointerPID == 0);
5224  Assert(BgWriterPID == 0);
5226 
5227  /*
5228  * Start the archiver if we're responsible for (re-)archiving received
5229  * files.
5230  */
5231  Assert(PgArchPID == 0);
5232  if (XLogArchivingAlways())
5233  PgArchPID = pgarch_start();
5234 
5235  /*
5236  * If we aren't planning to enter hot standby mode later, treat
5237  * RECOVERY_STARTED as meaning we're out of startup, and report status
5238  * accordingly.
5239  */
5240  if (!EnableHotStandby)
5241  {
5243 #ifdef USE_SYSTEMD
5244  sd_notify(0, "READY=1");
5245 #endif
5246  }
5247 
5248  pmState = PM_RECOVERY;
5249  }
5252  {
5253  /*
5254  * Likewise, start other special children as needed.
5255  */
5256  Assert(PgStatPID == 0);
5257  PgStatPID = pgstat_start();
5258 
5259  ereport(LOG,
5260  (errmsg("database system is ready to accept read only connections")));
5261 
5262  /* Report status */
5264 #ifdef USE_SYSTEMD
5265  sd_notify(0, "READY=1");
5266 #endif
5267 
5270 
5271  /* Some workers may be scheduled to start now */
5272  StartWorkerNeeded = true;
5273  }
5274 
5277 
5279  PgArchPID != 0)
5280  {
5281  /*
5282  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5283  * next WAL file.
5284  */
5286  }
5287 
5288  /* Tell syslogger to rotate logfile if requested */
5289  if (SysLoggerPID != 0)
5290  {
5291  if (CheckLogrotateSignal())
5292  {
5295  }
5297  {
5299  }
5300  }
5301 
5304  {
5305  /*
5306  * Start one iteration of the autovacuum daemon, even if autovacuuming
5307  * is nominally not enabled. This is so we can have an active defense
5308  * against transaction ID wraparound. We set a flag for the main loop
5309  * to do it rather than trying to do it here --- this is because the
5310  * autovac process itself may send the signal, and we want to handle
5311  * that by launching another iteration as soon as the current one
5312  * completes.
5313  */
5314  start_autovac_launcher = true;
5315  }
5316 
5319  {
5320  /* The autovacuum launcher wants us to start a worker process. */
5322  }
5323 
5325  {
5326  /* Startup Process wants us to start the walreceiver process. */
5327  /* Start immediately if possible, else remember request for later. */
5328  WalReceiverRequested = true;
5330  }
5331 
5332  /*
5333  * Try to advance postmaster's state machine, if a child requests it.
5334  *
5335  * Be careful about the order of this action relative to sigusr1_handler's
5336  * other actions. Generally, this should be after other actions, in case
5337  * they have effects PostmasterStateMachine would need to know about.
5338  * However, we should do it before the CheckPromoteSignal step, which
5339  * cannot have any (immediate) effect on the state machine, but does
5340  * depend on what state we're in now.
5341  */
5343  {
5345  }
5346 
5347  if (StartupPID != 0 &&
5348  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5349  pmState == PM_HOT_STANDBY) &&
5351  {
5352  /*
5353  * Tell startup process to finish recovery.
5354  *
5355  * Leave the promote signal file in place and let the Startup process
5356  * do the unlink.
5357  */
5359  }
5360 
5361 #ifdef WIN32
5363 #endif
5364 
5365  errno = save_errno;
5366 }
5367 
5368 /*
5369  * SIGTERM while processing startup packet.
5370  *
5371  * Running proc_exit() from a signal handler would be quite unsafe.
5372  * However, since we have not yet touched shared memory, we can just
5373  * pull the plug and exit without running any atexit handlers.
5374  *
5375  * One might be tempted to try to send a message, or log one, indicating
5376  * why we are disconnecting. However, that would be quite unsafe in itself.
5377  * Also, it seems undesirable to provide clues about the database's state
5378  * to a client that has not yet completed authentication, or even sent us
5379  * a startup packet.
5380  */
5381 static void
5383 {
5384  _exit(1);
5385 }
5386 
5387 /*
5388  * Dummy signal handler
5389  *
5390  * We use this for signals that we don't actually use in the postmaster,
5391  * but we do use in backends. If we were to SIG_IGN such signals in the
5392  * postmaster, then a newly started backend might drop a signal that arrives
5393  * before it's able to reconfigure its signal processing. (See notes in
5394  * tcop/postgres.c.)
5395  */
5396 static void
5398 {
5399 }
5400 
5401 /*
5402  * Timeout while processing startup packet.
5403  * As for process_startup_packet_die(), we exit via _exit(1).
5404  */
5405 static void
5407 {
5408  _exit(1);
5409 }
5410 
5411 
5412 /*
5413  * Generate a random cancel key.
5414  */
5415 static bool
5417 {
5418  return pg_strong_random(cancel_key, sizeof(int32));
5419 }
5420 
5421 /*
5422  * Count up number of child processes of specified types (dead_end children
5423  * are always excluded).
5424  */
5425 static int
5426 CountChildren(int target)
5427 {
5428  dlist_iter iter;
5429  int cnt = 0;
5430 
5431  dlist_foreach(iter, &BackendList)
5432  {
5433  Backend *bp = dlist_container(Backend, elem, iter.cur);
5434 
5435  if (bp->dead_end)
5436  continue;
5437 
5438  /*
5439  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5440  * it first and avoid touching shared memory for every child.
5441  */
5442  if (target != BACKEND_TYPE_ALL)
5443  {
5444  /*
5445  * Assign bkend_type for any recently announced WAL Sender
5446  * processes.
5447  */
5448  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5451 
5452  if (!(target & bp->bkend_type))
5453  continue;
5454  }
5455 
5456  cnt++;
5457  }
5458  return cnt;
5459 }
5460 
5461 
5462 /*
5463  * StartChildProcess -- start an auxiliary process for the postmaster
5464  *
5465  * "type" determines what kind of child will be started. All child types
5466  * initially go to AuxiliaryProcessMain, which will handle common setup.
5467  *
5468  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5469  * to start subprocess.
5470  */
5471 static pid_t
5473 {
5474  pid_t pid;
5475  char *av[10];
5476  int ac = 0;
5477  char typebuf[32];
5478 
5479  /*
5480  * Set up command-line arguments for subprocess
5481  */
5482  av[ac++] = "postgres";
5483 
5484 #ifdef EXEC_BACKEND
5485  av[ac++] = "--forkboot";
5486  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5487 #endif
5488 
5489  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5490  av[ac++] = typebuf;
5491 
5492  av[ac] = NULL;
5493  Assert(ac < lengthof(av));
5494 
5495 #ifdef EXEC_BACKEND
5496  pid = postmaster_forkexec(ac, av);
5497 #else /* !EXEC_BACKEND */
5498  pid = fork_process();
5499 
5500  if (pid == 0) /* child */
5501  {
5503 
5504  /* Close the postmaster's sockets */
5505  ClosePostmasterPorts(false);
5506 
5507  /* Release postmaster's working memory context */
5510  PostmasterContext = NULL;
5511 
5512  AuxiliaryProcessMain(ac, av);
5513  ExitPostmaster(0);
5514  }
5515 #endif /* EXEC_BACKEND */
5516 
5517  if (pid < 0)
5518  {
5519  /* in parent, fork failed */
5520  int save_errno = errno;
5521 
5522  errno = save_errno;
5523  switch (type)
5524  {
5525  case StartupProcess:
5526  ereport(LOG,
5527  (errmsg("could not fork startup process: %m")));
5528  break;
5529  case BgWriterProcess:
5530  ereport(LOG,
5531  (errmsg("could not fork background writer process: %m")));
5532  break;
5533  case CheckpointerProcess:
5534  ereport(LOG,
5535  (errmsg("could not fork checkpointer process: %m")));
5536  break;
5537  case WalWriterProcess:
5538  ereport(LOG,
5539  (errmsg("could not fork WAL writer process: %m")));
5540  break;
5541  case WalReceiverProcess:
5542  ereport(LOG,
5543  (errmsg("could not fork WAL receiver process: %m")));
5544  break;
5545  default:
5546  ereport(LOG,
5547  (errmsg("could not fork process: %m")));
5548  break;
5549  }
5550 
5551  /*
5552  * fork failure is fatal during startup, but there's no need to choke
5553  * immediately if starting other child types fails.
5554  */
5555  if (type == StartupProcess)
5556  ExitPostmaster(1);
5557  return 0;
5558  }
5559 
5560  /*
5561  * in parent, successful fork
5562  */
5563  return pid;
5564 }
5565 
5566 /*
5567  * StartAutovacuumWorker
5568  * Start an autovac worker process.
5569  *
5570  * This function is here because it enters the resulting PID into the
5571  * postmaster's private backends list.
5572  *
5573  * NB -- this code very roughly matches BackendStartup.
5574  */
5575 static void
5577 {
5578  Backend *bn;
5579 
5580  /*
5581  * If not in condition to run a process, don't try, but handle it like a
5582  * fork failure. This does not normally happen, since the signal is only
5583  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5584  * we have to check to avoid race-condition problems during DB state
5585  * changes.
5586  */
5588  {
5589  /*
5590  * Compute the cancel key that will be assigned to this session. We
5591  * probably don't need cancel keys for autovac workers, but we'd
5592  * better have something random in the field to prevent unfriendly
5593  * people from sending cancels to them.
5594  */
5596  {
5597  ereport(LOG,
5598  (errcode(ERRCODE_INTERNAL_ERROR),
5599  errmsg("could not generate random cancel key")));
5600  return;
5601  }
5602 
5603  bn = (Backend *) malloc(sizeof(Backend));
5604  if (bn)
5605  {
5606  bn->cancel_key = MyCancelKey;
5607 
5608  /* Autovac workers are not dead_end and need a child slot */
5609  bn->dead_end = false;
5611  bn->bgworker_notify = false;
5612 
5613  bn->pid = StartAutoVacWorker();
5614  if (bn->pid > 0)
5615  {
5617  dlist_push_head(&BackendList, &bn->elem);
5618 #ifdef EXEC_BACKEND
5619  ShmemBackendArrayAdd(bn);
5620 #endif
5621  /* all OK */
5622  return;
5623  }
5624 
5625  /*
5626  * fork failed, fall through to report -- actual error message was
5627  * logged by StartAutoVacWorker
5628  */
5630  free(bn);
5631  }
5632  else
5633  ereport(LOG,
5634  (errcode(ERRCODE_OUT_OF_MEMORY),
5635  errmsg("out of memory")));
5636  }
5637 
5638  /*
5639  * Report the failure to the launcher, if it's running. (If it's not, we
5640  * might not even be connected to shared memory, so don't try to call
5641  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5642  * responds to the condition, but we don't do that here, instead waiting
5643  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5644  * quick succession between the autovac launcher and postmaster in case
5645  * things get ugly.
5646  */
5647  if (AutoVacPID != 0)
5648  {
5650  avlauncher_needs_signal = true;
5651  }
5652 }
5653 
5654 /*
5655  * MaybeStartWalReceiver
5656  * Start the WAL receiver process, if not running and our state allows.
5657  *
5658  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5659  * clear WalReceiverRequested. However, there's a race condition if the
5660  * walreceiver terminates and the startup process immediately requests a new
5661  * one: it's quite possible to get the signal for the request before reaping
5662  * the dead walreceiver process. Better to risk launching an extra
5663  * walreceiver than to miss launching one we need. (The walreceiver code
5664  * has logic to recognize that it should go away if not needed.)
5665  */
5666 static void
5668 {
5669  if (WalReceiverPID == 0 &&
5670  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5671  pmState == PM_HOT_STANDBY) &&
5673  {
5675  if (WalReceiverPID != 0)
5676  WalReceiverRequested = false;
5677  /* else leave the flag set, so we'll try again later */
5678  }
5679 }
5680 
5681 
5682 /*
5683  * Create the opts file
5684  */
5685 static bool
5686 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5687 {
5688  FILE *fp;
5689  int i;
5690 
5691 #define OPTS_FILE "postmaster.opts"
5692 
5693  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5694  {
5695  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5696  return false;
5697  }
5698 
5699  fprintf(fp, "%s", fullprogname);
5700  for (i = 1; i < argc; i++)
5701  fprintf(fp, " \"%s\"", argv[i]);
5702  fputs("\n", fp);
5703 
5704  if (fclose(fp))
5705  {
5706  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5707  return false;
5708  }
5709 
5710  return true;
5711 }
5712 
5713 
5714 /*
5715  * MaxLivePostmasterChildren
5716  *
5717  * This reports the number of entries needed in per-child-process arrays
5718  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5719  * These arrays include regular backends, autovac workers, walsenders
5720  * and background workers, but not special children nor dead_end children.
5721  * This allows the arrays to have a fixed maximum size, to wit the same
5722  * too-many-children limit enforced by canAcceptConnections(). The exact value
5723  * isn't too critical as long as it's more than MaxBackends.
5724  */
5725 int
5727 {
5728  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5730 }
5731 
5732 /*
5733  * Connect background worker to a database.
5734  */
5735 void
5737 {
5739 
5740  /* XXX is this the right errcode? */
5742  ereport(FATAL,
5743  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5744  errmsg("database connection requirement not indicated during registration")));
5745 
5746  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5747 
5748  /* it had better not gotten out of "init" mode yet */
5749  if (!IsInitProcessingMode())
5750  ereport(ERROR,
5751  (errmsg("invalid processing mode in background worker")));
5753 }
5754 
5755 /*
5756  * Connect background worker to a database using OIDs.
5757  */
5758 void
5760 {
5762 
5763  /* XXX is this the right errcode? */
5765  ereport(FATAL,
5766  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5767  errmsg("database connection requirement not indicated during registration")));
5768 
5769  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5770 
5771  /* it had better not gotten out of "init" mode yet */
5772  if (!IsInitProcessingMode())
5773  ereport(ERROR,
5774  (errmsg("invalid processing mode in background worker")));
5776 }
5777 
5778 /*
5779  * Block/unblock signals in a background worker
5780  */
5781 void
5783 {
5784  PG_SETMASK(&BlockSig);
5785 }
5786 
5787 void
5789 {
5791 }
5792 
5793 #ifdef EXEC_BACKEND
5794 static pid_t
5795 bgworker_forkexec(int shmem_slot)
5796 {
5797  char *av[10];
5798  int ac = 0;
5799  char forkav[MAXPGPATH];
5800 
5801  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5802 
5803  av[ac++] = "postgres";
5804  av[ac++] = forkav;
5805  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5806  av[ac] = NULL;
5807 
5808  Assert(ac < lengthof(av));
5809 
5810  return postmaster_forkexec(ac, av);
5811 }
5812 #endif
5813 
5814 /*
5815  * Start a new bgworker.
5816  * Starting time conditions must have been checked already.
5817  *
5818  * Returns true on success, false on failure.
5819  * In either case, update the RegisteredBgWorker's state appropriately.
5820  *
5821  * This code is heavily based on autovacuum.c, q.v.
5822  */
5823 static bool
5825 {
5826  pid_t worker_pid;
5827 
5828  Assert(rw->rw_pid == 0);
5829 
5830  /*
5831  * Allocate and assign the Backend element. Note we must do this before
5832  * forking, so that we can handle failures (out of memory or child-process
5833  * slots) cleanly.
5834  *
5835  * Treat failure as though the worker had crashed. That way, the
5836  * postmaster will wait a bit before attempting to start it again; if we
5837  * tried again right away, most likely we'd find ourselves hitting the
5838  * same resource-exhaustion condition.
5839  */
5840  if (!assign_backendlist_entry(rw))
5841  {
5843  return false;
5844  }
5845 
5846  ereport(DEBUG1,
5847  (errmsg("starting background worker process \"%s\"",
5848  rw->rw_worker.bgw_name)));
5849 
5850 #ifdef EXEC_BACKEND
5851  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5852 #else
5853  switch ((worker_pid = fork_process()))
5854 #endif
5855  {
5856  case -1:
5857  /* in postmaster, fork failed ... */
5858  ereport(LOG,
5859  (errmsg("could not fork worker process: %m")));
5860  /* undo what assign_backendlist_entry did */
5862  rw->rw_child_slot = 0;
5863  free(rw->rw_backend);
5864  rw->rw_backend = NULL;
5865  /* mark entry as crashed, so we'll try again later */
5866