PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "common/string.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqformat.h"
107 #include "libpq/pqsignal.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "port/pg_bswap.h"
111 #include "postmaster/autovacuum.h"
113 #include "postmaster/fork_process.h"
114 #include "postmaster/interrupt.h"
115 #include "postmaster/pgarch.h"
116 #include "postmaster/postmaster.h"
117 #include "postmaster/syslogger.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/timestamp.h"
133 #include "utils/varlena.h"
134 
135 #ifdef EXEC_BACKEND
136 #include "storage/spin.h"
137 #endif
138 
139 
140 /*
141  * Possible types of a backend. Beyond being the possible bkend_type values in
142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143  * and CountChildren().
144  */
145 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150 
151 /*
152  * List of active backends (or child processes anyway; we don't actually
153  * know whether a given child has become a backend or is still in the
154  * authorization phase). This is used mainly to keep track of how many
155  * children we have and send them appropriate signals when necessary.
156  *
157  * As shown in the above set of backend types, this list includes not only
158  * "normal" client sessions, but also autovacuum workers, walsenders, and
159  * background workers. (Note that at the time of launch, walsenders are
160  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
161  * upon noticing they've changed their PMChildFlags entry. Hence that check
162  * must be done before any operation that needs to distinguish walsenders
163  * from normal backends.)
164  *
165  * Also, "dead_end" children are in it: these are children launched just for
166  * the purpose of sending a friendly rejection message to a would-be client.
167  * We must track them because they are attached to shared memory, but we know
168  * they will never become live backends. dead_end children are not assigned a
169  * PMChildSlot. dead_end children have bkend_type NORMAL.
170  *
171  * "Special" children such as the startup, bgwriter and autovacuum launcher
172  * tasks are not in this list. They are tracked via StartupPID and other
173  * pid_t variables below. (Thus, there can't be more than one of any given
174  * "special" child process type. We use BackendList entries for any child
175  * process there can be more than one of.)
176  */
177 typedef struct bkend
178 {
179  pid_t pid; /* process id of backend */
180  int32 cancel_key; /* cancel key for cancels for this backend */
181  int child_slot; /* PMChildSlot for this backend, if any */
182  int bkend_type; /* child process flavor, see above */
183  bool dead_end; /* is it going to send an error and quit? */
184  bool bgworker_notify; /* gets bgworker start/stop notifications */
185  dlist_node elem; /* list link in BackendList */
186 } Backend;
187 
189 
190 #ifdef EXEC_BACKEND
191 static Backend *ShmemBackendArray;
192 #endif
193 
195 
196 
197 
198 /* The socket number we are listening for connections on */
200 
201 /* The directory names for Unix socket(s) */
203 
204 /* The TCP listen address(es) */
206 
207 /*
208  * ReservedBackends is the number of backends reserved for superuser use.
209  * This number is taken out of the pool size given by MaxConnections so
210  * number of backend slots available to non-superusers is
211  * (MaxConnections - ReservedBackends). Note what this really means is
212  * "if there are <= ReservedBackends connections available, only superusers
213  * can make new connections" --- pre-existing superuser connections don't
214  * count against the limit.
215  */
217 
218 /* The socket(s) we're listening to. */
219 #define MAXLISTEN 64
221 
222 /*
223  * These globals control the behavior of the postmaster in case some
224  * backend dumps core. Normally, it kills all peers of the dead backend
225  * and reinitializes shared memory. By specifying -s or -n, we can have
226  * the postmaster stop (rather than kill) peers and not reinitialize
227  * shared data structures. (Reinit is currently dead code, though.)
228  */
229 static bool Reinit = true;
230 static int SendStop = false;
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 bool Db_user_namespace = false;
241 
242 bool enable_bonjour = false;
245 
246 /* PIDs of special child processes; 0 when not running */
247 static pid_t StartupPID = 0,
256 
257 /* Startup process's status */
258 typedef enum
259 {
262  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
265 
267 
268 /* Startup/shutdown state */
269 #define NoShutdown 0
270 #define SmartShutdown 1
271 #define FastShutdown 2
272 #define ImmediateShutdown 3
273 
274 static int Shutdown = NoShutdown;
275 
276 static bool FatalError = false; /* T if recovering from backend crash */
277 
278 /*
279  * We use a simple state machine to control startup, shutdown, and
280  * crash recovery (which is rather like shutdown followed by startup).
281  *
282  * After doing all the postmaster initialization work, we enter PM_STARTUP
283  * state and the startup process is launched. The startup process begins by
284  * reading the control file and other preliminary initialization steps.
285  * In a normal startup, or after crash recovery, the startup process exits
286  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
287  * is handled specially since it takes much longer and we would like to support
288  * hot standby during archive recovery.
289  *
290  * When the startup process is ready to start archive recovery, it signals the
291  * postmaster, and we switch to PM_RECOVERY state. The background writer and
292  * checkpointer are launched, while the startup process continues applying WAL.
293  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
294  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
295  * state and begin accepting connections to perform read-only queries. When
296  * archive recovery is finished, the startup process exits with exit code 0
297  * and we switch to PM_RUN state.
298  *
299  * Normal child backends can only be launched when we are in PM_RUN or
300  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
301  * In other states we handle connection requests by launching "dead_end"
302  * child processes, which will simply send the client an error message and
303  * quit. (We track these in the BackendList so that we can know when they
304  * are all gone; this is important because they're still connected to shared
305  * memory, and would interfere with an attempt to destroy the shmem segment,
306  * possibly leading to SHMALL failure when we try to make a new one.)
307  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
308  * to drain out of the system, and therefore stop accepting connection
309  * requests at all until the last existing child has quit (which hopefully
310  * will not be very long).
311  *
312  * Notice that this state variable does not distinguish *why* we entered
313  * states later than PM_RUN --- Shutdown and FatalError must be consulted
314  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
315  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
316  * states when trying to recover from a crash). It can be true in PM_STARTUP
317  * state, because we don't clear it until we've successfully started WAL redo.
318  */
319 typedef enum
320 {
321  PM_INIT, /* postmaster starting */
322  PM_STARTUP, /* waiting for startup subprocess */
323  PM_RECOVERY, /* in archive recovery mode */
324  PM_HOT_STANDBY, /* in hot standby mode */
325  PM_RUN, /* normal "database is alive" state */
326  PM_STOP_BACKENDS, /* need to stop remaining backends */
327  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
328  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
329  * ckpt */
330  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
331  * finish */
332  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
333  PM_NO_CHILDREN /* all important children have exited */
334 } PMState;
335 
337 
338 /*
339  * While performing a "smart shutdown", we restrict new connections but stay
340  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
341  * connsAllowed is a sub-state indicator showing the active restriction.
342  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
343  */
344 typedef enum
345 {
346  ALLOW_ALL_CONNS, /* normal not-shutting-down state */
347  ALLOW_SUPERUSER_CONNS, /* only superusers can connect */
348  ALLOW_NO_CONNS /* no new connections allowed, period */
350 
352 
353 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
354 /* Zero means timeout is not running */
355 static time_t AbortStartTime = 0;
356 
357 /* Length of said timeout */
358 #define SIGKILL_CHILDREN_AFTER_SECS 5
359 
360 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
361 
362 bool ClientAuthInProgress = false; /* T during new-client
363  * authentication */
364 
365 bool redirection_done = false; /* stderr redirected for syslogger? */
366 
367 /* received START_AUTOVAC_LAUNCHER signal */
368 static volatile sig_atomic_t start_autovac_launcher = false;
369 
370 /* the launcher needs to be signaled to communicate some condition */
371 static volatile bool avlauncher_needs_signal = false;
372 
373 /* received START_WALRECEIVER signal */
374 static volatile sig_atomic_t WalReceiverRequested = false;
375 
376 /* set when there's a worker that needs to be started up */
377 static volatile bool StartWorkerNeeded = true;
378 static volatile bool HaveCrashedWorker = false;
379 
380 #ifdef USE_SSL
381 /* Set when and if SSL has been initialized properly */
382 static bool LoadedSSL = false;
383 #endif
384 
385 #ifdef USE_BONJOUR
386 static DNSServiceRef bonjour_sdref = NULL;
387 #endif
388 
389 /*
390  * postmaster.c - function prototypes
391  */
392 static void CloseServerPorts(int status, Datum arg);
393 static void unlink_external_pid_file(int status, Datum arg);
394 static void getInstallationPaths(const char *argv0);
395 static void checkControlFile(void);
396 static Port *ConnCreate(int serverFd);
397 static void ConnFree(Port *port);
398 static void reset_shared(void);
399 static void SIGHUP_handler(SIGNAL_ARGS);
400 static void pmdie(SIGNAL_ARGS);
401 static void reaper(SIGNAL_ARGS);
402 static void sigusr1_handler(SIGNAL_ARGS);
404 static void dummy_handler(SIGNAL_ARGS);
405 static void StartupPacketTimeoutHandler(void);
406 static void CleanupBackend(int pid, int exitstatus);
407 static bool CleanupBackgroundWorker(int pid, int exitstatus);
408 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
409 static void LogChildExit(int lev, const char *procname,
410  int pid, int exitstatus);
411 static void PostmasterStateMachine(void);
412 static void BackendInitialize(Port *port);
413 static void BackendRun(Port *port) pg_attribute_noreturn();
414 static void ExitPostmaster(int status) pg_attribute_noreturn();
415 static int ServerLoop(void);
416 static int BackendStartup(Port *port);
417 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
418 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
419 static void processCancelRequest(Port *port, void *pkt);
420 static int initMasks(fd_set *rmask);
421 static void report_fork_failure_to_client(Port *port, int errnum);
422 static CAC_state canAcceptConnections(int backend_type);
423 static bool RandomCancelKey(int32 *cancel_key);
424 static void signal_child(pid_t pid, int signal);
425 static bool SignalSomeChildren(int signal, int targets);
426 static void TerminateChildren(int signal);
427 
428 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
429 
430 static int CountChildren(int target);
432 static void maybe_start_bgworkers(void);
433 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
434 static pid_t StartChildProcess(AuxProcType type);
435 static void StartAutovacuumWorker(void);
436 static void MaybeStartWalReceiver(void);
437 static void InitPostmasterDeathWatchHandle(void);
438 
439 /*
440  * Archiver is allowed to start up at the current postmaster state?
441  *
442  * If WAL archiving is enabled always, we are allowed to start archiver
443  * even during recovery.
444  */
445 #define PgArchStartupAllowed() \
446  ((XLogArchivingActive() && pmState == PM_RUN) || \
447  (XLogArchivingAlways() && \
448  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
449 
450 #ifdef EXEC_BACKEND
451 
452 #ifdef WIN32
453 #define WNOHANG 0 /* ignored, so any integer value will do */
454 
455 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
456 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
457 
458 static HANDLE win32ChildQueue;
459 
460 typedef struct
461 {
462  HANDLE waitHandle;
463  HANDLE procHandle;
464  DWORD procId;
465 } win32_deadchild_waitinfo;
466 #endif /* WIN32 */
467 
468 static pid_t backend_forkexec(Port *port);
469 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
470 
471 /* Type for a socket that can be inherited to a client process */
472 #ifdef WIN32
473 typedef struct
474 {
475  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
476  * if not a socket */
477  WSAPROTOCOL_INFO wsainfo;
478 } InheritableSocket;
479 #else
480 typedef int InheritableSocket;
481 #endif
482 
483 /*
484  * Structure contains all variables passed to exec:ed backends
485  */
486 typedef struct
487 {
488  Port port;
489  InheritableSocket portsocket;
490  char DataDir[MAXPGPATH];
493  int MyPMChildSlot;
494 #ifndef WIN32
495  unsigned long UsedShmemSegID;
496 #else
497  void *ShmemProtectiveRegion;
498  HANDLE UsedShmemSegID;
499 #endif
500  void *UsedShmemSegAddr;
503  Backend *ShmemBackendArray;
504 #ifndef HAVE_SPINLOCKS
506 #endif
515  InheritableSocket pgStatSock;
516  pid_t PostmasterPid;
520  bool redirection_done;
521  bool IsBinaryUpgrade;
522  int max_safe_fds;
523  int MaxBackends;
524 #ifdef WIN32
525  HANDLE PostmasterHandle;
526  HANDLE initial_signal_pipe;
527  HANDLE syslogPipe[2];
528 #else
529  int postmaster_alive_fds[2];
530  int syslogPipe[2];
531 #endif
532  char my_exec_path[MAXPGPATH];
533  char pkglib_path[MAXPGPATH];
534 } BackendParameters;
535 
536 static void read_backend_variables(char *id, Port *port);
537 static void restore_backend_variables(BackendParameters *param, Port *port);
538 
539 #ifndef WIN32
540 static bool save_backend_variables(BackendParameters *param, Port *port);
541 #else
542 static bool save_backend_variables(BackendParameters *param, Port *port,
543  HANDLE childProcess, pid_t childPid);
544 #endif
545 
546 static void ShmemBackendArrayAdd(Backend *bn);
547 static void ShmemBackendArrayRemove(Backend *bn);
548 #endif /* EXEC_BACKEND */
549 
550 #define StartupDataBase() StartChildProcess(StartupProcess)
551 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
552 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
553 #define StartWalWriter() StartChildProcess(WalWriterProcess)
554 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
555 
556 /* Macros to check exit status of a child process */
557 #define EXIT_STATUS_0(st) ((st) == 0)
558 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
559 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
560 
561 #ifndef WIN32
562 /*
563  * File descriptors for pipe used to monitor if postmaster is alive.
564  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
565  */
566 int postmaster_alive_fds[2] = {-1, -1};
567 #else
568 /* Process handle of postmaster used for the same purpose on Windows */
569 HANDLE PostmasterHandle;
570 #endif
571 
572 /*
573  * Postmaster main entry point
574  */
575 void
576 PostmasterMain(int argc, char *argv[])
577 {
578  int opt;
579  int status;
580  char *userDoption = NULL;
581  bool listen_addr_saved = false;
582  int i;
583  char *output_config_variable = NULL;
584 
586 
588 
590 
591  /*
592  * We should not be creating any files or directories before we check the
593  * data directory (see checkDataDir()), but just in case set the umask to
594  * the most restrictive (owner-only) permissions.
595  *
596  * checkDataDir() will reset the umask based on the data directory
597  * permissions.
598  */
599  umask(PG_MODE_MASK_OWNER);
600 
601  /*
602  * By default, palloc() requests in the postmaster will be allocated in
603  * the PostmasterContext, which is space that can be recycled by backends.
604  * Allocated data that needs to be available to backends should be
605  * allocated in TopMemoryContext.
606  */
608  "Postmaster",
611 
612  /* Initialize paths to installation files */
613  getInstallationPaths(argv[0]);
614 
615  /*
616  * Set up signal handlers for the postmaster process.
617  *
618  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
619  * is used by all child processes and client processes). That has a
620  * couple of special behaviors:
621  *
622  * 1. Except on Windows, we tell sigaction() to block all signals for the
623  * duration of the signal handler. This is faster than our old approach
624  * of blocking/unblocking explicitly in the signal handler, and it should
625  * also prevent excessive stack consumption if signals arrive quickly.
626  *
627  * 2. We do not set the SA_RESTART flag. This is because signals will be
628  * blocked at all times except when ServerLoop is waiting for something to
629  * happen, and during that window, we want signals to exit the select(2)
630  * wait so that ServerLoop can respond if anything interesting happened.
631  * On some platforms, signals marked SA_RESTART would not cause the
632  * select() wait to end.
633  *
634  * Child processes will generally want SA_RESTART, so pqsignal() sets that
635  * flag. We expect children to set up their own handlers before
636  * unblocking signals.
637  *
638  * CAUTION: when changing this list, check for side-effects on the signal
639  * handling setup of child processes. See tcop/postgres.c,
640  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
641  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
642  * postmaster/syslogger.c, postmaster/bgworker.c and
643  * postmaster/checkpointer.c.
644  */
645  pqinitmask();
647 
648  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
649  * children do same */
650  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
651  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
652  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
653  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
654  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
655  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
656  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
657  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
658 
659  /*
660  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
661  * ignore those signals in a postmaster environment, so that there is no
662  * risk of a child process freezing up due to writing to stderr. But for
663  * a standalone backend, their default handling is reasonable. Hence, all
664  * child processes should just allow the inherited settings to stand.
665  */
666 #ifdef SIGTTIN
667  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
668 #endif
669 #ifdef SIGTTOU
670  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
671 #endif
672 
673  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
674 #ifdef SIGXFSZ
675  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
676 #endif
677 
678  /*
679  * Options setup
680  */
682 
683  opterr = 1;
684 
685  /*
686  * Parse command-line options. CAUTION: keep this in sync with
687  * tcop/postgres.c (the option sets should not conflict) and with the
688  * common help() function in main/main.c.
689  */
690  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:W:-:")) != -1)
691  {
692  switch (opt)
693  {
694  case 'B':
695  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
696  break;
697 
698  case 'b':
699  /* Undocumented flag used for binary upgrades */
700  IsBinaryUpgrade = true;
701  break;
702 
703  case 'C':
704  output_config_variable = strdup(optarg);
705  break;
706 
707  case 'D':
708  userDoption = strdup(optarg);
709  break;
710 
711  case 'd':
713  break;
714 
715  case 'E':
716  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
717  break;
718 
719  case 'e':
720  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
721  break;
722 
723  case 'F':
724  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
725  break;
726 
727  case 'f':
729  {
730  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
731  progname, optarg);
732  ExitPostmaster(1);
733  }
734  break;
735 
736  case 'h':
737  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
738  break;
739 
740  case 'i':
741  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
742  break;
743 
744  case 'j':
745  /* only used by interactive backend */
746  break;
747 
748  case 'k':
749  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
750  break;
751 
752  case 'l':
753  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
754  break;
755 
756  case 'N':
757  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
758  break;
759 
760  case 'n':
761  /* Don't reinit shared mem after abnormal exit */
762  Reinit = false;
763  break;
764 
765  case 'O':
766  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
767  break;
768 
769  case 'P':
770  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
771  break;
772 
773  case 'p':
775  break;
776 
777  case 'r':
778  /* only used by single-user backend */
779  break;
780 
781  case 'S':
783  break;
784 
785  case 's':
786  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
787  break;
788 
789  case 'T':
790 
791  /*
792  * In the event that some backend dumps core, send SIGSTOP,
793  * rather than SIGQUIT, to all its peers. This lets the wily
794  * post_hacker collect core dumps from everyone.
795  */
796  SendStop = true;
797  break;
798 
799  case 't':
800  {
801  const char *tmp = get_stats_option_name(optarg);
802 
803  if (tmp)
804  {
806  }
807  else
808  {
809  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
810  progname, optarg);
811  ExitPostmaster(1);
812  }
813  break;
814  }
815 
816  case 'W':
817  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
818  break;
819 
820  case 'c':
821  case '-':
822  {
823  char *name,
824  *value;
825 
826  ParseLongOption(optarg, &name, &value);
827  if (!value)
828  {
829  if (opt == '-')
830  ereport(ERROR,
831  (errcode(ERRCODE_SYNTAX_ERROR),
832  errmsg("--%s requires a value",
833  optarg)));
834  else
835  ereport(ERROR,
836  (errcode(ERRCODE_SYNTAX_ERROR),
837  errmsg("-c %s requires a value",
838  optarg)));
839  }
840 
842  free(name);
843  if (value)
844  free(value);
845  break;
846  }
847 
848  default:
849  write_stderr("Try \"%s --help\" for more information.\n",
850  progname);
851  ExitPostmaster(1);
852  }
853  }
854 
855  /*
856  * Postmaster accepts no non-option switch arguments.
857  */
858  if (optind < argc)
859  {
860  write_stderr("%s: invalid argument: \"%s\"\n",
861  progname, argv[optind]);
862  write_stderr("Try \"%s --help\" for more information.\n",
863  progname);
864  ExitPostmaster(1);
865  }
866 
867  /*
868  * Locate the proper configuration files and data directory, and read
869  * postgresql.conf for the first time.
870  */
871  if (!SelectConfigFiles(userDoption, progname))
872  ExitPostmaster(2);
873 
874  if (output_config_variable != NULL)
875  {
876  /*
877  * "-C guc" was specified, so print GUC's value and exit. No extra
878  * permission check is needed because the user is reading inside the
879  * data dir.
880  */
881  const char *config_val = GetConfigOption(output_config_variable,
882  false, false);
883 
884  puts(config_val ? config_val : "");
885  ExitPostmaster(0);
886  }
887 
888  /* Verify that DataDir looks reasonable */
889  checkDataDir();
890 
891  /* Check that pg_control exists */
893 
894  /* And switch working directory into it */
895  ChangeToDataDir();
896 
897  /*
898  * Check for invalid combinations of GUC settings.
899  */
901  {
902  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
903  progname,
905  ExitPostmaster(1);
906  }
908  ereport(ERROR,
909  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
911  ereport(ERROR,
912  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
913 
914  /*
915  * Other one-time internal sanity checks can go here, if they are fast.
916  * (Put any slow processing further down, after postmaster.pid creation.)
917  */
918  if (!CheckDateTokenTables())
919  {
920  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
921  ExitPostmaster(1);
922  }
923 
924  /*
925  * Now that we are done processing the postmaster arguments, reset
926  * getopt(3) library so that it will work correctly in subprocesses.
927  */
928  optind = 1;
929 #ifdef HAVE_INT_OPTRESET
930  optreset = 1; /* some systems need this too */
931 #endif
932 
933  /* For debugging: display postmaster environment */
934  {
935  extern char **environ;
936  char **p;
937 
938  ereport(DEBUG3,
939  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
940  progname)));
941  ereport(DEBUG3,
942  (errmsg_internal("-----------------------------------------")));
943  for (p = environ; *p; ++p)
944  ereport(DEBUG3,
945  (errmsg_internal("\t%s", *p)));
946  ereport(DEBUG3,
947  (errmsg_internal("-----------------------------------------")));
948  }
949 
950  /*
951  * Create lockfile for data directory.
952  *
953  * We want to do this before we try to grab the input sockets, because the
954  * data directory interlock is more reliable than the socket-file
955  * interlock (thanks to whoever decided to put socket files in /tmp :-().
956  * For the same reason, it's best to grab the TCP socket(s) before the
957  * Unix socket(s).
958  *
959  * Also note that this internally sets up the on_proc_exit function that
960  * is responsible for removing both data directory and socket lockfiles;
961  * so it must happen before opening sockets so that at exit, the socket
962  * lockfiles go away after CloseServerPorts runs.
963  */
964  CreateDataDirLockFile(true);
965 
966  /*
967  * Read the control file (for error checking and config info).
968  *
969  * Since we verify the control file's CRC, this has a useful side effect
970  * on machines where we need a run-time test for CRC support instructions.
971  * The postmaster will do the test once at startup, and then its child
972  * processes will inherit the correct function pointer and not need to
973  * repeat the test.
974  */
976 
977  /*
978  * Register the apply launcher. Since it registers a background worker,
979  * it needs to be called before InitializeMaxBackends(), and it's probably
980  * a good idea to call it before any modules had chance to take the
981  * background worker slots.
982  */
984 
985  /*
986  * process any libraries that should be preloaded at postmaster start
987  */
989 
990  /*
991  * Initialize SSL library, if specified.
992  */
993 #ifdef USE_SSL
994  if (EnableSSL)
995  {
996  (void) secure_initialize(true);
997  LoadedSSL = true;
998  }
999 #endif
1000 
1001  /*
1002  * Now that loadable modules have had their chance to register background
1003  * workers, calculate MaxBackends.
1004  */
1006 
1007  /*
1008  * Set up shared memory and semaphores.
1009  */
1010  reset_shared();
1011 
1012  /*
1013  * Estimate number of openable files. This must happen after setting up
1014  * semaphores, because on some platforms semaphores count as open files.
1015  */
1016  set_max_safe_fds();
1017 
1018  /*
1019  * Set reference point for stack-depth checking.
1020  */
1021  set_stack_base();
1022 
1023  /*
1024  * Initialize pipe (or process handle on Windows) that allows children to
1025  * wake up from sleep on postmaster death.
1026  */
1028 
1029 #ifdef WIN32
1030 
1031  /*
1032  * Initialize I/O completion port used to deliver list of dead children.
1033  */
1034  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1035  if (win32ChildQueue == NULL)
1036  ereport(FATAL,
1037  (errmsg("could not create I/O completion port for child queue")));
1038 #endif
1039 
1040 #ifdef EXEC_BACKEND
1041  /* Write out nondefault GUC settings for child processes to use */
1042  write_nondefault_variables(PGC_POSTMASTER);
1043 
1044  /*
1045  * Clean out the temp directory used to transmit parameters to child
1046  * processes (see internal_forkexec, below). We must do this before
1047  * launching any child processes, else we have a race condition: we could
1048  * remove a parameter file before the child can read it. It should be
1049  * safe to do so now, because we verified earlier that there are no
1050  * conflicting Postgres processes in this data directory.
1051  */
1053 #endif
1054 
1055  /*
1056  * Forcibly remove the files signaling a standby promotion request.
1057  * Otherwise, the existence of those files triggers a promotion too early,
1058  * whether a user wants that or not.
1059  *
1060  * This removal of files is usually unnecessary because they can exist
1061  * only during a few moments during a standby promotion. However there is
1062  * a race condition: if pg_ctl promote is executed and creates the files
1063  * during a promotion, the files can stay around even after the server is
1064  * brought up to be the primary. Then, if a new standby starts by using
1065  * the backup taken from the new primary, the files can exist at server
1066  * startup and must be removed in order to avoid an unexpected promotion.
1067  *
1068  * Note that promotion signal files need to be removed before the startup
1069  * process is invoked. Because, after that, they can be used by
1070  * postmaster's SIGUSR1 signal handler.
1071  */
1073 
1074  /* Do the same for logrotate signal file */
1076 
1077  /* Remove any outdated file holding the current log filenames. */
1078  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1079  ereport(LOG,
1081  errmsg("could not remove file \"%s\": %m",
1083 
1084  /*
1085  * If enabled, start up syslogger collection subprocess
1086  */
1088 
1089  /*
1090  * Reset whereToSendOutput from DestDebug (its starting state) to
1091  * DestNone. This stops ereport from sending log messages to stderr unless
1092  * Log_destination permits. We don't do this until the postmaster is
1093  * fully launched, since startup failures may as well be reported to
1094  * stderr.
1095  *
1096  * If we are in fact disabling logging to stderr, first emit a log message
1097  * saying so, to provide a breadcrumb trail for users who may not remember
1098  * that their logging is configured to go somewhere else.
1099  */
1101  ereport(LOG,
1102  (errmsg("ending log output to stderr"),
1103  errhint("Future log output will go to log destination \"%s\".",
1105 
1107 
1108  /*
1109  * Report server startup in log. While we could emit this much earlier,
1110  * it seems best to do so after starting the log collector, if we intend
1111  * to use one.
1112  */
1113  ereport(LOG,
1114  (errmsg("starting %s", PG_VERSION_STR)));
1115 
1116  /*
1117  * Establish input sockets.
1118  *
1119  * First, mark them all closed, and set up an on_proc_exit function that's
1120  * charged with closing the sockets again at postmaster shutdown.
1121  */
1122  for (i = 0; i < MAXLISTEN; i++)
1124 
1126 
1127  if (ListenAddresses)
1128  {
1129  char *rawstring;
1130  List *elemlist;
1131  ListCell *l;
1132  int success = 0;
1133 
1134  /* Need a modifiable copy of ListenAddresses */
1135  rawstring = pstrdup(ListenAddresses);
1136 
1137  /* Parse string into list of hostnames */
1138  if (!SplitGUCList(rawstring, ',', &elemlist))
1139  {
1140  /* syntax error in list */
1141  ereport(FATAL,
1142  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1143  errmsg("invalid list syntax in parameter \"%s\"",
1144  "listen_addresses")));
1145  }
1146 
1147  foreach(l, elemlist)
1148  {
1149  char *curhost = (char *) lfirst(l);
1150 
1151  if (strcmp(curhost, "*") == 0)
1152  status = StreamServerPort(AF_UNSPEC, NULL,
1153  (unsigned short) PostPortNumber,
1154  NULL,
1156  else
1157  status = StreamServerPort(AF_UNSPEC, curhost,
1158  (unsigned short) PostPortNumber,
1159  NULL,
1160  ListenSocket, MAXLISTEN);
1161 
1162  if (status == STATUS_OK)
1163  {
1164  success++;
1165  /* record the first successful host addr in lockfile */
1166  if (!listen_addr_saved)
1167  {
1169  listen_addr_saved = true;
1170  }
1171  }
1172  else
1173  ereport(WARNING,
1174  (errmsg("could not create listen socket for \"%s\"",
1175  curhost)));
1176  }
1177 
1178  if (!success && elemlist != NIL)
1179  ereport(FATAL,
1180  (errmsg("could not create any TCP/IP sockets")));
1181 
1182  list_free(elemlist);
1183  pfree(rawstring);
1184  }
1185 
1186 #ifdef USE_BONJOUR
1187  /* Register for Bonjour only if we opened TCP socket(s) */
1189  {
1190  DNSServiceErrorType err;
1191 
1192  /*
1193  * We pass 0 for interface_index, which will result in registering on
1194  * all "applicable" interfaces. It's not entirely clear from the
1195  * DNS-SD docs whether this would be appropriate if we have bound to
1196  * just a subset of the available network interfaces.
1197  */
1198  err = DNSServiceRegister(&bonjour_sdref,
1199  0,
1200  0,
1201  bonjour_name,
1202  "_postgresql._tcp.",
1203  NULL,
1204  NULL,
1206  0,
1207  NULL,
1208  NULL,
1209  NULL);
1210  if (err != kDNSServiceErr_NoError)
1211  ereport(LOG,
1212  (errmsg("DNSServiceRegister() failed: error code %ld",
1213  (long) err)));
1214 
1215  /*
1216  * We don't bother to read the mDNS daemon's reply, and we expect that
1217  * it will automatically terminate our registration when the socket is
1218  * closed at postmaster termination. So there's nothing more to be
1219  * done here. However, the bonjour_sdref is kept around so that
1220  * forked children can close their copies of the socket.
1221  */
1222  }
1223 #endif
1224 
1225 #ifdef HAVE_UNIX_SOCKETS
1227  {
1228  char *rawstring;
1229  List *elemlist;
1230  ListCell *l;
1231  int success = 0;
1232 
1233  /* Need a modifiable copy of Unix_socket_directories */
1234  rawstring = pstrdup(Unix_socket_directories);
1235 
1236  /* Parse string into list of directories */
1237  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1238  {
1239  /* syntax error in list */
1240  ereport(FATAL,
1241  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1242  errmsg("invalid list syntax in parameter \"%s\"",
1243  "unix_socket_directories")));
1244  }
1245 
1246  foreach(l, elemlist)
1247  {
1248  char *socketdir = (char *) lfirst(l);
1249 
1250  status = StreamServerPort(AF_UNIX, NULL,
1251  (unsigned short) PostPortNumber,
1252  socketdir,
1253  ListenSocket, MAXLISTEN);
1254 
1255  if (status == STATUS_OK)
1256  {
1257  success++;
1258  /* record the first successful Unix socket in lockfile */
1259  if (success == 1)
1261  }
1262  else
1263  ereport(WARNING,
1264  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1265  socketdir)));
1266  }
1267 
1268  if (!success && elemlist != NIL)
1269  ereport(FATAL,
1270  (errmsg("could not create any Unix-domain sockets")));
1271 
1272  list_free_deep(elemlist);
1273  pfree(rawstring);
1274  }
1275 #endif
1276 
1277  /*
1278  * check that we have some socket to listen on
1279  */
1280  if (ListenSocket[0] == PGINVALID_SOCKET)
1281  ereport(FATAL,
1282  (errmsg("no socket created for listening")));
1283 
1284  /*
1285  * If no valid TCP ports, write an empty line for listen address,
1286  * indicating the Unix socket must be used. Note that this line is not
1287  * added to the lock file until there is a socket backing it.
1288  */
1289  if (!listen_addr_saved)
1291 
1292  /*
1293  * Record postmaster options. We delay this till now to avoid recording
1294  * bogus options (eg, unusable port number).
1295  */
1296  if (!CreateOptsFile(argc, argv, my_exec_path))
1297  ExitPostmaster(1);
1298 
1299  /*
1300  * Write the external PID file if requested
1301  */
1302  if (external_pid_file)
1303  {
1304  FILE *fpidfile = fopen(external_pid_file, "w");
1305 
1306  if (fpidfile)
1307  {
1308  fprintf(fpidfile, "%d\n", MyProcPid);
1309  fclose(fpidfile);
1310 
1311  /* Make PID file world readable */
1312  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1313  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1315  }
1316  else
1317  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1319 
1321  }
1322 
1323  /*
1324  * Remove old temporary files. At this point there can be no other
1325  * Postgres processes running in this directory, so this should be safe.
1326  */
1328 
1329  /*
1330  * Initialize stats collection subsystem (this does NOT start the
1331  * collector process!)
1332  */
1333  pgstat_init();
1334 
1335  /*
1336  * Initialize the autovacuum subsystem (again, no process start yet)
1337  */
1338  autovac_init();
1339 
1340  /*
1341  * Load configuration files for client authentication.
1342  */
1343  if (!load_hba())
1344  {
1345  /*
1346  * It makes no sense to continue if we fail to load the HBA file,
1347  * since there is no way to connect to the database in this case.
1348  */
1349  ereport(FATAL,
1350  (errmsg("could not load pg_hba.conf")));
1351  }
1352  if (!load_ident())
1353  {
1354  /*
1355  * We can start up without the IDENT file, although it means that you
1356  * cannot log in using any of the authentication methods that need a
1357  * user name mapping. load_ident() already logged the details of error
1358  * to the log.
1359  */
1360  }
1361 
1362 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1363 
1364  /*
1365  * On macOS, libintl replaces setlocale() with a version that calls
1366  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1367  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1368  * the process multithreaded. The postmaster calls sigprocmask() and
1369  * calls fork() without an immediate exec(), both of which have undefined
1370  * behavior in a multithreaded program. A multithreaded postmaster is the
1371  * normal case on Windows, which offers neither fork() nor sigprocmask().
1372  */
1373  if (pthread_is_threaded_np() != 0)
1374  ereport(FATAL,
1375  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1376  errmsg("postmaster became multithreaded during startup"),
1377  errhint("Set the LC_ALL environment variable to a valid locale.")));
1378 #endif
1379 
1380  /*
1381  * Remember postmaster startup time
1382  */
1384 
1385  /*
1386  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1387  * see what's happening.
1388  */
1390 
1391  /*
1392  * We're ready to rock and roll...
1393  */
1395  Assert(StartupPID != 0);
1397  pmState = PM_STARTUP;
1398 
1399  /* Some workers may be scheduled to start now */
1401 
1402  status = ServerLoop();
1403 
1404  /*
1405  * ServerLoop probably shouldn't ever return, but if it does, close down.
1406  */
1407  ExitPostmaster(status != STATUS_OK);
1408 
1409  abort(); /* not reached */
1410 }
1411 
1412 
1413 /*
1414  * on_proc_exit callback to close server's listen sockets
1415  */
1416 static void
1418 {
1419  int i;
1420 
1421  /*
1422  * First, explicitly close all the socket FDs. We used to just let this
1423  * happen implicitly at postmaster exit, but it's better to close them
1424  * before we remove the postmaster.pid lockfile; otherwise there's a race
1425  * condition if a new postmaster wants to re-use the TCP port number.
1426  */
1427  for (i = 0; i < MAXLISTEN; i++)
1428  {
1429  if (ListenSocket[i] != PGINVALID_SOCKET)
1430  {
1433  }
1434  }
1435 
1436  /*
1437  * Next, remove any filesystem entries for Unix sockets. To avoid race
1438  * conditions against incoming postmasters, this must happen after closing
1439  * the sockets and before removing lock files.
1440  */
1442 
1443  /*
1444  * We don't do anything about socket lock files here; those will be
1445  * removed in a later on_proc_exit callback.
1446  */
1447 }
1448 
1449 /*
1450  * on_proc_exit callback to delete external_pid_file
1451  */
1452 static void
1454 {
1455  if (external_pid_file)
1456  unlink(external_pid_file);
1457 }
1458 
1459 
1460 /*
1461  * Compute and check the directory paths to files that are part of the
1462  * installation (as deduced from the postgres executable's own location)
1463  */
1464 static void
1466 {
1467  DIR *pdir;
1468 
1469  /* Locate the postgres executable itself */
1470  if (find_my_exec(argv0, my_exec_path) < 0)
1471  ereport(FATAL,
1472  (errmsg("%s: could not locate my own executable path", argv0)));
1473 
1474 #ifdef EXEC_BACKEND
1475  /* Locate executable backend before we change working directory */
1476  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1477  postgres_exec_path) < 0)
1478  ereport(FATAL,
1479  (errmsg("%s: could not locate matching postgres executable",
1480  argv0)));
1481 #endif
1482 
1483  /*
1484  * Locate the pkglib directory --- this has to be set early in case we try
1485  * to load any modules from it in response to postgresql.conf entries.
1486  */
1488 
1489  /*
1490  * Verify that there's a readable directory there; otherwise the Postgres
1491  * installation is incomplete or corrupt. (A typical cause of this
1492  * failure is that the postgres executable has been moved or hardlinked to
1493  * some directory that's not a sibling of the installation lib/
1494  * directory.)
1495  */
1496  pdir = AllocateDir(pkglib_path);
1497  if (pdir == NULL)
1498  ereport(ERROR,
1500  errmsg("could not open directory \"%s\": %m",
1501  pkglib_path),
1502  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1503  my_exec_path)));
1504  FreeDir(pdir);
1505 
1506  /*
1507  * XXX is it worth similarly checking the share/ directory? If the lib/
1508  * directory is there, then share/ probably is too.
1509  */
1510 }
1511 
1512 /*
1513  * Check that pg_control exists in the correct location in the data directory.
1514  *
1515  * No attempt is made to validate the contents of pg_control here. This is
1516  * just a sanity check to see if we are looking at a real data directory.
1517  */
1518 static void
1520 {
1521  char path[MAXPGPATH];
1522  FILE *fp;
1523 
1524  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1525 
1526  fp = AllocateFile(path, PG_BINARY_R);
1527  if (fp == NULL)
1528  {
1529  write_stderr("%s: could not find the database system\n"
1530  "Expected to find it in the directory \"%s\",\n"
1531  "but could not open file \"%s\": %s\n",
1532  progname, DataDir, path, strerror(errno));
1533  ExitPostmaster(2);
1534  }
1535  FreeFile(fp);
1536 }
1537 
1538 /*
1539  * Determine how long should we let ServerLoop sleep.
1540  *
1541  * In normal conditions we wait at most one minute, to ensure that the other
1542  * background tasks handled by ServerLoop get done even when no requests are
1543  * arriving. However, if there are background workers waiting to be started,
1544  * we don't actually sleep so that they are quickly serviced. Other exception
1545  * cases are as shown in the code.
1546  */
1547 static void
1548 DetermineSleepTime(struct timeval *timeout)
1549 {
1550  TimestampTz next_wakeup = 0;
1551 
1552  /*
1553  * Normal case: either there are no background workers at all, or we're in
1554  * a shutdown sequence (during which we ignore bgworkers altogether).
1555  */
1556  if (Shutdown > NoShutdown ||
1558  {
1559  if (AbortStartTime != 0)
1560  {
1561  /* time left to abort; clamp to 0 in case it already expired */
1562  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1563  (time(NULL) - AbortStartTime);
1564  timeout->tv_sec = Max(timeout->tv_sec, 0);
1565  timeout->tv_usec = 0;
1566  }
1567  else
1568  {
1569  timeout->tv_sec = 60;
1570  timeout->tv_usec = 0;
1571  }
1572  return;
1573  }
1574 
1575  if (StartWorkerNeeded)
1576  {
1577  timeout->tv_sec = 0;
1578  timeout->tv_usec = 0;
1579  return;
1580  }
1581 
1582  if (HaveCrashedWorker)
1583  {
1584  slist_mutable_iter siter;
1585 
1586  /*
1587  * When there are crashed bgworkers, we sleep just long enough that
1588  * they are restarted when they request to be. Scan the list to
1589  * determine the minimum of all wakeup times according to most recent
1590  * crash time and requested restart interval.
1591  */
1593  {
1594  RegisteredBgWorker *rw;
1595  TimestampTz this_wakeup;
1596 
1597  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1598 
1599  if (rw->rw_crashed_at == 0)
1600  continue;
1601 
1603  || rw->rw_terminate)
1604  {
1605  ForgetBackgroundWorker(&siter);
1606  continue;
1607  }
1608 
1609  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1610  1000L * rw->rw_worker.bgw_restart_time);
1611  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1612  next_wakeup = this_wakeup;
1613  }
1614  }
1615 
1616  if (next_wakeup != 0)
1617  {
1618  long secs;
1619  int microsecs;
1620 
1622  &secs, &microsecs);
1623  timeout->tv_sec = secs;
1624  timeout->tv_usec = microsecs;
1625 
1626  /* Ensure we don't exceed one minute */
1627  if (timeout->tv_sec > 60)
1628  {
1629  timeout->tv_sec = 60;
1630  timeout->tv_usec = 0;
1631  }
1632  }
1633  else
1634  {
1635  timeout->tv_sec = 60;
1636  timeout->tv_usec = 0;
1637  }
1638 }
1639 
1640 /*
1641  * Main idle loop of postmaster
1642  *
1643  * NB: Needs to be called with signals blocked
1644  */
1645 static int
1647 {
1648  fd_set readmask;
1649  int nSockets;
1650  time_t last_lockfile_recheck_time,
1651  last_touch_time;
1652 
1653  last_lockfile_recheck_time = last_touch_time = time(NULL);
1654 
1655  nSockets = initMasks(&readmask);
1656 
1657  for (;;)
1658  {
1659  fd_set rmask;
1660  int selres;
1661  time_t now;
1662 
1663  /*
1664  * Wait for a connection request to arrive.
1665  *
1666  * We block all signals except while sleeping. That makes it safe for
1667  * signal handlers, which again block all signals while executing, to
1668  * do nontrivial work.
1669  *
1670  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1671  * any new connections, so we don't call select(), and just sleep.
1672  */
1673  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1674 
1675  if (pmState == PM_WAIT_DEAD_END)
1676  {
1678 
1679  pg_usleep(100000L); /* 100 msec seems reasonable */
1680  selres = 0;
1681 
1682  PG_SETMASK(&BlockSig);
1683  }
1684  else
1685  {
1686  /* must set timeout each time; some OSes change it! */
1687  struct timeval timeout;
1688 
1689  /* Needs to run with blocked signals! */
1690  DetermineSleepTime(&timeout);
1691 
1693 
1694  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1695 
1696  PG_SETMASK(&BlockSig);
1697  }
1698 
1699  /* Now check the select() result */
1700  if (selres < 0)
1701  {
1702  if (errno != EINTR && errno != EWOULDBLOCK)
1703  {
1704  ereport(LOG,
1706  errmsg("select() failed in postmaster: %m")));
1707  return STATUS_ERROR;
1708  }
1709  }
1710 
1711  /*
1712  * New connection pending on any of our sockets? If so, fork a child
1713  * process to deal with it.
1714  */
1715  if (selres > 0)
1716  {
1717  int i;
1718 
1719  for (i = 0; i < MAXLISTEN; i++)
1720  {
1721  if (ListenSocket[i] == PGINVALID_SOCKET)
1722  break;
1723  if (FD_ISSET(ListenSocket[i], &rmask))
1724  {
1725  Port *port;
1726 
1727  port = ConnCreate(ListenSocket[i]);
1728  if (port)
1729  {
1730  BackendStartup(port);
1731 
1732  /*
1733  * We no longer need the open socket or port structure
1734  * in this process
1735  */
1736  StreamClose(port->sock);
1737  ConnFree(port);
1738  }
1739  }
1740  }
1741  }
1742 
1743  /* If we have lost the log collector, try to start a new one */
1744  if (SysLoggerPID == 0 && Logging_collector)
1746 
1747  /*
1748  * If no background writer process is running, and we are not in a
1749  * state that prevents it, start one. It doesn't matter if this
1750  * fails, we'll just try again later. Likewise for the checkpointer.
1751  */
1752  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1754  {
1755  if (CheckpointerPID == 0)
1757  if (BgWriterPID == 0)
1759  }
1760 
1761  /*
1762  * Likewise, if we have lost the walwriter process, try to start a new
1763  * one. But this is needed only in normal operation (else we cannot
1764  * be writing any new WAL).
1765  */
1766  if (WalWriterPID == 0 && pmState == PM_RUN)
1768 
1769  /*
1770  * If we have lost the autovacuum launcher, try to start a new one. We
1771  * don't want autovacuum to run in binary upgrade mode because
1772  * autovacuum might update relfrozenxid for empty tables before the
1773  * physical files are put in place.
1774  */
1775  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1777  pmState == PM_RUN)
1778  {
1780  if (AutoVacPID != 0)
1781  start_autovac_launcher = false; /* signal processed */
1782  }
1783 
1784  /* If we have lost the stats collector, try to start a new one */
1785  if (PgStatPID == 0 &&
1786  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1787  PgStatPID = pgstat_start();
1788 
1789  /* If we have lost the archiver, try to start a new one. */
1790  if (PgArchPID == 0 && PgArchStartupAllowed())
1791  PgArchPID = pgarch_start();
1792 
1793  /* If we need to signal the autovacuum launcher, do so now */
1795  {
1796  avlauncher_needs_signal = false;
1797  if (AutoVacPID != 0)
1799  }
1800 
1801  /* If we need to start a WAL receiver, try to do that now */
1804 
1805  /* Get other worker processes running, if needed */
1808 
1809 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1810 
1811  /*
1812  * With assertions enabled, check regularly for appearance of
1813  * additional threads. All builds check at start and exit.
1814  */
1815  Assert(pthread_is_threaded_np() == 0);
1816 #endif
1817 
1818  /*
1819  * Lastly, check to see if it's time to do some things that we don't
1820  * want to do every single time through the loop, because they're a
1821  * bit expensive. Note that there's up to a minute of slop in when
1822  * these tasks will be performed, since DetermineSleepTime() will let
1823  * us sleep at most that long; except for SIGKILL timeout which has
1824  * special-case logic there.
1825  */
1826  now = time(NULL);
1827 
1828  /*
1829  * If we already sent SIGQUIT to children and they are slow to shut
1830  * down, it's time to send them SIGKILL. This doesn't happen
1831  * normally, but under certain conditions backends can get stuck while
1832  * shutting down. This is a last measure to get them unwedged.
1833  *
1834  * Note we also do this during recovery from a process crash.
1835  */
1836  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1837  AbortStartTime != 0 &&
1839  {
1840  /* We were gentle with them before. Not anymore */
1841  ereport(LOG,
1842  (errmsg("issuing SIGKILL to recalcitrant children")));
1844  /* reset flag so we don't SIGKILL again */
1845  AbortStartTime = 0;
1846  }
1847 
1848  /*
1849  * Once a minute, verify that postmaster.pid hasn't been removed or
1850  * overwritten. If it has, we force a shutdown. This avoids having
1851  * postmasters and child processes hanging around after their database
1852  * is gone, and maybe causing problems if a new database cluster is
1853  * created in the same place. It also provides some protection
1854  * against a DBA foolishly removing postmaster.pid and manually
1855  * starting a new postmaster. Data corruption is likely to ensue from
1856  * that anyway, but we can minimize the damage by aborting ASAP.
1857  */
1858  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1859  {
1860  if (!RecheckDataDirLockFile())
1861  {
1862  ereport(LOG,
1863  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1865  }
1866  last_lockfile_recheck_time = now;
1867  }
1868 
1869  /*
1870  * Touch Unix socket and lock files every 58 minutes, to ensure that
1871  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1872  * no one runs cleaners with cutoff times of less than an hour ...
1873  */
1874  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1875  {
1876  TouchSocketFiles();
1878  last_touch_time = now;
1879  }
1880  }
1881 }
1882 
1883 /*
1884  * Initialise the masks for select() for the ports we are listening on.
1885  * Return the number of sockets to listen on.
1886  */
1887 static int
1888 initMasks(fd_set *rmask)
1889 {
1890  int maxsock = -1;
1891  int i;
1892 
1893  FD_ZERO(rmask);
1894 
1895  for (i = 0; i < MAXLISTEN; i++)
1896  {
1897  int fd = ListenSocket[i];
1898 
1899  if (fd == PGINVALID_SOCKET)
1900  break;
1901  FD_SET(fd, rmask);
1902 
1903  if (fd > maxsock)
1904  maxsock = fd;
1905  }
1906 
1907  return maxsock + 1;
1908 }
1909 
1910 
1911 /*
1912  * Read a client's startup packet and do something according to it.
1913  *
1914  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1915  * not return at all.
1916  *
1917  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1918  * if that's what you want. Return STATUS_ERROR if you don't want to
1919  * send anything to the client, which would typically be appropriate
1920  * if we detect a communications failure.)
1921  *
1922  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1923  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1924  * encryption layer sets both flags, but a rejected negotiation sets only the
1925  * flag for that layer, since the client may wish to try the other one. We
1926  * should make no assumption here about the order in which the client may make
1927  * requests.
1928  */
1929 static int
1930 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1931 {
1932  int32 len;
1933  void *buf;
1934  ProtocolVersion proto;
1935  MemoryContext oldcontext;
1936 
1937  pq_startmsgread();
1938 
1939  /*
1940  * Grab the first byte of the length word separately, so that we can tell
1941  * whether we have no data at all or an incomplete packet. (This might
1942  * sound inefficient, but it's not really, because of buffering in
1943  * pqcomm.c.)
1944  */
1945  if (pq_getbytes((char *) &len, 1) == EOF)
1946  {
1947  /*
1948  * If we get no data at all, don't clutter the log with a complaint;
1949  * such cases often occur for legitimate reasons. An example is that
1950  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1951  * client didn't like our response, it'll probably just drop the
1952  * connection. Service-monitoring software also often just opens and
1953  * closes a connection without sending anything. (So do port
1954  * scanners, which may be less benign, but it's not really our job to
1955  * notice those.)
1956  */
1957  return STATUS_ERROR;
1958  }
1959 
1960  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1961  {
1962  /* Got a partial length word, so bleat about that */
1963  if (!ssl_done && !gss_done)
1965  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1966  errmsg("incomplete startup packet")));
1967  return STATUS_ERROR;
1968  }
1969 
1970  len = pg_ntoh32(len);
1971  len -= 4;
1972 
1973  if (len < (int32) sizeof(ProtocolVersion) ||
1975  {
1977  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1978  errmsg("invalid length of startup packet")));
1979  return STATUS_ERROR;
1980  }
1981 
1982  /*
1983  * Allocate at least the size of an old-style startup packet, plus one
1984  * extra byte, and make sure all are zeroes. This ensures we will have
1985  * null termination of all strings, in both fixed- and variable-length
1986  * packet layouts.
1987  */
1988  if (len <= (int32) sizeof(StartupPacket))
1989  buf = palloc0(sizeof(StartupPacket) + 1);
1990  else
1991  buf = palloc0(len + 1);
1992 
1993  if (pq_getbytes(buf, len) == EOF)
1994  {
1996  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1997  errmsg("incomplete startup packet")));
1998  return STATUS_ERROR;
1999  }
2000  pq_endmsgread();
2001 
2002  /*
2003  * The first field is either a protocol version number or a special
2004  * request code.
2005  */
2006  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2007 
2008  if (proto == CANCEL_REQUEST_CODE)
2009  {
2010  processCancelRequest(port, buf);
2011  /* Not really an error, but we don't want to proceed further */
2012  return STATUS_ERROR;
2013  }
2014 
2015  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2016  {
2017  char SSLok;
2018 
2019 #ifdef USE_SSL
2020  /* No SSL when disabled or on Unix sockets */
2021  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2022  SSLok = 'N';
2023  else
2024  SSLok = 'S'; /* Support for SSL */
2025 #else
2026  SSLok = 'N'; /* No support for SSL */
2027 #endif
2028 
2029 retry1:
2030  if (send(port->sock, &SSLok, 1, 0) != 1)
2031  {
2032  if (errno == EINTR)
2033  goto retry1; /* if interrupted, just retry */
2036  errmsg("failed to send SSL negotiation response: %m")));
2037  return STATUS_ERROR; /* close the connection */
2038  }
2039 
2040 #ifdef USE_SSL
2041  if (SSLok == 'S' && secure_open_server(port) == -1)
2042  return STATUS_ERROR;
2043 #endif
2044 
2045  /*
2046  * regular startup packet, cancel, etc packet should follow, but not
2047  * another SSL negotiation request, and a GSS request should only
2048  * follow if SSL was rejected (client may negotiate in either order)
2049  */
2050  return ProcessStartupPacket(port, true, SSLok == 'S');
2051  }
2052  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2053  {
2054  char GSSok = 'N';
2055 
2056 #ifdef ENABLE_GSS
2057  /* No GSSAPI encryption when on Unix socket */
2058  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2059  GSSok = 'G';
2060 #endif
2061 
2062  while (send(port->sock, &GSSok, 1, 0) != 1)
2063  {
2064  if (errno == EINTR)
2065  continue;
2068  errmsg("failed to send GSSAPI negotiation response: %m")));
2069  return STATUS_ERROR; /* close the connection */
2070  }
2071 
2072 #ifdef ENABLE_GSS
2073  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2074  return STATUS_ERROR;
2075 #endif
2076 
2077  /*
2078  * regular startup packet, cancel, etc packet should follow, but not
2079  * another GSS negotiation request, and an SSL request should only
2080  * follow if GSS was rejected (client may negotiate in either order)
2081  */
2082  return ProcessStartupPacket(port, GSSok == 'G', true);
2083  }
2084 
2085  /* Could add additional special packet types here */
2086 
2087  /*
2088  * Set FrontendProtocol now so that ereport() knows what format to send if
2089  * we fail during startup.
2090  */
2091  FrontendProtocol = proto;
2092 
2093  /* Check that the major protocol version is in range. */
2096  ereport(FATAL,
2097  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2098  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2099  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2103 
2104  /*
2105  * Now fetch parameters out of startup packet and save them into the Port
2106  * structure. All data structures attached to the Port struct must be
2107  * allocated in TopMemoryContext so that they will remain available in a
2108  * running backend (even after PostmasterContext is destroyed). We need
2109  * not worry about leaking this storage on failure, since we aren't in the
2110  * postmaster process anymore.
2111  */
2113 
2114  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2115  {
2116  int32 offset = sizeof(ProtocolVersion);
2117  List *unrecognized_protocol_options = NIL;
2118 
2119  /*
2120  * Scan packet body for name/option pairs. We can assume any string
2121  * beginning within the packet body is null-terminated, thanks to
2122  * zeroing extra byte above.
2123  */
2124  port->guc_options = NIL;
2125 
2126  while (offset < len)
2127  {
2128  char *nameptr = ((char *) buf) + offset;
2129  int32 valoffset;
2130  char *valptr;
2131 
2132  if (*nameptr == '\0')
2133  break; /* found packet terminator */
2134  valoffset = offset + strlen(nameptr) + 1;
2135  if (valoffset >= len)
2136  break; /* missing value, will complain below */
2137  valptr = ((char *) buf) + valoffset;
2138 
2139  if (strcmp(nameptr, "database") == 0)
2140  port->database_name = pstrdup(valptr);
2141  else if (strcmp(nameptr, "user") == 0)
2142  port->user_name = pstrdup(valptr);
2143  else if (strcmp(nameptr, "options") == 0)
2144  port->cmdline_options = pstrdup(valptr);
2145  else if (strcmp(nameptr, "replication") == 0)
2146  {
2147  /*
2148  * Due to backward compatibility concerns the replication
2149  * parameter is a hybrid beast which allows the value to be
2150  * either boolean or the string 'database'. The latter
2151  * connects to a specific database which is e.g. required for
2152  * logical decoding while.
2153  */
2154  if (strcmp(valptr, "database") == 0)
2155  {
2156  am_walsender = true;
2157  am_db_walsender = true;
2158  }
2159  else if (!parse_bool(valptr, &am_walsender))
2160  ereport(FATAL,
2161  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2162  errmsg("invalid value for parameter \"%s\": \"%s\"",
2163  "replication",
2164  valptr),
2165  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2166  }
2167  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2168  {
2169  /*
2170  * Any option beginning with _pq_. is reserved for use as a
2171  * protocol-level option, but at present no such options are
2172  * defined.
2173  */
2174  unrecognized_protocol_options =
2175  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2176  }
2177  else
2178  {
2179  /* Assume it's a generic GUC option */
2180  port->guc_options = lappend(port->guc_options,
2181  pstrdup(nameptr));
2182  port->guc_options = lappend(port->guc_options,
2183  pstrdup(valptr));
2184 
2185  /*
2186  * Copy application_name to port if we come across it. This
2187  * is done so we can log the application_name in the
2188  * connection authorization message. Note that the GUC would
2189  * be used but we haven't gone through GUC setup yet.
2190  */
2191  if (strcmp(nameptr, "application_name") == 0)
2192  {
2193  char *tmp_app_name = pstrdup(valptr);
2194 
2195  pg_clean_ascii(tmp_app_name);
2196 
2197  port->application_name = tmp_app_name;
2198  }
2199  }
2200  offset = valoffset + strlen(valptr) + 1;
2201  }
2202 
2203  /*
2204  * If we didn't find a packet terminator exactly at the end of the
2205  * given packet length, complain.
2206  */
2207  if (offset != len - 1)
2208  ereport(FATAL,
2209  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2210  errmsg("invalid startup packet layout: expected terminator as last byte")));
2211 
2212  /*
2213  * If the client requested a newer protocol version or if the client
2214  * requested any protocol options we didn't recognize, let them know
2215  * the newest minor protocol version we do support and the names of
2216  * any unrecognized options.
2217  */
2219  unrecognized_protocol_options != NIL)
2220  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2221  }
2222  else
2223  {
2224  /*
2225  * Get the parameters from the old-style, fixed-width-fields startup
2226  * packet as C strings. The packet destination was cleared first so a
2227  * short packet has zeros silently added. We have to be prepared to
2228  * truncate the pstrdup result for oversize fields, though.
2229  */
2230  StartupPacket *packet = (StartupPacket *) buf;
2231 
2232  port->database_name = pstrdup(packet->database);
2233  if (strlen(port->database_name) > sizeof(packet->database))
2234  port->database_name[sizeof(packet->database)] = '\0';
2235  port->user_name = pstrdup(packet->user);
2236  if (strlen(port->user_name) > sizeof(packet->user))
2237  port->user_name[sizeof(packet->user)] = '\0';
2238  port->cmdline_options = pstrdup(packet->options);
2239  if (strlen(port->cmdline_options) > sizeof(packet->options))
2240  port->cmdline_options[sizeof(packet->options)] = '\0';
2241  port->guc_options = NIL;
2242  }
2243 
2244  /* Check a user name was given. */
2245  if (port->user_name == NULL || port->user_name[0] == '\0')
2246  ereport(FATAL,
2247  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2248  errmsg("no PostgreSQL user name specified in startup packet")));
2249 
2250  /* The database defaults to the user name. */
2251  if (port->database_name == NULL || port->database_name[0] == '\0')
2252  port->database_name = pstrdup(port->user_name);
2253 
2254  if (Db_user_namespace)
2255  {
2256  /*
2257  * If user@, it is a global user, remove '@'. We only want to do this
2258  * if there is an '@' at the end and no earlier in the user string or
2259  * they may fake as a local user of another database attaching to this
2260  * database.
2261  */
2262  if (strchr(port->user_name, '@') ==
2263  port->user_name + strlen(port->user_name) - 1)
2264  *strchr(port->user_name, '@') = '\0';
2265  else
2266  {
2267  /* Append '@' and dbname */
2268  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2269  }
2270  }
2271 
2272  /*
2273  * Truncate given database and user names to length of a Postgres name.
2274  * This avoids lookup failures when overlength names are given.
2275  */
2276  if (strlen(port->database_name) >= NAMEDATALEN)
2277  port->database_name[NAMEDATALEN - 1] = '\0';
2278  if (strlen(port->user_name) >= NAMEDATALEN)
2279  port->user_name[NAMEDATALEN - 1] = '\0';
2280 
2281  if (am_walsender)
2283  else
2285 
2286  /*
2287  * Normal walsender backends, e.g. for streaming replication, are not
2288  * connected to a particular database. But walsenders used for logical
2289  * replication need to connect to a specific database. We allow streaming
2290  * replication commands to be issued even if connected to a database as it
2291  * can make sense to first make a basebackup and then stream changes
2292  * starting from that.
2293  */
2294  if (am_walsender && !am_db_walsender)
2295  port->database_name[0] = '\0';
2296 
2297  /*
2298  * Done putting stuff in TopMemoryContext.
2299  */
2300  MemoryContextSwitchTo(oldcontext);
2301 
2302  /*
2303  * If we're going to reject the connection due to database state, say so
2304  * now instead of wasting cycles on an authentication exchange. (This also
2305  * allows a pg_ping utility to be written.)
2306  */
2307  switch (port->canAcceptConnections)
2308  {
2309  case CAC_STARTUP:
2310  ereport(FATAL,
2312  errmsg("the database system is starting up")));
2313  break;
2314  case CAC_SHUTDOWN:
2315  ereport(FATAL,
2317  errmsg("the database system is shutting down")));
2318  break;
2319  case CAC_RECOVERY:
2320  ereport(FATAL,
2322  errmsg("the database system is in recovery mode")));
2323  break;
2324  case CAC_TOOMANY:
2325  ereport(FATAL,
2326  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2327  errmsg("sorry, too many clients already")));
2328  break;
2329  case CAC_SUPERUSER:
2330  /* OK for now, will check in InitPostgres */
2331  break;
2332  case CAC_OK:
2333  break;
2334  }
2335 
2336  return STATUS_OK;
2337 }
2338 
2339 /*
2340  * Send a NegotiateProtocolVersion to the client. This lets the client know
2341  * that they have requested a newer minor protocol version than we are able
2342  * to speak. We'll speak the highest version we know about; the client can,
2343  * of course, abandon the connection if that's a problem.
2344  *
2345  * We also include in the response a list of protocol options we didn't
2346  * understand. This allows clients to include optional parameters that might
2347  * be present either in newer protocol versions or third-party protocol
2348  * extensions without fear of having to reconnect if those options are not
2349  * understood, while at the same time making certain that the client is aware
2350  * of which options were actually accepted.
2351  */
2352 static void
2353 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2354 {
2356  ListCell *lc;
2357 
2358  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2360  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2361  foreach(lc, unrecognized_protocol_options)
2362  pq_sendstring(&buf, lfirst(lc));
2363  pq_endmessage(&buf);
2364 
2365  /* no need to flush, some other message will follow */
2366 }
2367 
2368 /*
2369  * The client has sent a cancel request packet, not a normal
2370  * start-a-new-connection packet. Perform the necessary processing.
2371  * Nothing is sent back to the client.
2372  */
2373 static void
2375 {
2376  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2377  int backendPID;
2378  int32 cancelAuthCode;
2379  Backend *bp;
2380 
2381 #ifndef EXEC_BACKEND
2382  dlist_iter iter;
2383 #else
2384  int i;
2385 #endif
2386 
2387  backendPID = (int) pg_ntoh32(canc->backendPID);
2388  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2389 
2390  /*
2391  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2392  * longer access the postmaster's own backend list, and must rely on the
2393  * duplicate array in shared memory.
2394  */
2395 #ifndef EXEC_BACKEND
2396  dlist_foreach(iter, &BackendList)
2397  {
2398  bp = dlist_container(Backend, elem, iter.cur);
2399 #else
2400  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2401  {
2402  bp = (Backend *) &ShmemBackendArray[i];
2403 #endif
2404  if (bp->pid == backendPID)
2405  {
2406  if (bp->cancel_key == cancelAuthCode)
2407  {
2408  /* Found a match; signal that backend to cancel current op */
2409  ereport(DEBUG2,
2410  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2411  backendPID)));
2412  signal_child(bp->pid, SIGINT);
2413  }
2414  else
2415  /* Right PID, wrong key: no way, Jose */
2416  ereport(LOG,
2417  (errmsg("wrong key in cancel request for process %d",
2418  backendPID)));
2419  return;
2420  }
2421 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2422  }
2423 #else
2424  }
2425 #endif
2426 
2427  /* No matching backend */
2428  ereport(LOG,
2429  (errmsg("PID %d in cancel request did not match any process",
2430  backendPID)));
2431 }
2432 
2433 /*
2434  * canAcceptConnections --- check to see if database state allows connections
2435  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2436  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2437  * know whether a NORMAL connection might turn into a walsender.)
2438  */
2439 static CAC_state
2440 canAcceptConnections(int backend_type)
2441 {
2442  CAC_state result = CAC_OK;
2443 
2444  /*
2445  * Can't start backends when in startup/shutdown/inconsistent recovery
2446  * state. We treat autovac workers the same as user backends for this
2447  * purpose. However, bgworkers are excluded from this test; we expect
2448  * bgworker_should_start_now() decided whether the DB state allows them.
2449  */
2450  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2451  backend_type != BACKEND_TYPE_BGWORKER)
2452  {
2453  if (Shutdown > NoShutdown)
2454  return CAC_SHUTDOWN; /* shutdown is pending */
2455  else if (!FatalError &&
2456  (pmState == PM_STARTUP ||
2457  pmState == PM_RECOVERY))
2458  return CAC_STARTUP; /* normal startup */
2459  else
2460  return CAC_RECOVERY; /* else must be crash recovery */
2461  }
2462 
2463  /*
2464  * "Smart shutdown" restrictions are applied only to normal connections,
2465  * not to autovac workers or bgworkers. When only superusers can connect,
2466  * we return CAC_SUPERUSER to indicate that superuserness must be checked
2467  * later. Note that neither CAC_OK nor CAC_SUPERUSER can safely be
2468  * returned until we have checked for too many children.
2469  */
2470  if (connsAllowed != ALLOW_ALL_CONNS &&
2471  backend_type == BACKEND_TYPE_NORMAL)
2472  {
2474  result = CAC_SUPERUSER; /* allow superusers only */
2475  else
2476  return CAC_SHUTDOWN; /* shutdown is pending */
2477  }
2478 
2479  /*
2480  * Don't start too many children.
2481  *
2482  * We allow more connections here than we can have backends because some
2483  * might still be authenticating; they might fail auth, or some existing
2484  * backend might exit before the auth cycle is completed. The exact
2485  * MaxBackends limit is enforced when a new backend tries to join the
2486  * shared-inval backend array.
2487  *
2488  * The limit here must match the sizes of the per-child-process arrays;
2489  * see comments for MaxLivePostmasterChildren().
2490  */
2492  result = CAC_TOOMANY;
2493 
2494  return result;
2495 }
2496 
2497 
2498 /*
2499  * ConnCreate -- create a local connection data structure
2500  *
2501  * Returns NULL on failure, other than out-of-memory which is fatal.
2502  */
2503 static Port *
2504 ConnCreate(int serverFd)
2505 {
2506  Port *port;
2507 
2508  if (!(port = (Port *) calloc(1, sizeof(Port))))
2509  {
2510  ereport(LOG,
2511  (errcode(ERRCODE_OUT_OF_MEMORY),
2512  errmsg("out of memory")));
2513  ExitPostmaster(1);
2514  }
2515 
2516  if (StreamConnection(serverFd, port) != STATUS_OK)
2517  {
2518  if (port->sock != PGINVALID_SOCKET)
2519  StreamClose(port->sock);
2520  ConnFree(port);
2521  return NULL;
2522  }
2523 
2524  return port;
2525 }
2526 
2527 
2528 /*
2529  * ConnFree -- free a local connection data structure
2530  *
2531  * Caller has already closed the socket if any, so there's not much
2532  * to do here.
2533  */
2534 static void
2536 {
2537  free(conn);
2538 }
2539 
2540 
2541 /*
2542  * ClosePostmasterPorts -- close all the postmaster's open sockets
2543  *
2544  * This is called during child process startup to release file descriptors
2545  * that are not needed by that child process. The postmaster still has
2546  * them open, of course.
2547  *
2548  * Note: we pass am_syslogger as a boolean because we don't want to set
2549  * the global variable yet when this is called.
2550  */
2551 void
2552 ClosePostmasterPorts(bool am_syslogger)
2553 {
2554  int i;
2555 
2556 #ifndef WIN32
2557 
2558  /*
2559  * Close the write end of postmaster death watch pipe. It's important to
2560  * do this as early as possible, so that if postmaster dies, others won't
2561  * think that it's still running because we're holding the pipe open.
2562  */
2564  ereport(FATAL,
2566  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2568  /* Notify fd.c that we released one pipe FD. */
2570 #endif
2571 
2572  /*
2573  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2574  * so we don't call ReleaseExternalFD() here.
2575  */
2576  for (i = 0; i < MAXLISTEN; i++)
2577  {
2578  if (ListenSocket[i] != PGINVALID_SOCKET)
2579  {
2582  }
2583  }
2584 
2585  /*
2586  * If using syslogger, close the read side of the pipe. We don't bother
2587  * tracking this in fd.c, either.
2588  */
2589  if (!am_syslogger)
2590  {
2591 #ifndef WIN32
2592  if (syslogPipe[0] >= 0)
2593  close(syslogPipe[0]);
2594  syslogPipe[0] = -1;
2595 #else
2596  if (syslogPipe[0])
2597  CloseHandle(syslogPipe[0]);
2598  syslogPipe[0] = 0;
2599 #endif
2600  }
2601 
2602 #ifdef USE_BONJOUR
2603  /* If using Bonjour, close the connection to the mDNS daemon */
2604  if (bonjour_sdref)
2605  close(DNSServiceRefSockFD(bonjour_sdref));
2606 #endif
2607 }
2608 
2609 
2610 /*
2611  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2612  *
2613  * Called early in the postmaster and every backend.
2614  */
2615 void
2617 {
2618  unsigned int rseed;
2619 
2620  MyProcPid = getpid();
2623 
2624  /*
2625  * Set a different seed for random() in every process. We want something
2626  * unpredictable, so if possible, use high-quality random bits for the
2627  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2628  */
2629  if (!pg_strong_random(&rseed, sizeof(rseed)))
2630  {
2631  /*
2632  * Since PIDs and timestamps tend to change more frequently in their
2633  * least significant bits, shift the timestamp left to allow a larger
2634  * total number of seeds in a given time period. Since that would
2635  * leave only 20 bits of the timestamp that cycle every ~1 second,
2636  * also mix in some higher bits.
2637  */
2638  rseed = ((uint64) MyProcPid) ^
2639  ((uint64) MyStartTimestamp << 12) ^
2640  ((uint64) MyStartTimestamp >> 20);
2641  }
2642  srandom(rseed);
2643 }
2644 
2645 
2646 /*
2647  * reset_shared -- reset shared memory and semaphores
2648  */
2649 static void
2651 {
2652  /*
2653  * Create or re-create shared memory and semaphores.
2654  *
2655  * Note: in each "cycle of life" we will normally assign the same IPC keys
2656  * (if using SysV shmem and/or semas). This helps ensure that we will
2657  * clean up dead IPC objects if the postmaster crashes and is restarted.
2658  */
2660 }
2661 
2662 
2663 /*
2664  * SIGHUP -- reread config files, and tell children to do same
2665  */
2666 static void
2668 {
2669  int save_errno = errno;
2670 
2671  /*
2672  * We rely on the signal mechanism to have blocked all signals ... except
2673  * on Windows, which lacks sigaction(), so we have to do it manually.
2674  */
2675 #ifdef WIN32
2676  PG_SETMASK(&BlockSig);
2677 #endif
2678 
2679  if (Shutdown <= SmartShutdown)
2680  {
2681  ereport(LOG,
2682  (errmsg("received SIGHUP, reloading configuration files")));
2685  if (StartupPID != 0)
2687  if (BgWriterPID != 0)
2689  if (CheckpointerPID != 0)
2691  if (WalWriterPID != 0)
2693  if (WalReceiverPID != 0)
2695  if (AutoVacPID != 0)
2697  if (PgArchPID != 0)
2699  if (SysLoggerPID != 0)
2701  if (PgStatPID != 0)
2703 
2704  /* Reload authentication config files too */
2705  if (!load_hba())
2706  ereport(LOG,
2707  /* translator: %s is a configuration file */
2708  (errmsg("%s was not reloaded", "pg_hba.conf")));
2709 
2710  if (!load_ident())
2711  ereport(LOG,
2712  (errmsg("%s was not reloaded", "pg_ident.conf")));
2713 
2714 #ifdef USE_SSL
2715  /* Reload SSL configuration as well */
2716  if (EnableSSL)
2717  {
2718  if (secure_initialize(false) == 0)
2719  LoadedSSL = true;
2720  else
2721  ereport(LOG,
2722  (errmsg("SSL configuration was not reloaded")));
2723  }
2724  else
2725  {
2726  secure_destroy();
2727  LoadedSSL = false;
2728  }
2729 #endif
2730 
2731 #ifdef EXEC_BACKEND
2732  /* Update the starting-point file for future children */
2733  write_nondefault_variables(PGC_SIGHUP);
2734 #endif
2735  }
2736 
2737 #ifdef WIN32
2739 #endif
2740 
2741  errno = save_errno;
2742 }
2743 
2744 
2745 /*
2746  * pmdie -- signal handler for processing various postmaster signals.
2747  */
2748 static void
2750 {
2751  int save_errno = errno;
2752 
2753  /*
2754  * We rely on the signal mechanism to have blocked all signals ... except
2755  * on Windows, which lacks sigaction(), so we have to do it manually.
2756  */
2757 #ifdef WIN32
2758  PG_SETMASK(&BlockSig);
2759 #endif
2760 
2761  ereport(DEBUG2,
2762  (errmsg_internal("postmaster received signal %d",
2763  postgres_signal_arg)));
2764 
2765  switch (postgres_signal_arg)
2766  {
2767  case SIGTERM:
2768 
2769  /*
2770  * Smart Shutdown:
2771  *
2772  * Wait for children to end their work, then shut down.
2773  */
2774  if (Shutdown >= SmartShutdown)
2775  break;
2777  ereport(LOG,
2778  (errmsg("received smart shutdown request")));
2779 
2780  /* Report status */
2782 #ifdef USE_SYSTEMD
2783  sd_notify(0, "STOPPING=1");
2784 #endif
2785 
2786  /*
2787  * If we reached normal running, we have to wait for any online
2788  * backup mode to end; otherwise go straight to waiting for client
2789  * backends to exit. (The difference is that in the former state,
2790  * we'll still let in new superuser clients, so that somebody can
2791  * end the online backup mode.) If already in PM_STOP_BACKENDS or
2792  * a later state, do not change it.
2793  */
2794  if (pmState == PM_RUN)
2796  else if (pmState == PM_HOT_STANDBY)
2798  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2799  {
2800  /* There should be no clients, so proceed to stop children */
2802  }
2803 
2804  /*
2805  * Now wait for online backup mode to end and backends to exit. If
2806  * that is already the case, PostmasterStateMachine will take the
2807  * next step.
2808  */
2810  break;
2811 
2812  case SIGINT:
2813 
2814  /*
2815  * Fast Shutdown:
2816  *
2817  * Abort all children with SIGTERM (rollback active transactions
2818  * and exit) and shut down when they are gone.
2819  */
2820  if (Shutdown >= FastShutdown)
2821  break;
2823  ereport(LOG,
2824  (errmsg("received fast shutdown request")));
2825 
2826  /* Report status */
2828 #ifdef USE_SYSTEMD
2829  sd_notify(0, "STOPPING=1");
2830 #endif
2831 
2832  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2833  {
2834  /* Just shut down background processes silently */
2836  }
2837  else if (pmState == PM_RUN ||
2839  {
2840  /* Report that we're about to zap live client sessions */
2841  ereport(LOG,
2842  (errmsg("aborting any active transactions")));
2844  }
2845 
2846  /*
2847  * PostmasterStateMachine will issue any necessary signals, or
2848  * take the next step if no child processes need to be killed.
2849  */
2851  break;
2852 
2853  case SIGQUIT:
2854 
2855  /*
2856  * Immediate Shutdown:
2857  *
2858  * abort all children with SIGQUIT, wait for them to exit,
2859  * terminate remaining ones with SIGKILL, then exit without
2860  * attempt to properly shut down the data base system.
2861  */
2862  if (Shutdown >= ImmediateShutdown)
2863  break;
2865  ereport(LOG,
2866  (errmsg("received immediate shutdown request")));
2867 
2868  /* Report status */
2870 #ifdef USE_SYSTEMD
2871  sd_notify(0, "STOPPING=1");
2872 #endif
2873 
2874  /* tell children to shut down ASAP */
2878 
2879  /* set stopwatch for them to die */
2880  AbortStartTime = time(NULL);
2881 
2882  /*
2883  * Now wait for backends to exit. If there are none,
2884  * PostmasterStateMachine will take the next step.
2885  */
2887  break;
2888  }
2889 
2890 #ifdef WIN32
2892 #endif
2893 
2894  errno = save_errno;
2895 }
2896 
2897 /*
2898  * Reaper -- signal handler to cleanup after a child process dies.
2899  */
2900 static void
2902 {
2903  int save_errno = errno;
2904  int pid; /* process id of dead child process */
2905  int exitstatus; /* its exit status */
2906 
2907  /*
2908  * We rely on the signal mechanism to have blocked all signals ... except
2909  * on Windows, which lacks sigaction(), so we have to do it manually.
2910  */
2911 #ifdef WIN32
2912  PG_SETMASK(&BlockSig);
2913 #endif
2914 
2915  ereport(DEBUG4,
2916  (errmsg_internal("reaping dead processes")));
2917 
2918  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2919  {
2920  /*
2921  * Check if this child was a startup process.
2922  */
2923  if (pid == StartupPID)
2924  {
2925  StartupPID = 0;
2926 
2927  /*
2928  * Startup process exited in response to a shutdown request (or it
2929  * completed normally regardless of the shutdown request).
2930  */
2931  if (Shutdown > NoShutdown &&
2932  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2933  {
2936  /* PostmasterStateMachine logic does the rest */
2937  continue;
2938  }
2939 
2940  if (EXIT_STATUS_3(exitstatus))
2941  {
2942  ereport(LOG,
2943  (errmsg("shutdown at recovery target")));
2946  TerminateChildren(SIGTERM);
2948  /* PostmasterStateMachine logic does the rest */
2949  continue;
2950  }
2951 
2952  /*
2953  * Unexpected exit of startup process (including FATAL exit)
2954  * during PM_STARTUP is treated as catastrophic. There are no
2955  * other processes running yet, so we can just exit.
2956  */
2957  if (pmState == PM_STARTUP &&
2959  !EXIT_STATUS_0(exitstatus))
2960  {
2961  LogChildExit(LOG, _("startup process"),
2962  pid, exitstatus);
2963  ereport(LOG,
2964  (errmsg("aborting startup due to startup process failure")));
2965  ExitPostmaster(1);
2966  }
2967 
2968  /*
2969  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2970  * the startup process is catastrophic, so kill other children,
2971  * and set StartupStatus so we don't try to reinitialize after
2972  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2973  * then we previously sent the startup process a SIGQUIT; so
2974  * that's probably the reason it died, and we do want to try to
2975  * restart in that case.
2976  *
2977  * This stanza also handles the case where we sent a SIGQUIT
2978  * during PM_STARTUP due to some dead_end child crashing: in that
2979  * situation, if the startup process dies on the SIGQUIT, we need
2980  * to transition to PM_WAIT_BACKENDS state which will allow
2981  * PostmasterStateMachine to restart the startup process. (On the
2982  * other hand, the startup process might complete normally, if we
2983  * were too late with the SIGQUIT. In that case we'll fall
2984  * through and commence normal operations.)
2985  */
2986  if (!EXIT_STATUS_0(exitstatus))
2987  {
2989  {
2991  if (pmState == PM_STARTUP)
2993  }
2994  else
2996  HandleChildCrash(pid, exitstatus,
2997  _("startup process"));
2998  continue;
2999  }
3000 
3001  /*
3002  * Startup succeeded, commence normal operations
3003  */
3005  FatalError = false;
3006  AbortStartTime = 0;
3007  ReachedNormalRunning = true;
3008  pmState = PM_RUN;
3010 
3011  /*
3012  * Crank up the background tasks, if we didn't do that already
3013  * when we entered consistent recovery state. It doesn't matter
3014  * if this fails, we'll just try again later.
3015  */
3016  if (CheckpointerPID == 0)
3018  if (BgWriterPID == 0)
3020  if (WalWriterPID == 0)
3022 
3023  /*
3024  * Likewise, start other special children as needed. In a restart
3025  * situation, some of them may be alive already.
3026  */
3029  if (PgArchStartupAllowed() && PgArchPID == 0)
3030  PgArchPID = pgarch_start();
3031  if (PgStatPID == 0)
3032  PgStatPID = pgstat_start();
3033 
3034  /* workers may be scheduled to start now */
3036 
3037  /* at this point we are really open for business */
3038  ereport(LOG,
3039  (errmsg("database system is ready to accept connections")));
3040 
3041  /* Report status */
3043 #ifdef USE_SYSTEMD
3044  sd_notify(0, "READY=1");
3045 #endif
3046 
3047  continue;
3048  }
3049 
3050  /*
3051  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3052  * one at the next iteration of the postmaster's main loop, if
3053  * necessary. Any other exit condition is treated as a crash.
3054  */
3055  if (pid == BgWriterPID)
3056  {
3057  BgWriterPID = 0;
3058  if (!EXIT_STATUS_0(exitstatus))
3059  HandleChildCrash(pid, exitstatus,
3060  _("background writer process"));
3061  continue;
3062  }
3063 
3064  /*
3065  * Was it the checkpointer?
3066  */
3067  if (pid == CheckpointerPID)
3068  {
3069  CheckpointerPID = 0;
3070  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3071  {
3072  /*
3073  * OK, we saw normal exit of the checkpointer after it's been
3074  * told to shut down. We expect that it wrote a shutdown
3075  * checkpoint. (If for some reason it didn't, recovery will
3076  * occur on next postmaster start.)
3077  *
3078  * At this point we should have no normal backend children
3079  * left (else we'd not be in PM_SHUTDOWN state) but we might
3080  * have dead_end children to wait for.
3081  *
3082  * If we have an archiver subprocess, tell it to do a last
3083  * archive cycle and quit. Likewise, if we have walsender
3084  * processes, tell them to send any remaining WAL and quit.
3085  */
3087 
3088  /* Waken archiver for the last time */
3089  if (PgArchPID != 0)
3091 
3092  /*
3093  * Waken walsenders for the last time. No regular backends
3094  * should be around anymore.
3095  */
3097 
3099 
3100  /*
3101  * We can also shut down the stats collector now; there's
3102  * nothing left for it to do.
3103  */
3104  if (PgStatPID != 0)
3106  }
3107  else
3108  {
3109  /*
3110  * Any unexpected exit of the checkpointer (including FATAL
3111  * exit) is treated as a crash.
3112  */
3113  HandleChildCrash(pid, exitstatus,
3114  _("checkpointer process"));
3115  }
3116 
3117  continue;
3118  }
3119 
3120  /*
3121  * Was it the wal writer? Normal exit can be ignored; we'll start a
3122  * new one at the next iteration of the postmaster's main loop, if
3123  * necessary. Any other exit condition is treated as a crash.
3124  */
3125  if (pid == WalWriterPID)
3126  {
3127  WalWriterPID = 0;
3128  if (!EXIT_STATUS_0(exitstatus))
3129  HandleChildCrash(pid, exitstatus,
3130  _("WAL writer process"));
3131  continue;
3132  }
3133 
3134  /*
3135  * Was it the wal receiver? If exit status is zero (normal) or one
3136  * (FATAL exit), we assume everything is all right just like normal
3137  * backends. (If we need a new wal receiver, we'll start one at the
3138  * next iteration of the postmaster's main loop.)
3139  */
3140  if (pid == WalReceiverPID)
3141  {
3142  WalReceiverPID = 0;
3143  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3144  HandleChildCrash(pid, exitstatus,
3145  _("WAL receiver process"));
3146  continue;
3147  }
3148 
3149  /*
3150  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3151  * start a new one at the next iteration of the postmaster's main
3152  * loop, if necessary. Any other exit condition is treated as a
3153  * crash.
3154  */
3155  if (pid == AutoVacPID)
3156  {
3157  AutoVacPID = 0;
3158  if (!EXIT_STATUS_0(exitstatus))
3159  HandleChildCrash(pid, exitstatus,
3160  _("autovacuum launcher process"));
3161  continue;
3162  }
3163 
3164  /*
3165  * Was it the archiver? If so, just try to start a new one; no need
3166  * to force reset of the rest of the system. (If fail, we'll try
3167  * again in future cycles of the main loop.). Unless we were waiting
3168  * for it to shut down; don't restart it in that case, and
3169  * PostmasterStateMachine() will advance to the next shutdown step.
3170  */
3171  if (pid == PgArchPID)
3172  {
3173  PgArchPID = 0;
3174  if (!EXIT_STATUS_0(exitstatus))
3175  LogChildExit(LOG, _("archiver process"),
3176  pid, exitstatus);
3177  if (PgArchStartupAllowed())
3178  PgArchPID = pgarch_start();
3179  continue;
3180  }
3181 
3182  /*
3183  * Was it the statistics collector? If so, just try to start a new
3184  * one; no need to force reset of the rest of the system. (If fail,
3185  * we'll try again in future cycles of the main loop.)
3186  */
3187  if (pid == PgStatPID)
3188  {
3189  PgStatPID = 0;
3190  if (!EXIT_STATUS_0(exitstatus))
3191  LogChildExit(LOG, _("statistics collector process"),
3192  pid, exitstatus);
3193  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3194  PgStatPID = pgstat_start();
3195  continue;
3196  }
3197 
3198  /* Was it the system logger? If so, try to start a new one */
3199  if (pid == SysLoggerPID)
3200  {
3201  SysLoggerPID = 0;
3202  /* for safety's sake, launch new logger *first* */
3204  if (!EXIT_STATUS_0(exitstatus))
3205  LogChildExit(LOG, _("system logger process"),
3206  pid, exitstatus);
3207  continue;
3208  }
3209 
3210  /* Was it one of our background workers? */
3211  if (CleanupBackgroundWorker(pid, exitstatus))
3212  {
3213  /* have it be restarted */
3214  HaveCrashedWorker = true;
3215  continue;
3216  }
3217 
3218  /*
3219  * Else do standard backend child cleanup.
3220  */
3221  CleanupBackend(pid, exitstatus);
3222  } /* loop over pending child-death reports */
3223 
3224  /*
3225  * After cleaning out the SIGCHLD queue, see if we have any state changes
3226  * or actions to make.
3227  */
3229 
3230  /* Done with signal handler */
3231 #ifdef WIN32
3233 #endif
3234 
3235  errno = save_errno;
3236 }
3237 
3238 /*
3239  * Scan the bgworkers list and see if the given PID (which has just stopped
3240  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3241  * bgworker, return false.
3242  *
3243  * This is heavily based on CleanupBackend. One important difference is that
3244  * we don't know yet that the dying process is a bgworker, so we must be silent
3245  * until we're sure it is.
3246  */
3247 static bool
3249  int exitstatus) /* child's exit status */
3250 {
3251  char namebuf[MAXPGPATH];
3252  slist_mutable_iter iter;
3253 
3255  {
3256  RegisteredBgWorker *rw;
3257 
3258  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3259 
3260  if (rw->rw_pid != pid)
3261  continue;
3262 
3263 #ifdef WIN32
3264  /* see CleanupBackend */
3265  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3266  exitstatus = 0;
3267 #endif
3268 
3269  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3270  rw->rw_worker.bgw_type);
3271 
3272 
3273  if (!EXIT_STATUS_0(exitstatus))
3274  {
3275  /* Record timestamp, so we know when to restart the worker. */
3277  }
3278  else
3279  {
3280  /* Zero exit status means terminate */
3281  rw->rw_crashed_at = 0;
3282  rw->rw_terminate = true;
3283  }
3284 
3285  /*
3286  * Additionally, for shared-memory-connected workers, just like a
3287  * backend, any exit status other than 0 or 1 is considered a crash
3288  * and causes a system-wide restart.
3289  */
3290  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3291  {
3292  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3293  {
3294  HandleChildCrash(pid, exitstatus, namebuf);
3295  return true;
3296  }
3297  }
3298 
3299  /*
3300  * We must release the postmaster child slot whether this worker is
3301  * connected to shared memory or not, but we only treat it as a crash
3302  * if it is in fact connected.
3303  */
3306  {
3307  HandleChildCrash(pid, exitstatus, namebuf);
3308  return true;
3309  }
3310 
3311  /* Get it out of the BackendList and clear out remaining data */
3312  dlist_delete(&rw->rw_backend->elem);
3313 #ifdef EXEC_BACKEND
3314  ShmemBackendArrayRemove(rw->rw_backend);
3315 #endif
3316 
3317  /*
3318  * It's possible that this background worker started some OTHER
3319  * background worker and asked to be notified when that worker started
3320  * or stopped. If so, cancel any notifications destined for the
3321  * now-dead backend.
3322  */
3323  if (rw->rw_backend->bgworker_notify)
3325  free(rw->rw_backend);
3326  rw->rw_backend = NULL;
3327  rw->rw_pid = 0;
3328  rw->rw_child_slot = 0;
3329  ReportBackgroundWorkerExit(&iter); /* report child death */
3330 
3331  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3332  namebuf, pid, exitstatus);
3333 
3334  return true;
3335  }
3336 
3337  return false;
3338 }
3339 
3340 /*
3341  * CleanupBackend -- cleanup after terminated backend.
3342  *
3343  * Remove all local state associated with backend.
3344  *
3345  * If you change this, see also CleanupBackgroundWorker.
3346  */
3347 static void
3349  int exitstatus) /* child's exit status. */
3350 {
3351  dlist_mutable_iter iter;
3352 
3353  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3354 
3355  /*
3356  * If a backend dies in an ugly way then we must signal all other backends
3357  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3358  * assume everything is all right and proceed to remove the backend from
3359  * the active backend list.
3360  */
3361 
3362 #ifdef WIN32
3363 
3364  /*
3365  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3366  * since that sometimes happens under load when the process fails to start
3367  * properly (long before it starts using shared memory). Microsoft reports
3368  * it is related to mutex failure:
3369  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3370  */
3371  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3372  {
3373  LogChildExit(LOG, _("server process"), pid, exitstatus);
3374  exitstatus = 0;
3375  }
3376 #endif
3377 
3378  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3379  {
3380  HandleChildCrash(pid, exitstatus, _("server process"));
3381  return;
3382  }
3383 
3384  dlist_foreach_modify(iter, &BackendList)
3385  {
3386  Backend *bp = dlist_container(Backend, elem, iter.cur);
3387 
3388  if (bp->pid == pid)
3389  {
3390  if (!bp->dead_end)
3391  {
3393  {
3394  /*
3395  * Uh-oh, the child failed to clean itself up. Treat as a
3396  * crash after all.
3397  */
3398  HandleChildCrash(pid, exitstatus, _("server process"));
3399  return;
3400  }
3401 #ifdef EXEC_BACKEND
3402  ShmemBackendArrayRemove(bp);
3403 #endif
3404  }
3405  if (bp->bgworker_notify)
3406  {
3407  /*
3408  * This backend may have been slated to receive SIGUSR1 when
3409  * some background worker started or stopped. Cancel those
3410  * notifications, as we don't want to signal PIDs that are not
3411  * PostgreSQL backends. This gets skipped in the (probably
3412  * very common) case where the backend has never requested any
3413  * such notifications.
3414  */
3416  }
3417  dlist_delete(iter.cur);
3418  free(bp);
3419  break;
3420  }
3421  }
3422 }
3423 
3424 /*
3425  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3426  * walwriter, autovacuum, or background worker.
3427  *
3428  * The objectives here are to clean up our local state about the child
3429  * process, and to signal all other remaining children to quickdie.
3430  */
3431 static void
3432 HandleChildCrash(int pid, int exitstatus, const char *procname)
3433 {
3434  dlist_mutable_iter iter;
3435  slist_iter siter;
3436  Backend *bp;
3437  bool take_action;
3438 
3439  /*
3440  * We only log messages and send signals if this is the first process
3441  * crash and we're not doing an immediate shutdown; otherwise, we're only
3442  * here to update postmaster's idea of live processes. If we have already
3443  * signaled children, nonzero exit status is to be expected, so don't
3444  * clutter log.
3445  */
3446  take_action = !FatalError && Shutdown != ImmediateShutdown;
3447 
3448  if (take_action)
3449  {
3450  LogChildExit(LOG, procname, pid, exitstatus);
3451  ereport(LOG,
3452  (errmsg("terminating any other active server processes")));
3454  }
3455 
3456  /* Process background workers. */
3458  {
3459  RegisteredBgWorker *rw;
3460 
3461  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3462  if (rw->rw_pid == 0)
3463  continue; /* not running */
3464  if (rw->rw_pid == pid)
3465  {
3466  /*
3467  * Found entry for freshly-dead worker, so remove it.
3468  */
3470  dlist_delete(&rw->rw_backend->elem);
3471 #ifdef EXEC_BACKEND
3472  ShmemBackendArrayRemove(rw->rw_backend);
3473 #endif
3474  free(rw->rw_backend);
3475  rw->rw_backend = NULL;
3476  rw->rw_pid = 0;
3477  rw->rw_child_slot = 0;
3478  /* don't reset crashed_at */
3479  /* don't report child stop, either */
3480  /* Keep looping so we can signal remaining workers */
3481  }
3482  else
3483  {
3484  /*
3485  * This worker is still alive. Unless we did so already, tell it
3486  * to commit hara-kiri.
3487  *
3488  * SIGQUIT is the special signal that says exit without proc_exit
3489  * and let the user know what's going on. But if SendStop is set
3490  * (-s on command line), then we send SIGSTOP instead, so that we
3491  * can get core dumps from all backends by hand.
3492  */
3493  if (take_action)
3494  {
3495  ereport(DEBUG2,
3496  (errmsg_internal("sending %s to process %d",
3497  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3498  (int) rw->rw_pid)));
3500  }
3501  }
3502  }
3503 
3504  /* Process regular backends */
3505  dlist_foreach_modify(iter, &BackendList)
3506  {
3507  bp = dlist_container(Backend, elem, iter.cur);
3508 
3509  if (bp->pid == pid)
3510  {
3511  /*
3512  * Found entry for freshly-dead backend, so remove it.
3513  */
3514  if (!bp->dead_end)
3515  {
3517 #ifdef EXEC_BACKEND
3518  ShmemBackendArrayRemove(bp);
3519 #endif
3520  }
3521  dlist_delete(iter.cur);
3522  free(bp);
3523  /* Keep looping so we can signal remaining backends */
3524  }
3525  else
3526  {
3527  /*
3528  * This backend is still alive. Unless we did so already, tell it
3529  * to commit hara-kiri.
3530  *
3531  * SIGQUIT is the special signal that says exit without proc_exit
3532  * and let the user know what's going on. But if SendStop is set
3533  * (-s on command line), then we send SIGSTOP instead, so that we
3534  * can get core dumps from all backends by hand.
3535  *
3536  * We could exclude dead_end children here, but at least in the
3537  * SIGSTOP case it seems better to include them.
3538  *
3539  * Background workers were already processed above; ignore them
3540  * here.
3541  */
3542  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3543  continue;
3544 
3545  if (take_action)
3546  {
3547  ereport(DEBUG2,
3548  (errmsg_internal("sending %s to process %d",
3549  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3550  (int) bp->pid)));
3551  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3552  }
3553  }
3554  }
3555 
3556  /* Take care of the startup process too */
3557  if (pid == StartupPID)
3558  {
3559  StartupPID = 0;
3560  /* Caller adjusts StartupStatus, so don't touch it here */
3561  }
3562  else if (StartupPID != 0 && take_action)
3563  {
3564  ereport(DEBUG2,
3565  (errmsg_internal("sending %s to process %d",
3566  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3567  (int) StartupPID)));
3568  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3570  }
3571 
3572  /* Take care of the bgwriter too */
3573  if (pid == BgWriterPID)
3574  BgWriterPID = 0;
3575  else if (BgWriterPID != 0 && take_action)
3576  {
3577  ereport(DEBUG2,
3578  (errmsg_internal("sending %s to process %d",
3579  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3580  (int) BgWriterPID)));
3581  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3582  }
3583 
3584  /* Take care of the checkpointer too */
3585  if (pid == CheckpointerPID)
3586  CheckpointerPID = 0;
3587  else if (CheckpointerPID != 0 && take_action)
3588  {
3589  ereport(DEBUG2,
3590  (errmsg_internal("sending %s to process %d",
3591  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3592  (int) CheckpointerPID)));
3593  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3594  }
3595 
3596  /* Take care of the walwriter too */
3597  if (pid == WalWriterPID)
3598  WalWriterPID = 0;
3599  else if (WalWriterPID != 0 && take_action)
3600  {
3601  ereport(DEBUG2,
3602  (errmsg_internal("sending %s to process %d",
3603  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3604  (int) WalWriterPID)));
3605  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3606  }
3607 
3608  /* Take care of the walreceiver too */
3609  if (pid == WalReceiverPID)
3610  WalReceiverPID = 0;
3611  else if (WalReceiverPID != 0 && take_action)
3612  {
3613  ereport(DEBUG2,
3614  (errmsg_internal("sending %s to process %d",
3615  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3616  (int) WalReceiverPID)));
3617  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3618  }
3619 
3620  /* Take care of the autovacuum launcher too */
3621  if (pid == AutoVacPID)
3622  AutoVacPID = 0;
3623  else if (AutoVacPID != 0 && take_action)
3624  {
3625  ereport(DEBUG2,
3626  (errmsg_internal("sending %s to process %d",
3627  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3628  (int) AutoVacPID)));
3629  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3630  }
3631 
3632  /*
3633  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3634  * necessary, but it seems like a good idea for robustness, and it
3635  * simplifies the state-machine logic in the case where a shutdown request
3636  * arrives during crash processing.)
3637  */
3638  if (PgArchPID != 0 && take_action)
3639  {
3640  ereport(DEBUG2,
3641  (errmsg_internal("sending %s to process %d",
3642  "SIGQUIT",
3643  (int) PgArchPID)));
3644  signal_child(PgArchPID, SIGQUIT);
3645  }
3646 
3647  /*
3648  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3649  * necessary, but it seems like a good idea for robustness, and it
3650  * simplifies the state-machine logic in the case where a shutdown request
3651  * arrives during crash processing.)
3652  */
3653  if (PgStatPID != 0 && take_action)
3654  {
3655  ereport(DEBUG2,
3656  (errmsg_internal("sending %s to process %d",
3657  "SIGQUIT",
3658  (int) PgStatPID)));
3659  signal_child(PgStatPID, SIGQUIT);
3661  }
3662 
3663  /* We do NOT restart the syslogger */
3664 
3665  if (Shutdown != ImmediateShutdown)
3666  FatalError = true;
3667 
3668  /* We now transit into a state of waiting for children to die */
3669  if (pmState == PM_RECOVERY ||
3670  pmState == PM_HOT_STANDBY ||
3671  pmState == PM_RUN ||
3673  pmState == PM_SHUTDOWN)
3675 
3676  /*
3677  * .. and if this doesn't happen quickly enough, now the clock is ticking
3678  * for us to kill them without mercy.
3679  */
3680  if (AbortStartTime == 0)
3681  AbortStartTime = time(NULL);
3682 }
3683 
3684 /*
3685  * Log the death of a child process.
3686  */
3687 static void
3688 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3689 {
3690  /*
3691  * size of activity_buffer is arbitrary, but set equal to default
3692  * track_activity_query_size
3693  */
3694  char activity_buffer[1024];
3695  const char *activity = NULL;
3696 
3697  if (!EXIT_STATUS_0(exitstatus))
3698  activity = pgstat_get_crashed_backend_activity(pid,
3699  activity_buffer,
3700  sizeof(activity_buffer));
3701 
3702  if (WIFEXITED(exitstatus))
3703  ereport(lev,
3704 
3705  /*------
3706  translator: %s is a noun phrase describing a child process, such as
3707  "server process" */
3708  (errmsg("%s (PID %d) exited with exit code %d",
3709  procname, pid, WEXITSTATUS(exitstatus)),
3710  activity ? errdetail("Failed process was running: %s", activity) : 0));
3711  else if (WIFSIGNALED(exitstatus))
3712  {
3713 #if defined(WIN32)
3714  ereport(lev,
3715 
3716  /*------
3717  translator: %s is a noun phrase describing a child process, such as
3718  "server process" */
3719  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3720  procname, pid, WTERMSIG(exitstatus)),
3721  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3722  activity ? errdetail("Failed process was running: %s", activity) : 0));
3723 #else
3724  ereport(lev,
3725 
3726  /*------
3727  translator: %s is a noun phrase describing a child process, such as
3728  "server process" */
3729  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3730  procname, pid, WTERMSIG(exitstatus),
3731  pg_strsignal(WTERMSIG(exitstatus))),
3732  activity ? errdetail("Failed process was running: %s", activity) : 0));
3733 #endif
3734  }
3735  else
3736  ereport(lev,
3737 
3738  /*------
3739  translator: %s is a noun phrase describing a child process, such as
3740  "server process" */
3741  (errmsg("%s (PID %d) exited with unrecognized status %d",
3742  procname, pid, exitstatus),
3743  activity ? errdetail("Failed process was running: %s", activity) : 0));
3744 }
3745 
3746 /*
3747  * Advance the postmaster's state machine and take actions as appropriate
3748  *
3749  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3750  * receive the signals that might mean we need to change state.
3751  */
3752 static void
3754 {
3755  /* If we're doing a smart shutdown, try to advance that state. */
3756  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3757  {
3759  {
3760  /*
3761  * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode
3762  * is not active.
3763  */
3764  if (!BackupInProgress())
3766  }
3767 
3769  {
3770  /*
3771  * ALLOW_NO_CONNS state ends when we have no normal client
3772  * backends running. Then we're ready to stop other children.
3773  */
3776  }
3777  }
3778 
3779  /*
3780  * If we're ready to do so, signal child processes to shut down. (This
3781  * isn't a persistent state, but treating it as a distinct pmState allows
3782  * us to share this code across multiple shutdown code paths.)
3783  */
3784  if (pmState == PM_STOP_BACKENDS)
3785  {
3786  /*
3787  * Forget any pending requests for background workers, since we're no
3788  * longer willing to launch any new workers. (If additional requests
3789  * arrive, BackgroundWorkerStateChange will reject them.)
3790  */
3792 
3793  /* Signal all backend children except walsenders */
3794  SignalSomeChildren(SIGTERM,
3796  /* and the autovac launcher too */
3797  if (AutoVacPID != 0)
3798  signal_child(AutoVacPID, SIGTERM);
3799  /* and the bgwriter too */
3800  if (BgWriterPID != 0)
3801  signal_child(BgWriterPID, SIGTERM);
3802  /* and the walwriter too */
3803  if (WalWriterPID != 0)
3804  signal_child(WalWriterPID, SIGTERM);
3805  /* If we're in recovery, also stop startup and walreceiver procs */
3806  if (StartupPID != 0)
3807  signal_child(StartupPID, SIGTERM);
3808  if (WalReceiverPID != 0)
3809  signal_child(WalReceiverPID, SIGTERM);
3810  /* checkpointer, archiver, stats, and syslogger may continue for now */
3811 
3812  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3814  }
3815 
3816  /*
3817  * If we are in a state-machine state that implies waiting for backends to
3818  * exit, see if they're all gone, and change state if so.
3819  */
3820  if (pmState == PM_WAIT_BACKENDS)
3821  {
3822  /*
3823  * PM_WAIT_BACKENDS state ends when we have no regular backends
3824  * (including autovac workers), no bgworkers (including unconnected
3825  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3826  * doing crash recovery or an immediate shutdown then we expect the
3827  * checkpointer to exit as well, otherwise not. The archiver, stats,
3828  * and syslogger processes are disregarded since they are not
3829  * connected to shared memory; we also disregard dead_end children
3830  * here. Walsenders are also disregarded, they will be terminated
3831  * later after writing the checkpoint record, like the archiver
3832  * process.
3833  */
3835  StartupPID == 0 &&
3836  WalReceiverPID == 0 &&
3837  BgWriterPID == 0 &&
3838  (CheckpointerPID == 0 ||
3840  WalWriterPID == 0 &&
3841  AutoVacPID == 0)
3842  {
3844  {
3845  /*
3846  * Start waiting for dead_end children to die. This state
3847  * change causes ServerLoop to stop creating new ones.
3848  */
3850 
3851  /*
3852  * We already SIGQUIT'd the archiver and stats processes, if
3853  * any, when we started immediate shutdown or entered
3854  * FatalError state.
3855  */
3856  }
3857  else
3858  {
3859  /*
3860  * If we get here, we are proceeding with normal shutdown. All
3861  * the regular children are gone, and it's time to tell the
3862  * checkpointer to do a shutdown checkpoint.
3863  */
3865  /* Start the checkpointer if not running */
3866  if (CheckpointerPID == 0)
3868  /* And tell it to shut down */
3869  if (CheckpointerPID != 0)
3870  {
3872  pmState = PM_SHUTDOWN;
3873  }
3874  else
3875  {
3876  /*
3877  * If we failed to fork a checkpointer, just shut down.
3878  * Any required cleanup will happen at next restart. We
3879  * set FatalError so that an "abnormal shutdown" message
3880  * gets logged when we exit.
3881  */
3882  FatalError = true;
3884 
3885  /* Kill the walsenders, archiver and stats collector too */
3887  if (PgArchPID != 0)
3889  if (PgStatPID != 0)
3891  }
3892  }
3893  }
3894  }
3895 
3896  if (pmState == PM_SHUTDOWN_2)
3897  {
3898  /*
3899  * PM_SHUTDOWN_2 state ends when there's no other children than
3900  * dead_end children left. There shouldn't be any regular backends
3901  * left by now anyway; what we're really waiting for is walsenders and
3902  * archiver.
3903  */
3904  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3905  {
3907  }
3908  }
3909 
3910  if (pmState == PM_WAIT_DEAD_END)
3911  {
3912  /*
3913  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3914  * (ie, no dead_end children remain), and the archiver and stats
3915  * collector are gone too.
3916  *
3917  * The reason we wait for those two is to protect them against a new
3918  * postmaster starting conflicting subprocesses; this isn't an
3919  * ironclad protection, but it at least helps in the
3920  * shutdown-and-immediately-restart scenario. Note that they have
3921  * already been sent appropriate shutdown signals, either during a
3922  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3923  * FatalError processing.
3924  */
3925  if (dlist_is_empty(&BackendList) &&
3926  PgArchPID == 0 && PgStatPID == 0)
3927  {
3928  /* These other guys should be dead already */
3929  Assert(StartupPID == 0);
3930  Assert(WalReceiverPID == 0);
3931  Assert(BgWriterPID == 0);
3932  Assert(CheckpointerPID == 0);
3933  Assert(WalWriterPID == 0);
3934  Assert(AutoVacPID == 0);
3935  /* syslogger is not considered here */
3937  }
3938  }
3939 
3940  /*
3941  * If we've been told to shut down, we exit as soon as there are no
3942  * remaining children. If there was a crash, cleanup will occur at the
3943  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3944  * crash before exiting, but that seems unwise if we are quitting because
3945  * we got SIGTERM from init --- there may well not be time for recovery
3946  * before init decides to SIGKILL us.)
3947  *
3948  * Note that the syslogger continues to run. It will exit when it sees
3949  * EOF on its input pipe, which happens when there are no more upstream
3950  * processes.
3951  */
3953  {
3954  if (FatalError)
3955  {
3956  ereport(LOG, (errmsg("abnormal database system shutdown")));
3957  ExitPostmaster(1);
3958  }
3959  else
3960  {
3961  /*
3962  * Terminate exclusive backup mode to avoid recovery after a clean
3963  * fast shutdown. Since an exclusive backup can only be taken
3964  * during normal running (and not, for example, while running
3965  * under Hot Standby) it only makes sense to do this if we reached
3966  * normal running. If we're still in recovery, the backup file is
3967  * one we're recovering *from*, and we must keep it around so that
3968  * recovery restarts from the right place.
3969  */
3971  CancelBackup();
3972 
3973  /* Normal exit from the postmaster is here */
3974  ExitPostmaster(0);
3975  }
3976  }
3977 
3978  /*
3979  * If the startup process failed, or the user does not want an automatic
3980  * restart after backend crashes, wait for all non-syslogger children to
3981  * exit, and then exit postmaster. We don't try to reinitialize when the
3982  * startup process fails, because more than likely it will just fail again
3983  * and we will keep trying forever.
3984  */
3985  if (pmState == PM_NO_CHILDREN &&
3987  ExitPostmaster(1);
3988 
3989  /*
3990  * If we need to recover from a crash, wait for all non-syslogger children
3991  * to exit, then reset shmem and StartupDataBase.
3992  */
3993  if (FatalError && pmState == PM_NO_CHILDREN)
3994  {
3995  ereport(LOG,
3996  (errmsg("all server processes terminated; reinitializing")));
3997 
3998  /* allow background workers to immediately restart */
4000 
4001  shmem_exit(1);
4002 
4003  /* re-read control file into local memory */
4005 
4006  reset_shared();
4007 
4009  Assert(StartupPID != 0);
4011  pmState = PM_STARTUP;
4012  /* crash recovery started, reset SIGKILL flag */
4013  AbortStartTime = 0;
4014  }
4015 }
4016 
4017 
4018 /*
4019  * Send a signal to a postmaster child process
4020  *
4021  * On systems that have setsid(), each child process sets itself up as a
4022  * process group leader. For signals that are generally interpreted in the
4023  * appropriate fashion, we signal the entire process group not just the
4024  * direct child process. This allows us to, for example, SIGQUIT a blocked
4025  * archive_recovery script, or SIGINT a script being run by a backend via
4026  * system().
4027  *
4028  * There is a race condition for recently-forked children: they might not
4029  * have executed setsid() yet. So we signal the child directly as well as
4030  * the group. We assume such a child will handle the signal before trying
4031  * to spawn any grandchild processes. We also assume that signaling the
4032  * child twice will not cause any problems.
4033  */
4034 static void
4035 signal_child(pid_t pid, int signal)
4036 {
4037  if (kill(pid, signal) < 0)
4038  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4039 #ifdef HAVE_SETSID
4040  switch (signal)
4041  {
4042  case SIGINT:
4043  case SIGTERM:
4044  case SIGQUIT:
4045  case SIGSTOP:
4046  case SIGKILL:
4047  if (kill(-pid, signal) < 0)
4048  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4049  break;
4050  default:
4051  break;
4052  }
4053 #endif
4054 }
4055 
4056 /*
4057  * Send a signal to the targeted children (but NOT special children;
4058  * dead_end children are never signaled, either).
4059  */
4060 static bool
4061 SignalSomeChildren(int signal, int target)
4062 {
4063  dlist_iter iter;
4064  bool signaled = false;
4065 
4066  dlist_foreach(iter, &BackendList)
4067  {
4068  Backend *bp = dlist_container(Backend, elem, iter.cur);
4069 
4070  if (bp->dead_end)
4071  continue;
4072 
4073  /*
4074  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4075  * it first and avoid touching shared memory for every child.
4076  */
4077  if (target != BACKEND_TYPE_ALL)
4078  {
4079  /*
4080  * Assign bkend_type for any recently announced WAL Sender
4081  * processes.
4082  */
4083  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4086 
4087  if (!(target & bp->bkend_type))
4088  continue;
4089  }
4090 
4091  ereport(DEBUG4,
4092  (errmsg_internal("sending signal %d to process %d",
4093  signal, (int) bp->pid)));
4094  signal_child(bp->pid, signal);
4095  signaled = true;
4096  }
4097  return signaled;
4098 }
4099 
4100 /*
4101  * Send a termination signal to children. This considers all of our children
4102  * processes, except syslogger and dead_end backends.
4103  */
4104 static void
4106 {
4107  SignalChildren(signal);
4108  if (StartupPID != 0)
4109  {
4110  signal_child(StartupPID, signal);
4111  if (signal == SIGQUIT || signal == SIGKILL)
4113  }
4114  if (BgWriterPID != 0)
4115  signal_child(BgWriterPID, signal);
4116  if (CheckpointerPID != 0)
4117  signal_child(CheckpointerPID, signal);
4118  if (WalWriterPID != 0)
4119  signal_child(WalWriterPID, signal);
4120  if (WalReceiverPID != 0)
4121  signal_child(WalReceiverPID, signal);
4122  if (AutoVacPID != 0)
4123  signal_child(AutoVacPID, signal);
4124  if (PgArchPID != 0)
4125  signal_child(PgArchPID, signal);
4126  if (PgStatPID != 0)
4127  signal_child(PgStatPID, signal);
4128 }
4129 
4130 /*
4131  * BackendStartup -- start backend process
4132  *
4133  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4134  *
4135  * Note: if you change this code, also consider StartAutovacuumWorker.
4136  */
4137 static int
4139 {
4140  Backend *bn; /* for backend cleanup */
4141  pid_t pid;
4142 
4143  /*
4144  * Create backend data structure. Better before the fork() so we can
4145  * handle failure cleanly.
4146  */
4147  bn = (Backend *) malloc(sizeof(Backend));
4148  if (!bn)
4149  {
4150  ereport(LOG,
4151  (errcode(ERRCODE_OUT_OF_MEMORY),
4152  errmsg("out of memory")));
4153  return STATUS_ERROR;
4154  }
4155 
4156  /*
4157  * Compute the cancel key that will be assigned to this backend. The
4158  * backend will have its own copy in the forked-off process' value of
4159  * MyCancelKey, so that it can transmit the key to the frontend.
4160  */
4162  {
4163  free(bn);
4164  ereport(LOG,
4165  (errcode(ERRCODE_INTERNAL_ERROR),
4166  errmsg("could not generate random cancel key")));
4167  return STATUS_ERROR;
4168  }
4169 
4170  bn->cancel_key = MyCancelKey;
4171 
4172  /* Pass down canAcceptConnections state */
4174  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4176 
4177  /*
4178  * Unless it's a dead_end child, assign it a child slot number
4179  */
4180  if (!bn->dead_end)
4182  else
4183  bn->child_slot = 0;
4184 
4185  /* Hasn't asked to be notified about any bgworkers yet */
4186  bn->bgworker_notify = false;
4187 
4188 #ifdef EXEC_BACKEND
4189  pid = backend_forkexec(port);
4190 #else /* !EXEC_BACKEND */
4191  pid = fork_process();
4192  if (pid == 0) /* child */
4193  {
4194  free(bn);
4195 
4196  /* Detangle from postmaster */
4198 
4199  /* Close the postmaster's sockets */
4200  ClosePostmasterPorts(false);
4201 
4202  /* Perform additional initialization and collect startup packet */
4203  BackendInitialize(port);
4204 
4205  /* And run the backend */
4206  BackendRun(port);
4207  }
4208 #endif /* EXEC_BACKEND */
4209 
4210  if (pid < 0)
4211  {
4212  /* in parent, fork failed */
4213  int save_errno = errno;
4214 
4215  if (!bn->dead_end)
4217  free(bn);
4218  errno = save_errno;
4219  ereport(LOG,
4220  (errmsg("could not fork new process for connection: %m")));
4221  report_fork_failure_to_client(port, save_errno);
4222  return STATUS_ERROR;
4223  }
4224 
4225  /* in parent, successful fork */
4226  ereport(DEBUG2,
4227  (errmsg_internal("forked new backend, pid=%d socket=%d",
4228  (int) pid, (int) port->sock)));
4229 
4230  /*
4231  * Everything's been successful, it's safe to add this backend to our list
4232  * of backends.
4233  */
4234  bn->pid = pid;
4235  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4236  dlist_push_head(&BackendList, &bn->elem);
4237 
4238 #ifdef EXEC_BACKEND
4239  if (!bn->dead_end)
4240  ShmemBackendArrayAdd(bn);
4241 #endif
4242 
4243  return STATUS_OK;
4244 }
4245 
4246 /*
4247  * Try to report backend fork() failure to client before we close the
4248  * connection. Since we do not care to risk blocking the postmaster on
4249  * this connection, we set the connection to non-blocking and try only once.
4250  *
4251  * This is grungy special-purpose code; we cannot use backend libpq since
4252  * it's not up and running.
4253  */
4254 static void
4256 {
4257  char buffer[1000];
4258  int rc;
4259 
4260  /* Format the error message packet (always V2 protocol) */
4261  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4262  _("could not fork new process for connection: "),
4263  strerror(errnum));
4264 
4265  /* Set port to non-blocking. Don't do send() if this fails */
4266  if (!pg_set_noblock(port->sock))
4267  return;
4268 
4269  /* We'll retry after EINTR, but ignore all other failures */
4270  do
4271  {
4272  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4273  } while (rc < 0 && errno == EINTR);
4274 }
4275 
4276 
4277 /*
4278  * BackendInitialize -- initialize an interactive (postmaster-child)
4279  * backend process, and collect the client's startup packet.
4280  *
4281  * returns: nothing. Will not return at all if there's any failure.
4282  *
4283  * Note: this code does not depend on having any access to shared memory.
4284  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4285  * shared memory not have been touched yet; see comments within.
4286  * In the EXEC_BACKEND case, we are physically attached to shared memory
4287  * but have not yet set up most of our local pointers to shmem structures.
4288  */
4289 static void
4291 {
4292  int status;
4293  int ret;
4294  char remote_host[NI_MAXHOST];
4295  char remote_port[NI_MAXSERV];
4296  StringInfoData ps_data;
4297 
4298  /* Save port etc. for ps status */
4299  MyProcPort = port;
4300 
4301  /* Tell fd.c about the long-lived FD associated with the port */
4303 
4304  /*
4305  * PreAuthDelay is a debugging aid for investigating problems in the
4306  * authentication cycle: it can be set in postgresql.conf to allow time to
4307  * attach to the newly-forked backend with a debugger. (See also
4308  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4309  * is not honored until after authentication.)
4310  */
4311  if (PreAuthDelay > 0)
4312  pg_usleep(PreAuthDelay * 1000000L);
4313 
4314  /* This flag will remain set until InitPostgres finishes authentication */
4315  ClientAuthInProgress = true; /* limit visibility of log messages */
4316 
4317  /* set these to empty in case they are needed before we set them up */
4318  port->remote_host = "";
4319  port->remote_port = "";
4320 
4321  /*
4322  * Initialize libpq and enable reporting of ereport errors to the client.
4323  * Must do this now because authentication uses libpq to send messages.
4324  */
4325  pq_init(); /* initialize libpq to talk to client */
4326  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4327 
4328  /*
4329  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4330  * to collect the startup packet; while SIGQUIT results in _exit(2).
4331  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4332  * cleanly if a buggy client fails to send the packet promptly.
4333  *
4334  * Exiting with _exit(1) is only possible because we have not yet touched
4335  * shared memory; therefore no outside-the-process state needs to get
4336  * cleaned up.
4337  */
4339  /* SIGQUIT handler was already set up by InitPostmasterChild */
4340  InitializeTimeouts(); /* establishes SIGALRM handler */
4342 
4343  /*
4344  * Get the remote host name and port for logging and status display.
4345  */
4346  remote_host[0] = '\0';
4347  remote_port[0] = '\0';
4348  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4349  remote_host, sizeof(remote_host),
4350  remote_port, sizeof(remote_port),
4351  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4352  ereport(WARNING,
4353  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4354  gai_strerror(ret))));
4355 
4356  /*
4357  * Save remote_host and remote_port in port structure (after this, they
4358  * will appear in log_line_prefix data for log messages).
4359  */
4360  port->remote_host = strdup(remote_host);
4361  port->remote_port = strdup(remote_port);
4362 
4363  /* And now we can issue the Log_connections message, if wanted */
4364  if (Log_connections)
4365  {
4366  if (remote_port[0])
4367  ereport(LOG,
4368  (errmsg("connection received: host=%s port=%s",
4369  remote_host,
4370  remote_port)));
4371  else
4372  ereport(LOG,
4373  (errmsg("connection received: host=%s",
4374  remote_host)));
4375  }
4376 
4377  /*
4378  * If we did a reverse lookup to name, we might as well save the results
4379  * rather than possibly repeating the lookup during authentication.
4380  *
4381  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4382  * get nothing useful for a client without an rDNS entry. Therefore, we
4383  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4384  * it into remote_hostname if so. (This test is conservative and might
4385  * sometimes classify a hostname as numeric, but an error in that
4386  * direction is safe; it only results in a possible extra lookup.)
4387  */
4388  if (log_hostname &&
4389  ret == 0 &&
4390  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4391  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4392  port->remote_hostname = strdup(remote_host);
4393 
4394  /*
4395  * Ready to begin client interaction. We will give up and _exit(1) after
4396  * a time delay, so that a broken client can't hog a connection
4397  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4398  * against the time limit.
4399  *
4400  * Note: AuthenticationTimeout is applied here while waiting for the
4401  * startup packet, and then again in InitPostgres for the duration of any
4402  * authentication operations. So a hostile client could tie up the
4403  * process for nearly twice AuthenticationTimeout before we kick him off.
4404  *
4405  * Note: because PostgresMain will call InitializeTimeouts again, the
4406  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4407  * since we never use it again after this function.
4408  */
4411 
4412  /*
4413  * Receive the startup packet (which might turn out to be a cancel request
4414  * packet).
4415  */
4416  status = ProcessStartupPacket(port, false, false);
4417 
4418  /*
4419  * Disable the timeout, and prevent SIGTERM again.
4420  */
4422  PG_SETMASK(&BlockSig);
4423 
4424  /*
4425  * As a safety check that nothing in startup has yet performed
4426  * shared-memory modifications that would need to be undone if we had
4427  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4428  * handlers have been registered yet. (This isn't terribly bulletproof,
4429  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4430  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4431  * handlers unfortunately, since pq_init() already registered one.)
4432  */
4434 
4435  /*
4436  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4437  * already did any appropriate error reporting.
4438  */
4439  if (status != STATUS_OK)
4440  proc_exit(0);
4441 
4442  /*
4443  * Now that we have the user and database name, we can set the process
4444  * title for ps. It's good to do this as early as possible in startup.
4445  */
4446  initStringInfo(&ps_data);
4447  if (am_walsender)
4449  appendStringInfo(&ps_data, "%s ", port->user_name);
4450  if (!am_walsender)
4451  appendStringInfo(&ps_data, "%s ", port->database_name);
4452  appendStringInfo(&ps_data, "%s", port->remote_host);
4453  if (port->remote_port[0] != '\0')
4454  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4455 
4456  init_ps_display(ps_data.data);
4457  pfree(ps_data.data);
4458 
4459  set_ps_display("initializing");
4460 }
4461 
4462 
4463 /*
4464  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4465  *
4466  * returns:
4467  * Doesn't return at all.
4468  */
4469 static void
4471 {
4472  char *av[2];
4473  const int ac = 1;
4474 
4475  av[0] = "postgres";
4476  av[1] = NULL;
4477 
4478  /*
4479  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4480  * just yet, though, because InitPostgres will need the HBA data.)
4481  */
4483 
4484  PostgresMain(ac, av, port->database_name, port->user_name);
4485 }
4486 
4487 
4488 #ifdef EXEC_BACKEND
4489 
4490 /*
4491  * postmaster_forkexec -- fork and exec a postmaster subprocess
4492  *
4493  * The caller must have set up the argv array already, except for argv[2]
4494  * which will be filled with the name of the temp variable file.
4495  *
4496  * Returns the child process PID, or -1 on fork failure (a suitable error
4497  * message has been logged on failure).
4498  *
4499  * All uses of this routine will dispatch to SubPostmasterMain in the
4500  * child process.
4501  */
4502 pid_t
4503 postmaster_forkexec(int argc, char *argv[])
4504 {
4505  Port port;
4506 
4507  /* This entry point passes dummy values for the Port variables */
4508  memset(&port, 0, sizeof(port));
4509  return internal_forkexec(argc, argv, &port);
4510 }
4511 
4512 /*
4513  * backend_forkexec -- fork/exec off a backend process
4514  *
4515  * Some operating systems (WIN32) don't have fork() so we have to simulate
4516  * it by storing parameters that need to be passed to the child and
4517  * then create a new child process.
4518  *
4519  * returns the pid of the fork/exec'd process, or -1 on failure
4520  */
4521 static pid_t
4522 backend_forkexec(Port *port)
4523 {
4524  char *av[4];
4525  int ac = 0;
4526 
4527  av[ac++] = "postgres";
4528  av[ac++] = "--forkbackend";
4529  av[ac++] = NULL; /* filled in by internal_forkexec */
4530 
4531  av[ac] = NULL;
4532  Assert(ac < lengthof(av));
4533 
4534  return internal_forkexec(ac, av, port);
4535 }
4536 
4537 #ifndef WIN32
4538 
4539 /*
4540  * internal_forkexec non-win32 implementation
4541  *
4542  * - writes out backend variables to the parameter file
4543  * - fork():s, and then exec():s the child process
4544  */
4545 static pid_t
4546 internal_forkexec(int argc, char *argv[], Port *port)
4547 {
4548  static unsigned long tmpBackendFileNum = 0;
4549  pid_t pid;
4550  char tmpfilename[MAXPGPATH];
4551  BackendParameters param;
4552  FILE *fp;
4553 
4554  if (!save_backend_variables(&param, port))
4555  return -1; /* log made by save_backend_variables */
4556 
4557  /* Calculate name for temp file */
4558  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4560  MyProcPid, ++tmpBackendFileNum);
4561 
4562  /* Open file */
4563  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4564  if (!fp)
4565  {
4566  /*
4567  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4568  * directory, ignoring errors.
4569  */
4571 
4572  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4573  if (!fp)
4574  {
4575  ereport(LOG,
4577  errmsg("could not create file \"%s\": %m",
4578  tmpfilename)));
4579  return -1;
4580  }
4581  }
4582 
4583  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4584  {
4585  ereport(LOG,
4587  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4588  FreeFile(fp);
4589  return -1;
4590  }
4591 
4592  /* Release file */
4593  if (FreeFile(fp))
4594  {
4595  ereport(LOG,
4597  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4598  return -1;
4599  }
4600 
4601  /* Make sure caller set up argv properly */
4602  Assert(argc >= 3);
4603  Assert(argv[argc] == NULL);
4604  Assert(strncmp(argv[1], "--fork", 6) == 0);
4605  Assert(argv[2] == NULL);
4606 
4607  /* Insert temp file name after --fork argument */
4608  argv[2] = tmpfilename;
4609 
4610  /* Fire off execv in child */
4611  if ((pid = fork_process()) == 0)
4612  {
4613  if (execv(postgres_exec_path, argv) < 0)
4614  {
4615  ereport(LOG,
4616  (errmsg("could not execute server process \"%s\": %m",
4617  postgres_exec_path)));
4618  /* We're already in the child process here, can't return */
4619  exit(1);
4620  }
4621  }
4622 
4623  return pid; /* Parent returns pid, or -1 on fork failure */
4624 }
4625 #else /* WIN32 */
4626 
4627 /*
4628  * internal_forkexec win32 implementation
4629  *
4630  * - starts backend using CreateProcess(), in suspended state
4631  * - writes out backend variables to the parameter file
4632  * - during this, duplicates handles and sockets required for
4633  * inheritance into the new process
4634  * - resumes execution of the new process once the backend parameter
4635  * file is complete.
4636  */
4637 static pid_t
4638 internal_forkexec(int argc, char *argv[], Port *port)
4639 {
4640  int retry_count = 0;
4641  STARTUPINFO si;
4642  PROCESS_INFORMATION pi;
4643  int i;
4644  int j;
4645  char cmdLine[MAXPGPATH * 2];
4646  HANDLE paramHandle;
4647  BackendParameters *param;
4648  SECURITY_ATTRIBUTES sa;
4649  char paramHandleStr[32];
4650  win32_deadchild_waitinfo *childinfo;
4651 
4652  /* Make sure caller set up argv properly */
4653  Assert(argc >= 3);
4654  Assert(argv[argc] == NULL);
4655  Assert(strncmp(argv[1], "--fork", 6) == 0);
4656  Assert(argv[2] == NULL);
4657 
4658  /* Resume here if we need to retry */
4659 retry:
4660 
4661  /* Set up shared memory for parameter passing */
4662  ZeroMemory(&sa, sizeof(sa));
4663  sa.nLength = sizeof(sa);
4664  sa.bInheritHandle = TRUE;
4665  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4666  &sa,
4667  PAGE_READWRITE,
4668  0,
4669  sizeof(BackendParameters),
4670  NULL);
4671  if (paramHandle == INVALID_HANDLE_VALUE)
4672  {
4673  ereport(LOG,
4674  (errmsg("could not create backend parameter file mapping: error code %lu",
4675  GetLastError())));
4676  return -1;
4677  }
4678 
4679  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4680  if (!param)
4681  {
4682  ereport(LOG,
4683  (errmsg("could not map backend parameter memory: error code %lu",
4684  GetLastError())));
4685  CloseHandle(paramHandle);
4686  return -1;
4687  }
4688 
4689  /* Insert temp file name after --fork argument */
4690 #ifdef _WIN64
4691  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4692 #else
4693  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4694 #endif
4695  argv[2] = paramHandleStr;
4696 
4697  /* Format the cmd line */
4698  cmdLine[sizeof(cmdLine) - 1] = '\0';
4699  cmdLine[sizeof(cmdLine) - 2] = '\0';
4700  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4701  i = 0;
4702  while (argv[++i] != NULL)
4703  {
4704  j = strlen(cmdLine);
4705  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4706  }
4707  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4708  {
4709  ereport(LOG,
4710  (errmsg("subprocess command line too long")));
4711  UnmapViewOfFile(param);
4712  CloseHandle(paramHandle);
4713  return -1;
4714  }
4715 
4716  memset(&pi, 0, sizeof(pi));
4717  memset(&si, 0, sizeof(si));
4718  si.cb = sizeof(si);
4719 
4720  /*
4721  * Create the subprocess in a suspended state. This will be resumed later,
4722  * once we have written out the parameter file.
4723  */
4724  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4725  NULL, NULL, &si, &pi))
4726  {
4727  ereport(LOG,
4728  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4729  GetLastError())));
4730  UnmapViewOfFile(param);
4731  CloseHandle(paramHandle);
4732  return -1;
4733  }
4734 
4735  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4736  {
4737  /*
4738  * log made by save_backend_variables, but we have to clean up the
4739  * mess with the half-started process
4740  */
4741  if (!TerminateProcess(pi.hProcess, 255))
4742  ereport(LOG,
4743  (errmsg_internal("could not terminate unstarted process: error code %lu",
4744  GetLastError())));
4745  CloseHandle(pi.hProcess);
4746  CloseHandle(pi.hThread);
4747  UnmapViewOfFile(param);
4748  CloseHandle(paramHandle);
4749  return -1; /* log made by save_backend_variables */
4750  }
4751 
4752  /* Drop the parameter shared memory that is now inherited to the backend */
4753  if (!UnmapViewOfFile(param))
4754  ereport(LOG,
4755  (errmsg("could not unmap view of backend parameter file: error code %lu",
4756  GetLastError())));
4757  if (!CloseHandle(paramHandle))
4758  ereport(LOG,
4759  (errmsg("could not close handle to backend parameter file: error code %lu",
4760  GetLastError())));
4761 
4762  /*
4763  * Reserve the memory region used by our main shared memory segment before
4764  * we resume the child process. Normally this should succeed, but if ASLR
4765  * is active then it might sometimes fail due to the stack or heap having
4766  * gotten mapped into that range. In that case, just terminate the
4767  * process and retry.
4768  */
4769  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4770  {
4771  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4772  if (!TerminateProcess(pi.hProcess, 255))
4773  ereport(LOG,
4774  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4775  GetLastError())));
4776  CloseHandle(pi.hProcess);
4777  CloseHandle(pi.hThread);
4778  if (++retry_count < 100)
4779  goto retry;
4780  ereport(LOG,
4781  (errmsg("giving up after too many tries to reserve shared memory"),
4782  errhint("This might be caused by ASLR or antivirus software.")));
4783  return -1;
4784  }
4785 
4786  /*
4787  * Now that the backend variables are written out, we start the child
4788  * thread so it can start initializing while we set up the rest of the
4789  * parent state.
4790  */
4791  if (ResumeThread(pi.hThread) == -1)
4792  {
4793  if (!TerminateProcess(pi.hProcess, 255))
4794  {
4795  ereport(LOG,
4796  (errmsg_internal("could not terminate unstartable process: error code %lu",
4797  GetLastError())));
4798  CloseHandle(pi.hProcess);
4799  CloseHandle(pi.hThread);
4800  return -1;
4801  }
4802  CloseHandle(pi.hProcess);
4803  CloseHandle(pi.hThread);
4804  ereport(LOG,
4805  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4806  GetLastError())));
4807  return -1;
4808  }
4809 
4810  /*
4811  * Queue a waiter to signal when this child dies. The wait will be handled
4812  * automatically by an operating system thread pool.
4813  *
4814  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4815  * Struct will be free():d from the callback function that runs on a
4816  * different thread.
4817  */
4818  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4819  if (!childinfo)
4820  ereport(FATAL,
4821  (errcode(ERRCODE_OUT_OF_MEMORY),
4822  errmsg("out of memory")));
4823 
4824  childinfo->procHandle = pi.hProcess;
4825  childinfo->procId = pi.dwProcessId;
4826 
4827  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4828  pi.hProcess,
4829  pgwin32_deadchild_callback,
4830  childinfo,
4831  INFINITE,
4832  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4833  ereport(FATAL,
4834  (errmsg_internal("could not register process for wait: error code %lu",
4835  GetLastError())));
4836 
4837  /* Don't close pi.hProcess here - the wait thread needs access to it */
4838 
4839  CloseHandle(pi.hThread);
4840 
4841  return pi.dwProcessId;
4842 }
4843 #endif /* WIN32 */
4844 
4845 
4846 /*
4847  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4848  * to what it would be if we'd simply forked on Unix, and then
4849  * dispatch to the appropriate place.
4850  *
4851  * The first two command line arguments are expected to be "--forkFOO"
4852  * (where FOO indicates which postmaster child we are to become), and
4853  * the name of a variables file that we can read to load data that would
4854  * have been inherited by fork() on Unix. Remaining arguments go to the
4855  * subprocess FooMain() routine.
4856  */
4857 void
4858 SubPostmasterMain(int argc, char *argv[])
4859 {
4860  Port port;
4861 
4862  /* In EXEC_BACKEND case we will not have inherited these settings */
4863  IsPostmasterEnvironment = true;
4865 
4866  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4868 
4869  /* Check we got appropriate args */
4870  if (argc < 3)
4871  elog(FATAL, "invalid subpostmaster invocation");
4872 
4873  /* Read in the variables file */
4874  memset(&port, 0, sizeof(Port));
4875  read_backend_variables(argv[2], &port);
4876 
4877  /* Close the postmaster's sockets (as soon as we know them) */
4878  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4879 
4880  /*
4881  * Start our win32 signal implementation. This has to be done after we
4882  * read the backend variables, because we need to pick up the signal pipe
4883  * from the parent process.
4884  */
4885 #ifdef WIN32
4887 #endif
4888 
4889  /* Setup as postmaster child */
4891 
4892  /*
4893  * If appropriate, physically re-attach to shared memory segment. We want
4894  * to do this before going any further to ensure that we can attach at the
4895  * same address the postmaster used. On the other hand, if we choose not
4896  * to re-attach, we may have other cleanup to do.
4897  *
4898  * If testing EXEC_BACKEND on Linux, you should run this as root before
4899  * starting the postmaster:
4900  *
4901  * echo 0 >/proc/sys/kernel/randomize_va_space
4902  *
4903  * This prevents using randomized stack and code addresses that cause the
4904  * child process's memory map to be different from the parent's, making it
4905  * sometimes impossible to attach to shared memory at the desired address.
4906  * Return the setting to its old value (usually '1' or '2') when finished.
4907  */
4908  if (strcmp(argv[1], "--forkbackend") == 0 ||
4909  strcmp(argv[1], "--forkavlauncher") == 0 ||
4910  strcmp(argv[1], "--forkavworker") == 0 ||
4911  strcmp(argv[1], "--forkboot") == 0 ||
4912  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4914  else
4916 
4917  /* autovacuum needs this set before calling InitProcess */
4918  if (strcmp(argv[1], "--forkavlauncher") == 0)
4919  AutovacuumLauncherIAm();
4920  if (strcmp(argv[1], "--forkavworker") == 0)
4921  AutovacuumWorkerIAm();
4922 
4923  /* Read in remaining GUC variables */
4924  read_nondefault_variables();
4925 
4926  /*
4927  * Check that the data directory looks valid, which will also check the
4928  * privileges on the data directory and update our umask and file/group
4929  * variables for creating files later. Note: this should really be done
4930  * before we create any files or directories.
4931  */
4932  checkDataDir();
4933 
4934  /*
4935  * (re-)read control file, as it contains config. The postmaster will
4936  * already have read this, but this process doesn't know about that.
4937  */
4938  LocalProcessControlFile(false);
4939 
4940  /*
4941  * Reload any libraries that were preloaded by the postmaster. Since we
4942  * exec'd this process, those libraries didn't come along with us; but we
4943  * should load them into all child processes to be consistent with the
4944  * non-EXEC_BACKEND behavior.
4945  */
4947 
4948  /* Run backend or appropriate child */
4949  if (strcmp(argv[1], "--forkbackend") == 0)
4950  {
4951  Assert(argc == 3); /* shouldn't be any more args */
4952 
4953  /*
4954  * Need to reinitialize the SSL library in the backend, since the
4955  * context structures contain function pointers and cannot be passed
4956  * through the parameter file.
4957  *
4958  * If for some reason reload fails (maybe the user installed broken
4959  * key files), soldier on without SSL; that's better than all
4960  * connections becoming impossible.
4961  *
4962  * XXX should we do this in all child processes? For the moment it's
4963  * enough to do it in backend children.
4964  */
4965 #ifdef USE_SSL
4966  if (EnableSSL)
4967  {
4968  if (secure_initialize(false) == 0)
4969  LoadedSSL = true;
4970  else
4971  ereport(LOG,
4972  (errmsg("SSL configuration could not be loaded in child process")));
4973  }
4974 #endif
4975 
4976  /*
4977  * Perform additional initialization and collect startup packet.
4978  *
4979  * We want to do this before InitProcess() for a couple of reasons: 1.
4980  * so that we aren't eating up a PGPROC slot while waiting on the
4981  * client. 2. so that if InitProcess() fails due to being out of
4982  * PGPROC slots, we have already initialized libpq and are able to
4983  * report the error to the client.
4984  */
4985  BackendInitialize(&port);
4986 
4987  /* Restore basic shared memory pointers */
4989 
4990  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4991  InitProcess();
4992 
4993  /* Attach process to shared data structures */
4995 
4996  /* And run the backend */
4997  BackendRun(&port); /* does not return */
4998  }
4999  if (strcmp(argv[1], "--forkboot") == 0)
5000  {
5001  /* Restore basic shared memory pointers */
5003 
5004  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5006 
5007  /* Attach process to shared data structures */
5009 
5010  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
5011  }
5012  if (strcmp(argv[1], "--forkavlauncher") == 0)
5013  {
5014  /* Restore basic shared memory pointers */
5016 
5017  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5018  InitProcess();
5019 
5020  /* Attach process to shared data structures */
5022 
5023  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5024  }
5025  if (strcmp(argv[1], "--forkavworker") == 0)
5026  {
5027  /* Restore basic shared memory pointers */
5029 
5030  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5031  InitProcess();
5032 
5033  /* Attach process to shared data structures */
5035 
5036  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5037  }
5038  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5039  {
5040  int shmem_slot;
5041 
5042  /* do this as early as possible; in particular, before InitProcess() */
5043  IsBackgroundWorker = true;
5044 
5045  /* Restore basic shared memory pointers */
5047 
5048  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5049  InitProcess();
5050 
5051  /* Attach process to shared data structures */
5053 
5054  /* Fetch MyBgworkerEntry from shared memory */
5055  shmem_slot = atoi(argv[1] + 15);
5056  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5057 
5059  }
5060  if (strcmp(argv[1], "--forkarch") == 0)
5061  {
5062  /* Do not want to attach to shared memory */
5063 
5064  PgArchiverMain(argc, argv); /* does not return */
5065  }
5066  if (strcmp(argv[1], "--forkcol") == 0)
5067  {
5068  /* Do not want to attach to shared memory */
5069 
5070  PgstatCollectorMain(argc, argv); /* does not return */
5071  }
5072  if (strcmp(argv[1], "--forklog") == 0)
5073  {
5074  /* Do not want to attach to shared memory */
5075 
5076  SysLoggerMain(argc, argv); /* does not return */
5077  }
5078 
5079  abort(); /* shouldn't get here */
5080 }
5081 #endif /* EXEC_BACKEND */
5082 
5083 
5084 /*
5085  * ExitPostmaster -- cleanup
5086  *
5087  * Do NOT call exit() directly --- always go through here!
5088  */
5089 static void
5091 {
5092 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5093 
5094  /*
5095  * There is no known cause for a postmaster to become multithreaded after
5096  * startup. Recheck to account for the possibility of unknown causes.
5097  * This message uses LOG level, because an unclean shutdown at this point
5098  * would usually not look much different from a clean shutdown.
5099  */
5100  if (pthread_is_threaded_np() != 0)
5101  ereport(LOG,
5102  (errcode(ERRCODE_INTERNAL_ERROR),
5103  errmsg_internal("postmaster became multithreaded"),
5104  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5105 #endif
5106 
5107  /* should cleanup shared memory and kill all backends */
5108 
5109  /*
5110  * Not sure of the semantics here. When the Postmaster dies, should the
5111  * backends all be killed? probably not.
5112  *
5113  * MUST -- vadim 05-10-1999
5114  */
5115 
5116  proc_exit(status);
5117 }
5118 
5119 /*
5120  * sigusr1_handler - handle signal conditions from child processes
5121  */
5122 static void
5124 {
5125  int save_errno = errno;
5126 
5127  /*
5128  * We rely on the signal mechanism to have blocked all signals ... except
5129  * on Windows, which lacks sigaction(), so we have to do it manually.
5130  */
5131 #ifdef WIN32
5132  PG_SETMASK(&BlockSig);
5133 #endif
5134 
5135  /*
5136  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5137  * unexpected states. If the startup process quickly starts up, completes
5138  * recovery, exits, we might process the death of the startup process
5139  * first. We don't want to go back to recovery in that case.
5140  */
5143  {
5144  /* WAL redo has started. We're out of reinitialization. */
5145  FatalError = false;
5146  AbortStartTime = 0;
5147 
5148  /*
5149  * Crank up the background tasks. It doesn't matter if this fails,
5150  * we'll just try again later.
5151  */
5152  Assert(CheckpointerPID == 0);
5154  Assert(BgWriterPID == 0);
5156 
5157  /*
5158  * Start the archiver if we're responsible for (re-)archiving received
5159  * files.
5160  */
5161  Assert(PgArchPID == 0);
5162  if (XLogArchivingAlways())
5163  PgArchPID = pgarch_start();
5164 
5165  /*
5166  * If we aren't planning to enter hot standby mode later, treat
5167  * RECOVERY_STARTED as meaning we're out of startup, and report status
5168  * accordingly.
5169  */
5170  if (!EnableHotStandby)
5171  {
5173 #ifdef USE_SYSTEMD
5174  sd_notify(0, "READY=1");
5175 #endif
5176  }
5177 
5178  pmState = PM_RECOVERY;
5179  }
5180 
5183  {
5184  /*
5185  * Likewise, start other special children as needed.
5186  */
5187  Assert(PgStatPID == 0);
5188  PgStatPID = pgstat_start();
5189 
5190  ereport(LOG,
5191  (errmsg("database system is ready to accept read only connections")));
5192 
5193  /* Report status */
5195 #ifdef USE_SYSTEMD
5196  sd_notify(0, "READY=1");
5197 #endif
5198 
5201 
5202  /* Some workers may be scheduled to start now */
5203  StartWorkerNeeded = true;
5204  }
5205 
5206  /* Process background worker state changes. */
5208  {
5209  /* Accept new worker requests only if not stopping. */
5211  StartWorkerNeeded = true;
5212  }
5213 
5216 
5218  PgArchPID != 0)
5219  {
5220  /*
5221  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5222  * next WAL file.
5223  */
5225  }
5226 
5227  /* Tell syslogger to rotate logfile if requested */
5228  if (SysLoggerPID != 0)
5229  {
5230  if (CheckLogrotateSignal())
5231  {
5234  }
5236  {
5238  }
5239  }
5240 
5243  {
5244  /*
5245  * Start one iteration of the autovacuum daemon, even if autovacuuming
5246  * is nominally not enabled. This is so we can have an active defense
5247  * against transaction ID wraparound. We set a flag for the main loop
5248  * to do it rather than trying to do it here --- this is because the
5249  * autovac process itself may send the signal, and we want to handle
5250  * that by launching another iteration as soon as the current one
5251  * completes.
5252  */
5253  start_autovac_launcher = true;
5254  }
5255 
5258  {
5259  /* The autovacuum launcher wants us to start a worker process. */
5261  }
5262 
5264  {
5265  /* Startup Process wants us to start the walreceiver process. */
5266  /* Start immediately if possible, else remember request for later. */
5267  WalReceiverRequested = true;
5269  }
5270 
5271  /*
5272  * Try to advance postmaster's state machine, if a child requests it.
5273  *
5274  * Be careful about the order of this action relative to sigusr1_handler's
5275  * other actions. Generally, this should be after other actions, in case
5276  * they have effects PostmasterStateMachine would need to know about.
5277  * However, we should do it before the CheckPromoteSignal step, which
5278  * cannot have any (immediate) effect on the state machine, but does
5279  * depend on what state we're in now.
5280  */
5282  {
5284  }
5285 
5286  if (StartupPID != 0 &&
5287  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5288  pmState == PM_HOT_STANDBY) &&
5290  {
5291  /*
5292  * Tell startup process to finish recovery.
5293  *
5294  * Leave the promote signal file in place and let the Startup process
5295  * do the unlink.
5296  */
5298  }
5299 
5300 #ifdef WIN32
5302 #endif
5303 
5304  errno = save_errno;
5305 }
5306 
5307 /*
5308  * SIGTERM while processing startup packet.
5309  *
5310  * Running proc_exit() from a signal handler would be quite unsafe.
5311  * However, since we have not yet touched shared memory, we can just
5312  * pull the plug and exit without running any atexit handlers.
5313  *
5314  * One might be tempted to try to send a message, or log one, indicating
5315  * why we are disconnecting. However, that would be quite unsafe in itself.
5316  * Also, it seems undesirable to provide clues about the database's state
5317  * to a client that has not yet completed authentication, or even sent us
5318  * a startup packet.
5319  */
5320 static void
5322 {
5323  _exit(1);
5324 }
5325 
5326 /*
5327  * Dummy signal handler
5328  *
5329  * We use this for signals that we don't actually use in the postmaster,
5330  * but we do use in backends. If we were to SIG_IGN such signals in the
5331  * postmaster, then a newly started backend might drop a signal that arrives
5332  * before it's able to reconfigure its signal processing. (See notes in
5333  * tcop/postgres.c.)
5334  */
5335 static void
5337 {
5338 }
5339 
5340 /*
5341  * Timeout while processing startup packet.
5342  * As for process_startup_packet_die(), we exit via _exit(1).
5343  */
5344 static void
5346 {
5347  _exit(1);
5348 }
5349 
5350 
5351 /*
5352  * Generate a random cancel key.
5353  */
5354 static bool
5356 {
5357  return pg_strong_random(cancel_key, sizeof(int32));
5358 }
5359 
5360 /*
5361  * Count up number of child processes of specified types (dead_end children
5362  * are always excluded).
5363  */
5364 static int
5365 CountChildren(int target)
5366 {
5367  dlist_iter iter;
5368  int cnt = 0;
5369 
5370  dlist_foreach(iter, &BackendList)
5371  {
5372  Backend *bp = dlist_container(Backend, elem, iter.cur);
5373 
5374  if (bp->dead_end)
5375  continue;
5376 
5377  /*
5378  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5379  * it first and avoid touching shared memory for every child.
5380  */
5381  if (target != BACKEND_TYPE_ALL)
5382  {
5383  /*
5384  * Assign bkend_type for any recently announced WAL Sender
5385  * processes.
5386  */
5387  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5390 
5391  if (!(target & bp->bkend_type))
5392  continue;
5393  }
5394 
5395  cnt++;
5396  }
5397  return cnt;
5398 }
5399 
5400 
5401 /*
5402  * StartChildProcess -- start an auxiliary process for the postmaster
5403  *
5404  * "type" determines what kind of child will be started. All child types
5405  * initially go to AuxiliaryProcessMain, which will handle common setup.
5406  *
5407  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5408  * to start subprocess.
5409  */
5410 static pid_t
5412 {
5413  pid_t pid;
5414  char *av[10];
5415  int ac = 0;
5416  char typebuf[32];
5417 
5418  /*
5419  * Set up command-line arguments for subprocess
5420  */
5421  av[ac++] = "postgres";
5422 
5423 #ifdef EXEC_BACKEND
5424  av[ac++] = "--forkboot";
5425  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5426 #endif
5427 
5428  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5429  av[ac++] = typebuf;
5430 
5431  av[ac] = NULL;
5432  Assert(ac < lengthof(av));
5433 
5434 #ifdef EXEC_BACKEND
5435  pid = postmaster_forkexec(ac, av);
5436 #else /* !EXEC_BACKEND */
5437  pid = fork_process();
5438 
5439  if (pid == 0) /* child */
5440  {
5442 
5443  /* Close the postmaster's sockets */
5444  ClosePostmasterPorts(false);
5445 
5446  /* Release postmaster's working memory context */
5449  PostmasterContext = NULL;
5450 
5451  AuxiliaryProcessMain(ac, av);
5452  ExitPostmaster(0);
5453  }
5454 #endif /* EXEC_BACKEND */
5455 
5456  if (pid < 0)
5457  {
5458  /* in parent, fork failed */
5459  int save_errno = errno;
5460 
5461  errno = save_errno;
5462  switch (type)
5463  {
5464  case StartupProcess:
5465  ereport(LOG,
5466  (errmsg("could not fork startup process: %m")));
5467  break;
5468  case BgWriterProcess:
5469  ereport(LOG,
5470  (errmsg("could not fork background writer process: %m")));
5471  break;
5472  case CheckpointerProcess:
5473  ereport(LOG,
5474  (errmsg("could not fork checkpointer process: %m")));
5475  break;
5476  case WalWriterProcess:
5477  ereport(LOG,
5478  (errmsg("could not fork WAL writer process: %m")));
5479  break;
5480  case WalReceiverProcess:
5481  ereport(LOG,
5482  (errmsg("could not fork WAL receiver process: %m")));
5483  break;
5484  default:
5485  ereport(LOG,
5486  (errmsg("could not fork process: %m")));
5487  break;
5488  }
5489 
5490  /*
5491  * fork failure is fatal during startup, but there's no need to choke
5492  * immediately if starting other child types fails.
5493  */
5494  if (type == StartupProcess)
5495  ExitPostmaster(1);
5496  return 0;
5497  }
5498 
5499  /*
5500  * in parent, successful fork
5501  */
5502  return pid;
5503 }
5504 
5505 /*
5506  * StartAutovacuumWorker
5507  * Start an autovac worker process.
5508  *
5509  * This function is here because it enters the resulting PID into the
5510  * postmaster's private backends list.
5511  *
5512  * NB -- this code very roughly matches BackendStartup.
5513  */
5514 static void
5516 {
5517  Backend *bn;
5518 
5519  /*
5520  * If not in condition to run a process, don't try, but handle it like a
5521  * fork failure. This does not normally happen, since the signal is only
5522  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5523  * we have to check to avoid race-condition problems during DB state
5524  * changes.
5525  */
5527  {
5528  /*
5529  * Compute the cancel key that will be assigned to this session. We
5530  * probably don't need cancel keys for autovac workers, but we'd
5531  * better have something random in the field to prevent unfriendly
5532  * people from sending cancels to them.
5533  */
5535  {
5536  ereport(LOG,
5537  (errcode(ERRCODE_INTERNAL_ERROR),
5538  errmsg("could not generate random cancel key")));
5539  return;
5540  }
5541 
5542  bn = (Backend *) malloc(sizeof(Backend));
5543  if (bn)
5544  {
5545  bn->cancel_key = MyCancelKey;
5546 
5547  /* Autovac workers are not dead_end and need a child slot */
5548  bn->dead_end = false;
5550  bn->bgworker_notify = false;
5551 
5552  bn->pid = StartAutoVacWorker();
5553  if (bn->pid > 0)
5554  {
5556  dlist_push_head(&BackendList, &bn->elem);
5557 #ifdef EXEC_BACKEND
5558  ShmemBackendArrayAdd(bn);
5559 #endif
5560  /* all OK */
5561  return;
5562  }
5563 
5564  /*
5565  * fork failed, fall through to report -- actual error message was
5566  * logged by StartAutoVacWorker
5567  */
5569  free(bn);
5570  }
5571  else
5572  ereport(LOG,
5573  (errcode(ERRCODE_OUT_OF_MEMORY),
5574  errmsg("out of memory")));
5575  }
5576 
5577  /*
5578  * Report the failure to the launcher, if it's running. (If it's not, we
5579  * might not even be connected to shared memory, so don't try to call
5580  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5581  * responds to the condition, but we don't do that here, instead waiting
5582  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5583  * quick succession between the autovac launcher and postmaster in case
5584  * things get ugly.
5585  */
5586  if (AutoVacPID != 0)
5587  {
5589  avlauncher_needs_signal = true;
5590  }
5591 }
5592 
5593 /*
5594  * MaybeStartWalReceiver
5595  * Start the WAL receiver process, if not running and our state allows.
5596  *
5597  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5598  * clear WalReceiverRequested. However, there's a race condition if the
5599  * walreceiver terminates and the startup process immediately requests a new
5600  * one: it's quite possible to get the signal for the request before reaping
5601  * the dead walreceiver process. Better to risk launching an extra
5602  * walreceiver than to miss launching one we need. (The walreceiver code
5603  * has logic to recognize that it should go away if not needed.)
5604  */
5605 static void
5607 {
5608  if (WalReceiverPID == 0 &&
5609  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5610  pmState == PM_HOT_STANDBY) &&
5612  {
5614  if (WalReceiverPID != 0)
5615  WalReceiverRequested = false;
5616  /* else leave the flag set, so we'll try again later */
5617  }
5618 }
5619 
5620 
5621 /*
5622  * Create the opts file
5623  */
5624 static bool
5625 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5626 {
5627  FILE *fp;
5628  int i;
5629 
5630 #define OPTS_FILE "postmaster.opts"
5631 
5632  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5633  {
5634  ereport(LOG,
5636  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5637  return false;
5638  }
5639 
5640  fprintf(fp, "%s", fullprogname);
5641  for (i = 1; i < argc; i++)
5642  fprintf(fp, " \"%s\"", argv[i]);
5643  fputs("\n", fp);
5644 
5645  if (fclose(fp))
5646  {
5647  ereport(LOG,
5649  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5650  return false;
5651  }
5652 
5653  return true;
5654 }
5655 
5656 
5657 /*
5658  * MaxLivePostmasterChildren
5659  *
5660  * This reports the number of entries needed in per-child-process arrays
5661  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5662  * These arrays include regular backends, autovac workers, walsenders
5663  * and background workers, but not special children nor dead_end children.
5664  * This allows the arrays to have a fixed maximum size, to wit the same
5665  * too-many-children limit enforced by canAcceptConnections(). The exact value
5666  * isn't too critical as long as it's more than MaxBackends.
5667  */
5668 int
5670 {
5671  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5673 }
5674 
5675 /*
5676  * Connect background worker to a database.
5677  */
5678 void
5680 {
5682 
5683  /* XXX is this the right errcode? */
5685  ereport(FATAL,
5686  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5687  errmsg("database connection requirement not indicated during registration")));
5688 
5689  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5690 
5691  /* it had better not gotten out of "init" mode yet */
5692  if (!IsInitProcessingMode())
5693  ereport(ERROR,
5694  (errmsg("invalid processing mode in background worker")));
5696 }
5697 
5698 /*
5699  * Connect background worker to a database using OIDs.
5700  */
5701 void
5703 {
5705 
5706  /* XXX is this the right errcode? */
5708  ereport(FATAL,
5709  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5710  errmsg("database connection requirement not indicated during registration")));
5711 
5712  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5713 
5714  /* it had better not gotten out of "init" mode yet */
5715  if (!IsInitProcessingMode())
5716  ereport(ERROR,
5717  (errmsg("invalid processing mode in background worker")));
5719 }
5720 
5721 /*
5722  * Block/unblock signals in a background worker
5723  */
5724 void
5726 {
5727  PG_SETMASK(&BlockSig);
5728 }
5729 
5730 void
5732 {
5734 }
5735 
5736 #ifdef EXEC_BACKEND
5737 static pid_t
5738 bgworker_forkexec(int shmem_slot)
5739 {
5740  char *av[10];
5741  int ac = 0;
5742  char forkav[MAXPGPATH];
5743 
5744  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5745 
5746  av[ac++] = "postgres";
5747  av[ac++] = forkav;
5748  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5749  av[ac] = NULL;
5750 
5751  Assert(ac < lengthof(av));
5752 
5753  return postmaster_forkexec(ac, av);
5754 }
5755 #endif
5756 
5757 /*
5758  * Start a new bgworker.
5759  * Starting time conditions must have been checked already.
5760  *
5761  * Returns true on success, false on failure.
5762  * In either case, update the RegisteredBgWorker's state appropriately.
5763  *
5764  * This code is heavily based on autovacuum.c, q.v.
5765  */
5766 static bool
5768 {
5769  pid_t worker_pid;
5770 
5771  Assert(rw->rw_pid == 0);
5772 
5773  /*
5774  * Allocate and assign the Backend element. Note we must do this before
5775  * forking, so that we can handle failures (out of memory or child-process
5776  * slots) cleanly.
5777  *
5778  * Treat failure as though the worker had crashed. That way, the
5779  * postmaster will wait a bit before attempting to start it again; if we
5780  * tried again right away, most likely we'd find ourselves hitting the
5781  * same resource-exhaustion condition.
5782  */
5783  if (!assign_backendlist_entry(rw))
5784  {
5786  return false;
5787  }
5788 
5789  ereport(DEBUG1,
5790  (errmsg("starting background worker process \"%s\"",
5791  rw->rw_worker.bgw_name)));
5792 
5793 #ifdef EXEC_BACKEND
5794  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5795 #else
5796  switch ((worker_pid = fork_process()))
5797 #endif
5798  {
5799  case -1:
5800  /* in postmaster, fork failed ... */
5801  ereport(LOG,
5802  (errmsg("could not fork worker process: %m")));
5803  /* undo what assign_backendlist_entry did */
5805  rw->rw_child_slot = 0;
5806  free(rw->rw_backend);
5807  rw->rw_backend = NULL;
5808  /* mark entry as crashed, so we'll try again later */
5810  break;
5811 
5812 #ifndef EXEC_BACKEND
5813  case 0:
5814  /* in postmaster child ... */
5816 
5817  /* Close the postmaster's sockets */
5818  ClosePostmasterPorts(false);
5819 
5820  /*
5821  * Before blowing away PostmasterContext, save this bgworker's
5822  * data where it can find it.
5823  */
5824  MyBgworkerEntry = (BackgroundWorker *)
5826  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5827 
5828  /* Release postmaster's working memory context */
5831  PostmasterContext = NULL;
5832 
5834 
5835  exit(1); /* should not get here */
5836  break;
5837 #endif
5838  default:
5839  /* in postmaster, fork successful ... */
5840  rw->rw_pid = worker_pid;
5841  rw->rw_backend->pid = rw->rw_pid;
5843  /* add new worker to lists of backends */
5844  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5845 #ifdef EXEC_BACKEND
5846  ShmemBackendArrayAdd(rw->rw_backend);
5847 #endif
5848  return true;
5849  }
5850 
5851  return false;
5852 }
5853 
5854 /*
5855  * Does the current postmaster state require starting a worker with the
5856  * specified start_time?
5857  */
5858 static bool
5860 {
5861  switch (pmState)
5862  {
5863  case PM_NO_CHILDREN:
5864  case PM_WAIT_DEAD_END:
5865