PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/file_perm.h"
101 #include "common/ip.h"
102 #include "common/string.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqformat.h"
107 #include "libpq/pqsignal.h"
108 #include "miscadmin.h"
109 #include "pg_getopt.h"
110 #include "pgstat.h"
111 #include "port/pg_bswap.h"
112 #include "postmaster/autovacuum.h"
114 #include "postmaster/fork_process.h"
115 #include "postmaster/pgarch.h"
116 #include "postmaster/postmaster.h"
117 #include "postmaster/syslogger.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/timestamp.h"
133 #include "utils/varlena.h"
134 
135 #ifdef EXEC_BACKEND
136 #include "storage/spin.h"
137 #endif
138 
139 
140 /*
141  * Possible types of a backend. Beyond being the possible bkend_type values in
142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143  * and CountChildren().
144  */
145 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150 
151 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
152 
153 /*
154  * List of active backends (or child processes anyway; we don't actually
155  * know whether a given child has become a backend or is still in the
156  * authorization phase). This is used mainly to keep track of how many
157  * children we have and send them appropriate signals when necessary.
158  *
159  * "Special" children such as the startup, bgwriter and autovacuum launcher
160  * tasks are not in this list. Autovacuum worker and walsender are in it.
161  * Also, "dead_end" children are in it: these are children launched just for
162  * the purpose of sending a friendly rejection message to a would-be client.
163  * We must track them because they are attached to shared memory, but we know
164  * they will never become live backends. dead_end children are not assigned a
165  * PMChildSlot.
166  *
167  * Background workers are in this list, too.
168  */
169 typedef struct bkend
170 {
171  pid_t pid; /* process id of backend */
172  int32 cancel_key; /* cancel key for cancels for this backend */
173  int child_slot; /* PMChildSlot for this backend, if any */
174 
175  /*
176  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
177  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
178  * bkend_type is normal, you should check for a recent transition.
179  */
181  bool dead_end; /* is it going to send an error and quit? */
182  bool bgworker_notify; /* gets bgworker start/stop notifications */
183  dlist_node elem; /* list link in BackendList */
184 } Backend;
185 
187 
188 #ifdef EXEC_BACKEND
189 static Backend *ShmemBackendArray;
190 #endif
191 
193 
194 
195 
196 /* The socket number we are listening for connections on */
198 
199 /* The directory names for Unix socket(s) */
201 
202 /* The TCP listen address(es) */
204 
205 /*
206  * ReservedBackends is the number of backends reserved for superuser use.
207  * This number is taken out of the pool size given by MaxConnections so
208  * number of backend slots available to non-superusers is
209  * (MaxConnections - ReservedBackends). Note what this really means is
210  * "if there are <= ReservedBackends connections available, only superusers
211  * can make new connections" --- pre-existing superuser connections don't
212  * count against the limit.
213  */
215 
216 /* The socket(s) we're listening to. */
217 #define MAXLISTEN 64
219 
220 /*
221  * Set by the -o option
222  */
223 static char ExtraOptions[MAXPGPATH];
224 
225 /*
226  * These globals control the behavior of the postmaster in case some
227  * backend dumps core. Normally, it kills all peers of the dead backend
228  * and reinitializes shared memory. By specifying -s or -n, we can have
229  * the postmaster stop (rather than kill) peers and not reinitialize
230  * shared data structures. (Reinit is currently dead code, though.)
231  */
232 static bool Reinit = true;
233 static int SendStop = false;
234 
235 /* still more option variables */
236 bool EnableSSL = false;
237 
238 int PreAuthDelay = 0;
240 
241 bool log_hostname; /* for ps display and logging */
242 bool Log_connections = false;
243 bool Db_user_namespace = false;
244 
245 bool enable_bonjour = false;
248 
249 /* PIDs of special child processes; 0 when not running */
250 static pid_t StartupPID = 0,
259 
260 /* Startup process's status */
261 typedef enum
262 {
265  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
268 
270 
271 /* Startup/shutdown state */
272 #define NoShutdown 0
273 #define SmartShutdown 1
274 #define FastShutdown 2
275 #define ImmediateShutdown 3
276 
277 static int Shutdown = NoShutdown;
278 
279 static bool FatalError = false; /* T if recovering from backend crash */
280 
281 /*
282  * We use a simple state machine to control startup, shutdown, and
283  * crash recovery (which is rather like shutdown followed by startup).
284  *
285  * After doing all the postmaster initialization work, we enter PM_STARTUP
286  * state and the startup process is launched. The startup process begins by
287  * reading the control file and other preliminary initialization steps.
288  * In a normal startup, or after crash recovery, the startup process exits
289  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
290  * is handled specially since it takes much longer and we would like to support
291  * hot standby during archive recovery.
292  *
293  * When the startup process is ready to start archive recovery, it signals the
294  * postmaster, and we switch to PM_RECOVERY state. The background writer and
295  * checkpointer are launched, while the startup process continues applying WAL.
296  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
297  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
298  * state and begin accepting connections to perform read-only queries. When
299  * archive recovery is finished, the startup process exits with exit code 0
300  * and we switch to PM_RUN state.
301  *
302  * Normal child backends can only be launched when we are in PM_RUN or
303  * PM_HOT_STANDBY state. (We also allow launch of normal
304  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
305  * In other states we handle connection requests by launching "dead_end"
306  * child processes, which will simply send the client an error message and
307  * quit. (We track these in the BackendList so that we can know when they
308  * are all gone; this is important because they're still connected to shared
309  * memory, and would interfere with an attempt to destroy the shmem segment,
310  * possibly leading to SHMALL failure when we try to make a new one.)
311  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
312  * to drain out of the system, and therefore stop accepting connection
313  * requests at all until the last existing child has quit (which hopefully
314  * will not be very long).
315  *
316  * Notice that this state variable does not distinguish *why* we entered
317  * states later than PM_RUN --- Shutdown and FatalError must be consulted
318  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
319  * states, nor in PM_SHUTDOWN states (because we don't enter those states
320  * when trying to recover from a crash). It can be true in PM_STARTUP state,
321  * because we don't clear it until we've successfully started WAL redo.
322  */
323 typedef enum
324 {
325  PM_INIT, /* postmaster starting */
326  PM_STARTUP, /* waiting for startup subprocess */
327  PM_RECOVERY, /* in archive recovery mode */
328  PM_HOT_STANDBY, /* in hot standby mode */
329  PM_RUN, /* normal "database is alive" state */
330  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
331  PM_WAIT_READONLY, /* waiting for read only backends to exit */
332  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
333  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
334  * ckpt */
335  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
336  * finish */
337  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
338  PM_NO_CHILDREN /* all important children have exited */
339 } PMState;
340 
342 
343 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
344 /* Zero means timeout is not running */
345 static time_t AbortStartTime = 0;
346 
347 /* Length of said timeout */
348 #define SIGKILL_CHILDREN_AFTER_SECS 5
349 
350 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
351 
352 bool ClientAuthInProgress = false; /* T during new-client
353  * authentication */
354 
355 bool redirection_done = false; /* stderr redirected for syslogger? */
356 
357 /* received START_AUTOVAC_LAUNCHER signal */
358 static volatile sig_atomic_t start_autovac_launcher = false;
359 
360 /* the launcher needs to be signaled to communicate some condition */
361 static volatile bool avlauncher_needs_signal = false;
362 
363 /* received START_WALRECEIVER signal */
364 static volatile sig_atomic_t WalReceiverRequested = false;
365 
366 /* set when there's a worker that needs to be started up */
367 static volatile bool StartWorkerNeeded = true;
368 static volatile bool HaveCrashedWorker = false;
369 
370 #ifdef USE_SSL
371 /* Set when and if SSL has been initialized properly */
372 static bool LoadedSSL = false;
373 #endif
374 
375 #ifdef USE_BONJOUR
376 static DNSServiceRef bonjour_sdref = NULL;
377 #endif
378 
379 /*
380  * postmaster.c - function prototypes
381  */
382 static void CloseServerPorts(int status, Datum arg);
383 static void unlink_external_pid_file(int status, Datum arg);
384 static void getInstallationPaths(const char *argv0);
385 static void checkControlFile(void);
386 static Port *ConnCreate(int serverFd);
387 static void ConnFree(Port *port);
388 static void reset_shared(void);
389 static void SIGHUP_handler(SIGNAL_ARGS);
390 static void pmdie(SIGNAL_ARGS);
391 static void reaper(SIGNAL_ARGS);
392 static void sigusr1_handler(SIGNAL_ARGS);
393 static void startup_die(SIGNAL_ARGS);
394 static void dummy_handler(SIGNAL_ARGS);
395 static void StartupPacketTimeoutHandler(void);
396 static void CleanupBackend(int pid, int exitstatus);
397 static bool CleanupBackgroundWorker(int pid, int exitstatus);
398 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
399 static void LogChildExit(int lev, const char *procname,
400  int pid, int exitstatus);
401 static void PostmasterStateMachine(void);
402 static void BackendInitialize(Port *port);
403 static void BackendRun(Port *port) pg_attribute_noreturn();
404 static void ExitPostmaster(int status) pg_attribute_noreturn();
405 static int ServerLoop(void);
406 static int BackendStartup(Port *port);
407 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
408 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
409 static void processCancelRequest(Port *port, void *pkt);
410 static int initMasks(fd_set *rmask);
411 static void report_fork_failure_to_client(Port *port, int errnum);
412 static CAC_state canAcceptConnections(int backend_type);
413 static bool RandomCancelKey(int32 *cancel_key);
414 static void signal_child(pid_t pid, int signal);
415 static bool SignalSomeChildren(int signal, int targets);
416 static void TerminateChildren(int signal);
417 
418 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
419 
420 static int CountChildren(int target);
422 static void maybe_start_bgworkers(void);
423 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
424 static pid_t StartChildProcess(AuxProcType type);
425 static void StartAutovacuumWorker(void);
426 static void MaybeStartWalReceiver(void);
427 static void InitPostmasterDeathWatchHandle(void);
428 
429 /*
430  * Archiver is allowed to start up at the current postmaster state?
431  *
432  * If WAL archiving is enabled always, we are allowed to start archiver
433  * even during recovery.
434  */
435 #define PgArchStartupAllowed() \
436  ((XLogArchivingActive() && pmState == PM_RUN) || \
437  (XLogArchivingAlways() && \
438  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
439 
440 #ifdef EXEC_BACKEND
441 
442 #ifdef WIN32
443 #define WNOHANG 0 /* ignored, so any integer value will do */
444 
445 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
446 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
447 
448 static HANDLE win32ChildQueue;
449 
450 typedef struct
451 {
452  HANDLE waitHandle;
453  HANDLE procHandle;
454  DWORD procId;
455 } win32_deadchild_waitinfo;
456 #endif /* WIN32 */
457 
458 static pid_t backend_forkexec(Port *port);
459 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
460 
461 /* Type for a socket that can be inherited to a client process */
462 #ifdef WIN32
463 typedef struct
464 {
465  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
466  * if not a socket */
467  WSAPROTOCOL_INFO wsainfo;
468 } InheritableSocket;
469 #else
470 typedef int InheritableSocket;
471 #endif
472 
473 /*
474  * Structure contains all variables passed to exec:ed backends
475  */
476 typedef struct
477 {
478  Port port;
479  InheritableSocket portsocket;
480  char DataDir[MAXPGPATH];
483  int MyPMChildSlot;
484 #ifndef WIN32
485  unsigned long UsedShmemSegID;
486 #else
487  void *ShmemProtectiveRegion;
488  HANDLE UsedShmemSegID;
489 #endif
490  void *UsedShmemSegAddr;
493  Backend *ShmemBackendArray;
494 #ifndef HAVE_SPINLOCKS
496 #endif
505  InheritableSocket pgStatSock;
506  pid_t PostmasterPid;
510  bool redirection_done;
511  bool IsBinaryUpgrade;
512  int max_safe_fds;
513  int MaxBackends;
514 #ifdef WIN32
515  HANDLE PostmasterHandle;
516  HANDLE initial_signal_pipe;
517  HANDLE syslogPipe[2];
518 #else
519  int postmaster_alive_fds[2];
520  int syslogPipe[2];
521 #endif
522  char my_exec_path[MAXPGPATH];
523  char pkglib_path[MAXPGPATH];
524  char ExtraOptions[MAXPGPATH];
525 } BackendParameters;
526 
527 static void read_backend_variables(char *id, Port *port);
528 static void restore_backend_variables(BackendParameters *param, Port *port);
529 
530 #ifndef WIN32
531 static bool save_backend_variables(BackendParameters *param, Port *port);
532 #else
533 static bool save_backend_variables(BackendParameters *param, Port *port,
534  HANDLE childProcess, pid_t childPid);
535 #endif
536 
537 static void ShmemBackendArrayAdd(Backend *bn);
538 static void ShmemBackendArrayRemove(Backend *bn);
539 #endif /* EXEC_BACKEND */
540 
541 #define StartupDataBase() StartChildProcess(StartupProcess)
542 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
543 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
544 #define StartWalWriter() StartChildProcess(WalWriterProcess)
545 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
546 
547 /* Macros to check exit status of a child process */
548 #define EXIT_STATUS_0(st) ((st) == 0)
549 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
550 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
551 
552 #ifndef WIN32
553 /*
554  * File descriptors for pipe used to monitor if postmaster is alive.
555  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
556  */
557 int postmaster_alive_fds[2] = {-1, -1};
558 #else
559 /* Process handle of postmaster used for the same purpose on Windows */
560 HANDLE PostmasterHandle;
561 #endif
562 
563 /*
564  * Postmaster main entry point
565  */
566 void
567 PostmasterMain(int argc, char *argv[])
568 {
569  int opt;
570  int status;
571  char *userDoption = NULL;
572  bool listen_addr_saved = false;
573  int i;
574  char *output_config_variable = NULL;
575 
577 
579 
581 
582  /*
583  * We should not be creating any files or directories before we check the
584  * data directory (see checkDataDir()), but just in case set the umask to
585  * the most restrictive (owner-only) permissions.
586  *
587  * checkDataDir() will reset the umask based on the data directory
588  * permissions.
589  */
590  umask(PG_MODE_MASK_OWNER);
591 
592  /*
593  * By default, palloc() requests in the postmaster will be allocated in
594  * the PostmasterContext, which is space that can be recycled by backends.
595  * Allocated data that needs to be available to backends should be
596  * allocated in TopMemoryContext.
597  */
599  "Postmaster",
602 
603  /* Initialize paths to installation files */
604  getInstallationPaths(argv[0]);
605 
606  /*
607  * Set up signal handlers for the postmaster process.
608  *
609  * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
610  * is used by all child processes and client processes). That has a
611  * couple of special behaviors:
612  *
613  * 1. Except on Windows, we tell sigaction() to block all signals for the
614  * duration of the signal handler. This is faster than our old approach
615  * of blocking/unblocking explicitly in the signal handler, and it should
616  * also prevent excessive stack consumption if signals arrive quickly.
617  *
618  * 2. We do not set the SA_RESTART flag. This is because signals will be
619  * blocked at all times except when ServerLoop is waiting for something to
620  * happen, and during that window, we want signals to exit the select(2)
621  * wait so that ServerLoop can respond if anything interesting happened.
622  * On some platforms, signals marked SA_RESTART would not cause the
623  * select() wait to end.
624  *
625  * Child processes will generally want SA_RESTART, so pqsignal() sets that
626  * flag. We expect children to set up their own handlers before
627  * unblocking signals.
628  *
629  * CAUTION: when changing this list, check for side-effects on the signal
630  * handling setup of child processes. See tcop/postgres.c,
631  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
632  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
633  * postmaster/syslogger.c, postmaster/bgworker.c and
634  * postmaster/checkpointer.c.
635  */
636  pqinitmask();
638 
639  pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
640  * children do same */
641  pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
642  pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
643  pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
644  pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
645  pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
646  pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
647  pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
648  pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
649 
650  /*
651  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
652  * ignore those signals in a postmaster environment, so that there is no
653  * risk of a child process freezing up due to writing to stderr. But for
654  * a standalone backend, their default handling is reasonable. Hence, all
655  * child processes should just allow the inherited settings to stand.
656  */
657 #ifdef SIGTTIN
658  pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
659 #endif
660 #ifdef SIGTTOU
661  pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
662 #endif
663 
664  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
665 #ifdef SIGXFSZ
666  pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
667 #endif
668 
669  /*
670  * Options setup
671  */
673 
674  opterr = 1;
675 
676  /*
677  * Parse command-line options. CAUTION: keep this in sync with
678  * tcop/postgres.c (the option sets should not conflict) and with the
679  * common help() function in main/main.c.
680  */
681  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
682  {
683  switch (opt)
684  {
685  case 'B':
686  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
687  break;
688 
689  case 'b':
690  /* Undocumented flag used for binary upgrades */
691  IsBinaryUpgrade = true;
692  break;
693 
694  case 'C':
695  output_config_variable = strdup(optarg);
696  break;
697 
698  case 'D':
699  userDoption = strdup(optarg);
700  break;
701 
702  case 'd':
704  break;
705 
706  case 'E':
707  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
708  break;
709 
710  case 'e':
711  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
712  break;
713 
714  case 'F':
715  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
716  break;
717 
718  case 'f':
720  {
721  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
722  progname, optarg);
723  ExitPostmaster(1);
724  }
725  break;
726 
727  case 'h':
728  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'i':
732  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
733  break;
734 
735  case 'j':
736  /* only used by interactive backend */
737  break;
738 
739  case 'k':
740  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
741  break;
742 
743  case 'l':
744  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
745  break;
746 
747  case 'N':
748  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
749  break;
750 
751  case 'n':
752  /* Don't reinit shared mem after abnormal exit */
753  Reinit = false;
754  break;
755 
756  case 'O':
757  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
758  break;
759 
760  case 'o':
761  /* Other options to pass to the backend on the command line */
763  sizeof(ExtraOptions) - strlen(ExtraOptions),
764  " %s", optarg);
765  break;
766 
767  case 'P':
768  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
769  break;
770 
771  case 'p':
773  break;
774 
775  case 'r':
776  /* only used by single-user backend */
777  break;
778 
779  case 'S':
781  break;
782 
783  case 's':
784  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
785  break;
786 
787  case 'T':
788 
789  /*
790  * In the event that some backend dumps core, send SIGSTOP,
791  * rather than SIGQUIT, to all its peers. This lets the wily
792  * post_hacker collect core dumps from everyone.
793  */
794  SendStop = true;
795  break;
796 
797  case 't':
798  {
799  const char *tmp = get_stats_option_name(optarg);
800 
801  if (tmp)
802  {
804  }
805  else
806  {
807  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
808  progname, optarg);
809  ExitPostmaster(1);
810  }
811  break;
812  }
813 
814  case 'W':
815  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
816  break;
817 
818  case 'c':
819  case '-':
820  {
821  char *name,
822  *value;
823 
824  ParseLongOption(optarg, &name, &value);
825  if (!value)
826  {
827  if (opt == '-')
828  ereport(ERROR,
829  (errcode(ERRCODE_SYNTAX_ERROR),
830  errmsg("--%s requires a value",
831  optarg)));
832  else
833  ereport(ERROR,
834  (errcode(ERRCODE_SYNTAX_ERROR),
835  errmsg("-c %s requires a value",
836  optarg)));
837  }
838 
840  free(name);
841  if (value)
842  free(value);
843  break;
844  }
845 
846  default:
847  write_stderr("Try \"%s --help\" for more information.\n",
848  progname);
849  ExitPostmaster(1);
850  }
851  }
852 
853  /*
854  * Postmaster accepts no non-option switch arguments.
855  */
856  if (optind < argc)
857  {
858  write_stderr("%s: invalid argument: \"%s\"\n",
859  progname, argv[optind]);
860  write_stderr("Try \"%s --help\" for more information.\n",
861  progname);
862  ExitPostmaster(1);
863  }
864 
865  /*
866  * Locate the proper configuration files and data directory, and read
867  * postgresql.conf for the first time.
868  */
869  if (!SelectConfigFiles(userDoption, progname))
870  ExitPostmaster(2);
871 
872  if (output_config_variable != NULL)
873  {
874  /*
875  * "-C guc" was specified, so print GUC's value and exit. No extra
876  * permission check is needed because the user is reading inside the
877  * data dir.
878  */
879  const char *config_val = GetConfigOption(output_config_variable,
880  false, false);
881 
882  puts(config_val ? config_val : "");
883  ExitPostmaster(0);
884  }
885 
886  /* Verify that DataDir looks reasonable */
887  checkDataDir();
888 
889  /* Check that pg_control exists */
891 
892  /* And switch working directory into it */
893  ChangeToDataDir();
894 
895  /*
896  * Check for invalid combinations of GUC settings.
897  */
899  {
900  write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
901  progname,
903  ExitPostmaster(1);
904  }
906  ereport(ERROR,
907  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
909  ereport(ERROR,
910  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
911 
912  /*
913  * Other one-time internal sanity checks can go here, if they are fast.
914  * (Put any slow processing further down, after postmaster.pid creation.)
915  */
916  if (!CheckDateTokenTables())
917  {
918  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
919  ExitPostmaster(1);
920  }
921 
922  /*
923  * Now that we are done processing the postmaster arguments, reset
924  * getopt(3) library so that it will work correctly in subprocesses.
925  */
926  optind = 1;
927 #ifdef HAVE_INT_OPTRESET
928  optreset = 1; /* some systems need this too */
929 #endif
930 
931  /* For debugging: display postmaster environment */
932  {
933  extern char **environ;
934  char **p;
935 
936  ereport(DEBUG3,
937  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
938  progname)));
939  ereport(DEBUG3,
940  (errmsg_internal("-----------------------------------------")));
941  for (p = environ; *p; ++p)
942  ereport(DEBUG3,
943  (errmsg_internal("\t%s", *p)));
944  ereport(DEBUG3,
945  (errmsg_internal("-----------------------------------------")));
946  }
947 
948  /*
949  * Create lockfile for data directory.
950  *
951  * We want to do this before we try to grab the input sockets, because the
952  * data directory interlock is more reliable than the socket-file
953  * interlock (thanks to whoever decided to put socket files in /tmp :-().
954  * For the same reason, it's best to grab the TCP socket(s) before the
955  * Unix socket(s).
956  *
957  * Also note that this internally sets up the on_proc_exit function that
958  * is responsible for removing both data directory and socket lockfiles;
959  * so it must happen before opening sockets so that at exit, the socket
960  * lockfiles go away after CloseServerPorts runs.
961  */
962  CreateDataDirLockFile(true);
963 
964  /*
965  * Read the control file (for error checking and config info).
966  *
967  * Since we verify the control file's CRC, this has a useful side effect
968  * on machines where we need a run-time test for CRC support instructions.
969  * The postmaster will do the test once at startup, and then its child
970  * processes will inherit the correct function pointer and not need to
971  * repeat the test.
972  */
974 
975  /*
976  * Register the apply launcher. Since it registers a background worker,
977  * it needs to be called before InitializeMaxBackends(), and it's probably
978  * a good idea to call it before any modules had chance to take the
979  * background worker slots.
980  */
982 
983  /*
984  * process any libraries that should be preloaded at postmaster start
985  */
987 
988  /*
989  * Initialize SSL library, if specified.
990  */
991 #ifdef USE_SSL
992  if (EnableSSL)
993  {
994  (void) secure_initialize(true);
995  LoadedSSL = true;
996  }
997 #endif
998 
999  /*
1000  * Now that loadable modules have had their chance to register background
1001  * workers, calculate MaxBackends.
1002  */
1004 
1005  /*
1006  * Set up shared memory and semaphores.
1007  */
1008  reset_shared();
1009 
1010  /*
1011  * Estimate number of openable files. This must happen after setting up
1012  * semaphores, because on some platforms semaphores count as open files.
1013  */
1014  set_max_safe_fds();
1015 
1016  /*
1017  * Set reference point for stack-depth checking.
1018  */
1019  set_stack_base();
1020 
1021  /*
1022  * Initialize pipe (or process handle on Windows) that allows children to
1023  * wake up from sleep on postmaster death.
1024  */
1026 
1027 #ifdef WIN32
1028 
1029  /*
1030  * Initialize I/O completion port used to deliver list of dead children.
1031  */
1032  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1033  if (win32ChildQueue == NULL)
1034  ereport(FATAL,
1035  (errmsg("could not create I/O completion port for child queue")));
1036 #endif
1037 
1038 #ifdef EXEC_BACKEND
1039  /* Write out nondefault GUC settings for child processes to use */
1040  write_nondefault_variables(PGC_POSTMASTER);
1041 
1042  /*
1043  * Clean out the temp directory used to transmit parameters to child
1044  * processes (see internal_forkexec, below). We must do this before
1045  * launching any child processes, else we have a race condition: we could
1046  * remove a parameter file before the child can read it. It should be
1047  * safe to do so now, because we verified earlier that there are no
1048  * conflicting Postgres processes in this data directory.
1049  */
1051 #endif
1052 
1053  /*
1054  * Forcibly remove the files signaling a standby promotion request.
1055  * Otherwise, the existence of those files triggers a promotion too early,
1056  * whether a user wants that or not.
1057  *
1058  * This removal of files is usually unnecessary because they can exist
1059  * only during a few moments during a standby promotion. However there is
1060  * a race condition: if pg_ctl promote is executed and creates the files
1061  * during a promotion, the files can stay around even after the server is
1062  * brought up to new master. Then, if new standby starts by using the
1063  * backup taken from that master, the files can exist at the server
1064  * startup and should be removed in order to avoid an unexpected
1065  * promotion.
1066  *
1067  * Note that promotion signal files need to be removed before the startup
1068  * process is invoked. Because, after that, they can be used by
1069  * postmaster's SIGUSR1 signal handler.
1070  */
1072 
1073  /* Do the same for logrotate signal file */
1075 
1076  /* Remove any outdated file holding the current log filenames. */
1077  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1078  ereport(LOG,
1080  errmsg("could not remove file \"%s\": %m",
1082 
1083  /*
1084  * If enabled, start up syslogger collection subprocess
1085  */
1087 
1088  /*
1089  * Reset whereToSendOutput from DestDebug (its starting state) to
1090  * DestNone. This stops ereport from sending log messages to stderr unless
1091  * Log_destination permits. We don't do this until the postmaster is
1092  * fully launched, since startup failures may as well be reported to
1093  * stderr.
1094  *
1095  * If we are in fact disabling logging to stderr, first emit a log message
1096  * saying so, to provide a breadcrumb trail for users who may not remember
1097  * that their logging is configured to go somewhere else.
1098  */
1100  ereport(LOG,
1101  (errmsg("ending log output to stderr"),
1102  errhint("Future log output will go to log destination \"%s\".",
1104 
1106 
1107  /*
1108  * Report server startup in log. While we could emit this much earlier,
1109  * it seems best to do so after starting the log collector, if we intend
1110  * to use one.
1111  */
1112  ereport(LOG,
1113  (errmsg("starting %s", PG_VERSION_STR)));
1114 
1115  /*
1116  * Establish input sockets.
1117  *
1118  * First, mark them all closed, and set up an on_proc_exit function that's
1119  * charged with closing the sockets again at postmaster shutdown.
1120  */
1121  for (i = 0; i < MAXLISTEN; i++)
1123 
1125 
1126  if (ListenAddresses)
1127  {
1128  char *rawstring;
1129  List *elemlist;
1130  ListCell *l;
1131  int success = 0;
1132 
1133  /* Need a modifiable copy of ListenAddresses */
1134  rawstring = pstrdup(ListenAddresses);
1135 
1136  /* Parse string into list of hostnames */
1137  if (!SplitGUCList(rawstring, ',', &elemlist))
1138  {
1139  /* syntax error in list */
1140  ereport(FATAL,
1141  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1142  errmsg("invalid list syntax in parameter \"%s\"",
1143  "listen_addresses")));
1144  }
1145 
1146  foreach(l, elemlist)
1147  {
1148  char *curhost = (char *) lfirst(l);
1149 
1150  if (strcmp(curhost, "*") == 0)
1151  status = StreamServerPort(AF_UNSPEC, NULL,
1152  (unsigned short) PostPortNumber,
1153  NULL,
1155  else
1156  status = StreamServerPort(AF_UNSPEC, curhost,
1157  (unsigned short) PostPortNumber,
1158  NULL,
1159  ListenSocket, MAXLISTEN);
1160 
1161  if (status == STATUS_OK)
1162  {
1163  success++;
1164  /* record the first successful host addr in lockfile */
1165  if (!listen_addr_saved)
1166  {
1168  listen_addr_saved = true;
1169  }
1170  }
1171  else
1172  ereport(WARNING,
1173  (errmsg("could not create listen socket for \"%s\"",
1174  curhost)));
1175  }
1176 
1177  if (!success && elemlist != NIL)
1178  ereport(FATAL,
1179  (errmsg("could not create any TCP/IP sockets")));
1180 
1181  list_free(elemlist);
1182  pfree(rawstring);
1183  }
1184 
1185 #ifdef USE_BONJOUR
1186  /* Register for Bonjour only if we opened TCP socket(s) */
1188  {
1189  DNSServiceErrorType err;
1190 
1191  /*
1192  * We pass 0 for interface_index, which will result in registering on
1193  * all "applicable" interfaces. It's not entirely clear from the
1194  * DNS-SD docs whether this would be appropriate if we have bound to
1195  * just a subset of the available network interfaces.
1196  */
1197  err = DNSServiceRegister(&bonjour_sdref,
1198  0,
1199  0,
1200  bonjour_name,
1201  "_postgresql._tcp.",
1202  NULL,
1203  NULL,
1205  0,
1206  NULL,
1207  NULL,
1208  NULL);
1209  if (err != kDNSServiceErr_NoError)
1210  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1211  (long) err);
1212 
1213  /*
1214  * We don't bother to read the mDNS daemon's reply, and we expect that
1215  * it will automatically terminate our registration when the socket is
1216  * closed at postmaster termination. So there's nothing more to be
1217  * done here. However, the bonjour_sdref is kept around so that
1218  * forked children can close their copies of the socket.
1219  */
1220  }
1221 #endif
1222 
1223 #ifdef HAVE_UNIX_SOCKETS
1225  {
1226  char *rawstring;
1227  List *elemlist;
1228  ListCell *l;
1229  int success = 0;
1230 
1231  /* Need a modifiable copy of Unix_socket_directories */
1232  rawstring = pstrdup(Unix_socket_directories);
1233 
1234  /* Parse string into list of directories */
1235  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1236  {
1237  /* syntax error in list */
1238  ereport(FATAL,
1239  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1240  errmsg("invalid list syntax in parameter \"%s\"",
1241  "unix_socket_directories")));
1242  }
1243 
1244  foreach(l, elemlist)
1245  {
1246  char *socketdir = (char *) lfirst(l);
1247 
1248  status = StreamServerPort(AF_UNIX, NULL,
1249  (unsigned short) PostPortNumber,
1250  socketdir,
1251  ListenSocket, MAXLISTEN);
1252 
1253  if (status == STATUS_OK)
1254  {
1255  success++;
1256  /* record the first successful Unix socket in lockfile */
1257  if (success == 1)
1259  }
1260  else
1261  ereport(WARNING,
1262  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1263  socketdir)));
1264  }
1265 
1266  if (!success && elemlist != NIL)
1267  ereport(FATAL,
1268  (errmsg("could not create any Unix-domain sockets")));
1269 
1270  list_free_deep(elemlist);
1271  pfree(rawstring);
1272  }
1273 #endif
1274 
1275  /*
1276  * check that we have some socket to listen on
1277  */
1278  if (ListenSocket[0] == PGINVALID_SOCKET)
1279  ereport(FATAL,
1280  (errmsg("no socket created for listening")));
1281 
1282  /*
1283  * If no valid TCP ports, write an empty line for listen address,
1284  * indicating the Unix socket must be used. Note that this line is not
1285  * added to the lock file until there is a socket backing it.
1286  */
1287  if (!listen_addr_saved)
1289 
1290  /*
1291  * Record postmaster options. We delay this till now to avoid recording
1292  * bogus options (eg, unusable port number).
1293  */
1294  if (!CreateOptsFile(argc, argv, my_exec_path))
1295  ExitPostmaster(1);
1296 
1297  /*
1298  * Write the external PID file if requested
1299  */
1300  if (external_pid_file)
1301  {
1302  FILE *fpidfile = fopen(external_pid_file, "w");
1303 
1304  if (fpidfile)
1305  {
1306  fprintf(fpidfile, "%d\n", MyProcPid);
1307  fclose(fpidfile);
1308 
1309  /* Make PID file world readable */
1310  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1311  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1313  }
1314  else
1315  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1317 
1319  }
1320 
1321  /*
1322  * Remove old temporary files. At this point there can be no other
1323  * Postgres processes running in this directory, so this should be safe.
1324  */
1326 
1327  /*
1328  * Initialize stats collection subsystem (this does NOT start the
1329  * collector process!)
1330  */
1331  pgstat_init();
1332 
1333  /*
1334  * Initialize the autovacuum subsystem (again, no process start yet)
1335  */
1336  autovac_init();
1337 
1338  /*
1339  * Load configuration files for client authentication.
1340  */
1341  if (!load_hba())
1342  {
1343  /*
1344  * It makes no sense to continue if we fail to load the HBA file,
1345  * since there is no way to connect to the database in this case.
1346  */
1347  ereport(FATAL,
1348  (errmsg("could not load pg_hba.conf")));
1349  }
1350  if (!load_ident())
1351  {
1352  /*
1353  * We can start up without the IDENT file, although it means that you
1354  * cannot log in using any of the authentication methods that need a
1355  * user name mapping. load_ident() already logged the details of error
1356  * to the log.
1357  */
1358  }
1359 
1360 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1361 
1362  /*
1363  * On macOS, libintl replaces setlocale() with a version that calls
1364  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1365  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1366  * the process multithreaded. The postmaster calls sigprocmask() and
1367  * calls fork() without an immediate exec(), both of which have undefined
1368  * behavior in a multithreaded program. A multithreaded postmaster is the
1369  * normal case on Windows, which offers neither fork() nor sigprocmask().
1370  */
1371  if (pthread_is_threaded_np() != 0)
1372  ereport(FATAL,
1373  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1374  errmsg("postmaster became multithreaded during startup"),
1375  errhint("Set the LC_ALL environment variable to a valid locale.")));
1376 #endif
1377 
1378  /*
1379  * Remember postmaster startup time
1380  */
1382 
1383  /*
1384  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1385  * see what's happening.
1386  */
1388 
1389  /*
1390  * We're ready to rock and roll...
1391  */
1393  Assert(StartupPID != 0);
1395  pmState = PM_STARTUP;
1396 
1397  /* Some workers may be scheduled to start now */
1399 
1400  status = ServerLoop();
1401 
1402  /*
1403  * ServerLoop probably shouldn't ever return, but if it does, close down.
1404  */
1405  ExitPostmaster(status != STATUS_OK);
1406 
1407  abort(); /* not reached */
1408 }
1409 
1410 
1411 /*
1412  * on_proc_exit callback to close server's listen sockets
1413  */
1414 static void
1416 {
1417  int i;
1418 
1419  /*
1420  * First, explicitly close all the socket FDs. We used to just let this
1421  * happen implicitly at postmaster exit, but it's better to close them
1422  * before we remove the postmaster.pid lockfile; otherwise there's a race
1423  * condition if a new postmaster wants to re-use the TCP port number.
1424  */
1425  for (i = 0; i < MAXLISTEN; i++)
1426  {
1427  if (ListenSocket[i] != PGINVALID_SOCKET)
1428  {
1431  }
1432  }
1433 
1434  /*
1435  * Next, remove any filesystem entries for Unix sockets. To avoid race
1436  * conditions against incoming postmasters, this must happen after closing
1437  * the sockets and before removing lock files.
1438  */
1440 
1441  /*
1442  * We don't do anything about socket lock files here; those will be
1443  * removed in a later on_proc_exit callback.
1444  */
1445 }
1446 
1447 /*
1448  * on_proc_exit callback to delete external_pid_file
1449  */
1450 static void
1452 {
1453  if (external_pid_file)
1454  unlink(external_pid_file);
1455 }
1456 
1457 
1458 /*
1459  * Compute and check the directory paths to files that are part of the
1460  * installation (as deduced from the postgres executable's own location)
1461  */
1462 static void
1464 {
1465  DIR *pdir;
1466 
1467  /* Locate the postgres executable itself */
1468  if (find_my_exec(argv0, my_exec_path) < 0)
1469  elog(FATAL, "%s: could not locate my own executable path", argv0);
1470 
1471 #ifdef EXEC_BACKEND
1472  /* Locate executable backend before we change working directory */
1473  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1474  postgres_exec_path) < 0)
1475  ereport(FATAL,
1476  (errmsg("%s: could not locate matching postgres executable",
1477  argv0)));
1478 #endif
1479 
1480  /*
1481  * Locate the pkglib directory --- this has to be set early in case we try
1482  * to load any modules from it in response to postgresql.conf entries.
1483  */
1485 
1486  /*
1487  * Verify that there's a readable directory there; otherwise the Postgres
1488  * installation is incomplete or corrupt. (A typical cause of this
1489  * failure is that the postgres executable has been moved or hardlinked to
1490  * some directory that's not a sibling of the installation lib/
1491  * directory.)
1492  */
1493  pdir = AllocateDir(pkglib_path);
1494  if (pdir == NULL)
1495  ereport(ERROR,
1497  errmsg("could not open directory \"%s\": %m",
1498  pkglib_path),
1499  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1500  my_exec_path)));
1501  FreeDir(pdir);
1502 
1503  /*
1504  * XXX is it worth similarly checking the share/ directory? If the lib/
1505  * directory is there, then share/ probably is too.
1506  */
1507 }
1508 
1509 /*
1510  * Check that pg_control exists in the correct location in the data directory.
1511  *
1512  * No attempt is made to validate the contents of pg_control here. This is
1513  * just a sanity check to see if we are looking at a real data directory.
1514  */
1515 static void
1517 {
1518  char path[MAXPGPATH];
1519  FILE *fp;
1520 
1521  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1522 
1523  fp = AllocateFile(path, PG_BINARY_R);
1524  if (fp == NULL)
1525  {
1526  write_stderr("%s: could not find the database system\n"
1527  "Expected to find it in the directory \"%s\",\n"
1528  "but could not open file \"%s\": %s\n",
1529  progname, DataDir, path, strerror(errno));
1530  ExitPostmaster(2);
1531  }
1532  FreeFile(fp);
1533 }
1534 
1535 /*
1536  * Determine how long should we let ServerLoop sleep.
1537  *
1538  * In normal conditions we wait at most one minute, to ensure that the other
1539  * background tasks handled by ServerLoop get done even when no requests are
1540  * arriving. However, if there are background workers waiting to be started,
1541  * we don't actually sleep so that they are quickly serviced. Other exception
1542  * cases are as shown in the code.
1543  */
1544 static void
1545 DetermineSleepTime(struct timeval *timeout)
1546 {
1547  TimestampTz next_wakeup = 0;
1548 
1549  /*
1550  * Normal case: either there are no background workers at all, or we're in
1551  * a shutdown sequence (during which we ignore bgworkers altogether).
1552  */
1553  if (Shutdown > NoShutdown ||
1555  {
1556  if (AbortStartTime != 0)
1557  {
1558  /* time left to abort; clamp to 0 in case it already expired */
1559  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1560  (time(NULL) - AbortStartTime);
1561  timeout->tv_sec = Max(timeout->tv_sec, 0);
1562  timeout->tv_usec = 0;
1563  }
1564  else
1565  {
1566  timeout->tv_sec = 60;
1567  timeout->tv_usec = 0;
1568  }
1569  return;
1570  }
1571 
1572  if (StartWorkerNeeded)
1573  {
1574  timeout->tv_sec = 0;
1575  timeout->tv_usec = 0;
1576  return;
1577  }
1578 
1579  if (HaveCrashedWorker)
1580  {
1581  slist_mutable_iter siter;
1582 
1583  /*
1584  * When there are crashed bgworkers, we sleep just long enough that
1585  * they are restarted when they request to be. Scan the list to
1586  * determine the minimum of all wakeup times according to most recent
1587  * crash time and requested restart interval.
1588  */
1590  {
1591  RegisteredBgWorker *rw;
1592  TimestampTz this_wakeup;
1593 
1594  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1595 
1596  if (rw->rw_crashed_at == 0)
1597  continue;
1598 
1600  || rw->rw_terminate)
1601  {
1602  ForgetBackgroundWorker(&siter);
1603  continue;
1604  }
1605 
1606  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1607  1000L * rw->rw_worker.bgw_restart_time);
1608  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1609  next_wakeup = this_wakeup;
1610  }
1611  }
1612 
1613  if (next_wakeup != 0)
1614  {
1615  long secs;
1616  int microsecs;
1617 
1619  &secs, &microsecs);
1620  timeout->tv_sec = secs;
1621  timeout->tv_usec = microsecs;
1622 
1623  /* Ensure we don't exceed one minute */
1624  if (timeout->tv_sec > 60)
1625  {
1626  timeout->tv_sec = 60;
1627  timeout->tv_usec = 0;
1628  }
1629  }
1630  else
1631  {
1632  timeout->tv_sec = 60;
1633  timeout->tv_usec = 0;
1634  }
1635 }
1636 
1637 /*
1638  * Main idle loop of postmaster
1639  *
1640  * NB: Needs to be called with signals blocked
1641  */
1642 static int
1644 {
1645  fd_set readmask;
1646  int nSockets;
1647  time_t last_lockfile_recheck_time,
1648  last_touch_time;
1649 
1650  last_lockfile_recheck_time = last_touch_time = time(NULL);
1651 
1652  nSockets = initMasks(&readmask);
1653 
1654  for (;;)
1655  {
1656  fd_set rmask;
1657  int selres;
1658  time_t now;
1659 
1660  /*
1661  * Wait for a connection request to arrive.
1662  *
1663  * We block all signals except while sleeping. That makes it safe for
1664  * signal handlers, which again block all signals while executing, to
1665  * do nontrivial work.
1666  *
1667  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1668  * any new connections, so we don't call select(), and just sleep.
1669  */
1670  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1671 
1672  if (pmState == PM_WAIT_DEAD_END)
1673  {
1675 
1676  pg_usleep(100000L); /* 100 msec seems reasonable */
1677  selres = 0;
1678 
1679  PG_SETMASK(&BlockSig);
1680  }
1681  else
1682  {
1683  /* must set timeout each time; some OSes change it! */
1684  struct timeval timeout;
1685 
1686  /* Needs to run with blocked signals! */
1687  DetermineSleepTime(&timeout);
1688 
1690 
1691  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1692 
1693  PG_SETMASK(&BlockSig);
1694  }
1695 
1696  /* Now check the select() result */
1697  if (selres < 0)
1698  {
1699  if (errno != EINTR && errno != EWOULDBLOCK)
1700  {
1701  ereport(LOG,
1703  errmsg("select() failed in postmaster: %m")));
1704  return STATUS_ERROR;
1705  }
1706  }
1707 
1708  /*
1709  * New connection pending on any of our sockets? If so, fork a child
1710  * process to deal with it.
1711  */
1712  if (selres > 0)
1713  {
1714  int i;
1715 
1716  for (i = 0; i < MAXLISTEN; i++)
1717  {
1718  if (ListenSocket[i] == PGINVALID_SOCKET)
1719  break;
1720  if (FD_ISSET(ListenSocket[i], &rmask))
1721  {
1722  Port *port;
1723 
1724  port = ConnCreate(ListenSocket[i]);
1725  if (port)
1726  {
1727  BackendStartup(port);
1728 
1729  /*
1730  * We no longer need the open socket or port structure
1731  * in this process
1732  */
1733  StreamClose(port->sock);
1734  ConnFree(port);
1735  }
1736  }
1737  }
1738  }
1739 
1740  /* If we have lost the log collector, try to start a new one */
1741  if (SysLoggerPID == 0 && Logging_collector)
1743 
1744  /*
1745  * If no background writer process is running, and we are not in a
1746  * state that prevents it, start one. It doesn't matter if this
1747  * fails, we'll just try again later. Likewise for the checkpointer.
1748  */
1749  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1751  {
1752  if (CheckpointerPID == 0)
1754  if (BgWriterPID == 0)
1756  }
1757 
1758  /*
1759  * Likewise, if we have lost the walwriter process, try to start a new
1760  * one. But this is needed only in normal operation (else we cannot
1761  * be writing any new WAL).
1762  */
1763  if (WalWriterPID == 0 && pmState == PM_RUN)
1765 
1766  /*
1767  * If we have lost the autovacuum launcher, try to start a new one. We
1768  * don't want autovacuum to run in binary upgrade mode because
1769  * autovacuum might update relfrozenxid for empty tables before the
1770  * physical files are put in place.
1771  */
1772  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1774  pmState == PM_RUN)
1775  {
1777  if (AutoVacPID != 0)
1778  start_autovac_launcher = false; /* signal processed */
1779  }
1780 
1781  /* If we have lost the stats collector, try to start a new one */
1782  if (PgStatPID == 0 &&
1783  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1784  PgStatPID = pgstat_start();
1785 
1786  /* If we have lost the archiver, try to start a new one. */
1787  if (PgArchPID == 0 && PgArchStartupAllowed())
1788  PgArchPID = pgarch_start();
1789 
1790  /* If we need to signal the autovacuum launcher, do so now */
1792  {
1793  avlauncher_needs_signal = false;
1794  if (AutoVacPID != 0)
1796  }
1797 
1798  /* If we need to start a WAL receiver, try to do that now */
1801 
1802  /* Get other worker processes running, if needed */
1805 
1806 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1807 
1808  /*
1809  * With assertions enabled, check regularly for appearance of
1810  * additional threads. All builds check at start and exit.
1811  */
1812  Assert(pthread_is_threaded_np() == 0);
1813 #endif
1814 
1815  /*
1816  * Lastly, check to see if it's time to do some things that we don't
1817  * want to do every single time through the loop, because they're a
1818  * bit expensive. Note that there's up to a minute of slop in when
1819  * these tasks will be performed, since DetermineSleepTime() will let
1820  * us sleep at most that long; except for SIGKILL timeout which has
1821  * special-case logic there.
1822  */
1823  now = time(NULL);
1824 
1825  /*
1826  * If we already sent SIGQUIT to children and they are slow to shut
1827  * down, it's time to send them SIGKILL. This doesn't happen
1828  * normally, but under certain conditions backends can get stuck while
1829  * shutting down. This is a last measure to get them unwedged.
1830  *
1831  * Note we also do this during recovery from a process crash.
1832  */
1833  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1834  AbortStartTime != 0 &&
1836  {
1837  /* We were gentle with them before. Not anymore */
1839  /* reset flag so we don't SIGKILL again */
1840  AbortStartTime = 0;
1841  }
1842 
1843  /*
1844  * Once a minute, verify that postmaster.pid hasn't been removed or
1845  * overwritten. If it has, we force a shutdown. This avoids having
1846  * postmasters and child processes hanging around after their database
1847  * is gone, and maybe causing problems if a new database cluster is
1848  * created in the same place. It also provides some protection
1849  * against a DBA foolishly removing postmaster.pid and manually
1850  * starting a new postmaster. Data corruption is likely to ensue from
1851  * that anyway, but we can minimize the damage by aborting ASAP.
1852  */
1853  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1854  {
1855  if (!RecheckDataDirLockFile())
1856  {
1857  ereport(LOG,
1858  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1860  }
1861  last_lockfile_recheck_time = now;
1862  }
1863 
1864  /*
1865  * Touch Unix socket and lock files every 58 minutes, to ensure that
1866  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1867  * no one runs cleaners with cutoff times of less than an hour ...
1868  */
1869  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1870  {
1871  TouchSocketFiles();
1873  last_touch_time = now;
1874  }
1875  }
1876 }
1877 
1878 /*
1879  * Initialise the masks for select() for the ports we are listening on.
1880  * Return the number of sockets to listen on.
1881  */
1882 static int
1883 initMasks(fd_set *rmask)
1884 {
1885  int maxsock = -1;
1886  int i;
1887 
1888  FD_ZERO(rmask);
1889 
1890  for (i = 0; i < MAXLISTEN; i++)
1891  {
1892  int fd = ListenSocket[i];
1893 
1894  if (fd == PGINVALID_SOCKET)
1895  break;
1896  FD_SET(fd, rmask);
1897 
1898  if (fd > maxsock)
1899  maxsock = fd;
1900  }
1901 
1902  return maxsock + 1;
1903 }
1904 
1905 
1906 /*
1907  * Read a client's startup packet and do something according to it.
1908  *
1909  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1910  * not return at all.
1911  *
1912  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1913  * if that's what you want. Return STATUS_ERROR if you don't want to
1914  * send anything to the client, which would typically be appropriate
1915  * if we detect a communications failure.)
1916  *
1917  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1918  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1919  * encryption layer sets both flags, but a rejected negotiation sets only the
1920  * flag for that layer, since the client may wish to try the other one. We
1921  * should make no assumption here about the order in which the client may make
1922  * requests.
1923  */
1924 static int
1925 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1926 {
1927  int32 len;
1928  void *buf;
1929  ProtocolVersion proto;
1930  MemoryContext oldcontext;
1931 
1932  pq_startmsgread();
1933 
1934  /*
1935  * Grab the first byte of the length word separately, so that we can tell
1936  * whether we have no data at all or an incomplete packet. (This might
1937  * sound inefficient, but it's not really, because of buffering in
1938  * pqcomm.c.)
1939  */
1940  if (pq_getbytes((char *) &len, 1) == EOF)
1941  {
1942  /*
1943  * If we get no data at all, don't clutter the log with a complaint;
1944  * such cases often occur for legitimate reasons. An example is that
1945  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1946  * client didn't like our response, it'll probably just drop the
1947  * connection. Service-monitoring software also often just opens and
1948  * closes a connection without sending anything. (So do port
1949  * scanners, which may be less benign, but it's not really our job to
1950  * notice those.)
1951  */
1952  return STATUS_ERROR;
1953  }
1954 
1955  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1956  {
1957  /* Got a partial length word, so bleat about that */
1958  if (!ssl_done && !gss_done)
1960  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1961  errmsg("incomplete startup packet")));
1962  return STATUS_ERROR;
1963  }
1964 
1965  len = pg_ntoh32(len);
1966  len -= 4;
1967 
1968  if (len < (int32) sizeof(ProtocolVersion) ||
1970  {
1972  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1973  errmsg("invalid length of startup packet")));
1974  return STATUS_ERROR;
1975  }
1976 
1977  /*
1978  * Allocate at least the size of an old-style startup packet, plus one
1979  * extra byte, and make sure all are zeroes. This ensures we will have
1980  * null termination of all strings, in both fixed- and variable-length
1981  * packet layouts.
1982  */
1983  if (len <= (int32) sizeof(StartupPacket))
1984  buf = palloc0(sizeof(StartupPacket) + 1);
1985  else
1986  buf = palloc0(len + 1);
1987 
1988  if (pq_getbytes(buf, len) == EOF)
1989  {
1991  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1992  errmsg("incomplete startup packet")));
1993  return STATUS_ERROR;
1994  }
1995  pq_endmsgread();
1996 
1997  /*
1998  * The first field is either a protocol version number or a special
1999  * request code.
2000  */
2001  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2002 
2003  if (proto == CANCEL_REQUEST_CODE)
2004  {
2005  processCancelRequest(port, buf);
2006  /* Not really an error, but we don't want to proceed further */
2007  return STATUS_ERROR;
2008  }
2009 
2010  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2011  {
2012  char SSLok;
2013 
2014 #ifdef USE_SSL
2015  /* No SSL when disabled or on Unix sockets */
2016  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2017  SSLok = 'N';
2018  else
2019  SSLok = 'S'; /* Support for SSL */
2020 #else
2021  SSLok = 'N'; /* No support for SSL */
2022 #endif
2023 
2024 retry1:
2025  if (send(port->sock, &SSLok, 1, 0) != 1)
2026  {
2027  if (errno == EINTR)
2028  goto retry1; /* if interrupted, just retry */
2031  errmsg("failed to send SSL negotiation response: %m")));
2032  return STATUS_ERROR; /* close the connection */
2033  }
2034 
2035 #ifdef USE_SSL
2036  if (SSLok == 'S' && secure_open_server(port) == -1)
2037  return STATUS_ERROR;
2038 #endif
2039 
2040  /*
2041  * regular startup packet, cancel, etc packet should follow, but not
2042  * another SSL negotiation request, and a GSS request should only
2043  * follow if SSL was rejected (client may negotiate in either order)
2044  */
2045  return ProcessStartupPacket(port, true, SSLok == 'S');
2046  }
2047  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2048  {
2049  char GSSok = 'N';
2050 #ifdef ENABLE_GSS
2051  /* No GSSAPI encryption when on Unix socket */
2052  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2053  GSSok = 'G';
2054 #endif
2055 
2056  while (send(port->sock, &GSSok, 1, 0) != 1)
2057  {
2058  if (errno == EINTR)
2059  continue;
2062  errmsg("failed to send GSSAPI negotiation response: %m")));
2063  return STATUS_ERROR; /* close the connection */
2064  }
2065 
2066 #ifdef ENABLE_GSS
2067  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2068  return STATUS_ERROR;
2069 #endif
2070 
2071  /*
2072  * regular startup packet, cancel, etc packet should follow, but not
2073  * another GSS negotiation request, and an SSL request should only
2074  * follow if GSS was rejected (client may negotiate in either order)
2075  */
2076  return ProcessStartupPacket(port, GSSok == 'G', true);
2077  }
2078 
2079  /* Could add additional special packet types here */
2080 
2081  /*
2082  * Set FrontendProtocol now so that ereport() knows what format to send if
2083  * we fail during startup.
2084  */
2085  FrontendProtocol = proto;
2086 
2087  /* Check that the major protocol version is in range. */
2090  ereport(FATAL,
2091  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2092  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2093  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2097 
2098  /*
2099  * Now fetch parameters out of startup packet and save them into the Port
2100  * structure. All data structures attached to the Port struct must be
2101  * allocated in TopMemoryContext so that they will remain available in a
2102  * running backend (even after PostmasterContext is destroyed). We need
2103  * not worry about leaking this storage on failure, since we aren't in the
2104  * postmaster process anymore.
2105  */
2107 
2108  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2109  {
2110  int32 offset = sizeof(ProtocolVersion);
2111  List *unrecognized_protocol_options = NIL;
2112 
2113  /*
2114  * Scan packet body for name/option pairs. We can assume any string
2115  * beginning within the packet body is null-terminated, thanks to
2116  * zeroing extra byte above.
2117  */
2118  port->guc_options = NIL;
2119 
2120  while (offset < len)
2121  {
2122  char *nameptr = ((char *) buf) + offset;
2123  int32 valoffset;
2124  char *valptr;
2125 
2126  if (*nameptr == '\0')
2127  break; /* found packet terminator */
2128  valoffset = offset + strlen(nameptr) + 1;
2129  if (valoffset >= len)
2130  break; /* missing value, will complain below */
2131  valptr = ((char *) buf) + valoffset;
2132 
2133  if (strcmp(nameptr, "database") == 0)
2134  port->database_name = pstrdup(valptr);
2135  else if (strcmp(nameptr, "user") == 0)
2136  port->user_name = pstrdup(valptr);
2137  else if (strcmp(nameptr, "options") == 0)
2138  port->cmdline_options = pstrdup(valptr);
2139  else if (strcmp(nameptr, "replication") == 0)
2140  {
2141  /*
2142  * Due to backward compatibility concerns the replication
2143  * parameter is a hybrid beast which allows the value to be
2144  * either boolean or the string 'database'. The latter
2145  * connects to a specific database which is e.g. required for
2146  * logical decoding while.
2147  */
2148  if (strcmp(valptr, "database") == 0)
2149  {
2150  am_walsender = true;
2151  am_db_walsender = true;
2152  }
2153  else if (!parse_bool(valptr, &am_walsender))
2154  ereport(FATAL,
2155  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2156  errmsg("invalid value for parameter \"%s\": \"%s\"",
2157  "replication",
2158  valptr),
2159  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2160  }
2161  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2162  {
2163  /*
2164  * Any option beginning with _pq_. is reserved for use as a
2165  * protocol-level option, but at present no such options are
2166  * defined.
2167  */
2168  unrecognized_protocol_options =
2169  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2170  }
2171  else
2172  {
2173  /* Assume it's a generic GUC option */
2174  port->guc_options = lappend(port->guc_options,
2175  pstrdup(nameptr));
2176  port->guc_options = lappend(port->guc_options,
2177  pstrdup(valptr));
2178 
2179  /*
2180  * Copy application_name to port if we come across it. This
2181  * is done so we can log the application_name in the
2182  * connection authorization message. Note that the GUC would
2183  * be used but we haven't gone through GUC setup yet.
2184  */
2185  if (strcmp(nameptr, "application_name") == 0)
2186  {
2187  char *tmp_app_name = pstrdup(valptr);
2188 
2189  pg_clean_ascii(tmp_app_name);
2190 
2191  port->application_name = tmp_app_name;
2192  }
2193  }
2194  offset = valoffset + strlen(valptr) + 1;
2195  }
2196 
2197  /*
2198  * If we didn't find a packet terminator exactly at the end of the
2199  * given packet length, complain.
2200  */
2201  if (offset != len - 1)
2202  ereport(FATAL,
2203  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2204  errmsg("invalid startup packet layout: expected terminator as last byte")));
2205 
2206  /*
2207  * If the client requested a newer protocol version or if the client
2208  * requested any protocol options we didn't recognize, let them know
2209  * the newest minor protocol version we do support and the names of
2210  * any unrecognized options.
2211  */
2213  unrecognized_protocol_options != NIL)
2214  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2215  }
2216  else
2217  {
2218  /*
2219  * Get the parameters from the old-style, fixed-width-fields startup
2220  * packet as C strings. The packet destination was cleared first so a
2221  * short packet has zeros silently added. We have to be prepared to
2222  * truncate the pstrdup result for oversize fields, though.
2223  */
2224  StartupPacket *packet = (StartupPacket *) buf;
2225 
2226  port->database_name = pstrdup(packet->database);
2227  if (strlen(port->database_name) > sizeof(packet->database))
2228  port->database_name[sizeof(packet->database)] = '\0';
2229  port->user_name = pstrdup(packet->user);
2230  if (strlen(port->user_name) > sizeof(packet->user))
2231  port->user_name[sizeof(packet->user)] = '\0';
2232  port->cmdline_options = pstrdup(packet->options);
2233  if (strlen(port->cmdline_options) > sizeof(packet->options))
2234  port->cmdline_options[sizeof(packet->options)] = '\0';
2235  port->guc_options = NIL;
2236  }
2237 
2238  /* Check a user name was given. */
2239  if (port->user_name == NULL || port->user_name[0] == '\0')
2240  ereport(FATAL,
2241  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2242  errmsg("no PostgreSQL user name specified in startup packet")));
2243 
2244  /* The database defaults to the user name. */
2245  if (port->database_name == NULL || port->database_name[0] == '\0')
2246  port->database_name = pstrdup(port->user_name);
2247 
2248  if (Db_user_namespace)
2249  {
2250  /*
2251  * If user@, it is a global user, remove '@'. We only want to do this
2252  * if there is an '@' at the end and no earlier in the user string or
2253  * they may fake as a local user of another database attaching to this
2254  * database.
2255  */
2256  if (strchr(port->user_name, '@') ==
2257  port->user_name + strlen(port->user_name) - 1)
2258  *strchr(port->user_name, '@') = '\0';
2259  else
2260  {
2261  /* Append '@' and dbname */
2262  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2263  }
2264  }
2265 
2266  /*
2267  * Truncate given database and user names to length of a Postgres name.
2268  * This avoids lookup failures when overlength names are given.
2269  */
2270  if (strlen(port->database_name) >= NAMEDATALEN)
2271  port->database_name[NAMEDATALEN - 1] = '\0';
2272  if (strlen(port->user_name) >= NAMEDATALEN)
2273  port->user_name[NAMEDATALEN - 1] = '\0';
2274 
2275  if (am_walsender)
2277  else
2279 
2280  /*
2281  * Normal walsender backends, e.g. for streaming replication, are not
2282  * connected to a particular database. But walsenders used for logical
2283  * replication need to connect to a specific database. We allow streaming
2284  * replication commands to be issued even if connected to a database as it
2285  * can make sense to first make a basebackup and then stream changes
2286  * starting from that.
2287  */
2288  if (am_walsender && !am_db_walsender)
2289  port->database_name[0] = '\0';
2290 
2291  /*
2292  * Done putting stuff in TopMemoryContext.
2293  */
2294  MemoryContextSwitchTo(oldcontext);
2295 
2296  /*
2297  * If we're going to reject the connection due to database state, say so
2298  * now instead of wasting cycles on an authentication exchange. (This also
2299  * allows a pg_ping utility to be written.)
2300  */
2301  switch (port->canAcceptConnections)
2302  {
2303  case CAC_STARTUP:
2304  ereport(FATAL,
2306  errmsg("the database system is starting up")));
2307  break;
2308  case CAC_SHUTDOWN:
2309  ereport(FATAL,
2311  errmsg("the database system is shutting down")));
2312  break;
2313  case CAC_RECOVERY:
2314  ereport(FATAL,
2316  errmsg("the database system is in recovery mode")));
2317  break;
2318  case CAC_TOOMANY:
2319  ereport(FATAL,
2320  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2321  errmsg("sorry, too many clients already")));
2322  break;
2323  case CAC_WAITBACKUP:
2324  /* OK for now, will check in InitPostgres */
2325  break;
2326  case CAC_OK:
2327  break;
2328  }
2329 
2330  return STATUS_OK;
2331 }
2332 
2333 /*
2334  * Send a NegotiateProtocolVersion to the client. This lets the client know
2335  * that they have requested a newer minor protocol version than we are able
2336  * to speak. We'll speak the highest version we know about; the client can,
2337  * of course, abandon the connection if that's a problem.
2338  *
2339  * We also include in the response a list of protocol options we didn't
2340  * understand. This allows clients to include optional parameters that might
2341  * be present either in newer protocol versions or third-party protocol
2342  * extensions without fear of having to reconnect if those options are not
2343  * understood, while at the same time making certain that the client is aware
2344  * of which options were actually accepted.
2345  */
2346 static void
2347 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2348 {
2350  ListCell *lc;
2351 
2352  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2354  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2355  foreach(lc, unrecognized_protocol_options)
2356  pq_sendstring(&buf, lfirst(lc));
2357  pq_endmessage(&buf);
2358 
2359  /* no need to flush, some other message will follow */
2360 }
2361 
2362 /*
2363  * The client has sent a cancel request packet, not a normal
2364  * start-a-new-connection packet. Perform the necessary processing.
2365  * Nothing is sent back to the client.
2366  */
2367 static void
2369 {
2370  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2371  int backendPID;
2372  int32 cancelAuthCode;
2373  Backend *bp;
2374 
2375 #ifndef EXEC_BACKEND
2376  dlist_iter iter;
2377 #else
2378  int i;
2379 #endif
2380 
2381  backendPID = (int) pg_ntoh32(canc->backendPID);
2382  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2383 
2384  /*
2385  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2386  * longer access the postmaster's own backend list, and must rely on the
2387  * duplicate array in shared memory.
2388  */
2389 #ifndef EXEC_BACKEND
2390  dlist_foreach(iter, &BackendList)
2391  {
2392  bp = dlist_container(Backend, elem, iter.cur);
2393 #else
2394  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2395  {
2396  bp = (Backend *) &ShmemBackendArray[i];
2397 #endif
2398  if (bp->pid == backendPID)
2399  {
2400  if (bp->cancel_key == cancelAuthCode)
2401  {
2402  /* Found a match; signal that backend to cancel current op */
2403  ereport(DEBUG2,
2404  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2405  backendPID)));
2406  signal_child(bp->pid, SIGINT);
2407  }
2408  else
2409  /* Right PID, wrong key: no way, Jose */
2410  ereport(LOG,
2411  (errmsg("wrong key in cancel request for process %d",
2412  backendPID)));
2413  return;
2414  }
2415 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2416  }
2417 #else
2418  }
2419 #endif
2420 
2421  /* No matching backend */
2422  ereport(LOG,
2423  (errmsg("PID %d in cancel request did not match any process",
2424  backendPID)));
2425 }
2426 
2427 /*
2428  * canAcceptConnections --- check to see if database state allows connections
2429  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2430  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2431  * know whether a NORMAL connection might turn into a walsender.)
2432  */
2433 static CAC_state
2434 canAcceptConnections(int backend_type)
2435 {
2436  CAC_state result = CAC_OK;
2437 
2438  /*
2439  * Can't start backends when in startup/shutdown/inconsistent recovery
2440  * state. We treat autovac workers the same as user backends for this
2441  * purpose. However, bgworkers are excluded from this test; we expect
2442  * bgworker_should_start_now() decided whether the DB state allows them.
2443  *
2444  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2445  * allowed so that a superuser can end online backup mode); we return
2446  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2447  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2448  * have checked for too many children.
2449  */
2450  if (pmState != PM_RUN &&
2451  backend_type != BACKEND_TYPE_BGWORKER)
2452  {
2453  if (pmState == PM_WAIT_BACKUP)
2454  result = CAC_WAITBACKUP; /* allow superusers only */
2455  else if (Shutdown > NoShutdown)
2456  return CAC_SHUTDOWN; /* shutdown is pending */
2457  else if (!FatalError &&
2458  (pmState == PM_STARTUP ||
2459  pmState == PM_RECOVERY))
2460  return CAC_STARTUP; /* normal startup */
2461  else if (!FatalError &&
2463  result = CAC_OK; /* connection OK during hot standby */
2464  else
2465  return CAC_RECOVERY; /* else must be crash recovery */
2466  }
2467 
2468  /*
2469  * Don't start too many children.
2470  *
2471  * We allow more connections here than we can have backends because some
2472  * might still be authenticating; they might fail auth, or some existing
2473  * backend might exit before the auth cycle is completed. The exact
2474  * MaxBackends limit is enforced when a new backend tries to join the
2475  * shared-inval backend array.
2476  *
2477  * The limit here must match the sizes of the per-child-process arrays;
2478  * see comments for MaxLivePostmasterChildren().
2479  */
2481  result = CAC_TOOMANY;
2482 
2483  return result;
2484 }
2485 
2486 
2487 /*
2488  * ConnCreate -- create a local connection data structure
2489  *
2490  * Returns NULL on failure, other than out-of-memory which is fatal.
2491  */
2492 static Port *
2493 ConnCreate(int serverFd)
2494 {
2495  Port *port;
2496 
2497  if (!(port = (Port *) calloc(1, sizeof(Port))))
2498  {
2499  ereport(LOG,
2500  (errcode(ERRCODE_OUT_OF_MEMORY),
2501  errmsg("out of memory")));
2502  ExitPostmaster(1);
2503  }
2504 
2505  if (StreamConnection(serverFd, port) != STATUS_OK)
2506  {
2507  if (port->sock != PGINVALID_SOCKET)
2508  StreamClose(port->sock);
2509  ConnFree(port);
2510  return NULL;
2511  }
2512 
2513  /*
2514  * Allocate GSSAPI specific state struct
2515  */
2516 #ifndef EXEC_BACKEND
2517 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2518  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2519  if (!port->gss)
2520  {
2521  ereport(LOG,
2522  (errcode(ERRCODE_OUT_OF_MEMORY),
2523  errmsg("out of memory")));
2524  ExitPostmaster(1);
2525  }
2526 #endif
2527 #endif
2528 
2529  return port;
2530 }
2531 
2532 
2533 /*
2534  * ConnFree -- free a local connection data structure
2535  */
2536 static void
2538 {
2539 #ifdef USE_SSL
2540  secure_close(conn);
2541 #endif
2542  if (conn->gss)
2543  free(conn->gss);
2544  free(conn);
2545 }
2546 
2547 
2548 /*
2549  * ClosePostmasterPorts -- close all the postmaster's open sockets
2550  *
2551  * This is called during child process startup to release file descriptors
2552  * that are not needed by that child process. The postmaster still has
2553  * them open, of course.
2554  *
2555  * Note: we pass am_syslogger as a boolean because we don't want to set
2556  * the global variable yet when this is called.
2557  */
2558 void
2559 ClosePostmasterPorts(bool am_syslogger)
2560 {
2561  int i;
2562 
2563 #ifndef WIN32
2564 
2565  /*
2566  * Close the write end of postmaster death watch pipe. It's important to
2567  * do this as early as possible, so that if postmaster dies, others won't
2568  * think that it's still running because we're holding the pipe open.
2569  */
2571  ereport(FATAL,
2573  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2575  /* Notify fd.c that we released one pipe FD. */
2577 #endif
2578 
2579  /*
2580  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2581  * so we don't call ReleaseExternalFD() here.
2582  */
2583  for (i = 0; i < MAXLISTEN; i++)
2584  {
2585  if (ListenSocket[i] != PGINVALID_SOCKET)
2586  {
2589  }
2590  }
2591 
2592  /*
2593  * If using syslogger, close the read side of the pipe. We don't bother
2594  * tracking this in fd.c, either.
2595  */
2596  if (!am_syslogger)
2597  {
2598 #ifndef WIN32
2599  if (syslogPipe[0] >= 0)
2600  close(syslogPipe[0]);
2601  syslogPipe[0] = -1;
2602 #else
2603  if (syslogPipe[0])
2604  CloseHandle(syslogPipe[0]);
2605  syslogPipe[0] = 0;
2606 #endif
2607  }
2608 
2609 #ifdef USE_BONJOUR
2610  /* If using Bonjour, close the connection to the mDNS daemon */
2611  if (bonjour_sdref)
2612  close(DNSServiceRefSockFD(bonjour_sdref));
2613 #endif
2614 }
2615 
2616 
2617 /*
2618  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2619  *
2620  * Called early in the postmaster and every backend.
2621  */
2622 void
2624 {
2625  unsigned int rseed;
2626 
2627  MyProcPid = getpid();
2630 
2631  /*
2632  * Set a different seed for random() in every process. We want something
2633  * unpredictable, so if possible, use high-quality random bits for the
2634  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2635  */
2636  if (!pg_strong_random(&rseed, sizeof(rseed)))
2637  {
2638  /*
2639  * Since PIDs and timestamps tend to change more frequently in their
2640  * least significant bits, shift the timestamp left to allow a larger
2641  * total number of seeds in a given time period. Since that would
2642  * leave only 20 bits of the timestamp that cycle every ~1 second,
2643  * also mix in some higher bits.
2644  */
2645  rseed = ((uint64) MyProcPid) ^
2646  ((uint64) MyStartTimestamp << 12) ^
2647  ((uint64) MyStartTimestamp >> 20);
2648  }
2649  srandom(rseed);
2650 }
2651 
2652 
2653 /*
2654  * reset_shared -- reset shared memory and semaphores
2655  */
2656 static void
2658 {
2659  /*
2660  * Create or re-create shared memory and semaphores.
2661  *
2662  * Note: in each "cycle of life" we will normally assign the same IPC keys
2663  * (if using SysV shmem and/or semas). This helps ensure that we will
2664  * clean up dead IPC objects if the postmaster crashes and is restarted.
2665  */
2667 }
2668 
2669 
2670 /*
2671  * SIGHUP -- reread config files, and tell children to do same
2672  */
2673 static void
2675 {
2676  int save_errno = errno;
2677 
2678  /*
2679  * We rely on the signal mechanism to have blocked all signals ... except
2680  * on Windows, which lacks sigaction(), so we have to do it manually.
2681  */
2682 #ifdef WIN32
2683  PG_SETMASK(&BlockSig);
2684 #endif
2685 
2686  if (Shutdown <= SmartShutdown)
2687  {
2688  ereport(LOG,
2689  (errmsg("received SIGHUP, reloading configuration files")));
2692  if (StartupPID != 0)
2694  if (BgWriterPID != 0)
2696  if (CheckpointerPID != 0)
2698  if (WalWriterPID != 0)
2700  if (WalReceiverPID != 0)
2702  if (AutoVacPID != 0)
2704  if (PgArchPID != 0)
2706  if (SysLoggerPID != 0)
2708  if (PgStatPID != 0)
2710 
2711  /* Reload authentication config files too */
2712  if (!load_hba())
2713  ereport(LOG,
2714  /* translator: %s is a configuration file */
2715  (errmsg("%s was not reloaded", "pg_hba.conf")));
2716 
2717  if (!load_ident())
2718  ereport(LOG,
2719  (errmsg("%s was not reloaded", "pg_ident.conf")));
2720 
2721 #ifdef USE_SSL
2722  /* Reload SSL configuration as well */
2723  if (EnableSSL)
2724  {
2725  if (secure_initialize(false) == 0)
2726  LoadedSSL = true;
2727  else
2728  ereport(LOG,
2729  (errmsg("SSL configuration was not reloaded")));
2730  }
2731  else
2732  {
2733  secure_destroy();
2734  LoadedSSL = false;
2735  }
2736 #endif
2737 
2738 #ifdef EXEC_BACKEND
2739  /* Update the starting-point file for future children */
2740  write_nondefault_variables(PGC_SIGHUP);
2741 #endif
2742  }
2743 
2744 #ifdef WIN32
2746 #endif
2747 
2748  errno = save_errno;
2749 }
2750 
2751 
2752 /*
2753  * pmdie -- signal handler for processing various postmaster signals.
2754  */
2755 static void
2757 {
2758  int save_errno = errno;
2759 
2760  /*
2761  * We rely on the signal mechanism to have blocked all signals ... except
2762  * on Windows, which lacks sigaction(), so we have to do it manually.
2763  */
2764 #ifdef WIN32
2765  PG_SETMASK(&BlockSig);
2766 #endif
2767 
2768  ereport(DEBUG2,
2769  (errmsg_internal("postmaster received signal %d",
2770  postgres_signal_arg)));
2771 
2772  switch (postgres_signal_arg)
2773  {
2774  case SIGTERM:
2775 
2776  /*
2777  * Smart Shutdown:
2778  *
2779  * Wait for children to end their work, then shut down.
2780  */
2781  if (Shutdown >= SmartShutdown)
2782  break;
2784  ereport(LOG,
2785  (errmsg("received smart shutdown request")));
2786 
2787  /* Report status */
2789 #ifdef USE_SYSTEMD
2790  sd_notify(0, "STOPPING=1");
2791 #endif
2792 
2793  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2795  {
2796  /* autovac workers are told to shut down immediately */
2797  /* and bgworkers too; does this need tweaking? */
2798  SignalSomeChildren(SIGTERM,
2800  /* and the autovac launcher too */
2801  if (AutoVacPID != 0)
2802  signal_child(AutoVacPID, SIGTERM);
2803  /* and the bgwriter too */
2804  if (BgWriterPID != 0)
2805  signal_child(BgWriterPID, SIGTERM);
2806  /* and the walwriter too */
2807  if (WalWriterPID != 0)
2808  signal_child(WalWriterPID, SIGTERM);
2809 
2810  /*
2811  * If we're in recovery, we can't kill the startup process
2812  * right away, because at present doing so does not release
2813  * its locks. We might want to change this in a future
2814  * release. For the time being, the PM_WAIT_READONLY state
2815  * indicates that we're waiting for the regular (read only)
2816  * backends to die off; once they do, we'll kill the startup
2817  * and walreceiver processes.
2818  */
2819  pmState = (pmState == PM_RUN) ?
2821  }
2822 
2823  /*
2824  * Now wait for online backup mode to end and backends to exit. If
2825  * that is already the case, PostmasterStateMachine will take the
2826  * next step.
2827  */
2829  break;
2830 
2831  case SIGINT:
2832 
2833  /*
2834  * Fast Shutdown:
2835  *
2836  * Abort all children with SIGTERM (rollback active transactions
2837  * and exit) and shut down when they are gone.
2838  */
2839  if (Shutdown >= FastShutdown)
2840  break;
2842  ereport(LOG,
2843  (errmsg("received fast shutdown request")));
2844 
2845  /* Report status */
2847 #ifdef USE_SYSTEMD
2848  sd_notify(0, "STOPPING=1");
2849 #endif
2850 
2851  if (StartupPID != 0)
2852  signal_child(StartupPID, SIGTERM);
2853  if (BgWriterPID != 0)
2854  signal_child(BgWriterPID, SIGTERM);
2855  if (WalReceiverPID != 0)
2856  signal_child(WalReceiverPID, SIGTERM);
2857  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2858  {
2860 
2861  /*
2862  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2863  * and/or checkpointer should be active in this state; we just
2864  * signaled the first four, and we don't want to kill
2865  * checkpointer yet.
2866  */
2868  }
2869  else if (pmState == PM_RUN ||
2870  pmState == PM_WAIT_BACKUP ||
2874  {
2875  ereport(LOG,
2876  (errmsg("aborting any active transactions")));
2877  /* shut down all backends and workers */
2878  SignalSomeChildren(SIGTERM,
2881  /* and the autovac launcher too */
2882  if (AutoVacPID != 0)
2883  signal_child(AutoVacPID, SIGTERM);
2884  /* and the walwriter too */
2885  if (WalWriterPID != 0)
2886  signal_child(WalWriterPID, SIGTERM);
2888  }
2889 
2890  /*
2891  * Now wait for backends to exit. If there are none,
2892  * PostmasterStateMachine will take the next step.
2893  */
2895  break;
2896 
2897  case SIGQUIT:
2898 
2899  /*
2900  * Immediate Shutdown:
2901  *
2902  * abort all children with SIGQUIT, wait for them to exit,
2903  * terminate remaining ones with SIGKILL, then exit without
2904  * attempt to properly shut down the data base system.
2905  */
2906  if (Shutdown >= ImmediateShutdown)
2907  break;
2909  ereport(LOG,
2910  (errmsg("received immediate shutdown request")));
2911 
2912  /* Report status */
2914 #ifdef USE_SYSTEMD
2915  sd_notify(0, "STOPPING=1");
2916 #endif
2917 
2920 
2921  /* set stopwatch for them to die */
2922  AbortStartTime = time(NULL);
2923 
2924  /*
2925  * Now wait for backends to exit. If there are none,
2926  * PostmasterStateMachine will take the next step.
2927  */
2929  break;
2930  }
2931 
2932 #ifdef WIN32
2934 #endif
2935 
2936  errno = save_errno;
2937 }
2938 
2939 /*
2940  * Reaper -- signal handler to cleanup after a child process dies.
2941  */
2942 static void
2944 {
2945  int save_errno = errno;
2946  int pid; /* process id of dead child process */
2947  int exitstatus; /* its exit status */
2948 
2949  /*
2950  * We rely on the signal mechanism to have blocked all signals ... except
2951  * on Windows, which lacks sigaction(), so we have to do it manually.
2952  */
2953 #ifdef WIN32
2954  PG_SETMASK(&BlockSig);
2955 #endif
2956 
2957  ereport(DEBUG4,
2958  (errmsg_internal("reaping dead processes")));
2959 
2960  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2961  {
2962  /*
2963  * Check if this child was a startup process.
2964  */
2965  if (pid == StartupPID)
2966  {
2967  StartupPID = 0;
2968 
2969  /*
2970  * Startup process exited in response to a shutdown request (or it
2971  * completed normally regardless of the shutdown request).
2972  */
2973  if (Shutdown > NoShutdown &&
2974  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2975  {
2978  /* PostmasterStateMachine logic does the rest */
2979  continue;
2980  }
2981 
2982  if (EXIT_STATUS_3(exitstatus))
2983  {
2984  ereport(LOG,
2985  (errmsg("shutdown at recovery target")));
2988  TerminateChildren(SIGTERM);
2990  /* PostmasterStateMachine logic does the rest */
2991  continue;
2992  }
2993 
2994  /*
2995  * Unexpected exit of startup process (including FATAL exit)
2996  * during PM_STARTUP is treated as catastrophic. There are no
2997  * other processes running yet, so we can just exit.
2998  */
2999  if (pmState == PM_STARTUP &&
3001  !EXIT_STATUS_0(exitstatus))
3002  {
3003  LogChildExit(LOG, _("startup process"),
3004  pid, exitstatus);
3005  ereport(LOG,
3006  (errmsg("aborting startup due to startup process failure")));
3007  ExitPostmaster(1);
3008  }
3009 
3010  /*
3011  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3012  * the startup process is catastrophic, so kill other children,
3013  * and set StartupStatus so we don't try to reinitialize after
3014  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3015  * then we previously sent the startup process a SIGQUIT; so
3016  * that's probably the reason it died, and we do want to try to
3017  * restart in that case.
3018  *
3019  * This stanza also handles the case where we sent a SIGQUIT
3020  * during PM_STARTUP due to some dead_end child crashing: in that
3021  * situation, if the startup process dies on the SIGQUIT, we need
3022  * to transition to PM_WAIT_BACKENDS state which will allow
3023  * PostmasterStateMachine to restart the startup process. (On the
3024  * other hand, the startup process might complete normally, if we
3025  * were too late with the SIGQUIT. In that case we'll fall
3026  * through and commence normal operations.)
3027  */
3028  if (!EXIT_STATUS_0(exitstatus))
3029  {
3031  {
3033  if (pmState == PM_STARTUP)
3035  }
3036  else
3038  HandleChildCrash(pid, exitstatus,
3039  _("startup process"));
3040  continue;
3041  }
3042 
3043  /*
3044  * Startup succeeded, commence normal operations
3045  */
3047  FatalError = false;
3048  AbortStartTime = 0;
3049  ReachedNormalRunning = true;
3050  pmState = PM_RUN;
3051 
3052  /*
3053  * Crank up the background tasks, if we didn't do that already
3054  * when we entered consistent recovery state. It doesn't matter
3055  * if this fails, we'll just try again later.
3056  */
3057  if (CheckpointerPID == 0)
3059  if (BgWriterPID == 0)
3061  if (WalWriterPID == 0)
3063 
3064  /*
3065  * Likewise, start other special children as needed. In a restart
3066  * situation, some of them may be alive already.
3067  */
3070  if (PgArchStartupAllowed() && PgArchPID == 0)
3071  PgArchPID = pgarch_start();
3072  if (PgStatPID == 0)
3073  PgStatPID = pgstat_start();
3074 
3075  /* workers may be scheduled to start now */
3077 
3078  /* at this point we are really open for business */
3079  ereport(LOG,
3080  (errmsg("database system is ready to accept connections")));
3081 
3082  /* Report status */
3084 #ifdef USE_SYSTEMD
3085  sd_notify(0, "READY=1");
3086 #endif
3087 
3088  continue;
3089  }
3090 
3091  /*
3092  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3093  * one at the next iteration of the postmaster's main loop, if
3094  * necessary. Any other exit condition is treated as a crash.
3095  */
3096  if (pid == BgWriterPID)
3097  {
3098  BgWriterPID = 0;
3099  if (!EXIT_STATUS_0(exitstatus))
3100  HandleChildCrash(pid, exitstatus,
3101  _("background writer process"));
3102  continue;
3103  }
3104 
3105  /*
3106  * Was it the checkpointer?
3107  */
3108  if (pid == CheckpointerPID)
3109  {
3110  CheckpointerPID = 0;
3111  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3112  {
3113  /*
3114  * OK, we saw normal exit of the checkpointer after it's been
3115  * told to shut down. We expect that it wrote a shutdown
3116  * checkpoint. (If for some reason it didn't, recovery will
3117  * occur on next postmaster start.)
3118  *
3119  * At this point we should have no normal backend children
3120  * left (else we'd not be in PM_SHUTDOWN state) but we might
3121  * have dead_end children to wait for.
3122  *
3123  * If we have an archiver subprocess, tell it to do a last
3124  * archive cycle and quit. Likewise, if we have walsender
3125  * processes, tell them to send any remaining WAL and quit.
3126  */
3128 
3129  /* Waken archiver for the last time */
3130  if (PgArchPID != 0)
3132 
3133  /*
3134  * Waken walsenders for the last time. No regular backends
3135  * should be around anymore.
3136  */
3138 
3140 
3141  /*
3142  * We can also shut down the stats collector now; there's
3143  * nothing left for it to do.
3144  */
3145  if (PgStatPID != 0)
3147  }
3148  else
3149  {
3150  /*
3151  * Any unexpected exit of the checkpointer (including FATAL
3152  * exit) is treated as a crash.
3153  */
3154  HandleChildCrash(pid, exitstatus,
3155  _("checkpointer process"));
3156  }
3157 
3158  continue;
3159  }
3160 
3161  /*
3162  * Was it the wal writer? Normal exit can be ignored; we'll start a
3163  * new one at the next iteration of the postmaster's main loop, if
3164  * necessary. Any other exit condition is treated as a crash.
3165  */
3166  if (pid == WalWriterPID)
3167  {
3168  WalWriterPID = 0;
3169  if (!EXIT_STATUS_0(exitstatus))
3170  HandleChildCrash(pid, exitstatus,
3171  _("WAL writer process"));
3172  continue;
3173  }
3174 
3175  /*
3176  * Was it the wal receiver? If exit status is zero (normal) or one
3177  * (FATAL exit), we assume everything is all right just like normal
3178  * backends. (If we need a new wal receiver, we'll start one at the
3179  * next iteration of the postmaster's main loop.)
3180  */
3181  if (pid == WalReceiverPID)
3182  {
3183  WalReceiverPID = 0;
3184  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3185  HandleChildCrash(pid, exitstatus,
3186  _("WAL receiver process"));
3187  continue;
3188  }
3189 
3190  /*
3191  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3192  * start a new one at the next iteration of the postmaster's main
3193  * loop, if necessary. Any other exit condition is treated as a
3194  * crash.
3195  */
3196  if (pid == AutoVacPID)
3197  {
3198  AutoVacPID = 0;
3199  if (!EXIT_STATUS_0(exitstatus))
3200  HandleChildCrash(pid, exitstatus,
3201  _("autovacuum launcher process"));
3202  continue;
3203  }
3204 
3205  /*
3206  * Was it the archiver? If so, just try to start a new one; no need
3207  * to force reset of the rest of the system. (If fail, we'll try
3208  * again in future cycles of the main loop.). Unless we were waiting
3209  * for it to shut down; don't restart it in that case, and
3210  * PostmasterStateMachine() will advance to the next shutdown step.
3211  */
3212  if (pid == PgArchPID)
3213  {
3214  PgArchPID = 0;
3215  if (!EXIT_STATUS_0(exitstatus))
3216  LogChildExit(LOG, _("archiver process"),
3217  pid, exitstatus);
3218  if (PgArchStartupAllowed())
3219  PgArchPID = pgarch_start();
3220  continue;
3221  }
3222 
3223  /*
3224  * Was it the statistics collector? If so, just try to start a new
3225  * one; no need to force reset of the rest of the system. (If fail,
3226  * we'll try again in future cycles of the main loop.)
3227  */
3228  if (pid == PgStatPID)
3229  {
3230  PgStatPID = 0;
3231  if (!EXIT_STATUS_0(exitstatus))
3232  LogChildExit(LOG, _("statistics collector process"),
3233  pid, exitstatus);
3234  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3235  PgStatPID = pgstat_start();
3236  continue;
3237  }
3238 
3239  /* Was it the system logger? If so, try to start a new one */
3240  if (pid == SysLoggerPID)
3241  {
3242  SysLoggerPID = 0;
3243  /* for safety's sake, launch new logger *first* */
3245  if (!EXIT_STATUS_0(exitstatus))
3246  LogChildExit(LOG, _("system logger process"),
3247  pid, exitstatus);
3248  continue;
3249  }
3250 
3251  /* Was it one of our background workers? */
3252  if (CleanupBackgroundWorker(pid, exitstatus))
3253  {
3254  /* have it be restarted */
3255  HaveCrashedWorker = true;
3256  continue;
3257  }
3258 
3259  /*
3260  * Else do standard backend child cleanup.
3261  */
3262  CleanupBackend(pid, exitstatus);
3263  } /* loop over pending child-death reports */
3264 
3265  /*
3266  * After cleaning out the SIGCHLD queue, see if we have any state changes
3267  * or actions to make.
3268  */
3270 
3271  /* Done with signal handler */
3272 #ifdef WIN32
3274 #endif
3275 
3276  errno = save_errno;
3277 }
3278 
3279 /*
3280  * Scan the bgworkers list and see if the given PID (which has just stopped
3281  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3282  * bgworker, return false.
3283  *
3284  * This is heavily based on CleanupBackend. One important difference is that
3285  * we don't know yet that the dying process is a bgworker, so we must be silent
3286  * until we're sure it is.
3287  */
3288 static bool
3290  int exitstatus) /* child's exit status */
3291 {
3292  char namebuf[MAXPGPATH];
3293  slist_mutable_iter iter;
3294 
3296  {
3297  RegisteredBgWorker *rw;
3298 
3299  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3300 
3301  if (rw->rw_pid != pid)
3302  continue;
3303 
3304 #ifdef WIN32
3305  /* see CleanupBackend */
3306  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3307  exitstatus = 0;
3308 #endif
3309 
3310  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3311  rw->rw_worker.bgw_type);
3312 
3313 
3314  if (!EXIT_STATUS_0(exitstatus))
3315  {
3316  /* Record timestamp, so we know when to restart the worker. */
3318  }
3319  else
3320  {
3321  /* Zero exit status means terminate */
3322  rw->rw_crashed_at = 0;
3323  rw->rw_terminate = true;
3324  }
3325 
3326  /*
3327  * Additionally, for shared-memory-connected workers, just like a
3328  * backend, any exit status other than 0 or 1 is considered a crash
3329  * and causes a system-wide restart.
3330  */
3331  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3332  {
3333  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3334  {
3335  HandleChildCrash(pid, exitstatus, namebuf);
3336  return true;
3337  }
3338  }
3339 
3340  /*
3341  * We must release the postmaster child slot whether this worker is
3342  * connected to shared memory or not, but we only treat it as a crash
3343  * if it is in fact connected.
3344  */
3347  {
3348  HandleChildCrash(pid, exitstatus, namebuf);
3349  return true;
3350  }
3351 
3352  /* Get it out of the BackendList and clear out remaining data */
3353  dlist_delete(&rw->rw_backend->elem);
3354 #ifdef EXEC_BACKEND
3355  ShmemBackendArrayRemove(rw->rw_backend);
3356 #endif
3357 
3358  /*
3359  * It's possible that this background worker started some OTHER
3360  * background worker and asked to be notified when that worker started
3361  * or stopped. If so, cancel any notifications destined for the
3362  * now-dead backend.
3363  */
3364  if (rw->rw_backend->bgworker_notify)
3366  free(rw->rw_backend);
3367  rw->rw_backend = NULL;
3368  rw->rw_pid = 0;
3369  rw->rw_child_slot = 0;
3370  ReportBackgroundWorkerExit(&iter); /* report child death */
3371 
3372  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3373  namebuf, pid, exitstatus);
3374 
3375  return true;
3376  }
3377 
3378  return false;
3379 }
3380 
3381 /*
3382  * CleanupBackend -- cleanup after terminated backend.
3383  *
3384  * Remove all local state associated with backend.
3385  *
3386  * If you change this, see also CleanupBackgroundWorker.
3387  */
3388 static void
3390  int exitstatus) /* child's exit status. */
3391 {
3392  dlist_mutable_iter iter;
3393 
3394  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3395 
3396  /*
3397  * If a backend dies in an ugly way then we must signal all other backends
3398  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3399  * assume everything is all right and proceed to remove the backend from
3400  * the active backend list.
3401  */
3402 
3403 #ifdef WIN32
3404 
3405  /*
3406  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3407  * since that sometimes happens under load when the process fails to start
3408  * properly (long before it starts using shared memory). Microsoft reports
3409  * it is related to mutex failure:
3410  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3411  */
3412  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3413  {
3414  LogChildExit(LOG, _("server process"), pid, exitstatus);
3415  exitstatus = 0;
3416  }
3417 #endif
3418 
3419  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3420  {
3421  HandleChildCrash(pid, exitstatus, _("server process"));
3422  return;
3423  }
3424 
3425  dlist_foreach_modify(iter, &BackendList)
3426  {
3427  Backend *bp = dlist_container(Backend, elem, iter.cur);
3428 
3429  if (bp->pid == pid)
3430  {
3431  if (!bp->dead_end)
3432  {
3434  {
3435  /*
3436  * Uh-oh, the child failed to clean itself up. Treat as a
3437  * crash after all.
3438  */
3439  HandleChildCrash(pid, exitstatus, _("server process"));
3440  return;
3441  }
3442 #ifdef EXEC_BACKEND
3443  ShmemBackendArrayRemove(bp);
3444 #endif
3445  }
3446  if (bp->bgworker_notify)
3447  {
3448  /*
3449  * This backend may have been slated to receive SIGUSR1 when
3450  * some background worker started or stopped. Cancel those
3451  * notifications, as we don't want to signal PIDs that are not
3452  * PostgreSQL backends. This gets skipped in the (probably
3453  * very common) case where the backend has never requested any
3454  * such notifications.
3455  */
3457  }
3458  dlist_delete(iter.cur);
3459  free(bp);
3460  break;
3461  }
3462  }
3463 }
3464 
3465 /*
3466  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3467  * walwriter, autovacuum, or background worker.
3468  *
3469  * The objectives here are to clean up our local state about the child
3470  * process, and to signal all other remaining children to quickdie.
3471  */
3472 static void
3473 HandleChildCrash(int pid, int exitstatus, const char *procname)
3474 {
3475  dlist_mutable_iter iter;
3476  slist_iter siter;
3477  Backend *bp;
3478  bool take_action;
3479 
3480  /*
3481  * We only log messages and send signals if this is the first process
3482  * crash and we're not doing an immediate shutdown; otherwise, we're only
3483  * here to update postmaster's idea of live processes. If we have already
3484  * signaled children, nonzero exit status is to be expected, so don't
3485  * clutter log.
3486  */
3487  take_action = !FatalError && Shutdown != ImmediateShutdown;
3488 
3489  if (take_action)
3490  {
3491  LogChildExit(LOG, procname, pid, exitstatus);
3492  ereport(LOG,
3493  (errmsg("terminating any other active server processes")));
3494  }
3495 
3496  /* Process background workers. */
3498  {
3499  RegisteredBgWorker *rw;
3500 
3501  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3502  if (rw->rw_pid == 0)
3503  continue; /* not running */
3504  if (rw->rw_pid == pid)
3505  {
3506  /*
3507  * Found entry for freshly-dead worker, so remove it.
3508  */
3510  dlist_delete(&rw->rw_backend->elem);
3511 #ifdef EXEC_BACKEND
3512  ShmemBackendArrayRemove(rw->rw_backend);
3513 #endif
3514  free(rw->rw_backend);
3515  rw->rw_backend = NULL;
3516  rw->rw_pid = 0;
3517  rw->rw_child_slot = 0;
3518  /* don't reset crashed_at */
3519  /* don't report child stop, either */
3520  /* Keep looping so we can signal remaining workers */
3521  }
3522  else
3523  {
3524  /*
3525  * This worker is still alive. Unless we did so already, tell it
3526  * to commit hara-kiri.
3527  *
3528  * SIGQUIT is the special signal that says exit without proc_exit
3529  * and let the user know what's going on. But if SendStop is set
3530  * (-s on command line), then we send SIGSTOP instead, so that we
3531  * can get core dumps from all backends by hand.
3532  */
3533  if (take_action)
3534  {
3535  ereport(DEBUG2,
3536  (errmsg_internal("sending %s to process %d",
3537  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3538  (int) rw->rw_pid)));
3540  }
3541  }
3542  }
3543 
3544  /* Process regular backends */
3545  dlist_foreach_modify(iter, &BackendList)
3546  {
3547  bp = dlist_container(Backend, elem, iter.cur);
3548 
3549  if (bp->pid == pid)
3550  {
3551  /*
3552  * Found entry for freshly-dead backend, so remove it.
3553  */
3554  if (!bp->dead_end)
3555  {
3557 #ifdef EXEC_BACKEND
3558  ShmemBackendArrayRemove(bp);
3559 #endif
3560  }
3561  dlist_delete(iter.cur);
3562  free(bp);
3563  /* Keep looping so we can signal remaining backends */
3564  }
3565  else
3566  {
3567  /*
3568  * This backend is still alive. Unless we did so already, tell it
3569  * to commit hara-kiri.
3570  *
3571  * SIGQUIT is the special signal that says exit without proc_exit
3572  * and let the user know what's going on. But if SendStop is set
3573  * (-s on command line), then we send SIGSTOP instead, so that we
3574  * can get core dumps from all backends by hand.
3575  *
3576  * We could exclude dead_end children here, but at least in the
3577  * SIGSTOP case it seems better to include them.
3578  *
3579  * Background workers were already processed above; ignore them
3580  * here.
3581  */
3582  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3583  continue;
3584 
3585  if (take_action)
3586  {
3587  ereport(DEBUG2,
3588  (errmsg_internal("sending %s to process %d",
3589  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3590  (int) bp->pid)));
3591  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3592  }
3593  }
3594  }
3595 
3596  /* Take care of the startup process too */
3597  if (pid == StartupPID)
3598  {
3599  StartupPID = 0;
3600  /* Caller adjusts StartupStatus, so don't touch it here */
3601  }
3602  else if (StartupPID != 0 && take_action)
3603  {
3604  ereport(DEBUG2,
3605  (errmsg_internal("sending %s to process %d",
3606  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3607  (int) StartupPID)));
3608  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3610  }
3611 
3612  /* Take care of the bgwriter too */
3613  if (pid == BgWriterPID)
3614  BgWriterPID = 0;
3615  else if (BgWriterPID != 0 && take_action)
3616  {
3617  ereport(DEBUG2,
3618  (errmsg_internal("sending %s to process %d",
3619  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3620  (int) BgWriterPID)));
3621  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3622  }
3623 
3624  /* Take care of the checkpointer too */
3625  if (pid == CheckpointerPID)
3626  CheckpointerPID = 0;
3627  else if (CheckpointerPID != 0 && take_action)
3628  {
3629  ereport(DEBUG2,
3630  (errmsg_internal("sending %s to process %d",
3631  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3632  (int) CheckpointerPID)));
3633  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3634  }
3635 
3636  /* Take care of the walwriter too */
3637  if (pid == WalWriterPID)
3638  WalWriterPID = 0;
3639  else if (WalWriterPID != 0 && take_action)
3640  {
3641  ereport(DEBUG2,
3642  (errmsg_internal("sending %s to process %d",
3643  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3644  (int) WalWriterPID)));
3645  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3646  }
3647 
3648  /* Take care of the walreceiver too */
3649  if (pid == WalReceiverPID)
3650  WalReceiverPID = 0;
3651  else if (WalReceiverPID != 0 && take_action)
3652  {
3653  ereport(DEBUG2,
3654  (errmsg_internal("sending %s to process %d",
3655  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3656  (int) WalReceiverPID)));
3657  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3658  }
3659 
3660  /* Take care of the autovacuum launcher too */
3661  if (pid == AutoVacPID)
3662  AutoVacPID = 0;
3663  else if (AutoVacPID != 0 && take_action)
3664  {
3665  ereport(DEBUG2,
3666  (errmsg_internal("sending %s to process %d",
3667  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3668  (int) AutoVacPID)));
3669  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3670  }
3671 
3672  /*
3673  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3674  * necessary, but it seems like a good idea for robustness, and it
3675  * simplifies the state-machine logic in the case where a shutdown request
3676  * arrives during crash processing.)
3677  */
3678  if (PgArchPID != 0 && take_action)
3679  {
3680  ereport(DEBUG2,
3681  (errmsg_internal("sending %s to process %d",
3682  "SIGQUIT",
3683  (int) PgArchPID)));
3684  signal_child(PgArchPID, SIGQUIT);
3685  }
3686 
3687  /*
3688  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3689  * necessary, but it seems like a good idea for robustness, and it
3690  * simplifies the state-machine logic in the case where a shutdown request
3691  * arrives during crash processing.)
3692  */
3693  if (PgStatPID != 0 && take_action)
3694  {
3695  ereport(DEBUG2,
3696  (errmsg_internal("sending %s to process %d",
3697  "SIGQUIT",
3698  (int) PgStatPID)));
3699  signal_child(PgStatPID, SIGQUIT);
3701  }
3702 
3703  /* We do NOT restart the syslogger */
3704 
3705  if (Shutdown != ImmediateShutdown)
3706  FatalError = true;
3707 
3708  /* We now transit into a state of waiting for children to die */
3709  if (pmState == PM_RECOVERY ||
3710  pmState == PM_HOT_STANDBY ||
3711  pmState == PM_RUN ||
3712  pmState == PM_WAIT_BACKUP ||
3714  pmState == PM_SHUTDOWN)
3716 
3717  /*
3718  * .. and if this doesn't happen quickly enough, now the clock is ticking
3719  * for us to kill them without mercy.
3720  */
3721  if (AbortStartTime == 0)
3722  AbortStartTime = time(NULL);
3723 }
3724 
3725 /*
3726  * Log the death of a child process.
3727  */
3728 static void
3729 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3730 {
3731  /*
3732  * size of activity_buffer is arbitrary, but set equal to default
3733  * track_activity_query_size
3734  */
3735  char activity_buffer[1024];
3736  const char *activity = NULL;
3737 
3738  if (!EXIT_STATUS_0(exitstatus))
3739  activity = pgstat_get_crashed_backend_activity(pid,
3740  activity_buffer,
3741  sizeof(activity_buffer));
3742 
3743  if (WIFEXITED(exitstatus))
3744  ereport(lev,
3745 
3746  /*------
3747  translator: %s is a noun phrase describing a child process, such as
3748  "server process" */
3749  (errmsg("%s (PID %d) exited with exit code %d",
3750  procname, pid, WEXITSTATUS(exitstatus)),
3751  activity ? errdetail("Failed process was running: %s", activity) : 0));
3752  else if (WIFSIGNALED(exitstatus))
3753  {
3754 #if defined(WIN32)
3755  ereport(lev,
3756 
3757  /*------
3758  translator: %s is a noun phrase describing a child process, such as
3759  "server process" */
3760  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3761  procname, pid, WTERMSIG(exitstatus)),
3762  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3763  activity ? errdetail("Failed process was running: %s", activity) : 0));
3764 #else
3765  ereport(lev,
3766 
3767  /*------
3768  translator: %s is a noun phrase describing a child process, such as
3769  "server process" */
3770  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3771  procname, pid, WTERMSIG(exitstatus),
3772  pg_strsignal(WTERMSIG(exitstatus))),
3773  activity ? errdetail("Failed process was running: %s", activity) : 0));
3774 #endif
3775  }
3776  else
3777  ereport(lev,
3778 
3779  /*------
3780  translator: %s is a noun phrase describing a child process, such as
3781  "server process" */
3782  (errmsg("%s (PID %d) exited with unrecognized status %d",
3783  procname, pid, exitstatus),
3784  activity ? errdetail("Failed process was running: %s", activity) : 0));
3785 }
3786 
3787 /*
3788  * Advance the postmaster's state machine and take actions as appropriate
3789  *
3790  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3791  * receive the signals that might mean we need to change state.
3792  */
3793 static void
3795 {
3796  if (pmState == PM_WAIT_BACKUP)
3797  {
3798  /*
3799  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3800  */
3801  if (!BackupInProgress())
3803  }
3804 
3805  if (pmState == PM_WAIT_READONLY)
3806  {
3807  /*
3808  * PM_WAIT_READONLY state ends when we have no regular backends that
3809  * have been started during recovery. We kill the startup and
3810  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3811  * we might like to kill these processes first and then wait for
3812  * backends to die off, but that doesn't work at present because
3813  * killing the startup process doesn't release its locks.
3814  */
3816  {
3817  if (StartupPID != 0)
3818  signal_child(StartupPID, SIGTERM);
3819  if (WalReceiverPID != 0)
3820  signal_child(WalReceiverPID, SIGTERM);
3822  }
3823  }
3824 
3825  /*
3826  * If we are in a state-machine state that implies waiting for backends to
3827  * exit, see if they're all gone, and change state if so.
3828  */
3829  if (pmState == PM_WAIT_BACKENDS)
3830  {
3831  /*
3832  * PM_WAIT_BACKENDS state ends when we have no regular backends
3833  * (including autovac workers), no bgworkers (including unconnected
3834  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3835  * doing crash recovery or an immediate shutdown then we expect the
3836  * checkpointer to exit as well, otherwise not. The archiver, stats,
3837  * and syslogger processes are disregarded since they are not
3838  * connected to shared memory; we also disregard dead_end children
3839  * here. Walsenders are also disregarded, they will be terminated
3840  * later after writing the checkpoint record, like the archiver
3841  * process.
3842  */
3844  StartupPID == 0 &&
3845  WalReceiverPID == 0 &&
3846  BgWriterPID == 0 &&
3847  (CheckpointerPID == 0 ||
3849  WalWriterPID == 0 &&
3850  AutoVacPID == 0)
3851  {
3853  {
3854  /*
3855  * Start waiting for dead_end children to die. This state
3856  * change causes ServerLoop to stop creating new ones.
3857  */
3859 
3860  /*
3861  * We already SIGQUIT'd the archiver and stats processes, if
3862  * any, when we started immediate shutdown or entered
3863  * FatalError state.
3864  */
3865  }
3866  else
3867  {
3868  /*
3869  * If we get here, we are proceeding with normal shutdown. All
3870  * the regular children are gone, and it's time to tell the
3871  * checkpointer to do a shutdown checkpoint.
3872  */
3874  /* Start the checkpointer if not running */
3875  if (CheckpointerPID == 0)
3877  /* And tell it to shut down */
3878  if (CheckpointerPID != 0)
3879  {
3881  pmState = PM_SHUTDOWN;
3882  }
3883  else
3884  {
3885  /*
3886  * If we failed to fork a checkpointer, just shut down.
3887  * Any required cleanup will happen at next restart. We
3888  * set FatalError so that an "abnormal shutdown" message
3889  * gets logged when we exit.
3890  */
3891  FatalError = true;
3893 
3894  /* Kill the walsenders, archiver and stats collector too */
3896  if (PgArchPID != 0)
3898  if (PgStatPID != 0)
3900  }
3901  }
3902  }
3903  }
3904 
3905  if (pmState == PM_SHUTDOWN_2)
3906  {
3907  /*
3908  * PM_SHUTDOWN_2 state ends when there's no other children than
3909  * dead_end children left. There shouldn't be any regular backends
3910  * left by now anyway; what we're really waiting for is walsenders and
3911  * archiver.
3912  */
3913  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3914  {
3916  }
3917  }
3918 
3919  if (pmState == PM_WAIT_DEAD_END)
3920  {
3921  /*
3922  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3923  * (ie, no dead_end children remain), and the archiver and stats
3924  * collector are gone too.
3925  *
3926  * The reason we wait for those two is to protect them against a new
3927  * postmaster starting conflicting subprocesses; this isn't an
3928  * ironclad protection, but it at least helps in the
3929  * shutdown-and-immediately-restart scenario. Note that they have
3930  * already been sent appropriate shutdown signals, either during a
3931  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3932  * FatalError processing.
3933  */
3934  if (dlist_is_empty(&BackendList) &&
3935  PgArchPID == 0 && PgStatPID == 0)
3936  {
3937  /* These other guys should be dead already */
3938  Assert(StartupPID == 0);
3939  Assert(WalReceiverPID == 0);
3940  Assert(BgWriterPID == 0);
3941  Assert(CheckpointerPID == 0);
3942  Assert(WalWriterPID == 0);
3943  Assert(AutoVacPID == 0);
3944  /* syslogger is not considered here */
3946  }
3947  }
3948 
3949  /*
3950  * If we've been told to shut down, we exit as soon as there are no
3951  * remaining children. If there was a crash, cleanup will occur at the
3952  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3953  * crash before exiting, but that seems unwise if we are quitting because
3954  * we got SIGTERM from init --- there may well not be time for recovery
3955  * before init decides to SIGKILL us.)
3956  *
3957  * Note that the syslogger continues to run. It will exit when it sees
3958  * EOF on its input pipe, which happens when there are no more upstream
3959  * processes.
3960  */
3962  {
3963  if (FatalError)
3964  {
3965  ereport(LOG, (errmsg("abnormal database system shutdown")));
3966  ExitPostmaster(1);
3967  }
3968  else
3969  {
3970  /*
3971  * Terminate exclusive backup mode to avoid recovery after a clean
3972  * fast shutdown. Since an exclusive backup can only be taken
3973  * during normal running (and not, for example, while running
3974  * under Hot Standby) it only makes sense to do this if we reached
3975  * normal running. If we're still in recovery, the backup file is
3976  * one we're recovering *from*, and we must keep it around so that
3977  * recovery restarts from the right place.
3978  */
3980  CancelBackup();
3981 
3982  /* Normal exit from the postmaster is here */
3983  ExitPostmaster(0);
3984  }
3985  }
3986 
3987  /*
3988  * If the startup process failed, or the user does not want an automatic
3989  * restart after backend crashes, wait for all non-syslogger children to
3990  * exit, and then exit postmaster. We don't try to reinitialize when the
3991  * startup process fails, because more than likely it will just fail again
3992  * and we will keep trying forever.
3993  */
3994  if (pmState == PM_NO_CHILDREN &&
3996  ExitPostmaster(1);
3997 
3998  /*
3999  * If we need to recover from a crash, wait for all non-syslogger children
4000  * to exit, then reset shmem and StartupDataBase.
4001  */
4002  if (FatalError && pmState == PM_NO_CHILDREN)
4003  {
4004  ereport(LOG,
4005  (errmsg("all server processes terminated; reinitializing")));
4006 
4007  /* allow background workers to immediately restart */
4009 
4010  shmem_exit(1);
4011 
4012  /* re-read control file into local memory */
4014 
4015  reset_shared();
4016 
4018  Assert(StartupPID != 0);
4020  pmState = PM_STARTUP;
4021  /* crash recovery started, reset SIGKILL flag */
4022  AbortStartTime = 0;
4023  }
4024 }
4025 
4026 
4027 /*
4028  * Send a signal to a postmaster child process
4029  *
4030  * On systems that have setsid(), each child process sets itself up as a
4031  * process group leader. For signals that are generally interpreted in the
4032  * appropriate fashion, we signal the entire process group not just the
4033  * direct child process. This allows us to, for example, SIGQUIT a blocked
4034  * archive_recovery script, or SIGINT a script being run by a backend via
4035  * system().
4036  *
4037  * There is a race condition for recently-forked children: they might not
4038  * have executed setsid() yet. So we signal the child directly as well as
4039  * the group. We assume such a child will handle the signal before trying
4040  * to spawn any grandchild processes. We also assume that signaling the
4041  * child twice will not cause any problems.
4042  */
4043 static void
4044 signal_child(pid_t pid, int signal)
4045 {
4046  if (kill(pid, signal) < 0)
4047  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4048 #ifdef HAVE_SETSID
4049  switch (signal)
4050  {
4051  case SIGINT:
4052  case SIGTERM:
4053  case SIGQUIT:
4054  case SIGSTOP:
4055  case SIGKILL:
4056  if (kill(-pid, signal) < 0)
4057  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4058  break;
4059  default:
4060  break;
4061  }
4062 #endif
4063 }
4064 
4065 /*
4066  * Send a signal to the targeted children (but NOT special children;
4067  * dead_end children are never signaled, either).
4068  */
4069 static bool
4070 SignalSomeChildren(int signal, int target)
4071 {
4072  dlist_iter iter;
4073  bool signaled = false;
4074 
4075  dlist_foreach(iter, &BackendList)
4076  {
4077  Backend *bp = dlist_container(Backend, elem, iter.cur);
4078 
4079  if (bp->dead_end)
4080  continue;
4081 
4082  /*
4083  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4084  * it first and avoid touching shared memory for every child.
4085  */
4086  if (target != BACKEND_TYPE_ALL)
4087  {
4088  /*
4089  * Assign bkend_type for any recently announced WAL Sender
4090  * processes.
4091  */
4092  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4095 
4096  if (!(target & bp->bkend_type))
4097  continue;
4098  }
4099 
4100  ereport(DEBUG4,
4101  (errmsg_internal("sending signal %d to process %d",
4102  signal, (int) bp->pid)));
4103  signal_child(bp->pid, signal);
4104  signaled = true;
4105  }
4106  return signaled;
4107 }
4108 
4109 /*
4110  * Send a termination signal to children. This considers all of our children
4111  * processes, except syslogger and dead_end backends.
4112  */
4113 static void
4115 {
4116  SignalChildren(signal);
4117  if (StartupPID != 0)
4118  {
4119  signal_child(StartupPID, signal);
4120  if (signal == SIGQUIT || signal == SIGKILL)
4122  }
4123  if (BgWriterPID != 0)
4124  signal_child(BgWriterPID, signal);
4125  if (CheckpointerPID != 0)
4126  signal_child(CheckpointerPID, signal);
4127  if (WalWriterPID != 0)
4128  signal_child(WalWriterPID, signal);
4129  if (WalReceiverPID != 0)
4130  signal_child(WalReceiverPID, signal);
4131  if (AutoVacPID != 0)
4132  signal_child(AutoVacPID, signal);
4133  if (PgArchPID != 0)
4134  signal_child(PgArchPID, signal);
4135  if (PgStatPID != 0)
4136  signal_child(PgStatPID, signal);
4137 }
4138 
4139 /*
4140  * BackendStartup -- start backend process
4141  *
4142  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4143  *
4144  * Note: if you change this code, also consider StartAutovacuumWorker.
4145  */
4146 static int
4148 {
4149  Backend *bn; /* for backend cleanup */
4150  pid_t pid;
4151 
4152  /*
4153  * Create backend data structure. Better before the fork() so we can
4154  * handle failure cleanly.
4155  */
4156  bn = (Backend *) malloc(sizeof(Backend));
4157  if (!bn)
4158  {
4159  ereport(LOG,
4160  (errcode(ERRCODE_OUT_OF_MEMORY),
4161  errmsg("out of memory")));
4162  return STATUS_ERROR;
4163  }
4164 
4165  /*
4166  * Compute the cancel key that will be assigned to this backend. The
4167  * backend will have its own copy in the forked-off process' value of
4168  * MyCancelKey, so that it can transmit the key to the frontend.
4169  */
4171  {
4172  free(bn);
4173  ereport(LOG,
4174  (errcode(ERRCODE_INTERNAL_ERROR),
4175  errmsg("could not generate random cancel key")));
4176  return STATUS_ERROR;
4177  }
4178 
4179  bn->cancel_key = MyCancelKey;
4180 
4181  /* Pass down canAcceptConnections state */
4183  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4185 
4186  /*
4187  * Unless it's a dead_end child, assign it a child slot number
4188  */
4189  if (!bn->dead_end)
4191  else
4192  bn->child_slot = 0;
4193 
4194  /* Hasn't asked to be notified about any bgworkers yet */
4195  bn->bgworker_notify = false;
4196 
4197 #ifdef EXEC_BACKEND
4198  pid = backend_forkexec(port);
4199 #else /* !EXEC_BACKEND */
4200  pid = fork_process();
4201  if (pid == 0) /* child */
4202  {
4203  free(bn);
4204 
4205  /* Detangle from postmaster */
4207 
4208  /* Close the postmaster's sockets */
4209  ClosePostmasterPorts(false);
4210 
4211  /* Perform additional initialization and collect startup packet */
4212  BackendInitialize(port);
4213 
4214  /* And run the backend */
4215  BackendRun(port);
4216  }
4217 #endif /* EXEC_BACKEND */
4218 
4219  if (pid < 0)
4220  {
4221  /* in parent, fork failed */
4222  int save_errno = errno;
4223 
4224  if (!bn->dead_end)
4226  free(bn);
4227  errno = save_errno;
4228  ereport(LOG,
4229  (errmsg("could not fork new process for connection: %m")));
4230  report_fork_failure_to_client(port, save_errno);
4231  return STATUS_ERROR;
4232  }
4233 
4234  /* in parent, successful fork */
4235  ereport(DEBUG2,
4236  (errmsg_internal("forked new backend, pid=%d socket=%d",
4237  (int) pid, (int) port->sock)));
4238 
4239  /*
4240  * Everything's been successful, it's safe to add this backend to our list
4241  * of backends.
4242  */
4243  bn->pid = pid;
4244  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4245  dlist_push_head(&BackendList, &bn->elem);
4246 
4247 #ifdef EXEC_BACKEND
4248  if (!bn->dead_end)
4249  ShmemBackendArrayAdd(bn);
4250 #endif
4251 
4252  return STATUS_OK;
4253 }
4254 
4255 /*
4256  * Try to report backend fork() failure to client before we close the
4257  * connection. Since we do not care to risk blocking the postmaster on
4258  * this connection, we set the connection to non-blocking and try only once.
4259  *
4260  * This is grungy special-purpose code; we cannot use backend libpq since
4261  * it's not up and running.
4262  */
4263 static void
4265 {
4266  char buffer[1000];
4267  int rc;
4268 
4269  /* Format the error message packet (always V2 protocol) */
4270  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4271  _("could not fork new process for connection: "),
4272  strerror(errnum));
4273 
4274  /* Set port to non-blocking. Don't do send() if this fails */
4275  if (!pg_set_noblock(port->sock))
4276  return;
4277 
4278  /* We'll retry after EINTR, but ignore all other failures */
4279  do
4280  {
4281  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4282  } while (rc < 0 && errno == EINTR);
4283 }
4284 
4285 
4286 /*
4287  * BackendInitialize -- initialize an interactive (postmaster-child)
4288  * backend process, and collect the client's startup packet.
4289  *
4290  * returns: nothing. Will not return at all if there's any failure.
4291  *
4292  * Note: this code does not depend on having any access to shared memory.
4293  * In the EXEC_BACKEND case, we are physically attached to shared memory
4294  * but have not yet set up most of our local pointers to shmem structures.
4295  */
4296 static void
4298 {
4299  int status;
4300  int ret;
4301  char remote_host[NI_MAXHOST];
4302  char remote_port[NI_MAXSERV];
4303  StringInfoData ps_data;
4304 
4305  /* Save port etc. for ps status */
4306  MyProcPort = port;
4307 
4308  /* Tell fd.c about the long-lived FD associated with the port */
4310 
4311  /*
4312  * PreAuthDelay is a debugging aid for investigating problems in the
4313  * authentication cycle: it can be set in postgresql.conf to allow time to
4314  * attach to the newly-forked backend with a debugger. (See also
4315  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4316  * is not honored until after authentication.)
4317  */
4318  if (PreAuthDelay > 0)
4319  pg_usleep(PreAuthDelay * 1000000L);
4320 
4321  /* This flag will remain set until InitPostgres finishes authentication */
4322  ClientAuthInProgress = true; /* limit visibility of log messages */
4323 
4324  /* set these to empty in case they are needed before we set them up */
4325  port->remote_host = "";
4326  port->remote_port = "";
4327 
4328  /*
4329  * Initialize libpq and enable reporting of ereport errors to the client.
4330  * Must do this now because authentication uses libpq to send messages.
4331  */
4332  pq_init(); /* initialize libpq to talk to client */
4333  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4334 
4335  /*
4336  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4337  * timeout while trying to collect the startup packet. Otherwise the
4338  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4339  * buggy client fails to send the packet promptly. XXX it follows that
4340  * the remainder of this function must tolerate losing control at any
4341  * instant. Likewise, any pg_on_exit_callback registered before or during
4342  * this function must be prepared to execute at any instant between here
4343  * and the end of this function. Furthermore, affected callbacks execute
4344  * partially or not at all when a second exit-inducing signal arrives
4345  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4346  * that mechanic, callbacks need not anticipate more than one call.) This
4347  * is fragile; it ought to instead follow the norm of handling interrupts
4348  * at selected, safe opportunities.
4349  */
4350  pqsignal(SIGTERM, startup_die);
4352  InitializeTimeouts(); /* establishes SIGALRM handler */
4354 
4355  /*
4356  * Get the remote host name and port for logging and status display.
4357  */
4358  remote_host[0] = '\0';
4359  remote_port[0] = '\0';
4360  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4361  remote_host, sizeof(remote_host),
4362  remote_port, sizeof(remote_port),
4363  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4364  ereport(WARNING,
4365  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4366  gai_strerror(ret))));
4367 
4368  /*
4369  * Save remote_host and remote_port in port structure (after this, they
4370  * will appear in log_line_prefix data for log messages).
4371  */
4372  port->remote_host = strdup(remote_host);
4373  port->remote_port = strdup(remote_port);
4374 
4375  /* And now we can issue the Log_connections message, if wanted */
4376  if (Log_connections)
4377  {
4378  if (remote_port[0])
4379  ereport(LOG,
4380  (errmsg("connection received: host=%s port=%s",
4381  remote_host,
4382  remote_port)));
4383  else
4384  ereport(LOG,
4385  (errmsg("connection received: host=%s",
4386  remote_host)));
4387  }
4388 
4389  /*
4390  * If we did a reverse lookup to name, we might as well save the results
4391  * rather than possibly repeating the lookup during authentication.
4392  *
4393  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4394  * get nothing useful for a client without an rDNS entry. Therefore, we
4395  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4396  * it into remote_hostname if so. (This test is conservative and might
4397  * sometimes classify a hostname as numeric, but an error in that
4398  * direction is safe; it only results in a possible extra lookup.)
4399  */
4400  if (log_hostname &&
4401  ret == 0 &&
4402  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4403  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4404  port->remote_hostname = strdup(remote_host);
4405 
4406  /*
4407  * Ready to begin client interaction. We will give up and exit(1) after a
4408  * time delay, so that a broken client can't hog a connection
4409  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4410  * against the time limit.
4411  *
4412  * Note: AuthenticationTimeout is applied here while waiting for the
4413  * startup packet, and then again in InitPostgres for the duration of any
4414  * authentication operations. So a hostile client could tie up the
4415  * process for nearly twice AuthenticationTimeout before we kick him off.
4416  *
4417  * Note: because PostgresMain will call InitializeTimeouts again, the
4418  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4419  * since we never use it again after this function.
4420  */
4423 
4424  /*
4425  * Receive the startup packet (which might turn out to be a cancel request
4426  * packet).
4427  */
4428  status = ProcessStartupPacket(port, false, false);
4429 
4430  /*
4431  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4432  * already did any appropriate error reporting.
4433  */
4434  if (status != STATUS_OK)
4435  proc_exit(0);
4436 
4437  /*
4438  * Now that we have the user and database name, we can set the process
4439  * title for ps. It's good to do this as early as possible in startup.
4440  */
4441  initStringInfo(&ps_data);
4442  if (am_walsender)
4444  appendStringInfo(&ps_data, "%s ", port->user_name);
4445  if (!am_walsender)
4446  appendStringInfo(&ps_data, "%s ", port->database_name);
4447  appendStringInfo(&ps_data, "%s", port->remote_host);
4448  if (port->remote_port[0] != '\0')
4449  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4450 
4451  init_ps_display(ps_data.data);
4452  pfree(ps_data.data);
4453 
4454  set_ps_display("initializing");
4455 
4456  /*
4457  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4458  */
4460  PG_SETMASK(&BlockSig);
4461 }
4462 
4463 
4464 /*
4465  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4466  *
4467  * returns:
4468  * Shouldn't return at all.
4469  * If PostgresMain() fails, return status.
4470  */
4471 static void
4473 {
4474  char **av;
4475  int maxac;
4476  int ac;
4477  int i;
4478 
4479  /*
4480  * Now, build the argv vector that will be given to PostgresMain.
4481  *
4482  * The maximum possible number of commandline arguments that could come
4483  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4484  * pg_split_opts().
4485  */
4486  maxac = 2; /* for fixed args supplied below */
4487  maxac += (strlen(ExtraOptions) + 1) / 2;
4488 
4489  av = (char **) MemoryContextAlloc(TopMemoryContext,
4490  maxac * sizeof(char *));
4491  ac = 0;
4492 
4493  av[ac++] = "postgres";
4494 
4495  /*
4496  * Pass any backend switches specified with -o on the postmaster's own
4497  * command line. We assume these are secure.
4498  */
4499  pg_split_opts(av, &ac, ExtraOptions);
4500 
4501  av[ac] = NULL;
4502 
4503  Assert(ac < maxac);
4504 
4505  /*
4506  * Debug: print arguments being passed to backend
4507  */
4508  ereport(DEBUG3,
4509  (errmsg_internal("%s child[%d]: starting with (",
4510  progname, (int) getpid())));
4511  for (i = 0; i < ac; ++i)
4512  ereport(DEBUG3,
4513  (errmsg_internal("\t%s", av[i])));
4514  ereport(DEBUG3,
4515  (errmsg_internal(")")));
4516 
4517  /*
4518  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4519  * just yet, though, because InitPostgres will need the HBA data.)
4520  */
4522 
4523  PostgresMain(ac, av, port->database_name, port->user_name);
4524 }
4525 
4526 
4527 #ifdef EXEC_BACKEND
4528 
4529 /*
4530  * postmaster_forkexec -- fork and exec a postmaster subprocess
4531  *
4532  * The caller must have set up the argv array already, except for argv[2]
4533  * which will be filled with the name of the temp variable file.
4534  *
4535  * Returns the child process PID, or -1 on fork failure (a suitable error
4536  * message has been logged on failure).
4537  *
4538  * All uses of this routine will dispatch to SubPostmasterMain in the
4539  * child process.
4540  */
4541 pid_t
4542 postmaster_forkexec(int argc, char *argv[])
4543 {
4544  Port port;
4545 
4546  /* This entry point passes dummy values for the Port variables */
4547  memset(&port, 0, sizeof(port));
4548  return internal_forkexec(argc, argv, &port);
4549 }
4550 
4551 /*
4552  * backend_forkexec -- fork/exec off a backend process
4553  *
4554  * Some operating systems (WIN32) don't have fork() so we have to simulate
4555  * it by storing parameters that need to be passed to the child and
4556  * then create a new child process.
4557  *
4558  * returns the pid of the fork/exec'd process, or -1 on failure
4559  */
4560 static pid_t
4561 backend_forkexec(Port *port)
4562 {
4563  char *av[4];
4564  int ac = 0;
4565 
4566  av[ac++] = "postgres";
4567  av[ac++] = "--forkbackend";
4568  av[ac++] = NULL; /* filled in by internal_forkexec */
4569 
4570  av[ac] = NULL;
4571  Assert(ac < lengthof(av));
4572 
4573  return internal_forkexec(ac, av, port);
4574 }
4575 
4576 #ifndef WIN32
4577 
4578 /*
4579  * internal_forkexec non-win32 implementation
4580  *
4581  * - writes out backend variables to the parameter file
4582  * - fork():s, and then exec():s the child process
4583  */
4584 static pid_t
4585 internal_forkexec(int argc, char *argv[], Port *port)
4586 {
4587  static unsigned long tmpBackendFileNum = 0;
4588  pid_t pid;
4589  char tmpfilename[MAXPGPATH];
4590  BackendParameters param;
4591  FILE *fp;
4592 
4593  if (!save_backend_variables(&param, port))
4594  return -1; /* log made by save_backend_variables */
4595 
4596  /* Calculate name for temp file */
4597  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4599  MyProcPid, ++tmpBackendFileNum);
4600 
4601  /* Open file */
4602  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4603  if (!fp)
4604  {
4605  /*
4606  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4607  * directory, ignoring errors.
4608  */
4610 
4611  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4612  if (!fp)
4613  {
4614  ereport(LOG,
4616  errmsg("could not create file \"%s\": %m",
4617  tmpfilename)));
4618  return -1;
4619  }
4620  }
4621 
4622  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4623  {
4624  ereport(LOG,
4626  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4627  FreeFile(fp);
4628  return -1;
4629  }
4630 
4631  /* Release file */
4632  if (FreeFile(fp))
4633  {
4634  ereport(LOG,
4636  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4637  return -1;
4638  }
4639 
4640  /* Make sure caller set up argv properly */
4641  Assert(argc >= 3);
4642  Assert(argv[argc] == NULL);
4643  Assert(strncmp(argv[1], "--fork", 6) == 0);
4644  Assert(argv[2] == NULL);
4645 
4646  /* Insert temp file name after --fork argument */
4647  argv[2] = tmpfilename;
4648 
4649  /* Fire off execv in child */
4650  if ((pid = fork_process()) == 0)
4651  {
4652  if (execv(postgres_exec_path, argv) < 0)
4653  {
4654  ereport(LOG,
4655  (errmsg("could not execute server process \"%s\": %m",
4656  postgres_exec_path)));
4657  /* We're already in the child process here, can't return */
4658  exit(1);
4659  }
4660  }
4661 
4662  return pid; /* Parent returns pid, or -1 on fork failure */
4663 }
4664 #else /* WIN32 */
4665 
4666 /*
4667  * internal_forkexec win32 implementation
4668  *
4669  * - starts backend using CreateProcess(), in suspended state
4670  * - writes out backend variables to the parameter file
4671  * - during this, duplicates handles and sockets required for
4672  * inheritance into the new process
4673  * - resumes execution of the new process once the backend parameter
4674  * file is complete.
4675  */
4676 static pid_t
4677 internal_forkexec(int argc, char *argv[], Port *port)
4678 {
4679  int retry_count = 0;
4680  STARTUPINFO si;
4681  PROCESS_INFORMATION pi;
4682  int i;
4683  int j;
4684  char cmdLine[MAXPGPATH * 2];
4685  HANDLE paramHandle;
4686  BackendParameters *param;
4687  SECURITY_ATTRIBUTES sa;
4688  char paramHandleStr[32];
4689  win32_deadchild_waitinfo *childinfo;
4690 
4691  /* Make sure caller set up argv properly */
4692  Assert(argc >= 3);
4693  Assert(argv[argc] == NULL);
4694  Assert(strncmp(argv[1], "--fork", 6) == 0);
4695  Assert(argv[2] == NULL);
4696 
4697  /* Resume here if we need to retry */
4698 retry:
4699 
4700  /* Set up shared memory for parameter passing */
4701  ZeroMemory(&sa, sizeof(sa));
4702  sa.nLength = sizeof(sa);
4703  sa.bInheritHandle = TRUE;
4704  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4705  &sa,
4706  PAGE_READWRITE,
4707  0,
4708  sizeof(BackendParameters),
4709  NULL);
4710  if (paramHandle == INVALID_HANDLE_VALUE)
4711  {
4712  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4713  GetLastError());
4714  return -1;
4715  }
4716 
4717  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4718  if (!param)
4719  {
4720  elog(LOG, "could not map backend parameter memory: error code %lu",
4721  GetLastError());
4722  CloseHandle(paramHandle);
4723  return -1;
4724  }
4725 
4726  /* Insert temp file name after --fork argument */
4727 #ifdef _WIN64
4728  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4729 #else
4730  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4731 #endif
4732  argv[2] = paramHandleStr;
4733 
4734  /* Format the cmd line */
4735  cmdLine[sizeof(cmdLine) - 1] = '\0';
4736  cmdLine[sizeof(cmdLine) - 2] = '\0';
4737  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4738  i = 0;
4739  while (argv[++i] != NULL)
4740  {
4741  j = strlen(cmdLine);
4742  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4743  }
4744  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4745  {
4746  elog(LOG, "subprocess command line too long");
4747  UnmapViewOfFile(param);
4748  CloseHandle(paramHandle);
4749  return -1;
4750  }
4751 
4752  memset(&pi, 0, sizeof(pi));
4753  memset(&si, 0, sizeof(si));
4754  si.cb = sizeof(si);
4755 
4756  /*
4757  * Create the subprocess in a suspended state. This will be resumed later,
4758  * once we have written out the parameter file.
4759  */
4760  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4761  NULL, NULL, &si, &pi))
4762  {
4763  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4764  GetLastError());
4765  UnmapViewOfFile(param);
4766  CloseHandle(paramHandle);
4767  return -1;
4768  }
4769 
4770  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4771  {
4772  /*
4773  * log made by save_backend_variables, but we have to clean up the
4774  * mess with the half-started process
4775  */
4776  if (!TerminateProcess(pi.hProcess, 255))
4777  ereport(LOG,
4778  (errmsg_internal("could not terminate unstarted process: error code %lu",
4779  GetLastError())));
4780  CloseHandle(pi.hProcess);
4781  CloseHandle(pi.hThread);
4782  UnmapViewOfFile(param);
4783  CloseHandle(paramHandle);
4784  return -1; /* log made by save_backend_variables */
4785  }
4786 
4787  /* Drop the parameter shared memory that is now inherited to the backend */
4788  if (!UnmapViewOfFile(param))
4789  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4790  GetLastError());
4791  if (!CloseHandle(paramHandle))
4792  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4793  GetLastError());
4794 
4795  /*
4796  * Reserve the memory region used by our main shared memory segment before
4797  * we resume the child process. Normally this should succeed, but if ASLR
4798  * is active then it might sometimes fail due to the stack or heap having
4799  * gotten mapped into that range. In that case, just terminate the
4800  * process and retry.
4801  */
4802  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4803  {
4804  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4805  if (!TerminateProcess(pi.hProcess, 255))
4806  ereport(LOG,
4807  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4808  GetLastError())));
4809  CloseHandle(pi.hProcess);
4810  CloseHandle(pi.hThread);
4811  if (++retry_count < 100)
4812  goto retry;
4813  ereport(LOG,
4814  (errmsg("giving up after too many tries to reserve shared memory"),
4815  errhint("This might be caused by ASLR or antivirus software.")));
4816  return -1;
4817  }
4818 
4819  /*
4820  * Now that the backend variables are written out, we start the child
4821  * thread so it can start initializing while we set up the rest of the
4822  * parent state.
4823  */
4824  if (ResumeThread(pi.hThread) == -1)
4825  {
4826  if (!TerminateProcess(pi.hProcess, 255))
4827  {
4828  ereport(LOG,
4829  (errmsg_internal("could not terminate unstartable process: error code %lu",
4830  GetLastError())));
4831  CloseHandle(pi.hProcess);
4832  CloseHandle(pi.hThread);
4833  return -1;
4834  }
4835  CloseHandle(pi.hProcess);
4836  CloseHandle(pi.hThread);
4837  ereport(LOG,
4838  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4839  GetLastError())));
4840  return -1;
4841  }
4842 
4843  /*
4844  * Queue a waiter to signal when this child dies. The wait will be handled
4845  * automatically by an operating system thread pool.
4846  *
4847  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4848  * Struct will be free():d from the callback function that runs on a
4849  * different thread.
4850  */
4851  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4852  if (!childinfo)
4853  ereport(FATAL,
4854  (errcode(ERRCODE_OUT_OF_MEMORY),
4855  errmsg("out of memory")));
4856 
4857  childinfo->procHandle = pi.hProcess;
4858  childinfo->procId = pi.dwProcessId;
4859 
4860  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4861  pi.hProcess,
4862  pgwin32_deadchild_callback,
4863  childinfo,
4864  INFINITE,
4865  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4866  ereport(FATAL,
4867  (errmsg_internal("could not register process for wait: error code %lu",
4868  GetLastError())));
4869 
4870  /* Don't close pi.hProcess here - the wait thread needs access to it */
4871 
4872  CloseHandle(pi.hThread);
4873 
4874  return pi.dwProcessId;
4875 }
4876 #endif /* WIN32 */
4877 
4878 
4879 /*
4880  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4881  * to what it would be if we'd simply forked on Unix, and then
4882  * dispatch to the appropriate place.
4883  *
4884  * The first two command line arguments are expected to be "--forkFOO"
4885  * (where FOO indicates which postmaster child we are to become), and
4886  * the name of a variables file that we can read to load data that would
4887  * have been inherited by fork() on Unix. Remaining arguments go to the
4888  * subprocess FooMain() routine.
4889  */
4890 void
4891 SubPostmasterMain(int argc, char *argv[])
4892 {
4893  Port port;
4894 
4895  /* In EXEC_BACKEND case we will not have inherited these settings */
4896  IsPostmasterEnvironment = true;
4898 
4899  /* Setup as postmaster child */
4901 
4902  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4904 
4905  /* Check we got appropriate args */
4906  if (argc < 3)
4907  elog(FATAL, "invalid subpostmaster invocation");
4908 
4909  /* Read in the variables file */
4910  memset(&port, 0, sizeof(Port));
4911  read_backend_variables(argv[2], &port);
4912 
4913  /* Close the postmaster's sockets (as soon as we know them) */
4914  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4915 
4916  /*
4917  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4918  * for the non-exec case.
4919  */
4920 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4921  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4922  if (!port.gss)
4923  ereport(FATAL,
4924  (errcode(ERRCODE_OUT_OF_MEMORY),
4925  errmsg("out of memory")));
4926 #endif
4927 
4928  /*
4929  * If appropriate, physically re-attach to shared memory segment. We want
4930  * to do this before going any further to ensure that we can attach at the
4931  * same address the postmaster used. On the other hand, if we choose not
4932  * to re-attach, we may have other cleanup to do.
4933  *
4934  * If testing EXEC_BACKEND on Linux, you should run this as root before
4935  * starting the postmaster:
4936  *
4937  * echo 0 >/proc/sys/kernel/randomize_va_space
4938  *
4939  * This prevents using randomized stack and code addresses that cause the
4940  * child process's memory map to be different from the parent's, making it
4941  * sometimes impossible to attach to shared memory at the desired address.
4942  * Return the setting to its old value (usually '1' or '2') when finished.
4943  */
4944  if (strcmp(argv[1], "--forkbackend") == 0 ||
4945  strcmp(argv[1], "--forkavlauncher") == 0 ||
4946  strcmp(argv[1], "--forkavworker") == 0 ||
4947  strcmp(argv[1], "--forkboot") == 0 ||
4948  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4950  else
4952 
4953  /* autovacuum needs this set before calling InitProcess */
4954  if (strcmp(argv[1], "--forkavlauncher") == 0)
4955  AutovacuumLauncherIAm();
4956  if (strcmp(argv[1], "--forkavworker") == 0)
4957  AutovacuumWorkerIAm();
4958 
4959  /*
4960  * Start our win32 signal implementation. This has to be done after we
4961  * read the backend variables, because we need to pick up the signal pipe
4962  * from the parent process.
4963  */
4964 #ifdef WIN32
4966 #endif
4967 
4968  /* In EXEC_BACKEND case we will not have inherited these settings */
4969  pqinitmask();
4970  PG_SETMASK(&BlockSig);
4971 
4972  /* Read in remaining GUC variables */
4973  read_nondefault_variables();
4974 
4975  /*
4976  * Check that the data directory looks valid, which will also check the
4977  * privileges on the data directory and update our umask and file/group
4978  * variables for creating files later. Note: this should really be done
4979  * before we create any files or directories.
4980  */
4981  checkDataDir();
4982 
4983  /*
4984  * (re-)read control file, as it contains config. The postmaster will
4985  * already have read this, but this process doesn't know about that.
4986  */
4987  LocalProcessControlFile(false);
4988 
4989  /*
4990  * Reload any libraries that were preloaded by the postmaster. Since we
4991  * exec'd this process, those libraries didn't come along with us; but we
4992  * should load them into all child processes to be consistent with the
4993  * non-EXEC_BACKEND behavior.
4994  */
4996 
4997  /* Run backend or appropriate child */
4998  if (strcmp(argv[1], "--forkbackend") == 0)
4999  {
5000  Assert(argc == 3); /* shouldn't be any more args */
5001 
5002  /*
5003  * Need to reinitialize the SSL library in the backend, since the
5004  * context structures contain function pointers and cannot be passed
5005  * through the parameter file.
5006  *
5007  * If for some reason reload fails (maybe the user installed broken
5008  * key files), soldier on without SSL; that's better than all
5009  * connections becoming impossible.
5010  *
5011  * XXX should we do this in all child processes? For the moment it's
5012  * enough to do it in backend children.
5013  */
5014 #ifdef USE_SSL
5015  if (EnableSSL)
5016  {
5017  if (secure_initialize(false) == 0)
5018  LoadedSSL = true;
5019  else
5020  ereport(LOG,
5021  (errmsg("SSL configuration could not be loaded in child process")));
5022  }
5023 #endif
5024 
5025  /*
5026  * Perform additional initialization and collect startup packet.
5027  *
5028  * We want to do this before InitProcess() for a couple of reasons: 1.
5029  * so that we aren't eating up a PGPROC slot while waiting on the
5030  * client. 2. so that if InitProcess() fails due to being out of
5031  * PGPROC slots, we have already initialized libpq and are able to
5032  * report the error to the client.
5033  */
5034  BackendInitialize(&port);
5035 
5036  /* Restore basic shared memory pointers */
5038 
5039  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5040  InitProcess();
5041 
5042  /* Attach process to shared data structures */
5044 
5045  /* And run the backend */
5046  BackendRun(&port); /* does not return */
5047  }
5048  if (strcmp(argv[1], "--forkboot") == 0)
5049  {
5050  /* Restore basic shared memory pointers */
5052 
5053  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5055 
5056  /* Attach process to shared data structures */
5058 
5059  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
5060  }
5061  if (strcmp(argv[1], "--forkavlauncher") == 0)
5062  {
5063  /* Restore basic shared memory pointers */
5065 
5066  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5067  InitProcess();
5068 
5069  /* Attach process to shared data structures */
5071 
5072  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5073  }
5074  if (strcmp(argv[1], "--forkavworker") == 0)
5075  {
5076  /* Restore basic shared memory pointers */
5078 
5079  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5080  InitProcess();
5081 
5082  /* Attach process to shared data structures */
5084 
5085  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5086  }
5087  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5088  {
5089  int shmem_slot;
5090 
5091  /* do this as early as possible; in particular, before InitProcess() */
5092  IsBackgroundWorker = true;
5093 
5094  /* Restore basic shared memory pointers */
5096 
5097  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5098  InitProcess();
5099 
5100  /* Attach process to shared data structures */
5102 
5103  /* Fetch MyBgworkerEntry from shared memory */
5104  shmem_slot = atoi(argv[1] + 15);
5105  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5106 
5108  }
5109  if (strcmp(argv[1], "--forkarch") == 0)
5110  {
5111  /* Do not want to attach to shared memory */
5112 
5113  PgArchiverMain(argc, argv); /* does not return */
5114  }
5115  if (strcmp(argv[1], "--forkcol") == 0)
5116  {
5117  /* Do not want to attach to shared memory */
5118 
5119  PgstatCollectorMain(argc, argv); /* does not return */
5120  }
5121  if (strcmp(argv[1], "--forklog") == 0)
5122  {
5123  /* Do not want to attach to shared memory */
5124 
5125  SysLoggerMain(argc, argv); /* does not return */
5126  }
5127 
5128  abort(); /* shouldn't get here */
5129 }
5130 #endif /* EXEC_BACKEND */
5131 
5132 
5133 /*
5134  * ExitPostmaster -- cleanup
5135  *
5136  * Do NOT call exit() directly --- always go through here!
5137  */
5138 static void
5140 {
5141 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5142 
5143  /*
5144  * There is no known cause for a postmaster to become multithreaded after
5145  * startup. Recheck to account for the possibility of unknown causes.
5146  * This message uses LOG level, because an unclean shutdown at this point
5147  * would usually not look much different from a clean shutdown.
5148  */
5149  if (pthread_is_threaded_np() != 0)
5150  ereport(LOG,
5151  (errcode(ERRCODE_INTERNAL_ERROR),
5152  errmsg_internal("postmaster became multithreaded"),
5153  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5154 #endif
5155 
5156  /* should cleanup shared memory and kill all backends */
5157 
5158  /*
5159  * Not sure of the semantics here. When the Postmaster dies, should the
5160  * backends all be killed? probably not.
5161  *
5162  * MUST -- vadim 05-10-1999
5163  */
5164 
5165  proc_exit(status);
5166 }
5167 
5168 /*
5169  * sigusr1_handler - handle signal conditions from child processes
5170  */
5171 static void
5173 {
5174  int save_errno = errno;
5175 
5176  /*
5177  * We rely on the signal mechanism to have blocked all signals ... except
5178  * on Windows, which lacks sigaction(), so we have to do it manually.
5179  */
5180 #ifdef WIN32
5181  PG_SETMASK(&BlockSig);
5182 #endif
5183 
5184  /* Process background worker state change. */
5186  {
5188  StartWorkerNeeded = true;
5189  }
5190 
5191  /*
5192  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5193  * unexpected states. If the startup process quickly starts up, completes
5194  * recovery, exits, we might process the death of the startup process
5195  * first. We don't want to go back to recovery in that case.
5196  */
5199  {
5200  /* WAL redo has started. We're out of reinitialization. */
5201  FatalError = false;
5202  AbortStartTime = 0;
5203 
5204  /*
5205  * Crank up the background tasks. It doesn't matter if this fails,
5206  * we'll just try again later.
5207  */
5208  Assert(CheckpointerPID == 0);
5210  Assert(BgWriterPID == 0);
5212 
5213  /*
5214  * Start the archiver if we're responsible for (re-)archiving received
5215  * files.
5216  */
5217  Assert(PgArchPID == 0);
5218  if (XLogArchivingAlways())
5219  PgArchPID = pgarch_start();
5220 
5221  /*
5222  * If we aren't planning to enter hot standby mode later, treat
5223  * RECOVERY_STARTED as meaning we're out of startup, and report status
5224  * accordingly.
5225  */
5226  if (!EnableHotStandby)
5227  {
5229 #ifdef USE_SYSTEMD
5230  sd_notify(0, "READY=1");
5231 #endif
5232  }
5233 
5234  pmState = PM_RECOVERY;
5235  }
5238  {
5239  /*
5240  * Likewise, start other special children as needed.
5241  */
5242  Assert(PgStatPID == 0);
5243  PgStatPID = pgstat_start();
5244 
5245  ereport(LOG,
5246  (errmsg("database system is ready to accept read only connections")));
5247 
5248  /* Report status */
5250 #ifdef USE_SYSTEMD
5251  sd_notify(0, "READY=1");
5252 #endif
5253 
5255  /* Some workers may be scheduled to start now */
5256  StartWorkerNeeded = true;
5257  }
5258 
5261 
5263  PgArchPID != 0)
5264  {
5265  /*
5266  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5267  * next WAL file.
5268  */
5270  }
5271 
5272  /* Tell syslogger to rotate logfile if requested */
5273  if (SysLoggerPID != 0)
5274  {
5275  if (CheckLogrotateSignal())
5276  {
5279  }
5281  {
5283  }
5284  }
5285 
5287  Shutdown == NoShutdown)
5288  {
5289  /*
5290  * Start one iteration of the autovacuum daemon, even if autovacuuming
5291  * is nominally not enabled. This is so we can have an active defense
5292  * against transaction ID wraparound. We set a flag for the main loop
5293  * to do it rather than trying to do it here --- this is because the
5294  * autovac process itself may send the signal, and we want to handle
5295  * that by launching another iteration as soon as the current one
5296  * completes.
5297  */
5298  start_autovac_launcher = true;
5299  }
5300 
5302  Shutdown == NoShutdown)
5303  {
5304  /* The autovacuum launcher wants us to start a worker process. */
5306  }
5307 
5309  {
5310  /* Startup Process wants us to start the walreceiver process. */
5311  /* Start immediately if possible, else remember request for later. */
5312  WalReceiverRequested = true;
5314  }
5315 
5316  /*
5317  * Try to advance postmaster's state machine, if a child requests it.
5318  *
5319  * Be careful about the order of this action relative to sigusr1_handler's
5320  * other actions. Generally, this should be after other actions, in case
5321  * they have effects PostmasterStateMachine would need to know about.
5322  * However, we should do it before the CheckPromoteSignal step, which
5323  * cannot have any (immediate) effect on the state machine, but does
5324  * depend on what state we're in now.
5325  */
5327  {
5329  }
5330 
5331  if (StartupPID != 0 &&
5332  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5335  {
5336  /* Tell startup process to finish recovery */
5338  }
5339 
5340 #ifdef WIN32
5342 #endif
5343 
5344  errno = save_errno;
5345 }
5346 
5347 /*
5348  * SIGTERM or SIGQUIT while processing startup packet.
5349  * Clean up and exit(1).
5350  *
5351  * XXX: possible future improvement: try to send a message indicating
5352  * why we are disconnecting. Problem is to be sure we don't block while
5353  * doing so, nor mess up SSL initialization. In practice, if the client
5354  * has wedged here, it probably couldn't do anything with the message anyway.
5355  */
5356 static void
5358 {
5359  proc_exit(1);
5360 }
5361 
5362 /*
5363  * Dummy signal handler
5364  *
5365  * We use this for signals that we don't actually use in the postmaster,
5366  * but we do use in backends. If we were to SIG_IGN such signals in the
5367  * postmaster, then a newly started backend might drop a signal that arrives
5368  * before it's able to reconfigure its signal processing. (See notes in
5369  * tcop/postgres.c.)
5370  */
5371 static void
5373 {
5374 }
5375 
5376 /*
5377  * Timeout while processing startup packet.
5378  * As for startup_die(), we clean up and exit(1).
5379  */
5380 static void
5382 {
5383  proc_exit(1);
5384 }
5385 
5386 
5387 /*
5388  * Generate a random cancel key.
5389  */
5390 static bool
5392 {
5393  return pg_strong_random(cancel_key, sizeof(int32));
5394 }
5395 
5396 /*
5397  * Count up number of child processes of specified types (dead_end children
5398  * are always excluded).
5399  */
5400 static int
5401 CountChildren(int target)
5402 {
5403  dlist_iter iter;
5404  int cnt = 0;
5405 
5406  dlist_foreach(iter, &BackendList)
5407  {
5408  Backend *bp = dlist_container(Backend, elem, iter.cur);
5409 
5410  if (bp->dead_end)
5411  continue;
5412 
5413  /*
5414  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5415  * it first and avoid touching shared memory for every child.
5416  */
5417  if (target != BACKEND_TYPE_ALL)
5418  {
5419  /*
5420  * Assign bkend_type for any recently announced WAL Sender
5421  * processes.
5422  */
5423  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5426 
5427  if (!(target & bp->bkend_type))
5428  continue;
5429  }
5430 
5431  cnt++;
5432  }
5433  return cnt;
5434 }
5435 
5436 
5437 /*
5438  * StartChildProcess -- start an auxiliary process for the postmaster
5439  *
5440  * "type" determines what kind of child will be started. All child types
5441  * initially go to AuxiliaryProcessMain, which will handle common setup.
5442  *
5443  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5444  * to start subprocess.
5445  */
5446 static pid_t
5448 {
5449  pid_t pid;
5450  char *av[10];
5451  int ac = 0;
5452  char typebuf[32];
5453 
5454  /*
5455  * Set up command-line arguments for subprocess
5456  */
5457  av[ac++] = "postgres";
5458 
5459 #ifdef EXEC_BACKEND
5460  av[ac++] = "--forkboot";
5461  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5462 #endif
5463 
5464  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5465  av[ac++] = typebuf;
5466 
5467  av[ac] = NULL;
5468  Assert(ac < lengthof(av));
5469 
5470 #ifdef EXEC_BACKEND
5471  pid = postmaster_forkexec(ac, av);
5472 #else /* !EXEC_BACKEND */
5473  pid = fork_process();
5474 
5475  if (pid == 0) /* child */
5476  {
5478 
5479  /* Close the postmaster's sockets */
5480  ClosePostmasterPorts(false);
5481 
5482  /* Release postmaster's working memory context */
5485  PostmasterContext = NULL;
5486 
5487  AuxiliaryProcessMain(ac, av);
5488  ExitPostmaster(0);
5489  }
5490 #endif /* EXEC_BACKEND */
5491 
5492  if (pid < 0)
5493  {
5494  /* in parent, fork failed */
5495  int save_errno = errno;
5496 
5497  errno = save_errno;
5498  switch (type)
5499  {
5500  case StartupProcess:
5501  ereport(LOG,
5502  (errmsg("could not fork startup process: %m")));
5503  break;
5504  case BgWriterProcess:
5505  ereport(LOG,
5506  (errmsg("could not fork background writer process: %m")));
5507  break;
5508  case CheckpointerProcess:
5509  ereport(LOG,
5510  (errmsg("could not fork checkpointer process: %m")));
5511  break;
5512  case WalWriterProcess:
5513  ereport(LOG,
5514  (errmsg("could not fork WAL writer process: %m")));
5515  break;
5516  case WalReceiverProcess:
5517  ereport(LOG,
5518  (errmsg("could not fork WAL receiver process: %m")));
5519  break;
5520  default:
5521  ereport(LOG,
5522  (errmsg("could not fork process: %m")));
5523  break;
5524  }
5525 
5526  /*
5527  * fork failure is fatal during startup, but there's no need to choke
5528  * immediately if starting other child types fails.
5529  */
5530  if (type == StartupProcess)
5531  ExitPostmaster(1);
5532  return 0;
5533  }
5534 
5535  /*
5536  * in parent, successful fork
5537  */
5538  return pid;
5539 }
5540 
5541 /*
5542  * StartAutovacuumWorker
5543  * Start an autovac worker process.
5544  *
5545  * This function is here because it enters the resulting PID into the
5546  * postmaster's private backends list.
5547  *
5548  * NB -- this code very roughly matches BackendStartup.
5549  */
5550 static void
5552 {
5553  Backend *bn;
5554 
5555  /*
5556  * If not in condition to run a process, don't try, but handle it like a
5557  * fork failure. This does not normally happen, since the signal is only
5558  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5559  * we have to check to avoid race-condition problems during DB state
5560  * changes.
5561  */
5563  {
5564  /*
5565  * Compute the cancel key that will be assigned to this session. We
5566  * probably don't need cancel keys for autovac workers, but we'd
5567  * better have something random in the field to prevent unfriendly
5568  * people from sending cancels to them.
5569  */
5571  {
5572  ereport(LOG,
5573  (errcode(ERRCODE_INTERNAL_ERROR),
5574  errmsg("could not generate random cancel key")));
5575  return;
5576  }
5577 
5578  bn = (Backend *) malloc(sizeof(Backend));
5579  if (bn)
5580  {
5581  bn->cancel_key = MyCancelKey;
5582 
5583  /* Autovac workers are not dead_end and need a child slot */
5584  bn->dead_end = false;
5586  bn->bgworker_notify = false;
5587 
5588  bn->pid = StartAutoVacWorker();
5589  if (bn->pid > 0)
5590  {
5592  dlist_push_head(&BackendList, &bn->elem);
5593 #ifdef EXEC_BACKEND
5594  ShmemBackendArrayAdd(bn);
5595 #endif
5596  /* all OK */
5597  return;
5598  }
5599 
5600  /*
5601  * fork failed, fall through to report -- actual error message was
5602  * logged by StartAutoVacWorker
5603  */
5605  free(bn);
5606  }
5607  else
5608  ereport(LOG,
5609  (errcode(ERRCODE_OUT_OF_MEMORY),
5610  errmsg("out of memory")));
5611  }
5612 
5613  /*
5614  * Report the failure to the launcher, if it's running. (If it's not, we
5615  * might not even be connected to shared memory, so don't try to call
5616  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5617  * responds to the condition, but we don't do that here, instead waiting
5618  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5619  * quick succession between the autovac launcher and postmaster in case
5620  * things get ugly.
5621  */
5622  if (AutoVacPID != 0)
5623  {
5625  avlauncher_needs_signal = true;
5626  }
5627 }
5628 
5629 /*
5630  * MaybeStartWalReceiver
5631  * Start the WAL receiver process, if not running and our state allows.
5632  *
5633  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5634  * clear WalReceiverRequested. However, there's a race condition if the
5635  * walreceiver terminates and the startup process immediately requests a new
5636  * one: it's quite possible to get the signal for the request before reaping
5637  * the dead walreceiver process. Better to risk launching an extra
5638  * walreceiver than to miss launching one we need. (The walreceiver code
5639  * has logic to recognize that it should go away if not needed.)
5640  */
5641 static void
5643 {
5644  if (WalReceiverPID == 0 &&
5645  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5647  Shutdown == NoShutdown)
5648  {
5650  if (WalReceiverPID != 0)
5651  WalReceiverRequested = false;
5652  /* else leave the flag set, so we'll try again later */
5653  }
5654 }
5655 
5656 
5657 /*
5658  * Create the opts file
5659  */
5660 static bool
5661 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5662 {
5663  FILE *fp;
5664  int i;
5665 
5666 #define OPTS_FILE "postmaster.opts"
5667 
5668  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5669  {
5670  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5671  return false;
5672  }
5673 
5674  fprintf(fp, "%s", fullprogname);
5675  for (i = 1; i < argc; i++)
5676  fprintf(fp, " \"%s\"", argv[i]);
5677  fputs("\n", fp);
5678 
5679  if (fclose(fp))
5680  {
5681  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5682  return false;
5683  }
5684 
5685  return true;
5686 }
5687 
5688 
5689 /*
5690  * MaxLivePostmasterChildren
5691  *
5692  * This reports the number of entries needed in per-child-process arrays
5693  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5694  * These arrays include regular backends, autovac workers, walsenders
5695  * and background workers, but not special children nor dead_end children.
5696  * This allows the arrays to have a fixed maximum size, to wit the same
5697  * too-many-children limit enforced by canAcceptConnections(). The exact value
5698  * isn't too critical as long as it's more than MaxBackends.
5699  */
5700 int
5702 {
5703  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5705 }
5706 
5707 /*
5708  * Connect background worker to a database.
5709  */
5710 void
5712 {
5714 
5715  /* XXX is this the right errcode? */
5717  ereport(FATAL,
5718  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5719  errmsg("database connection requirement not indicated during registration")));
5720 
5721  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5722 
5723  /* it had better not gotten out of "init" mode yet */
5724  if (!IsInitProcessingMode())
5725  ereport(ERROR,
5726  (errmsg("invalid processing mode in background worker")));
5728 }
5729 
5730 /*
5731  * Connect background worker to a database using OIDs.
5732  */
5733 void
5735 {
5737 
5738  /* XXX is this the right errcode? */
5740  ereport(FATAL,
5741  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5742  errmsg("database connection requirement not indicated during registration")));
5743 
5744  InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5745 
5746  /* it had better not gotten out of "init" mode yet */
5747  if (!IsInitProcessingMode())
5748  ereport(ERROR,
5749  (errmsg("invalid processing mode in background worker")));
5751 }
5752 
5753 /*
5754  * Block/unblock signals in a background worker
5755  */
5756 void
5758 {
5759  PG_SETMASK(&BlockSig);
5760 }
5761 
5762 void
5764 {
5766 }
5767 
5768 #ifdef EXEC_BACKEND
5769 static pid_t
5770 bgworker_forkexec(int shmem_slot)
5771 {
5772  char *av[10];
5773  int ac = 0;
5774  char forkav[MAXPGPATH];
5775 
5776  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5777 
5778  av[ac++] = "postgres";
5779  av[ac++] = forkav;
5780  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5781  av[ac] = NULL;
5782 
5783  Assert(ac < lengthof(av));
5784 
5785  return postmaster_forkexec(ac, av);
5786 }
5787 #endif
5788 
5789 /*
5790  * Start a new bgworker.
5791  * Starting time conditions must have been checked already.
5792  *
5793  * Returns true on success, false on failure.
5794  * In either case, update the RegisteredBgWorker's state appropriately.
5795  *
5796  * This code is heavily based on autovacuum.c, q.v.
5797  */
5798 static bool
5800 {
5801  pid_t worker_pid;
5802 
5803  Assert(rw->rw_pid == 0);
5804 
5805  /*
5806  * Allocate and assign the Backend element. Note we must do this before
5807  * forking, so that we can handle failures (out of memory or child-process
5808  * slots) cleanly.
5809  *
5810  * Treat failure as though the worker had crashed. That way, the
5811  * postmaster will wait a bit before attempting to start it again; if we
5812  * tried again right away, most likely we'd find ourselves hitting the
5813  * same resource-exhaustion condition.
5814  */
5815  if (!assign_backendlist_entry(rw))
5816  {
5818  return false;
5819  }
5820 
5821  ereport(DEBUG1,
5822  (errmsg("starting background worker process \"%s\"",
5823  rw->rw_worker.bgw_name)));
5824 
5825 #ifdef EXEC_BACKEND
5826  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5827 #else
5828  switch ((worker_pid = fork_process()))
5829 #endif
5830  {
5831  case -1:
5832  /* in postmaster, fork failed ... */
5833  ereport(LOG,
5834  (errmsg("could not fork worker process: %m")));
5835  /* undo what assign_backendlist_entry did */
5837  rw->rw_child_slot = 0;
5838  free(rw->rw_backend);
5839  rw->rw_backend = NULL;
5840  /* mark entry as crashed, so we'll try again later */
5842  break;
5843 
5844 #ifndef EXEC_BACKEND
5845  case 0:
5846  /* in postmaster child ... */
5848 
5849  /* Close the postmaster's sockets */
5850  ClosePostmasterPorts(false);
5851 
5852  /*
5853  * Before blowing away PostmasterContext, save this bgworker's
5854  * data where it can find it.
5855  */
5856  MyBgworkerEntry = (BackgroundWorker *)
5858  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5859 
5860  /* Release postmaster's working memory context */