PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef USE_BONJOUR
81 #include <dns_sd.h>
82 #endif
83 
84 #ifdef USE_SYSTEMD
85 #include <systemd/sd-daemon.h>
86 #endif
87 
88 #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 #include <pthread.h>
90 #endif
91 
92 #include "access/transam.h"
93 #include "access/xlog.h"
94 #include "access/xlogrecovery.h"
95 #include "catalog/pg_control.h"
96 #include "common/file_perm.h"
97 #include "common/file_utils.h"
98 #include "common/ip.h"
99 #include "common/pg_prng.h"
100 #include "common/string.h"
101 #include "lib/ilist.h"
102 #include "libpq/auth.h"
103 #include "libpq/libpq.h"
104 #include "libpq/pqformat.h"
105 #include "libpq/pqsignal.h"
106 #include "nodes/queryjumble.h"
107 #include "pg_getopt.h"
108 #include "pgstat.h"
109 #include "port/pg_bswap.h"
110 #include "postmaster/autovacuum.h"
111 #include "postmaster/auxprocess.h"
113 #include "postmaster/fork_process.h"
114 #include "postmaster/interrupt.h"
115 #include "postmaster/pgarch.h"
116 #include "postmaster/postmaster.h"
117 #include "postmaster/syslogger.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/timestamp.h"
133 #include "utils/varlena.h"
134 
135 #ifdef EXEC_BACKEND
136 #include "storage/spin.h"
137 #endif
138 
139 
140 /*
141  * Possible types of a backend. Beyond being the possible bkend_type values in
142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143  * and CountChildren().
144  */
145 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150 
151 /*
152  * List of active backends (or child processes anyway; we don't actually
153  * know whether a given child has become a backend or is still in the
154  * authorization phase). This is used mainly to keep track of how many
155  * children we have and send them appropriate signals when necessary.
156  *
157  * As shown in the above set of backend types, this list includes not only
158  * "normal" client sessions, but also autovacuum workers, walsenders, and
159  * background workers. (Note that at the time of launch, walsenders are
160  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
161  * upon noticing they've changed their PMChildFlags entry. Hence that check
162  * must be done before any operation that needs to distinguish walsenders
163  * from normal backends.)
164  *
165  * Also, "dead_end" children are in it: these are children launched just for
166  * the purpose of sending a friendly rejection message to a would-be client.
167  * We must track them because they are attached to shared memory, but we know
168  * they will never become live backends. dead_end children are not assigned a
169  * PMChildSlot. dead_end children have bkend_type NORMAL.
170  *
171  * "Special" children such as the startup, bgwriter and autovacuum launcher
172  * tasks are not in this list. They are tracked via StartupPID and other
173  * pid_t variables below. (Thus, there can't be more than one of any given
174  * "special" child process type. We use BackendList entries for any child
175  * process there can be more than one of.)
176  */
177 typedef struct bkend
178 {
179  pid_t pid; /* process id of backend */
180  int32 cancel_key; /* cancel key for cancels for this backend */
181  int child_slot; /* PMChildSlot for this backend, if any */
182  int bkend_type; /* child process flavor, see above */
183  bool dead_end; /* is it going to send an error and quit? */
184  bool bgworker_notify; /* gets bgworker start/stop notifications */
185  dlist_node elem; /* list link in BackendList */
187 
189 
190 #ifdef EXEC_BACKEND
191 static Backend *ShmemBackendArray;
192 #endif
193 
195 
196 
197 
198 /* The socket number we are listening for connections on */
199 int PostPortNumber = DEF_PGPORT;
200 
201 /* The directory names for Unix socket(s) */
203 
204 /* The TCP listen address(es) */
206 
207 /*
208  * SuperuserReservedConnections is the number of backends reserved for
209  * superuser use, and ReservedConnections is the number of backends reserved
210  * for use by roles with privileges of the pg_use_reserved_connections
211  * predefined role. These are taken out of the pool of MaxConnections backend
212  * slots, so the number of backend slots available for roles that are neither
213  * superuser nor have privileges of pg_use_reserved_connections is
214  * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
215  *
216  * If the number of remaining slots is less than or equal to
217  * SuperuserReservedConnections, only superusers can make new connections. If
218  * the number of remaining slots is greater than SuperuserReservedConnections
219  * but less than or equal to
220  * (SuperuserReservedConnections + ReservedConnections), only superusers and
221  * roles with privileges of pg_use_reserved_connections can make new
222  * connections. Note that pre-existing superuser and
223  * pg_use_reserved_connections connections don't count against the limits.
224  */
227 
228 /* The socket(s) we're listening to. */
229 #define MAXLISTEN 64
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 
241 bool enable_bonjour = false;
245 bool send_abort_for_crash = false;
246 bool send_abort_for_kill = false;
247 
248 /* PIDs of special child processes; 0 when not running */
249 static pid_t StartupPID = 0,
257 
258 /* Startup process's status */
259 typedef enum
260 {
263  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 
268 
269 /* Startup/shutdown state */
270 #define NoShutdown 0
271 #define SmartShutdown 1
272 #define FastShutdown 2
273 #define ImmediateShutdown 3
274 
275 static int Shutdown = NoShutdown;
276 
277 static bool FatalError = false; /* T if recovering from backend crash */
278 
279 /*
280  * We use a simple state machine to control startup, shutdown, and
281  * crash recovery (which is rather like shutdown followed by startup).
282  *
283  * After doing all the postmaster initialization work, we enter PM_STARTUP
284  * state and the startup process is launched. The startup process begins by
285  * reading the control file and other preliminary initialization steps.
286  * In a normal startup, or after crash recovery, the startup process exits
287  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
288  * is handled specially since it takes much longer and we would like to support
289  * hot standby during archive recovery.
290  *
291  * When the startup process is ready to start archive recovery, it signals the
292  * postmaster, and we switch to PM_RECOVERY state. The background writer and
293  * checkpointer are launched, while the startup process continues applying WAL.
294  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
295  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
296  * state and begin accepting connections to perform read-only queries. When
297  * archive recovery is finished, the startup process exits with exit code 0
298  * and we switch to PM_RUN state.
299  *
300  * Normal child backends can only be launched when we are in PM_RUN or
301  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
316  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
317  * states when trying to recover from a crash). It can be true in PM_STARTUP
318  * state, because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_STOP_BACKENDS, /* need to stop remaining backends */
328  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
329  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
330  * ckpt */
331  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
332  * finish */
333  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
334  PM_NO_CHILDREN /* all important children have exited */
336 
338 
339 /*
340  * While performing a "smart shutdown", we restrict new connections but stay
341  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
342  * connsAllowed is a sub-state indicator showing the active restriction.
343  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
344  */
345 static bool connsAllowed = true;
346 
347 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
348 /* Zero means timeout is not running */
349 static time_t AbortStartTime = 0;
350 
351 /* Length of said timeout */
352 #define SIGKILL_CHILDREN_AFTER_SECS 5
353 
354 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
355 
356 bool ClientAuthInProgress = false; /* T during new-client
357  * authentication */
358 
359 bool redirection_done = false; /* stderr redirected for syslogger? */
360 
361 /* received START_AUTOVAC_LAUNCHER signal */
362 static bool start_autovac_launcher = false;
363 
364 /* the launcher needs to be signaled to communicate some condition */
365 static bool avlauncher_needs_signal = false;
366 
367 /* received START_WALRECEIVER signal */
368 static bool WalReceiverRequested = false;
369 
370 /* set when there's a worker that needs to be started up */
371 static bool StartWorkerNeeded = true;
372 static bool HaveCrashedWorker = false;
373 
374 /* set when signals arrive */
375 static volatile sig_atomic_t pending_pm_pmsignal;
376 static volatile sig_atomic_t pending_pm_child_exit;
377 static volatile sig_atomic_t pending_pm_reload_request;
378 static volatile sig_atomic_t pending_pm_shutdown_request;
379 static volatile sig_atomic_t pending_pm_fast_shutdown_request;
380 static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
381 
382 /* event multiplexing object */
384 
385 #ifdef USE_SSL
386 /* Set when and if SSL has been initialized properly */
387 static bool LoadedSSL = false;
388 #endif
389 
390 #ifdef USE_BONJOUR
391 static DNSServiceRef bonjour_sdref = NULL;
392 #endif
393 
394 /*
395  * postmaster.c - function prototypes
396  */
397 static void CloseServerPorts(int status, Datum arg);
398 static void unlink_external_pid_file(int status, Datum arg);
399 static void getInstallationPaths(const char *argv0);
400 static void checkControlFile(void);
401 static Port *ConnCreate(int serverFd);
402 static void ConnFree(Port *port);
407 static void process_pm_pmsignal(void);
408 static void process_pm_child_exit(void);
409 static void process_pm_reload_request(void);
410 static void process_pm_shutdown_request(void);
412 static void dummy_handler(SIGNAL_ARGS);
413 static void StartupPacketTimeoutHandler(void);
414 static void CleanupBackend(int pid, int exitstatus);
415 static bool CleanupBackgroundWorker(int pid, int exitstatus);
416 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
417 static void LogChildExit(int lev, const char *procname,
418  int pid, int exitstatus);
419 static void PostmasterStateMachine(void);
420 static void BackendInitialize(Port *port);
421 static void BackendRun(Port *port) pg_attribute_noreturn();
422 static void ExitPostmaster(int status) pg_attribute_noreturn();
423 static int ServerLoop(void);
424 static int BackendStartup(Port *port);
425 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
426 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
427 static void processCancelRequest(Port *port, void *pkt);
428 static void report_fork_failure_to_client(Port *port, int errnum);
429 static CAC_state canAcceptConnections(int backend_type);
430 static bool RandomCancelKey(int32 *cancel_key);
431 static void signal_child(pid_t pid, int signal);
432 static void sigquit_child(pid_t pid);
433 static bool SignalSomeChildren(int signal, int target);
434 static void TerminateChildren(int signal);
435 
436 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
437 
438 static int CountChildren(int target);
440 static void maybe_start_bgworkers(void);
441 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
442 static pid_t StartChildProcess(AuxProcType type);
443 static void StartAutovacuumWorker(void);
444 static void MaybeStartWalReceiver(void);
445 static void InitPostmasterDeathWatchHandle(void);
446 
447 /*
448  * Archiver is allowed to start up at the current postmaster state?
449  *
450  * If WAL archiving is enabled always, we are allowed to start archiver
451  * even during recovery.
452  */
453 #define PgArchStartupAllowed() \
454  (((XLogArchivingActive() && pmState == PM_RUN) || \
455  (XLogArchivingAlways() && \
456  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
457  PgArchCanRestart())
458 
459 #ifdef EXEC_BACKEND
460 
461 #ifdef WIN32
462 #define WNOHANG 0 /* ignored, so any integer value will do */
463 
464 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
465 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
466 
467 static HANDLE win32ChildQueue;
468 
469 typedef struct
470 {
471  HANDLE waitHandle;
472  HANDLE procHandle;
473  DWORD procId;
474 } win32_deadchild_waitinfo;
475 #endif /* WIN32 */
476 
477 static pid_t backend_forkexec(Port *port);
478 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
479 
480 /* Type for a socket that can be inherited to a client process */
481 #ifdef WIN32
482 typedef struct
483 {
484  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
485  * if not a socket */
486  WSAPROTOCOL_INFO wsainfo;
487 } InheritableSocket;
488 #else
489 typedef int InheritableSocket;
490 #endif
491 
492 /*
493  * Structure contains all variables passed to exec:ed backends
494  */
495 typedef struct
496 {
497  Port port;
498  InheritableSocket portsocket;
499  char DataDir[MAXPGPATH];
501  int MyPMChildSlot;
502 #ifndef WIN32
503  unsigned long UsedShmemSegID;
504 #else
505  void *ShmemProtectiveRegion;
506  HANDLE UsedShmemSegID;
507 #endif
508  void *UsedShmemSegAddr;
511  Backend *ShmemBackendArray;
512 #ifndef HAVE_SPINLOCKS
514 #endif
523  pid_t PostmasterPid;
527  bool redirection_done;
528  bool IsBinaryUpgrade;
529  bool query_id_enabled;
530  int max_safe_fds;
531  int MaxBackends;
532 #ifdef WIN32
533  HANDLE PostmasterHandle;
534  HANDLE initial_signal_pipe;
535  HANDLE syslogPipe[2];
536 #else
537  int postmaster_alive_fds[2];
538  int syslogPipe[2];
539 #endif
540  char my_exec_path[MAXPGPATH];
541  char pkglib_path[MAXPGPATH];
542 } BackendParameters;
543 
544 static void read_backend_variables(char *id, Port *port);
545 static void restore_backend_variables(BackendParameters *param, Port *port);
546 
547 #ifndef WIN32
548 static bool save_backend_variables(BackendParameters *param, Port *port);
549 #else
550 static bool save_backend_variables(BackendParameters *param, Port *port,
551  HANDLE childProcess, pid_t childPid);
552 #endif
553 
554 static void ShmemBackendArrayAdd(Backend *bn);
555 static void ShmemBackendArrayRemove(Backend *bn);
556 #endif /* EXEC_BACKEND */
557 
558 #define StartupDataBase() StartChildProcess(StartupProcess)
559 #define StartArchiver() StartChildProcess(ArchiverProcess)
560 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
561 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
562 #define StartWalWriter() StartChildProcess(WalWriterProcess)
563 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
564 
565 /* Macros to check exit status of a child process */
566 #define EXIT_STATUS_0(st) ((st) == 0)
567 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
568 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
569 
570 #ifndef WIN32
571 /*
572  * File descriptors for pipe used to monitor if postmaster is alive.
573  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
574  */
575 int postmaster_alive_fds[2] = {-1, -1};
576 #else
577 /* Process handle of postmaster used for the same purpose on Windows */
578 HANDLE PostmasterHandle;
579 #endif
580 
581 /*
582  * Postmaster main entry point
583  */
584 void
585 PostmasterMain(int argc, char *argv[])
586 {
587  int opt;
588  int status;
589  char *userDoption = NULL;
590  bool listen_addr_saved = false;
591  int i;
592  char *output_config_variable = NULL;
593 
595 
597 
599 
600  /*
601  * Start our win32 signal implementation
602  */
603 #ifdef WIN32
605 #endif
606 
607  /*
608  * We should not be creating any files or directories before we check the
609  * data directory (see checkDataDir()), but just in case set the umask to
610  * the most restrictive (owner-only) permissions.
611  *
612  * checkDataDir() will reset the umask based on the data directory
613  * permissions.
614  */
615  umask(PG_MODE_MASK_OWNER);
616 
617  /*
618  * By default, palloc() requests in the postmaster will be allocated in
619  * the PostmasterContext, which is space that can be recycled by backends.
620  * Allocated data that needs to be available to backends should be
621  * allocated in TopMemoryContext.
622  */
624  "Postmaster",
627 
628  /* Initialize paths to installation files */
629  getInstallationPaths(argv[0]);
630 
631  /*
632  * Set up signal handlers for the postmaster process.
633  *
634  * CAUTION: when changing this list, check for side-effects on the signal
635  * handling setup of child processes. See tcop/postgres.c,
636  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
637  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
638  * postmaster/bgworker.c and postmaster/checkpointer.c.
639  */
640  pqinitmask();
641  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
642 
647  pqsignal(SIGALRM, SIG_IGN); /* ignored */
648  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
650  pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
652 
653  /* This may configure SIGURG, depending on platform. */
656 
657  /*
658  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
659  * ignore those signals in a postmaster environment, so that there is no
660  * risk of a child process freezing up due to writing to stderr. But for
661  * a standalone backend, their default handling is reasonable. Hence, all
662  * child processes should just allow the inherited settings to stand.
663  */
664 #ifdef SIGTTIN
665  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
666 #endif
667 #ifdef SIGTTOU
668  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
669 #endif
670 
671  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
672 #ifdef SIGXFSZ
673  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
674 #endif
675 
676  /* Begin accepting signals. */
677  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
678 
679  /*
680  * Options setup
681  */
683 
684  opterr = 1;
685 
686  /*
687  * Parse command-line options. CAUTION: keep this in sync with
688  * tcop/postgres.c (the option sets should not conflict) and with the
689  * common help() function in main/main.c.
690  */
691  while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
692  {
693  switch (opt)
694  {
695  case 'B':
696  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
697  break;
698 
699  case 'b':
700  /* Undocumented flag used for binary upgrades */
701  IsBinaryUpgrade = true;
702  break;
703 
704  case 'C':
705  output_config_variable = strdup(optarg);
706  break;
707 
708  case 'c':
709  case '-':
710  {
711  char *name,
712  *value;
713 
715  if (!value)
716  {
717  if (opt == '-')
718  ereport(ERROR,
719  (errcode(ERRCODE_SYNTAX_ERROR),
720  errmsg("--%s requires a value",
721  optarg)));
722  else
723  ereport(ERROR,
724  (errcode(ERRCODE_SYNTAX_ERROR),
725  errmsg("-c %s requires a value",
726  optarg)));
727  }
728 
730  pfree(name);
731  pfree(value);
732  break;
733  }
734 
735  case 'D':
736  userDoption = strdup(optarg);
737  break;
738 
739  case 'd':
741  break;
742 
743  case 'E':
744  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
745  break;
746 
747  case 'e':
748  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
749  break;
750 
751  case 'F':
752  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
753  break;
754 
755  case 'f':
757  {
758  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
759  progname, optarg);
760  ExitPostmaster(1);
761  }
762  break;
763 
764  case 'h':
765  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
766  break;
767 
768  case 'i':
769  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
770  break;
771 
772  case 'j':
773  /* only used by interactive backend */
774  break;
775 
776  case 'k':
777  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
778  break;
779 
780  case 'l':
781  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
782  break;
783 
784  case 'N':
785  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
786  break;
787 
788  case 'O':
789  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
790  break;
791 
792  case 'P':
793  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
794  break;
795 
796  case 'p':
798  break;
799 
800  case 'r':
801  /* only used by single-user backend */
802  break;
803 
804  case 'S':
806  break;
807 
808  case 's':
809  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
810  break;
811 
812  case 'T':
813 
814  /*
815  * This option used to be defined as sending SIGSTOP after a
816  * backend crash, but sending SIGABRT seems more useful.
817  */
818  SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
819  break;
820 
821  case 't':
822  {
823  const char *tmp = get_stats_option_name(optarg);
824 
825  if (tmp)
826  {
828  }
829  else
830  {
831  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
832  progname, optarg);
833  ExitPostmaster(1);
834  }
835  break;
836  }
837 
838  case 'W':
839  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
840  break;
841 
842  default:
843  write_stderr("Try \"%s --help\" for more information.\n",
844  progname);
845  ExitPostmaster(1);
846  }
847  }
848 
849  /*
850  * Postmaster accepts no non-option switch arguments.
851  */
852  if (optind < argc)
853  {
854  write_stderr("%s: invalid argument: \"%s\"\n",
855  progname, argv[optind]);
856  write_stderr("Try \"%s --help\" for more information.\n",
857  progname);
858  ExitPostmaster(1);
859  }
860 
861  /*
862  * Locate the proper configuration files and data directory, and read
863  * postgresql.conf for the first time.
864  */
866  ExitPostmaster(2);
867 
868  if (output_config_variable != NULL)
869  {
870  /*
871  * If this is a runtime-computed GUC, it hasn't yet been initialized,
872  * and the present value is not useful. However, this is a convenient
873  * place to print the value for most GUCs because it is safe to run
874  * postmaster startup to this point even if the server is already
875  * running. For the handful of runtime-computed GUCs that we cannot
876  * provide meaningful values for yet, we wait until later in
877  * postmaster startup to print the value. We won't be able to use -C
878  * on running servers for those GUCs, but using this option now would
879  * lead to incorrect results for them.
880  */
881  int flags = GetConfigOptionFlags(output_config_variable, true);
882 
883  if ((flags & GUC_RUNTIME_COMPUTED) == 0)
884  {
885  /*
886  * "-C guc" was specified, so print GUC's value and exit. No
887  * extra permission check is needed because the user is reading
888  * inside the data dir.
889  */
890  const char *config_val = GetConfigOption(output_config_variable,
891  false, false);
892 
893  puts(config_val ? config_val : "");
894  ExitPostmaster(0);
895  }
896 
897  /*
898  * A runtime-computed GUC will be printed later on. As we initialize
899  * a server startup sequence, silence any log messages that may show
900  * up in the output generated. FATAL and more severe messages are
901  * useful to show, even if one would only expect at least PANIC. LOG
902  * entries are hidden.
903  */
904  SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
906  }
907 
908  /* Verify that DataDir looks reasonable */
909  checkDataDir();
910 
911  /* Check that pg_control exists */
913 
914  /* And switch working directory into it */
915  ChangeToDataDir();
916 
917  /*
918  * Check for invalid combinations of GUC settings.
919  */
921  {
922  write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
923  progname,
926  ExitPostmaster(1);
927  }
929  ereport(ERROR,
930  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
932  ereport(ERROR,
933  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
934 
935  /*
936  * Other one-time internal sanity checks can go here, if they are fast.
937  * (Put any slow processing further down, after postmaster.pid creation.)
938  */
939  if (!CheckDateTokenTables())
940  {
941  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
942  ExitPostmaster(1);
943  }
944 
945  /*
946  * Now that we are done processing the postmaster arguments, reset
947  * getopt(3) library so that it will work correctly in subprocesses.
948  */
949  optind = 1;
950 #ifdef HAVE_INT_OPTRESET
951  optreset = 1; /* some systems need this too */
952 #endif
953 
954  /* For debugging: display postmaster environment */
955  {
956  extern char **environ;
957  char **p;
958 
959  ereport(DEBUG3,
960  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
961  progname)));
962  ereport(DEBUG3,
963  (errmsg_internal("-----------------------------------------")));
964  for (p = environ; *p; ++p)
965  ereport(DEBUG3,
966  (errmsg_internal("\t%s", *p)));
967  ereport(DEBUG3,
968  (errmsg_internal("-----------------------------------------")));
969  }
970 
971  /*
972  * Create lockfile for data directory.
973  *
974  * We want to do this before we try to grab the input sockets, because the
975  * data directory interlock is more reliable than the socket-file
976  * interlock (thanks to whoever decided to put socket files in /tmp :-().
977  * For the same reason, it's best to grab the TCP socket(s) before the
978  * Unix socket(s).
979  *
980  * Also note that this internally sets up the on_proc_exit function that
981  * is responsible for removing both data directory and socket lockfiles;
982  * so it must happen before opening sockets so that at exit, the socket
983  * lockfiles go away after CloseServerPorts runs.
984  */
985  CreateDataDirLockFile(true);
986 
987  /*
988  * Read the control file (for error checking and config info).
989  *
990  * Since we verify the control file's CRC, this has a useful side effect
991  * on machines where we need a run-time test for CRC support instructions.
992  * The postmaster will do the test once at startup, and then its child
993  * processes will inherit the correct function pointer and not need to
994  * repeat the test.
995  */
997 
998  /*
999  * Register the apply launcher. It's probably a good idea to call this
1000  * before any modules had a chance to take the background worker slots.
1001  */
1003 
1004  /*
1005  * process any libraries that should be preloaded at postmaster start
1006  */
1008 
1009  /*
1010  * Initialize SSL library, if specified.
1011  */
1012 #ifdef USE_SSL
1013  if (EnableSSL)
1014  {
1015  (void) secure_initialize(true);
1016  LoadedSSL = true;
1017  }
1018 #endif
1019 
1020  /*
1021  * Now that loadable modules have had their chance to alter any GUCs,
1022  * calculate MaxBackends.
1023  */
1025 
1026  /*
1027  * Give preloaded libraries a chance to request additional shared memory.
1028  */
1030 
1031  /*
1032  * Now that loadable modules have had their chance to request additional
1033  * shared memory, determine the value of any runtime-computed GUCs that
1034  * depend on the amount of shared memory required.
1035  */
1037 
1038  /*
1039  * Now that modules have been loaded, we can process any custom resource
1040  * managers specified in the wal_consistency_checking GUC.
1041  */
1043 
1044  /*
1045  * If -C was specified with a runtime-computed GUC, we held off printing
1046  * the value earlier, as the GUC was not yet initialized. We handle -C
1047  * for most GUCs before we lock the data directory so that the option may
1048  * be used on a running server. However, a handful of GUCs are runtime-
1049  * computed and do not have meaningful values until after locking the data
1050  * directory, and we cannot safely calculate their values earlier on a
1051  * running server. At this point, such GUCs should be properly
1052  * initialized, and we haven't yet set up shared memory, so this is a good
1053  * time to handle the -C option for these special GUCs.
1054  */
1055  if (output_config_variable != NULL)
1056  {
1057  const char *config_val = GetConfigOption(output_config_variable,
1058  false, false);
1059 
1060  puts(config_val ? config_val : "");
1061  ExitPostmaster(0);
1062  }
1063 
1064  /*
1065  * Set up shared memory and semaphores.
1066  *
1067  * Note: if using SysV shmem and/or semas, each postmaster startup will
1068  * normally choose the same IPC keys. This helps ensure that we will
1069  * clean up dead IPC objects if the postmaster crashes and is restarted.
1070  */
1072 
1073  /*
1074  * Estimate number of openable files. This must happen after setting up
1075  * semaphores, because on some platforms semaphores count as open files.
1076  */
1077  set_max_safe_fds();
1078 
1079  /*
1080  * Set reference point for stack-depth checking.
1081  */
1082  (void) set_stack_base();
1083 
1084  /*
1085  * Initialize pipe (or process handle on Windows) that allows children to
1086  * wake up from sleep on postmaster death.
1087  */
1089 
1090 #ifdef WIN32
1091 
1092  /*
1093  * Initialize I/O completion port used to deliver list of dead children.
1094  */
1095  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1096  if (win32ChildQueue == NULL)
1097  ereport(FATAL,
1098  (errmsg("could not create I/O completion port for child queue")));
1099 #endif
1100 
1101 #ifdef EXEC_BACKEND
1102  /* Write out nondefault GUC settings for child processes to use */
1103  write_nondefault_variables(PGC_POSTMASTER);
1104 
1105  /*
1106  * Clean out the temp directory used to transmit parameters to child
1107  * processes (see internal_forkexec, below). We must do this before
1108  * launching any child processes, else we have a race condition: we could
1109  * remove a parameter file before the child can read it. It should be
1110  * safe to do so now, because we verified earlier that there are no
1111  * conflicting Postgres processes in this data directory.
1112  */
1114 #endif
1115 
1116  /*
1117  * Forcibly remove the files signaling a standby promotion request.
1118  * Otherwise, the existence of those files triggers a promotion too early,
1119  * whether a user wants that or not.
1120  *
1121  * This removal of files is usually unnecessary because they can exist
1122  * only during a few moments during a standby promotion. However there is
1123  * a race condition: if pg_ctl promote is executed and creates the files
1124  * during a promotion, the files can stay around even after the server is
1125  * brought up to be the primary. Then, if a new standby starts by using
1126  * the backup taken from the new primary, the files can exist at server
1127  * startup and must be removed in order to avoid an unexpected promotion.
1128  *
1129  * Note that promotion signal files need to be removed before the startup
1130  * process is invoked. Because, after that, they can be used by
1131  * postmaster's SIGUSR1 signal handler.
1132  */
1134 
1135  /* Do the same for logrotate signal file */
1137 
1138  /* Remove any outdated file holding the current log filenames. */
1139  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1140  ereport(LOG,
1142  errmsg("could not remove file \"%s\": %m",
1144 
1145  /*
1146  * Initialize input sockets.
1147  *
1148  * Mark them all closed, and set up an on_proc_exit function that's
1149  * charged with closing the sockets again at postmaster shutdown.
1150  */
1151  for (i = 0; i < MAXLISTEN; i++)
1153 
1155 
1156  /*
1157  * If enabled, start up syslogger collection subprocess
1158  */
1160 
1161  /*
1162  * Reset whereToSendOutput from DestDebug (its starting state) to
1163  * DestNone. This stops ereport from sending log messages to stderr unless
1164  * Log_destination permits. We don't do this until the postmaster is
1165  * fully launched, since startup failures may as well be reported to
1166  * stderr.
1167  *
1168  * If we are in fact disabling logging to stderr, first emit a log message
1169  * saying so, to provide a breadcrumb trail for users who may not remember
1170  * that their logging is configured to go somewhere else.
1171  */
1173  ereport(LOG,
1174  (errmsg("ending log output to stderr"),
1175  errhint("Future log output will go to log destination \"%s\".",
1177 
1179 
1180  /*
1181  * Report server startup in log. While we could emit this much earlier,
1182  * it seems best to do so after starting the log collector, if we intend
1183  * to use one.
1184  */
1185  ereport(LOG,
1186  (errmsg("starting %s", PG_VERSION_STR)));
1187 
1188  /*
1189  * Establish input sockets.
1190  */
1191  if (ListenAddresses)
1192  {
1193  char *rawstring;
1194  List *elemlist;
1195  ListCell *l;
1196  int success = 0;
1197 
1198  /* Need a modifiable copy of ListenAddresses */
1199  rawstring = pstrdup(ListenAddresses);
1200 
1201  /* Parse string into list of hostnames */
1202  if (!SplitGUCList(rawstring, ',', &elemlist))
1203  {
1204  /* syntax error in list */
1205  ereport(FATAL,
1206  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1207  errmsg("invalid list syntax in parameter \"%s\"",
1208  "listen_addresses")));
1209  }
1210 
1211  foreach(l, elemlist)
1212  {
1213  char *curhost = (char *) lfirst(l);
1214 
1215  if (strcmp(curhost, "*") == 0)
1216  status = StreamServerPort(AF_UNSPEC, NULL,
1217  (unsigned short) PostPortNumber,
1218  NULL,
1220  else
1221  status = StreamServerPort(AF_UNSPEC, curhost,
1222  (unsigned short) PostPortNumber,
1223  NULL,
1225 
1226  if (status == STATUS_OK)
1227  {
1228  success++;
1229  /* record the first successful host addr in lockfile */
1230  if (!listen_addr_saved)
1231  {
1233  listen_addr_saved = true;
1234  }
1235  }
1236  else
1237  ereport(WARNING,
1238  (errmsg("could not create listen socket for \"%s\"",
1239  curhost)));
1240  }
1241 
1242  if (!success && elemlist != NIL)
1243  ereport(FATAL,
1244  (errmsg("could not create any TCP/IP sockets")));
1245 
1246  list_free(elemlist);
1247  pfree(rawstring);
1248  }
1249 
1250 #ifdef USE_BONJOUR
1251  /* Register for Bonjour only if we opened TCP socket(s) */
1253  {
1254  DNSServiceErrorType err;
1255 
1256  /*
1257  * We pass 0 for interface_index, which will result in registering on
1258  * all "applicable" interfaces. It's not entirely clear from the
1259  * DNS-SD docs whether this would be appropriate if we have bound to
1260  * just a subset of the available network interfaces.
1261  */
1262  err = DNSServiceRegister(&bonjour_sdref,
1263  0,
1264  0,
1265  bonjour_name,
1266  "_postgresql._tcp.",
1267  NULL,
1268  NULL,
1270  0,
1271  NULL,
1272  NULL,
1273  NULL);
1274  if (err != kDNSServiceErr_NoError)
1275  ereport(LOG,
1276  (errmsg("DNSServiceRegister() failed: error code %ld",
1277  (long) err)));
1278 
1279  /*
1280  * We don't bother to read the mDNS daemon's reply, and we expect that
1281  * it will automatically terminate our registration when the socket is
1282  * closed at postmaster termination. So there's nothing more to be
1283  * done here. However, the bonjour_sdref is kept around so that
1284  * forked children can close their copies of the socket.
1285  */
1286  }
1287 #endif
1288 
1290  {
1291  char *rawstring;
1292  List *elemlist;
1293  ListCell *l;
1294  int success = 0;
1295 
1296  /* Need a modifiable copy of Unix_socket_directories */
1297  rawstring = pstrdup(Unix_socket_directories);
1298 
1299  /* Parse string into list of directories */
1300  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1301  {
1302  /* syntax error in list */
1303  ereport(FATAL,
1304  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1305  errmsg("invalid list syntax in parameter \"%s\"",
1306  "unix_socket_directories")));
1307  }
1308 
1309  foreach(l, elemlist)
1310  {
1311  char *socketdir = (char *) lfirst(l);
1312 
1313  status = StreamServerPort(AF_UNIX, NULL,
1314  (unsigned short) PostPortNumber,
1315  socketdir,
1317 
1318  if (status == STATUS_OK)
1319  {
1320  success++;
1321  /* record the first successful Unix socket in lockfile */
1322  if (success == 1)
1324  }
1325  else
1326  ereport(WARNING,
1327  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1328  socketdir)));
1329  }
1330 
1331  if (!success && elemlist != NIL)
1332  ereport(FATAL,
1333  (errmsg("could not create any Unix-domain sockets")));
1334 
1335  list_free_deep(elemlist);
1336  pfree(rawstring);
1337  }
1338 
1339  /*
1340  * check that we have some socket to listen on
1341  */
1342  if (ListenSocket[0] == PGINVALID_SOCKET)
1343  ereport(FATAL,
1344  (errmsg("no socket created for listening")));
1345 
1346  /*
1347  * If no valid TCP ports, write an empty line for listen address,
1348  * indicating the Unix socket must be used. Note that this line is not
1349  * added to the lock file until there is a socket backing it.
1350  */
1351  if (!listen_addr_saved)
1353 
1354  /*
1355  * Record postmaster options. We delay this till now to avoid recording
1356  * bogus options (eg, unusable port number).
1357  */
1358  if (!CreateOptsFile(argc, argv, my_exec_path))
1359  ExitPostmaster(1);
1360 
1361  /*
1362  * Write the external PID file if requested
1363  */
1364  if (external_pid_file)
1365  {
1366  FILE *fpidfile = fopen(external_pid_file, "w");
1367 
1368  if (fpidfile)
1369  {
1370  fprintf(fpidfile, "%d\n", MyProcPid);
1371  fclose(fpidfile);
1372 
1373  /* Make PID file world readable */
1374  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1375  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1377  }
1378  else
1379  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1381 
1383  }
1384 
1385  /*
1386  * Remove old temporary files. At this point there can be no other
1387  * Postgres processes running in this directory, so this should be safe.
1388  */
1390 
1391  /*
1392  * Initialize the autovacuum subsystem (again, no process start yet)
1393  */
1394  autovac_init();
1395 
1396  /*
1397  * Load configuration files for client authentication.
1398  */
1399  if (!load_hba())
1400  {
1401  /*
1402  * It makes no sense to continue if we fail to load the HBA file,
1403  * since there is no way to connect to the database in this case.
1404  */
1405  ereport(FATAL,
1406  /* translator: %s is a configuration file */
1407  (errmsg("could not load %s", HbaFileName)));
1408  }
1409  if (!load_ident())
1410  {
1411  /*
1412  * We can start up without the IDENT file, although it means that you
1413  * cannot log in using any of the authentication methods that need a
1414  * user name mapping. load_ident() already logged the details of error
1415  * to the log.
1416  */
1417  }
1418 
1419 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1420 
1421  /*
1422  * On macOS, libintl replaces setlocale() with a version that calls
1423  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1424  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1425  * the process multithreaded. The postmaster calls sigprocmask() and
1426  * calls fork() without an immediate exec(), both of which have undefined
1427  * behavior in a multithreaded program. A multithreaded postmaster is the
1428  * normal case on Windows, which offers neither fork() nor sigprocmask().
1429  */
1430  if (pthread_is_threaded_np() != 0)
1431  ereport(FATAL,
1432  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1433  errmsg("postmaster became multithreaded during startup"),
1434  errhint("Set the LC_ALL environment variable to a valid locale.")));
1435 #endif
1436 
1437  /*
1438  * Remember postmaster startup time
1439  */
1441 
1442  /*
1443  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1444  * see what's happening.
1445  */
1447 
1448  /* Start bgwriter and checkpointer so they can help with recovery */
1449  if (CheckpointerPID == 0)
1451  if (BgWriterPID == 0)
1453 
1454  /*
1455  * We're ready to rock and roll...
1456  */
1458  Assert(StartupPID != 0);
1460  pmState = PM_STARTUP;
1461 
1462  /* Some workers may be scheduled to start now */
1464 
1465  status = ServerLoop();
1466 
1467  /*
1468  * ServerLoop probably shouldn't ever return, but if it does, close down.
1469  */
1470  ExitPostmaster(status != STATUS_OK);
1471 
1472  abort(); /* not reached */
1473 }
1474 
1475 
1476 /*
1477  * on_proc_exit callback to close server's listen sockets
1478  */
1479 static void
1481 {
1482  int i;
1483 
1484  /*
1485  * First, explicitly close all the socket FDs. We used to just let this
1486  * happen implicitly at postmaster exit, but it's better to close them
1487  * before we remove the postmaster.pid lockfile; otherwise there's a race
1488  * condition if a new postmaster wants to re-use the TCP port number.
1489  */
1490  for (i = 0; i < MAXLISTEN; i++)
1491  {
1493  {
1496  }
1497  }
1498 
1499  /*
1500  * Next, remove any filesystem entries for Unix sockets. To avoid race
1501  * conditions against incoming postmasters, this must happen after closing
1502  * the sockets and before removing lock files.
1503  */
1505 
1506  /*
1507  * We don't do anything about socket lock files here; those will be
1508  * removed in a later on_proc_exit callback.
1509  */
1510 }
1511 
1512 /*
1513  * on_proc_exit callback to delete external_pid_file
1514  */
1515 static void
1517 {
1518  if (external_pid_file)
1519  unlink(external_pid_file);
1520 }
1521 
1522 
1523 /*
1524  * Compute and check the directory paths to files that are part of the
1525  * installation (as deduced from the postgres executable's own location)
1526  */
1527 static void
1529 {
1530  DIR *pdir;
1531 
1532  /* Locate the postgres executable itself */
1533  if (find_my_exec(argv0, my_exec_path) < 0)
1534  ereport(FATAL,
1535  (errmsg("%s: could not locate my own executable path", argv0)));
1536 
1537 #ifdef EXEC_BACKEND
1538  /* Locate executable backend before we change working directory */
1539  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1540  postgres_exec_path) < 0)
1541  ereport(FATAL,
1542  (errmsg("%s: could not locate matching postgres executable",
1543  argv0)));
1544 #endif
1545 
1546  /*
1547  * Locate the pkglib directory --- this has to be set early in case we try
1548  * to load any modules from it in response to postgresql.conf entries.
1549  */
1551 
1552  /*
1553  * Verify that there's a readable directory there; otherwise the Postgres
1554  * installation is incomplete or corrupt. (A typical cause of this
1555  * failure is that the postgres executable has been moved or hardlinked to
1556  * some directory that's not a sibling of the installation lib/
1557  * directory.)
1558  */
1559  pdir = AllocateDir(pkglib_path);
1560  if (pdir == NULL)
1561  ereport(ERROR,
1563  errmsg("could not open directory \"%s\": %m",
1564  pkglib_path),
1565  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1566  my_exec_path)));
1567  FreeDir(pdir);
1568 
1569  /*
1570  * XXX is it worth similarly checking the share/ directory? If the lib/
1571  * directory is there, then share/ probably is too.
1572  */
1573 }
1574 
1575 /*
1576  * Check that pg_control exists in the correct location in the data directory.
1577  *
1578  * No attempt is made to validate the contents of pg_control here. This is
1579  * just a sanity check to see if we are looking at a real data directory.
1580  */
1581 static void
1583 {
1584  char path[MAXPGPATH];
1585  FILE *fp;
1586 
1587  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1588 
1589  fp = AllocateFile(path, PG_BINARY_R);
1590  if (fp == NULL)
1591  {
1592  write_stderr("%s: could not find the database system\n"
1593  "Expected to find it in the directory \"%s\",\n"
1594  "but could not open file \"%s\": %s\n",
1595  progname, DataDir, path, strerror(errno));
1596  ExitPostmaster(2);
1597  }
1598  FreeFile(fp);
1599 }
1600 
1601 /*
1602  * Determine how long should we let ServerLoop sleep, in milliseconds.
1603  *
1604  * In normal conditions we wait at most one minute, to ensure that the other
1605  * background tasks handled by ServerLoop get done even when no requests are
1606  * arriving. However, if there are background workers waiting to be started,
1607  * we don't actually sleep so that they are quickly serviced. Other exception
1608  * cases are as shown in the code.
1609  */
1610 static int
1612 {
1613  TimestampTz next_wakeup = 0;
1614 
1615  /*
1616  * Normal case: either there are no background workers at all, or we're in
1617  * a shutdown sequence (during which we ignore bgworkers altogether).
1618  */
1619  if (Shutdown > NoShutdown ||
1621  {
1622  if (AbortStartTime != 0)
1623  {
1624  int seconds;
1625 
1626  /* time left to abort; clamp to 0 in case it already expired */
1627  seconds = SIGKILL_CHILDREN_AFTER_SECS -
1628  (time(NULL) - AbortStartTime);
1629 
1630  return Max(seconds * 1000, 0);
1631  }
1632  else
1633  return 60 * 1000;
1634  }
1635 
1636  if (StartWorkerNeeded)
1637  return 0;
1638 
1639  if (HaveCrashedWorker)
1640  {
1641  slist_mutable_iter siter;
1642 
1643  /*
1644  * When there are crashed bgworkers, we sleep just long enough that
1645  * they are restarted when they request to be. Scan the list to
1646  * determine the minimum of all wakeup times according to most recent
1647  * crash time and requested restart interval.
1648  */
1650  {
1651  RegisteredBgWorker *rw;
1652  TimestampTz this_wakeup;
1653 
1654  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1655 
1656  if (rw->rw_crashed_at == 0)
1657  continue;
1658 
1660  || rw->rw_terminate)
1661  {
1662  ForgetBackgroundWorker(&siter);
1663  continue;
1664  }
1665 
1666  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1667  1000L * rw->rw_worker.bgw_restart_time);
1668  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1669  next_wakeup = this_wakeup;
1670  }
1671  }
1672 
1673  if (next_wakeup != 0)
1674  {
1675  int ms;
1676 
1677  /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1679  next_wakeup);
1680  return Min(60 * 1000, ms);
1681  }
1682 
1683  return 60 * 1000;
1684 }
1685 
1686 /*
1687  * Activate or deactivate notifications of server socket events. Since we
1688  * don't currently have a way to remove events from an existing WaitEventSet,
1689  * we'll just destroy and recreate the whole thing. This is called during
1690  * shutdown so we can wait for backends to exit without accepting new
1691  * connections, and during crash reinitialization when we need to start
1692  * listening for new connections again. The WaitEventSet will be freed in fork
1693  * children by ClosePostmasterPorts().
1694  */
1695 static void
1696 ConfigurePostmasterWaitSet(bool accept_connections)
1697 {
1698  int nsockets;
1699 
1700  if (pm_wait_set)
1702  pm_wait_set = NULL;
1703 
1704  /* How many server sockets do we need to wait for? */
1705  nsockets = 0;
1706  if (accept_connections)
1707  {
1708  while (nsockets < MAXLISTEN &&
1709  ListenSocket[nsockets] != PGINVALID_SOCKET)
1710  ++nsockets;
1711  }
1712 
1715  NULL);
1716 
1717  if (accept_connections)
1718  {
1719  for (int i = 0; i < nsockets; i++)
1721  NULL, NULL);
1722  }
1723 }
1724 
1725 /*
1726  * Main idle loop of postmaster
1727  */
1728 static int
1730 {
1731  time_t last_lockfile_recheck_time,
1732  last_touch_time;
1733  WaitEvent events[MAXLISTEN];
1734  int nevents;
1735 
1737  last_lockfile_recheck_time = last_touch_time = time(NULL);
1738 
1739  for (;;)
1740  {
1741  time_t now;
1742 
1743  nevents = WaitEventSetWait(pm_wait_set,
1745  events,
1746  lengthof(events),
1747  0 /* postmaster posts no wait_events */ );
1748 
1749  /*
1750  * Latch set by signal handler, or new connection pending on any of
1751  * our sockets? If the latter, fork a child process to deal with it.
1752  */
1753  for (int i = 0; i < nevents; i++)
1754  {
1755  if (events[i].events & WL_LATCH_SET)
1757 
1758  /*
1759  * The following requests are handled unconditionally, even if we
1760  * didn't see WL_LATCH_SET. This gives high priority to shutdown
1761  * and reload requests where the latch happens to appear later in
1762  * events[] or will be reported by a later call to
1763  * WaitEventSetWait().
1764  */
1771  if (pending_pm_pmsignal)
1773 
1774  if (events[i].events & WL_SOCKET_ACCEPT)
1775  {
1776  Port *port;
1777 
1778  port = ConnCreate(events[i].fd);
1779  if (port)
1780  {
1782 
1783  /*
1784  * We no longer need the open socket or port structure in
1785  * this process
1786  */
1787  StreamClose(port->sock);
1788  ConnFree(port);
1789  }
1790  }
1791  }
1792 
1793  /* If we have lost the log collector, try to start a new one */
1794  if (SysLoggerPID == 0 && Logging_collector)
1796 
1797  /*
1798  * If no background writer process is running, and we are not in a
1799  * state that prevents it, start one. It doesn't matter if this
1800  * fails, we'll just try again later. Likewise for the checkpointer.
1801  */
1802  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1804  {
1805  if (CheckpointerPID == 0)
1807  if (BgWriterPID == 0)
1809  }
1810 
1811  /*
1812  * Likewise, if we have lost the walwriter process, try to start a new
1813  * one. But this is needed only in normal operation (else we cannot
1814  * be writing any new WAL).
1815  */
1816  if (WalWriterPID == 0 && pmState == PM_RUN)
1818 
1819  /*
1820  * If we have lost the autovacuum launcher, try to start a new one. We
1821  * don't want autovacuum to run in binary upgrade mode because
1822  * autovacuum might update relfrozenxid for empty tables before the
1823  * physical files are put in place.
1824  */
1825  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1827  pmState == PM_RUN)
1828  {
1830  if (AutoVacPID != 0)
1831  start_autovac_launcher = false; /* signal processed */
1832  }
1833 
1834  /* If we have lost the archiver, try to start a new one. */
1835  if (PgArchPID == 0 && PgArchStartupAllowed())
1837 
1838  /* If we need to signal the autovacuum launcher, do so now */
1840  {
1841  avlauncher_needs_signal = false;
1842  if (AutoVacPID != 0)
1844  }
1845 
1846  /* If we need to start a WAL receiver, try to do that now */
1849 
1850  /* Get other worker processes running, if needed */
1853 
1854 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1855 
1856  /*
1857  * With assertions enabled, check regularly for appearance of
1858  * additional threads. All builds check at start and exit.
1859  */
1860  Assert(pthread_is_threaded_np() == 0);
1861 #endif
1862 
1863  /*
1864  * Lastly, check to see if it's time to do some things that we don't
1865  * want to do every single time through the loop, because they're a
1866  * bit expensive. Note that there's up to a minute of slop in when
1867  * these tasks will be performed, since DetermineSleepTime() will let
1868  * us sleep at most that long; except for SIGKILL timeout which has
1869  * special-case logic there.
1870  */
1871  now = time(NULL);
1872 
1873  /*
1874  * If we already sent SIGQUIT to children and they are slow to shut
1875  * down, it's time to send them SIGKILL (or SIGABRT if requested).
1876  * This doesn't happen normally, but under certain conditions backends
1877  * can get stuck while shutting down. This is a last measure to get
1878  * them unwedged.
1879  *
1880  * Note we also do this during recovery from a process crash.
1881  */
1882  if ((Shutdown >= ImmediateShutdown || FatalError) &&
1883  AbortStartTime != 0 &&
1885  {
1886  /* We were gentle with them before. Not anymore */
1887  ereport(LOG,
1888  /* translator: %s is SIGKILL or SIGABRT */
1889  (errmsg("issuing %s to recalcitrant children",
1890  send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1892  /* reset flag so we don't SIGKILL again */
1893  AbortStartTime = 0;
1894  }
1895 
1896  /*
1897  * Once a minute, verify that postmaster.pid hasn't been removed or
1898  * overwritten. If it has, we force a shutdown. This avoids having
1899  * postmasters and child processes hanging around after their database
1900  * is gone, and maybe causing problems if a new database cluster is
1901  * created in the same place. It also provides some protection
1902  * against a DBA foolishly removing postmaster.pid and manually
1903  * starting a new postmaster. Data corruption is likely to ensue from
1904  * that anyway, but we can minimize the damage by aborting ASAP.
1905  */
1906  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1907  {
1908  if (!RecheckDataDirLockFile())
1909  {
1910  ereport(LOG,
1911  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1913  }
1914  last_lockfile_recheck_time = now;
1915  }
1916 
1917  /*
1918  * Touch Unix socket and lock files every 58 minutes, to ensure that
1919  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1920  * no one runs cleaners with cutoff times of less than an hour ...
1921  */
1922  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1923  {
1924  TouchSocketFiles();
1926  last_touch_time = now;
1927  }
1928  }
1929 }
1930 
1931 /*
1932  * Read a client's startup packet and do something according to it.
1933  *
1934  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1935  * not return at all.
1936  *
1937  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1938  * if that's what you want. Return STATUS_ERROR if you don't want to
1939  * send anything to the client, which would typically be appropriate
1940  * if we detect a communications failure.)
1941  *
1942  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1943  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1944  * encryption layer sets both flags, but a rejected negotiation sets only the
1945  * flag for that layer, since the client may wish to try the other one. We
1946  * should make no assumption here about the order in which the client may make
1947  * requests.
1948  */
1949 static int
1950 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1951 {
1952  int32 len;
1953  char *buf;
1954  ProtocolVersion proto;
1955  MemoryContext oldcontext;
1956 
1957  pq_startmsgread();
1958 
1959  /*
1960  * Grab the first byte of the length word separately, so that we can tell
1961  * whether we have no data at all or an incomplete packet. (This might
1962  * sound inefficient, but it's not really, because of buffering in
1963  * pqcomm.c.)
1964  */
1965  if (pq_getbytes((char *) &len, 1) == EOF)
1966  {
1967  /*
1968  * If we get no data at all, don't clutter the log with a complaint;
1969  * such cases often occur for legitimate reasons. An example is that
1970  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1971  * client didn't like our response, it'll probably just drop the
1972  * connection. Service-monitoring software also often just opens and
1973  * closes a connection without sending anything. (So do port
1974  * scanners, which may be less benign, but it's not really our job to
1975  * notice those.)
1976  */
1977  return STATUS_ERROR;
1978  }
1979 
1980  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1981  {
1982  /* Got a partial length word, so bleat about that */
1983  if (!ssl_done && !gss_done)
1985  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1986  errmsg("incomplete startup packet")));
1987  return STATUS_ERROR;
1988  }
1989 
1990  len = pg_ntoh32(len);
1991  len -= 4;
1992 
1993  if (len < (int32) sizeof(ProtocolVersion) ||
1995  {
1997  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1998  errmsg("invalid length of startup packet")));
1999  return STATUS_ERROR;
2000  }
2001 
2002  /*
2003  * Allocate space to hold the startup packet, plus one extra byte that's
2004  * initialized to be zero. This ensures we will have null termination of
2005  * all strings inside the packet.
2006  */
2007  buf = palloc(len + 1);
2008  buf[len] = '\0';
2009 
2010  if (pq_getbytes(buf, len) == EOF)
2011  {
2013  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2014  errmsg("incomplete startup packet")));
2015  return STATUS_ERROR;
2016  }
2017  pq_endmsgread();
2018 
2019  /*
2020  * The first field is either a protocol version number or a special
2021  * request code.
2022  */
2023  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2024 
2025  if (proto == CANCEL_REQUEST_CODE)
2026  {
2027  if (len != sizeof(CancelRequestPacket))
2028  {
2030  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2031  errmsg("invalid length of startup packet")));
2032  return STATUS_ERROR;
2033  }
2035  /* Not really an error, but we don't want to proceed further */
2036  return STATUS_ERROR;
2037  }
2038 
2039  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2040  {
2041  char SSLok;
2042 
2043 #ifdef USE_SSL
2044  /* No SSL when disabled or on Unix sockets */
2045  if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2046  SSLok = 'N';
2047  else
2048  SSLok = 'S'; /* Support for SSL */
2049 #else
2050  SSLok = 'N'; /* No support for SSL */
2051 #endif
2052 
2053 retry1:
2054  if (send(port->sock, &SSLok, 1, 0) != 1)
2055  {
2056  if (errno == EINTR)
2057  goto retry1; /* if interrupted, just retry */
2060  errmsg("failed to send SSL negotiation response: %m")));
2061  return STATUS_ERROR; /* close the connection */
2062  }
2063 
2064 #ifdef USE_SSL
2065  if (SSLok == 'S' && secure_open_server(port) == -1)
2066  return STATUS_ERROR;
2067 #endif
2068 
2069  /*
2070  * At this point we should have no data already buffered. If we do,
2071  * it was received before we performed the SSL handshake, so it wasn't
2072  * encrypted and indeed may have been injected by a man-in-the-middle.
2073  * We report this case to the client.
2074  */
2075  if (pq_buffer_has_data())
2076  ereport(FATAL,
2077  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2078  errmsg("received unencrypted data after SSL request"),
2079  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2080 
2081  /*
2082  * regular startup packet, cancel, etc packet should follow, but not
2083  * another SSL negotiation request, and a GSS request should only
2084  * follow if SSL was rejected (client may negotiate in either order)
2085  */
2086  return ProcessStartupPacket(port, true, SSLok == 'S');
2087  }
2088  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2089  {
2090  char GSSok = 'N';
2091 
2092 #ifdef ENABLE_GSS
2093  /* No GSSAPI encryption when on Unix socket */
2094  if (port->laddr.addr.ss_family != AF_UNIX)
2095  GSSok = 'G';
2096 #endif
2097 
2098  while (send(port->sock, &GSSok, 1, 0) != 1)
2099  {
2100  if (errno == EINTR)
2101  continue;
2104  errmsg("failed to send GSSAPI negotiation response: %m")));
2105  return STATUS_ERROR; /* close the connection */
2106  }
2107 
2108 #ifdef ENABLE_GSS
2109  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2110  return STATUS_ERROR;
2111 #endif
2112 
2113  /*
2114  * At this point we should have no data already buffered. If we do,
2115  * it was received before we performed the GSS handshake, so it wasn't
2116  * encrypted and indeed may have been injected by a man-in-the-middle.
2117  * We report this case to the client.
2118  */
2119  if (pq_buffer_has_data())
2120  ereport(FATAL,
2121  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2122  errmsg("received unencrypted data after GSSAPI encryption request"),
2123  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2124 
2125  /*
2126  * regular startup packet, cancel, etc packet should follow, but not
2127  * another GSS negotiation request, and an SSL request should only
2128  * follow if GSS was rejected (client may negotiate in either order)
2129  */
2130  return ProcessStartupPacket(port, GSSok == 'G', true);
2131  }
2132 
2133  /* Could add additional special packet types here */
2134 
2135  /*
2136  * Set FrontendProtocol now so that ereport() knows what format to send if
2137  * we fail during startup.
2138  */
2139  FrontendProtocol = proto;
2140 
2141  /* Check that the major protocol version is in range. */
2144  ereport(FATAL,
2145  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2146  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2147  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2151 
2152  /*
2153  * Now fetch parameters out of startup packet and save them into the Port
2154  * structure. All data structures attached to the Port struct must be
2155  * allocated in TopMemoryContext so that they will remain available in a
2156  * running backend (even after PostmasterContext is destroyed). We need
2157  * not worry about leaking this storage on failure, since we aren't in the
2158  * postmaster process anymore.
2159  */
2161 
2162  /* Handle protocol version 3 startup packet */
2163  {
2164  int32 offset = sizeof(ProtocolVersion);
2165  List *unrecognized_protocol_options = NIL;
2166 
2167  /*
2168  * Scan packet body for name/option pairs. We can assume any string
2169  * beginning within the packet body is null-terminated, thanks to
2170  * zeroing extra byte above.
2171  */
2172  port->guc_options = NIL;
2173 
2174  while (offset < len)
2175  {
2176  char *nameptr = buf + offset;
2177  int32 valoffset;
2178  char *valptr;
2179 
2180  if (*nameptr == '\0')
2181  break; /* found packet terminator */
2182  valoffset = offset + strlen(nameptr) + 1;
2183  if (valoffset >= len)
2184  break; /* missing value, will complain below */
2185  valptr = buf + valoffset;
2186 
2187  if (strcmp(nameptr, "database") == 0)
2188  port->database_name = pstrdup(valptr);
2189  else if (strcmp(nameptr, "user") == 0)
2190  port->user_name = pstrdup(valptr);
2191  else if (strcmp(nameptr, "options") == 0)
2192  port->cmdline_options = pstrdup(valptr);
2193  else if (strcmp(nameptr, "replication") == 0)
2194  {
2195  /*
2196  * Due to backward compatibility concerns the replication
2197  * parameter is a hybrid beast which allows the value to be
2198  * either boolean or the string 'database'. The latter
2199  * connects to a specific database which is e.g. required for
2200  * logical decoding while.
2201  */
2202  if (strcmp(valptr, "database") == 0)
2203  {
2204  am_walsender = true;
2205  am_db_walsender = true;
2206  }
2207  else if (!parse_bool(valptr, &am_walsender))
2208  ereport(FATAL,
2209  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2210  errmsg("invalid value for parameter \"%s\": \"%s\"",
2211  "replication",
2212  valptr),
2213  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2214  }
2215  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2216  {
2217  /*
2218  * Any option beginning with _pq_. is reserved for use as a
2219  * protocol-level option, but at present no such options are
2220  * defined.
2221  */
2222  unrecognized_protocol_options =
2223  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2224  }
2225  else
2226  {
2227  /* Assume it's a generic GUC option */
2228  port->guc_options = lappend(port->guc_options,
2229  pstrdup(nameptr));
2230  port->guc_options = lappend(port->guc_options,
2231  pstrdup(valptr));
2232 
2233  /*
2234  * Copy application_name to port if we come across it. This
2235  * is done so we can log the application_name in the
2236  * connection authorization message. Note that the GUC would
2237  * be used but we haven't gone through GUC setup yet.
2238  */
2239  if (strcmp(nameptr, "application_name") == 0)
2240  {
2241  port->application_name = pg_clean_ascii(valptr, 0);
2242  }
2243  }
2244  offset = valoffset + strlen(valptr) + 1;
2245  }
2246 
2247  /*
2248  * If we didn't find a packet terminator exactly at the end of the
2249  * given packet length, complain.
2250  */
2251  if (offset != len - 1)
2252  ereport(FATAL,
2253  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2254  errmsg("invalid startup packet layout: expected terminator as last byte")));
2255 
2256  /*
2257  * If the client requested a newer protocol version or if the client
2258  * requested any protocol options we didn't recognize, let them know
2259  * the newest minor protocol version we do support and the names of
2260  * any unrecognized options.
2261  */
2263  unrecognized_protocol_options != NIL)
2264  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2265  }
2266 
2267  /* Check a user name was given. */
2268  if (port->user_name == NULL || port->user_name[0] == '\0')
2269  ereport(FATAL,
2270  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2271  errmsg("no PostgreSQL user name specified in startup packet")));
2272 
2273  /* The database defaults to the user name. */
2274  if (port->database_name == NULL || port->database_name[0] == '\0')
2275  port->database_name = pstrdup(port->user_name);
2276 
2277  if (am_walsender)
2279  else
2281 
2282  /*
2283  * Normal walsender backends, e.g. for streaming replication, are not
2284  * connected to a particular database. But walsenders used for logical
2285  * replication need to connect to a specific database. We allow streaming
2286  * replication commands to be issued even if connected to a database as it
2287  * can make sense to first make a basebackup and then stream changes
2288  * starting from that.
2289  */
2290  if (am_walsender && !am_db_walsender)
2291  port->database_name[0] = '\0';
2292 
2293  /*
2294  * Done putting stuff in TopMemoryContext.
2295  */
2296  MemoryContextSwitchTo(oldcontext);
2297 
2298  /*
2299  * If we're going to reject the connection due to database state, say so
2300  * now instead of wasting cycles on an authentication exchange. (This also
2301  * allows a pg_ping utility to be written.)
2302  */
2303  switch (port->canAcceptConnections)
2304  {
2305  case CAC_STARTUP:
2306  ereport(FATAL,
2308  errmsg("the database system is starting up")));
2309  break;
2310  case CAC_NOTCONSISTENT:
2311  if (EnableHotStandby)
2312  ereport(FATAL,
2314  errmsg("the database system is not yet accepting connections"),
2315  errdetail("Consistent recovery state has not been yet reached.")));
2316  else
2317  ereport(FATAL,
2319  errmsg("the database system is not accepting connections"),
2320  errdetail("Hot standby mode is disabled.")));
2321  break;
2322  case CAC_SHUTDOWN:
2323  ereport(FATAL,
2325  errmsg("the database system is shutting down")));
2326  break;
2327  case CAC_RECOVERY:
2328  ereport(FATAL,
2330  errmsg("the database system is in recovery mode")));
2331  break;
2332  case CAC_TOOMANY:
2333  ereport(FATAL,
2334  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2335  errmsg("sorry, too many clients already")));
2336  break;
2337  case CAC_OK:
2338  break;
2339  }
2340 
2341  return STATUS_OK;
2342 }
2343 
2344 /*
2345  * Send a NegotiateProtocolVersion to the client. This lets the client know
2346  * that they have requested a newer minor protocol version than we are able
2347  * to speak. We'll speak the highest version we know about; the client can,
2348  * of course, abandon the connection if that's a problem.
2349  *
2350  * We also include in the response a list of protocol options we didn't
2351  * understand. This allows clients to include optional parameters that might
2352  * be present either in newer protocol versions or third-party protocol
2353  * extensions without fear of having to reconnect if those options are not
2354  * understood, while at the same time making certain that the client is aware
2355  * of which options were actually accepted.
2356  */
2357 static void
2358 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2359 {
2361  ListCell *lc;
2362 
2365  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2366  foreach(lc, unrecognized_protocol_options)
2367  pq_sendstring(&buf, lfirst(lc));
2368  pq_endmessage(&buf);
2369 
2370  /* no need to flush, some other message will follow */
2371 }
2372 
2373 /*
2374  * The client has sent a cancel request packet, not a normal
2375  * start-a-new-connection packet. Perform the necessary processing.
2376  * Nothing is sent back to the client.
2377  */
2378 static void
2380 {
2381  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2382  int backendPID;
2383  int32 cancelAuthCode;
2384  Backend *bp;
2385 
2386 #ifndef EXEC_BACKEND
2387  dlist_iter iter;
2388 #else
2389  int i;
2390 #endif
2391 
2392  backendPID = (int) pg_ntoh32(canc->backendPID);
2393  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2394 
2395  /*
2396  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2397  * longer access the postmaster's own backend list, and must rely on the
2398  * duplicate array in shared memory.
2399  */
2400 #ifndef EXEC_BACKEND
2401  dlist_foreach(iter, &BackendList)
2402  {
2403  bp = dlist_container(Backend, elem, iter.cur);
2404 #else
2405  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2406  {
2407  bp = (Backend *) &ShmemBackendArray[i];
2408 #endif
2409  if (bp->pid == backendPID)
2410  {
2411  if (bp->cancel_key == cancelAuthCode)
2412  {
2413  /* Found a match; signal that backend to cancel current op */
2414  ereport(DEBUG2,
2415  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2416  backendPID)));
2417  signal_child(bp->pid, SIGINT);
2418  }
2419  else
2420  /* Right PID, wrong key: no way, Jose */
2421  ereport(LOG,
2422  (errmsg("wrong key in cancel request for process %d",
2423  backendPID)));
2424  return;
2425  }
2426 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2427  }
2428 #else
2429  }
2430 #endif
2431 
2432  /* No matching backend */
2433  ereport(LOG,
2434  (errmsg("PID %d in cancel request did not match any process",
2435  backendPID)));
2436 }
2437 
2438 /*
2439  * canAcceptConnections --- check to see if database state allows connections
2440  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2441  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2442  * know whether a NORMAL connection might turn into a walsender.)
2443  */
2444 static CAC_state
2445 canAcceptConnections(int backend_type)
2446 {
2447  CAC_state result = CAC_OK;
2448 
2449  /*
2450  * Can't start backends when in startup/shutdown/inconsistent recovery
2451  * state. We treat autovac workers the same as user backends for this
2452  * purpose. However, bgworkers are excluded from this test; we expect
2453  * bgworker_should_start_now() decided whether the DB state allows them.
2454  */
2455  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2456  backend_type != BACKEND_TYPE_BGWORKER)
2457  {
2458  if (Shutdown > NoShutdown)
2459  return CAC_SHUTDOWN; /* shutdown is pending */
2460  else if (!FatalError && pmState == PM_STARTUP)
2461  return CAC_STARTUP; /* normal startup */
2462  else if (!FatalError && pmState == PM_RECOVERY)
2463  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2464  * state */
2465  else
2466  return CAC_RECOVERY; /* else must be crash recovery */
2467  }
2468 
2469  /*
2470  * "Smart shutdown" restrictions are applied only to normal connections,
2471  * not to autovac workers or bgworkers.
2472  */
2473  if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2474  return CAC_SHUTDOWN; /* shutdown is pending */
2475 
2476  /*
2477  * Don't start too many children.
2478  *
2479  * We allow more connections here than we can have backends because some
2480  * might still be authenticating; they might fail auth, or some existing
2481  * backend might exit before the auth cycle is completed. The exact
2482  * MaxBackends limit is enforced when a new backend tries to join the
2483  * shared-inval backend array.
2484  *
2485  * The limit here must match the sizes of the per-child-process arrays;
2486  * see comments for MaxLivePostmasterChildren().
2487  */
2489  result = CAC_TOOMANY;
2490 
2491  return result;
2492 }
2493 
2494 
2495 /*
2496  * ConnCreate -- create a local connection data structure
2497  *
2498  * Returns NULL on failure, other than out-of-memory which is fatal.
2499  */
2500 static Port *
2501 ConnCreate(int serverFd)
2502 {
2503  Port *port;
2504 
2505  if (!(port = (Port *) calloc(1, sizeof(Port))))
2506  {
2507  ereport(LOG,
2508  (errcode(ERRCODE_OUT_OF_MEMORY),
2509  errmsg("out of memory")));
2510  ExitPostmaster(1);
2511  }
2512 
2513  if (StreamConnection(serverFd, port) != STATUS_OK)
2514  {
2515  if (port->sock != PGINVALID_SOCKET)
2516  StreamClose(port->sock);
2517  ConnFree(port);
2518  return NULL;
2519  }
2520 
2521  return port;
2522 }
2523 
2524 
2525 /*
2526  * ConnFree -- free a local connection data structure
2527  *
2528  * Caller has already closed the socket if any, so there's not much
2529  * to do here.
2530  */
2531 static void
2533 {
2534  free(port);
2535 }
2536 
2537 
2538 /*
2539  * ClosePostmasterPorts -- close all the postmaster's open sockets
2540  *
2541  * This is called during child process startup to release file descriptors
2542  * that are not needed by that child process. The postmaster still has
2543  * them open, of course.
2544  *
2545  * Note: we pass am_syslogger as a boolean because we don't want to set
2546  * the global variable yet when this is called.
2547  */
2548 void
2549 ClosePostmasterPorts(bool am_syslogger)
2550 {
2551  /* Release resources held by the postmaster's WaitEventSet. */
2552  if (pm_wait_set)
2553  {
2555  pm_wait_set = NULL;
2556  }
2557 
2558 #ifndef WIN32
2559 
2560  /*
2561  * Close the write end of postmaster death watch pipe. It's important to
2562  * do this as early as possible, so that if postmaster dies, others won't
2563  * think that it's still running because we're holding the pipe open.
2564  */
2566  ereport(FATAL,
2568  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2570  /* Notify fd.c that we released one pipe FD. */
2572 #endif
2573 
2574  /*
2575  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2576  * so we don't call ReleaseExternalFD() here.
2577  *
2578  * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
2579  * EXEC_BACKEND mode.
2580  */
2581 #ifndef EXEC_BACKEND
2582  for (int i = 0; i < MAXLISTEN; i++)
2583  {
2585  {
2588  }
2589  }
2590 #endif
2591 
2592  /*
2593  * If using syslogger, close the read side of the pipe. We don't bother
2594  * tracking this in fd.c, either.
2595  */
2596  if (!am_syslogger)
2597  {
2598 #ifndef WIN32
2599  if (syslogPipe[0] >= 0)
2600  close(syslogPipe[0]);
2601  syslogPipe[0] = -1;
2602 #else
2603  if (syslogPipe[0])
2604  CloseHandle(syslogPipe[0]);
2605  syslogPipe[0] = 0;
2606 #endif
2607  }
2608 
2609 #ifdef USE_BONJOUR
2610  /* If using Bonjour, close the connection to the mDNS daemon */
2611  if (bonjour_sdref)
2612  close(DNSServiceRefSockFD(bonjour_sdref));
2613 #endif
2614 }
2615 
2616 
2617 /*
2618  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2619  *
2620  * Called early in the postmaster and every backend.
2621  */
2622 void
2624 {
2625  MyProcPid = getpid();
2628 
2629  /*
2630  * Set a different global seed in every process. We want something
2631  * unpredictable, so if possible, use high-quality random bits for the
2632  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2633  */
2635  {
2636  uint64 rseed;
2637 
2638  /*
2639  * Since PIDs and timestamps tend to change more frequently in their
2640  * least significant bits, shift the timestamp left to allow a larger
2641  * total number of seeds in a given time period. Since that would
2642  * leave only 20 bits of the timestamp that cycle every ~1 second,
2643  * also mix in some higher bits.
2644  */
2645  rseed = ((uint64) MyProcPid) ^
2646  ((uint64) MyStartTimestamp << 12) ^
2647  ((uint64) MyStartTimestamp >> 20);
2648 
2650  }
2651 
2652  /*
2653  * Also make sure that we've set a good seed for random(3). Use of that
2654  * is deprecated in core Postgres, but extensions might use it.
2655  */
2656 #ifndef WIN32
2658 #endif
2659 }
2660 
2661 /*
2662  * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2663  * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2664  */
2665 static void
2667 {
2668  int save_errno = errno;
2669 
2670  pending_pm_pmsignal = true;
2671  SetLatch(MyLatch);
2672 
2673  errno = save_errno;
2674 }
2675 
2676 /*
2677  * pg_ctl uses SIGHUP to request a reload of the configuration files.
2678  */
2679 static void
2681 {
2682  int save_errno = errno;
2683 
2685  SetLatch(MyLatch);
2686 
2687  errno = save_errno;
2688 }
2689 
2690 /*
2691  * Re-read config files, and tell children to do same.
2692  */
2693 static void
2695 {
2696  pending_pm_reload_request = false;
2697 
2698  ereport(DEBUG2,
2699  (errmsg_internal("postmaster received reload request signal")));
2700 
2701  if (Shutdown <= SmartShutdown)
2702  {
2703  ereport(LOG,
2704  (errmsg("received SIGHUP, reloading configuration files")));
2707  if (StartupPID != 0)
2709  if (BgWriterPID != 0)
2711  if (CheckpointerPID != 0)
2713  if (WalWriterPID != 0)
2715  if (WalReceiverPID != 0)
2717  if (AutoVacPID != 0)
2719  if (PgArchPID != 0)
2721  if (SysLoggerPID != 0)
2723 
2724  /* Reload authentication config files too */
2725  if (!load_hba())
2726  ereport(LOG,
2727  /* translator: %s is a configuration file */
2728  (errmsg("%s was not reloaded", HbaFileName)));
2729 
2730  if (!load_ident())
2731  ereport(LOG,
2732  (errmsg("%s was not reloaded", IdentFileName)));
2733 
2734 #ifdef USE_SSL
2735  /* Reload SSL configuration as well */
2736  if (EnableSSL)
2737  {
2738  if (secure_initialize(false) == 0)
2739  LoadedSSL = true;
2740  else
2741  ereport(LOG,
2742  (errmsg("SSL configuration was not reloaded")));
2743  }
2744  else
2745  {
2746  secure_destroy();
2747  LoadedSSL = false;
2748  }
2749 #endif
2750 
2751 #ifdef EXEC_BACKEND
2752  /* Update the starting-point file for future children */
2753  write_nondefault_variables(PGC_SIGHUP);
2754 #endif
2755  }
2756 }
2757 
2758 /*
2759  * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2760  * shutdown.
2761  */
2762 static void
2764 {
2765  int save_errno = errno;
2766 
2767  switch (postgres_signal_arg)
2768  {
2769  case SIGTERM:
2770  /* smart is implied if the other two flags aren't set */
2772  break;
2773  case SIGINT:
2776  break;
2777  case SIGQUIT:
2780  break;
2781  }
2782  SetLatch(MyLatch);
2783 
2784  errno = save_errno;
2785 }
2786 
2787 /*
2788  * Process shutdown request.
2789  */
2790 static void
2792 {
2793  int mode;
2794 
2795  ereport(DEBUG2,
2796  (errmsg_internal("postmaster received shutdown request signal")));
2797 
2799 
2800  /*
2801  * If more than one shutdown request signal arrived since the last server
2802  * loop, take the one that is the most immediate. That matches the
2803  * priority that would apply if we processed them one by one in any order.
2804  */
2806  {
2810  }
2812  {
2814  mode = FastShutdown;
2815  }
2816  else
2817  mode = SmartShutdown;
2818 
2819  switch (mode)
2820  {
2821  case SmartShutdown:
2822 
2823  /*
2824  * Smart Shutdown:
2825  *
2826  * Wait for children to end their work, then shut down.
2827  */
2828  if (Shutdown >= SmartShutdown)
2829  break;
2831  ereport(LOG,
2832  (errmsg("received smart shutdown request")));
2833 
2834  /* Report status */
2836 #ifdef USE_SYSTEMD
2837  sd_notify(0, "STOPPING=1");
2838 #endif
2839 
2840  /*
2841  * If we reached normal running, we go straight to waiting for
2842  * client backends to exit. If already in PM_STOP_BACKENDS or a
2843  * later state, do not change it.
2844  */
2845  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2846  connsAllowed = false;
2847  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2848  {
2849  /* There should be no clients, so proceed to stop children */
2851  }
2852 
2853  /*
2854  * Now wait for online backup mode to end and backends to exit. If
2855  * that is already the case, PostmasterStateMachine will take the
2856  * next step.
2857  */
2859  break;
2860 
2861  case FastShutdown:
2862 
2863  /*
2864  * Fast Shutdown:
2865  *
2866  * Abort all children with SIGTERM (rollback active transactions
2867  * and exit) and shut down when they are gone.
2868  */
2869  if (Shutdown >= FastShutdown)
2870  break;
2872  ereport(LOG,
2873  (errmsg("received fast shutdown request")));
2874 
2875  /* Report status */
2877 #ifdef USE_SYSTEMD
2878  sd_notify(0, "STOPPING=1");
2879 #endif
2880 
2881  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2882  {
2883  /* Just shut down background processes silently */
2885  }
2886  else if (pmState == PM_RUN ||
2888  {
2889  /* Report that we're about to zap live client sessions */
2890  ereport(LOG,
2891  (errmsg("aborting any active transactions")));
2893  }
2894 
2895  /*
2896  * PostmasterStateMachine will issue any necessary signals, or
2897  * take the next step if no child processes need to be killed.
2898  */
2900  break;
2901 
2902  case ImmediateShutdown:
2903 
2904  /*
2905  * Immediate Shutdown:
2906  *
2907  * abort all children with SIGQUIT, wait for them to exit,
2908  * terminate remaining ones with SIGKILL, then exit without
2909  * attempt to properly shut down the data base system.
2910  */
2911  if (Shutdown >= ImmediateShutdown)
2912  break;
2914  ereport(LOG,
2915  (errmsg("received immediate shutdown request")));
2916 
2917  /* Report status */
2919 #ifdef USE_SYSTEMD
2920  sd_notify(0, "STOPPING=1");
2921 #endif
2922 
2923  /* tell children to shut down ASAP */
2924  /* (note we don't apply send_abort_for_crash here) */
2928 
2929  /* set stopwatch for them to die */
2930  AbortStartTime = time(NULL);
2931 
2932  /*
2933  * Now wait for backends to exit. If there are none,
2934  * PostmasterStateMachine will take the next step.
2935  */
2937  break;
2938  }
2939 }
2940 
2941 static void
2943 {
2944  int save_errno = errno;
2945 
2946  pending_pm_child_exit = true;
2947  SetLatch(MyLatch);
2948 
2949  errno = save_errno;
2950 }
2951 
2952 /*
2953  * Cleanup after a child process dies.
2954  */
2955 static void
2957 {
2958  int pid; /* process id of dead child process */
2959  int exitstatus; /* its exit status */
2960 
2961  pending_pm_child_exit = false;
2962 
2963  ereport(DEBUG4,
2964  (errmsg_internal("reaping dead processes")));
2965 
2966  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2967  {
2968  /*
2969  * Check if this child was a startup process.
2970  */
2971  if (pid == StartupPID)
2972  {
2973  StartupPID = 0;
2974 
2975  /*
2976  * Startup process exited in response to a shutdown request (or it
2977  * completed normally regardless of the shutdown request).
2978  */
2979  if (Shutdown > NoShutdown &&
2980  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2981  {
2984  /* PostmasterStateMachine logic does the rest */
2985  continue;
2986  }
2987 
2988  if (EXIT_STATUS_3(exitstatus))
2989  {
2990  ereport(LOG,
2991  (errmsg("shutdown at recovery target")));
2994  TerminateChildren(SIGTERM);
2996  /* PostmasterStateMachine logic does the rest */
2997  continue;
2998  }
2999 
3000  /*
3001  * Unexpected exit of startup process (including FATAL exit)
3002  * during PM_STARTUP is treated as catastrophic. There are no
3003  * other processes running yet, so we can just exit.
3004  */
3005  if (pmState == PM_STARTUP &&
3007  !EXIT_STATUS_0(exitstatus))
3008  {
3009  LogChildExit(LOG, _("startup process"),
3010  pid, exitstatus);
3011  ereport(LOG,
3012  (errmsg("aborting startup due to startup process failure")));
3013  ExitPostmaster(1);
3014  }
3015 
3016  /*
3017  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3018  * the startup process is catastrophic, so kill other children,
3019  * and set StartupStatus so we don't try to reinitialize after
3020  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3021  * then we previously sent the startup process a SIGQUIT; so
3022  * that's probably the reason it died, and we do want to try to
3023  * restart in that case.
3024  *
3025  * This stanza also handles the case where we sent a SIGQUIT
3026  * during PM_STARTUP due to some dead_end child crashing: in that
3027  * situation, if the startup process dies on the SIGQUIT, we need
3028  * to transition to PM_WAIT_BACKENDS state which will allow
3029  * PostmasterStateMachine to restart the startup process. (On the
3030  * other hand, the startup process might complete normally, if we
3031  * were too late with the SIGQUIT. In that case we'll fall
3032  * through and commence normal operations.)
3033  */
3034  if (!EXIT_STATUS_0(exitstatus))
3035  {
3037  {
3039  if (pmState == PM_STARTUP)
3041  }
3042  else
3044  HandleChildCrash(pid, exitstatus,
3045  _("startup process"));
3046  continue;
3047  }
3048 
3049  /*
3050  * Startup succeeded, commence normal operations
3051  */
3053  FatalError = false;
3054  AbortStartTime = 0;
3055  ReachedNormalRunning = true;
3056  pmState = PM_RUN;
3057  connsAllowed = true;
3058 
3059  /*
3060  * Crank up the background tasks, if we didn't do that already
3061  * when we entered consistent recovery state. It doesn't matter
3062  * if this fails, we'll just try again later.
3063  */
3064  if (CheckpointerPID == 0)
3066  if (BgWriterPID == 0)
3068  if (WalWriterPID == 0)
3070 
3071  /*
3072  * Likewise, start other special children as needed. In a restart
3073  * situation, some of them may be alive already.
3074  */
3077  if (PgArchStartupAllowed() && PgArchPID == 0)
3079 
3080  /* workers may be scheduled to start now */
3082 
3083  /* at this point we are really open for business */
3084  ereport(LOG,
3085  (errmsg("database system is ready to accept connections")));
3086 
3087  /* Report status */
3089 #ifdef USE_SYSTEMD
3090  sd_notify(0, "READY=1");
3091 #endif
3092 
3093  continue;
3094  }
3095 
3096  /*
3097  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3098  * one at the next iteration of the postmaster's main loop, if
3099  * necessary. Any other exit condition is treated as a crash.
3100  */
3101  if (pid == BgWriterPID)
3102  {
3103  BgWriterPID = 0;
3104  if (!EXIT_STATUS_0(exitstatus))
3105  HandleChildCrash(pid, exitstatus,
3106  _("background writer process"));
3107  continue;
3108  }
3109 
3110  /*
3111  * Was it the checkpointer?
3112  */
3113  if (pid == CheckpointerPID)
3114  {
3115  CheckpointerPID = 0;
3116  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3117  {
3118  /*
3119  * OK, we saw normal exit of the checkpointer after it's been
3120  * told to shut down. We expect that it wrote a shutdown
3121  * checkpoint. (If for some reason it didn't, recovery will
3122  * occur on next postmaster start.)
3123  *
3124  * At this point we should have no normal backend children
3125  * left (else we'd not be in PM_SHUTDOWN state) but we might
3126  * have dead_end children to wait for.
3127  *
3128  * If we have an archiver subprocess, tell it to do a last
3129  * archive cycle and quit. Likewise, if we have walsender
3130  * processes, tell them to send any remaining WAL and quit.
3131  */
3133 
3134  /* Waken archiver for the last time */
3135  if (PgArchPID != 0)
3137 
3138  /*
3139  * Waken walsenders for the last time. No regular backends
3140  * should be around anymore.
3141  */
3143 
3145  }
3146  else
3147  {
3148  /*
3149  * Any unexpected exit of the checkpointer (including FATAL
3150  * exit) is treated as a crash.
3151  */
3152  HandleChildCrash(pid, exitstatus,
3153  _("checkpointer process"));
3154  }
3155 
3156  continue;
3157  }
3158 
3159  /*
3160  * Was it the wal writer? Normal exit can be ignored; we'll start a
3161  * new one at the next iteration of the postmaster's main loop, if
3162  * necessary. Any other exit condition is treated as a crash.
3163  */
3164  if (pid == WalWriterPID)
3165  {
3166  WalWriterPID = 0;
3167  if (!EXIT_STATUS_0(exitstatus))
3168  HandleChildCrash(pid, exitstatus,
3169  _("WAL writer process"));
3170  continue;
3171  }
3172 
3173  /*
3174  * Was it the wal receiver? If exit status is zero (normal) or one
3175  * (FATAL exit), we assume everything is all right just like normal
3176  * backends. (If we need a new wal receiver, we'll start one at the
3177  * next iteration of the postmaster's main loop.)
3178  */
3179  if (pid == WalReceiverPID)
3180  {
3181  WalReceiverPID = 0;
3182  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3183  HandleChildCrash(pid, exitstatus,
3184  _("WAL receiver process"));
3185  continue;
3186  }
3187 
3188  /*
3189  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3190  * start a new one at the next iteration of the postmaster's main
3191  * loop, if necessary. Any other exit condition is treated as a
3192  * crash.
3193  */
3194  if (pid == AutoVacPID)
3195  {
3196  AutoVacPID = 0;
3197  if (!EXIT_STATUS_0(exitstatus))
3198  HandleChildCrash(pid, exitstatus,
3199  _("autovacuum launcher process"));
3200  continue;
3201  }
3202 
3203  /*
3204  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3205  * exit), we assume everything is all right just like normal backends
3206  * and just try to restart a new one so that we immediately retry
3207  * archiving remaining files. (If fail, we'll try again in future
3208  * cycles of the postmaster's main loop.) Unless we were waiting for
3209  * it to shut down; don't restart it in that case, and
3210  * PostmasterStateMachine() will advance to the next shutdown step.
3211  */
3212  if (pid == PgArchPID)
3213  {
3214  PgArchPID = 0;
3215  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3216  HandleChildCrash(pid, exitstatus,
3217  _("archiver process"));
3218  if (PgArchStartupAllowed())
3220  continue;
3221  }
3222 
3223  /* Was it the system logger? If so, try to start a new one */
3224  if (pid == SysLoggerPID)
3225  {
3226  SysLoggerPID = 0;
3227  /* for safety's sake, launch new logger *first* */
3229  if (!EXIT_STATUS_0(exitstatus))
3230  LogChildExit(LOG, _("system logger process"),
3231  pid, exitstatus);
3232  continue;
3233  }
3234 
3235  /* Was it one of our background workers? */
3236  if (CleanupBackgroundWorker(pid, exitstatus))
3237  {
3238  /* have it be restarted */
3239  HaveCrashedWorker = true;
3240  continue;
3241  }
3242 
3243  /*
3244  * Else do standard backend child cleanup.
3245  */
3246  CleanupBackend(pid, exitstatus);
3247  } /* loop over pending child-death reports */
3248 
3249  /*
3250  * After cleaning out the SIGCHLD queue, see if we have any state changes
3251  * or actions to make.
3252  */
3254 }
3255 
3256 /*
3257  * Scan the bgworkers list and see if the given PID (which has just stopped
3258  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3259  * bgworker, return false.
3260  *
3261  * This is heavily based on CleanupBackend. One important difference is that
3262  * we don't know yet that the dying process is a bgworker, so we must be silent
3263  * until we're sure it is.
3264  */
3265 static bool
3267  int exitstatus) /* child's exit status */
3268 {
3269  char namebuf[MAXPGPATH];
3270  slist_mutable_iter iter;
3271 
3273  {
3274  RegisteredBgWorker *rw;
3275 
3276  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3277 
3278  if (rw->rw_pid != pid)
3279  continue;
3280 
3281 #ifdef WIN32
3282  /* see CleanupBackend */
3283  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3284  exitstatus = 0;
3285 #endif
3286 
3287  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3288  rw->rw_worker.bgw_type);
3289 
3290 
3291  if (!EXIT_STATUS_0(exitstatus))
3292  {
3293  /* Record timestamp, so we know when to restart the worker. */
3295  }
3296  else
3297  {
3298  /* Zero exit status means terminate */
3299  rw->rw_crashed_at = 0;
3300  rw->rw_terminate = true;
3301  }
3302 
3303  /*
3304  * Additionally, just like a backend, any exit status other than 0 or
3305  * 1 is considered a crash and causes a system-wide restart.
3306  */
3307  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3308  {
3309  HandleChildCrash(pid, exitstatus, namebuf);
3310  return true;
3311  }
3312 
3313  /*
3314  * We must release the postmaster child slot. If the worker failed to
3315  * do so, it did not clean up after itself, requiring a crash-restart
3316  * cycle.
3317  */
3319  {
3320  HandleChildCrash(pid, exitstatus, namebuf);
3321  return true;
3322  }
3323 
3324  /* Get it out of the BackendList and clear out remaining data */
3325  dlist_delete(&rw->rw_backend->elem);
3326 #ifdef EXEC_BACKEND
3327  ShmemBackendArrayRemove(rw->rw_backend);
3328 #endif
3329 
3330  /*
3331  * It's possible that this background worker started some OTHER
3332  * background worker and asked to be notified when that worker started
3333  * or stopped. If so, cancel any notifications destined for the
3334  * now-dead backend.
3335  */
3336  if (rw->rw_backend->bgworker_notify)
3338  free(rw->rw_backend);
3339  rw->rw_backend = NULL;
3340  rw->rw_pid = 0;
3341  rw->rw_child_slot = 0;
3342  ReportBackgroundWorkerExit(&iter); /* report child death */
3343 
3344  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3345  namebuf, pid, exitstatus);
3346 
3347  return true;
3348  }
3349 
3350  return false;
3351 }
3352 
3353 /*
3354  * CleanupBackend -- cleanup after terminated backend.
3355  *
3356  * Remove all local state associated with backend.
3357  *
3358  * If you change this, see also CleanupBackgroundWorker.
3359  */
3360 static void
3362  int exitstatus) /* child's exit status. */
3363 {
3364  dlist_mutable_iter iter;
3365 
3366  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3367 
3368  /*
3369  * If a backend dies in an ugly way then we must signal all other backends
3370  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3371  * assume everything is all right and proceed to remove the backend from
3372  * the active backend list.
3373  */
3374 
3375 #ifdef WIN32
3376 
3377  /*
3378  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3379  * since that sometimes happens under load when the process fails to start
3380  * properly (long before it starts using shared memory). Microsoft reports
3381  * it is related to mutex failure:
3382  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3383  */
3384  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3385  {
3386  LogChildExit(LOG, _("server process"), pid, exitstatus);
3387  exitstatus = 0;
3388  }
3389 #endif
3390 
3391  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3392  {
3393  HandleChildCrash(pid, exitstatus, _("server process"));
3394  return;
3395  }
3396 
3398  {
3399  Backend *bp = dlist_container(Backend, elem, iter.cur);
3400 
3401  if (bp->pid == pid)
3402  {
3403  if (!bp->dead_end)
3404  {
3406  {
3407  /*
3408  * Uh-oh, the child failed to clean itself up. Treat as a
3409  * crash after all.
3410  */
3411  HandleChildCrash(pid, exitstatus, _("server process"));
3412  return;
3413  }
3414 #ifdef EXEC_BACKEND
3415  ShmemBackendArrayRemove(bp);
3416 #endif
3417  }
3418  if (bp->bgworker_notify)
3419  {
3420  /*
3421  * This backend may have been slated to receive SIGUSR1 when
3422  * some background worker started or stopped. Cancel those
3423  * notifications, as we don't want to signal PIDs that are not
3424  * PostgreSQL backends. This gets skipped in the (probably
3425  * very common) case where the backend has never requested any
3426  * such notifications.
3427  */
3429  }
3430  dlist_delete(iter.cur);
3431  free(bp);
3432  break;
3433  }
3434  }
3435 }
3436 
3437 /*
3438  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3439  * walwriter, autovacuum, archiver or background worker.
3440  *
3441  * The objectives here are to clean up our local state about the child
3442  * process, and to signal all other remaining children to quickdie.
3443  */
3444 static void
3445 HandleChildCrash(int pid, int exitstatus, const char *procname)
3446 {
3447  dlist_mutable_iter iter;
3448  slist_iter siter;
3449  Backend *bp;
3450  bool take_action;
3451 
3452  /*
3453  * We only log messages and send signals if this is the first process
3454  * crash and we're not doing an immediate shutdown; otherwise, we're only
3455  * here to update postmaster's idea of live processes. If we have already
3456  * signaled children, nonzero exit status is to be expected, so don't
3457  * clutter log.
3458  */
3459  take_action = !FatalError && Shutdown != ImmediateShutdown;
3460 
3461  if (take_action)
3462  {
3463  LogChildExit(LOG, procname, pid, exitstatus);
3464  ereport(LOG,
3465  (errmsg("terminating any other active server processes")));
3467  }
3468 
3469  /* Process background workers. */
3471  {
3472  RegisteredBgWorker *rw;
3473 
3474  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3475  if (rw->rw_pid == 0)
3476  continue; /* not running */
3477  if (rw->rw_pid == pid)
3478  {
3479  /*
3480  * Found entry for freshly-dead worker, so remove it.
3481  */
3483  dlist_delete(&rw->rw_backend->elem);
3484 #ifdef EXEC_BACKEND
3485  ShmemBackendArrayRemove(rw->rw_backend);
3486 #endif
3487  free(rw->rw_backend);
3488  rw->rw_backend = NULL;
3489  rw->rw_pid = 0;
3490  rw->rw_child_slot = 0;
3491  /* don't reset crashed_at */
3492  /* don't report child stop, either */
3493  /* Keep looping so we can signal remaining workers */
3494  }
3495  else
3496  {
3497  /*
3498  * This worker is still alive. Unless we did so already, tell it
3499  * to commit hara-kiri.
3500  */
3501  if (take_action)
3502  sigquit_child(rw->rw_pid);
3503  }
3504  }
3505 
3506  /* Process regular backends */
3508  {
3509  bp = dlist_container(Backend, elem, iter.cur);
3510 
3511  if (bp->pid == pid)
3512  {
3513  /*
3514  * Found entry for freshly-dead backend, so remove it.
3515  */
3516  if (!bp->dead_end)
3517  {
3519 #ifdef EXEC_BACKEND
3520  ShmemBackendArrayRemove(bp);
3521 #endif
3522  }
3523  dlist_delete(iter.cur);
3524  free(bp);
3525  /* Keep looping so we can signal remaining backends */
3526  }
3527  else
3528  {
3529  /*
3530  * This backend is still alive. Unless we did so already, tell it
3531  * to commit hara-kiri.
3532  *
3533  * We could exclude dead_end children here, but at least when
3534  * sending SIGABRT it seems better to include them.
3535  *
3536  * Background workers were already processed above; ignore them
3537  * here.
3538  */
3539  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3540  continue;
3541 
3542  if (take_action)
3543  sigquit_child(bp->pid);
3544  }
3545  }
3546 
3547  /* Take care of the startup process too */
3548  if (pid == StartupPID)
3549  {
3550  StartupPID = 0;
3551  /* Caller adjusts StartupStatus, so don't touch it here */
3552  }
3553  else if (StartupPID != 0 && take_action)
3554  {
3557  }
3558 
3559  /* Take care of the bgwriter too */
3560  if (pid == BgWriterPID)
3561  BgWriterPID = 0;
3562  else if (BgWriterPID != 0 && take_action)
3564 
3565  /* Take care of the checkpointer too */
3566  if (pid == CheckpointerPID)
3567  CheckpointerPID = 0;
3568  else if (CheckpointerPID != 0 && take_action)
3570 
3571  /* Take care of the walwriter too */
3572  if (pid == WalWriterPID)
3573  WalWriterPID = 0;
3574  else if (WalWriterPID != 0 && take_action)
3576 
3577  /* Take care of the walreceiver too */
3578  if (pid == WalReceiverPID)
3579  WalReceiverPID = 0;
3580  else if (WalReceiverPID != 0 && take_action)
3582 
3583  /* Take care of the autovacuum launcher too */
3584  if (pid == AutoVacPID)
3585  AutoVacPID = 0;
3586  else if (AutoVacPID != 0 && take_action)
3588 
3589  /* Take care of the archiver too */
3590  if (pid == PgArchPID)
3591  PgArchPID = 0;
3592  else if (PgArchPID != 0 && take_action)
3594 
3595  /* We do NOT restart the syslogger */
3596 
3597  if (Shutdown != ImmediateShutdown)
3598  FatalError = true;
3599 
3600  /* We now transit into a state of waiting for children to die */
3601  if (pmState == PM_RECOVERY ||
3602  pmState == PM_HOT_STANDBY ||
3603  pmState == PM_RUN ||
3605  pmState == PM_SHUTDOWN)
3607 
3608  /*
3609  * .. and if this doesn't happen quickly enough, now the clock is ticking
3610  * for us to kill them without mercy.
3611  */
3612  if (AbortStartTime == 0)
3613  AbortStartTime = time(NULL);
3614 }
3615 
3616 /*
3617  * Log the death of a child process.
3618  */
3619 static void
3620 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3621 {
3622  /*
3623  * size of activity_buffer is arbitrary, but set equal to default
3624  * track_activity_query_size
3625  */
3626  char activity_buffer[1024];
3627  const char *activity = NULL;
3628 
3629  if (!EXIT_STATUS_0(exitstatus))
3630  activity = pgstat_get_crashed_backend_activity(pid,
3631  activity_buffer,
3632  sizeof(activity_buffer));
3633 
3634  if (WIFEXITED(exitstatus))
3635  ereport(lev,
3636 
3637  /*------
3638  translator: %s is a noun phrase describing a child process, such as
3639  "server process" */
3640  (errmsg("%s (PID %d) exited with exit code %d",
3641  procname, pid, WEXITSTATUS(exitstatus)),
3642  activity ? errdetail("Failed process was running: %s", activity) : 0));
3643  else if (WIFSIGNALED(exitstatus))
3644  {
3645 #if defined(WIN32)
3646  ereport(lev,
3647 
3648  /*------
3649  translator: %s is a noun phrase describing a child process, such as
3650  "server process" */
3651  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3652  procname, pid, WTERMSIG(exitstatus)),
3653  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3654  activity ? errdetail("Failed process was running: %s", activity) : 0));
3655 #else
3656  ereport(lev,
3657 
3658  /*------
3659  translator: %s is a noun phrase describing a child process, such as
3660  "server process" */
3661  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3662  procname, pid, WTERMSIG(exitstatus),
3663  pg_strsignal(WTERMSIG(exitstatus))),
3664  activity ? errdetail("Failed process was running: %s", activity) : 0));
3665 #endif
3666  }
3667  else
3668  ereport(lev,
3669 
3670  /*------
3671  translator: %s is a noun phrase describing a child process, such as
3672  "server process" */
3673  (errmsg("%s (PID %d) exited with unrecognized status %d",
3674  procname, pid, exitstatus),
3675  activity ? errdetail("Failed process was running: %s", activity) : 0));
3676 }
3677 
3678 /*
3679  * Advance the postmaster's state machine and take actions as appropriate
3680  *
3681  * This is common code for process_pm_shutdown_request(),
3682  * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3683  * that might mean we need to change state.
3684  */
3685 static void
3687 {
3688  /* If we're doing a smart shutdown, try to advance that state. */
3689  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3690  {
3691  if (!connsAllowed)
3692  {
3693  /*
3694  * This state ends when we have no normal client backends running.
3695  * Then we're ready to stop other children.
3696  */
3699  }
3700  }
3701 
3702  /*
3703  * If we're ready to do so, signal child processes to shut down. (This
3704  * isn't a persistent state, but treating it as a distinct pmState allows
3705  * us to share this code across multiple shutdown code paths.)
3706  */
3707  if (pmState == PM_STOP_BACKENDS)
3708  {
3709  /*
3710  * Forget any pending requests for background workers, since we're no
3711  * longer willing to launch any new workers. (If additional requests
3712  * arrive, BackgroundWorkerStateChange will reject them.)
3713  */
3715 
3716  /* Signal all backend children except walsenders */
3717  SignalSomeChildren(SIGTERM,
3719  /* and the autovac launcher too */
3720  if (AutoVacPID != 0)
3721  signal_child(AutoVacPID, SIGTERM);
3722  /* and the bgwriter too */
3723  if (BgWriterPID != 0)
3724  signal_child(BgWriterPID, SIGTERM);
3725  /* and the walwriter too */
3726  if (WalWriterPID != 0)
3727  signal_child(WalWriterPID, SIGTERM);
3728  /* If we're in recovery, also stop startup and walreceiver procs */
3729  if (StartupPID != 0)
3730  signal_child(StartupPID, SIGTERM);
3731  if (WalReceiverPID != 0)
3732  signal_child(WalReceiverPID, SIGTERM);
3733  /* checkpointer, archiver, stats, and syslogger may continue for now */
3734 
3735  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3737  }
3738 
3739  /*
3740  * If we are in a state-machine state that implies waiting for backends to
3741  * exit, see if they're all gone, and change state if so.
3742  */
3743  if (pmState == PM_WAIT_BACKENDS)
3744  {
3745  /*
3746  * PM_WAIT_BACKENDS state ends when we have no regular backends
3747  * (including autovac workers), no bgworkers (including unconnected
3748  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3749  * doing crash recovery or an immediate shutdown then we expect the
3750  * checkpointer to exit as well, otherwise not. The stats and
3751  * syslogger processes are disregarded since they are not connected to
3752  * shared memory; we also disregard dead_end children here. Walsenders
3753  * and archiver are also disregarded, they will be terminated later
3754  * after writing the checkpoint record.
3755  */
3757  StartupPID == 0 &&
3758  WalReceiverPID == 0 &&
3759  BgWriterPID == 0 &&
3760  (CheckpointerPID == 0 ||
3762  WalWriterPID == 0 &&
3763  AutoVacPID == 0)
3764  {
3766  {
3767  /*
3768  * Start waiting for dead_end children to die. This state
3769  * change causes ServerLoop to stop creating new ones.
3770  */
3772 
3773  /*
3774  * We already SIGQUIT'd the archiver and stats processes, if
3775  * any, when we started immediate shutdown or entered
3776  * FatalError state.
3777  */
3778  }
3779  else
3780  {
3781  /*
3782  * If we get here, we are proceeding with normal shutdown. All
3783  * the regular children are gone, and it's time to tell the
3784  * checkpointer to do a shutdown checkpoint.
3785  */
3787  /* Start the checkpointer if not running */
3788  if (CheckpointerPID == 0)
3790  /* And tell it to shut down */
3791  if (CheckpointerPID != 0)
3792  {
3794  pmState = PM_SHUTDOWN;
3795  }
3796  else
3797  {
3798  /*
3799  * If we failed to fork a checkpointer, just shut down.
3800  * Any required cleanup will happen at next restart. We
3801  * set FatalError so that an "abnormal shutdown" message
3802  * gets logged when we exit.
3803  *
3804  * We don't consult send_abort_for_crash here, as it's
3805  * unlikely that dumping cores would illuminate the reason
3806  * for checkpointer fork failure.
3807  */
3808  FatalError = true;
3810 
3811  /* Kill the walsenders and archiver too */
3813  if (PgArchPID != 0)
3815  }
3816  }
3817  }
3818  }
3819 
3820  if (pmState == PM_SHUTDOWN_2)
3821  {
3822  /*
3823  * PM_SHUTDOWN_2 state ends when there's no other children than
3824  * dead_end children left. There shouldn't be any regular backends
3825  * left by now anyway; what we're really waiting for is walsenders and
3826  * archiver.
3827  */
3828  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3829  {
3831  }
3832  }
3833 
3834  if (pmState == PM_WAIT_DEAD_END)
3835  {
3836  /* Don't allow any new socket connection events. */
3838 
3839  /*
3840  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3841  * (ie, no dead_end children remain), and the archiver is gone too.
3842  *
3843  * The reason we wait for those two is to protect them against a new
3844  * postmaster starting conflicting subprocesses; this isn't an
3845  * ironclad protection, but it at least helps in the
3846  * shutdown-and-immediately-restart scenario. Note that they have
3847  * already been sent appropriate shutdown signals, either during a
3848  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3849  * FatalError processing.
3850  */
3851  if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3852  {
3853  /* These other guys should be dead already */
3854  Assert(StartupPID == 0);
3855  Assert(WalReceiverPID == 0);
3856  Assert(BgWriterPID == 0);
3857  Assert(CheckpointerPID == 0);
3858  Assert(WalWriterPID == 0);
3859  Assert(AutoVacPID == 0);
3860  /* syslogger is not considered here */
3862  }
3863  }
3864 
3865  /*
3866  * If we've been told to shut down, we exit as soon as there are no
3867  * remaining children. If there was a crash, cleanup will occur at the
3868  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3869  * crash before exiting, but that seems unwise if we are quitting because
3870  * we got SIGTERM from init --- there may well not be time for recovery
3871  * before init decides to SIGKILL us.)
3872  *
3873  * Note that the syslogger continues to run. It will exit when it sees
3874  * EOF on its input pipe, which happens when there are no more upstream
3875  * processes.
3876  */
3878  {
3879  if (FatalError)
3880  {
3881  ereport(LOG, (errmsg("abnormal database system shutdown")));
3882  ExitPostmaster(1);
3883  }
3884  else
3885  {
3886  /*
3887  * Normal exit from the postmaster is here. We don't need to log
3888  * anything here, since the UnlinkLockFiles proc_exit callback
3889  * will do so, and that should be the last user-visible action.
3890  */
3891  ExitPostmaster(0);
3892  }
3893  }
3894 
3895  /*
3896  * If the startup process failed, or the user does not want an automatic
3897  * restart after backend crashes, wait for all non-syslogger children to
3898  * exit, and then exit postmaster. We don't try to reinitialize when the
3899  * startup process fails, because more than likely it will just fail again
3900  * and we will keep trying forever.
3901  */
3902  if (pmState == PM_NO_CHILDREN)
3903  {
3905  {
3906  ereport(LOG,
3907  (errmsg("shutting down due to startup process failure")));
3908  ExitPostmaster(1);
3909  }
3910  if (!restart_after_crash)
3911  {
3912  ereport(LOG,
3913  (errmsg("shutting down because restart_after_crash is off")));
3914  ExitPostmaster(1);
3915  }
3916  }
3917 
3918  /*
3919  * If we need to recover from a crash, wait for all non-syslogger children
3920  * to exit, then reset shmem and StartupDataBase.
3921  */
3922  if (FatalError && pmState == PM_NO_CHILDREN)
3923  {
3924  ereport(LOG,
3925  (errmsg("all server processes terminated; reinitializing")));
3926 
3927  /* remove leftover temporary files after a crash */
3930 
3931  /* allow background workers to immediately restart */
3933 
3934  shmem_exit(1);
3935 
3936  /* re-read control file into local memory */
3938 
3939  /* re-create shared memory and semaphores */
3941 
3943  Assert(StartupPID != 0);
3945  pmState = PM_STARTUP;
3946  /* crash recovery started, reset SIGKILL flag */
3947  AbortStartTime = 0;
3948 
3949  /* start accepting server socket connection events again */
3951  }
3952 }
3953 
3954 
3955 /*
3956  * Send a signal to a postmaster child process
3957  *
3958  * On systems that have setsid(), each child process sets itself up as a
3959  * process group leader. For signals that are generally interpreted in the
3960  * appropriate fashion, we signal the entire process group not just the
3961  * direct child process. This allows us to, for example, SIGQUIT a blocked
3962  * archive_recovery script, or SIGINT a script being run by a backend via
3963  * system().
3964  *
3965  * There is a race condition for recently-forked children: they might not
3966  * have executed setsid() yet. So we signal the child directly as well as
3967  * the group. We assume such a child will handle the signal before trying
3968  * to spawn any grandchild processes. We also assume that signaling the
3969  * child twice will not cause any problems.
3970  */
3971 static void
3972 signal_child(pid_t pid, int signal)
3973 {
3974  if (kill(pid, signal) < 0)
3975  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3976 #ifdef HAVE_SETSID
3977  switch (signal)
3978  {
3979  case SIGINT:
3980  case SIGTERM:
3981  case SIGQUIT:
3982  case SIGKILL:
3983  case SIGABRT:
3984  if (kill(-pid, signal) < 0)
3985  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3986  break;
3987  default:
3988  break;
3989  }
3990 #endif
3991 }
3992 
3993 /*
3994  * Convenience function for killing a child process after a crash of some
3995  * other child process. We log the action at a higher level than we would
3996  * otherwise do, and we apply send_abort_for_crash to decide which signal
3997  * to send. Normally it's SIGQUIT -- and most other comments in this file
3998  * are written on the assumption that it is -- but developers might prefer
3999  * to use SIGABRT to collect per-child core dumps.
4000  */
4001 static void
4002 sigquit_child(pid_t pid)
4003 {
4004  ereport(DEBUG2,
4005  (errmsg_internal("sending %s to process %d",
4006  (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
4007  (int) pid)));
4009 }
4010 
4011 /*
4012  * Send a signal to the targeted children (but NOT special children;
4013  * dead_end children are never signaled, either).
4014  */
4015 static bool
4016 SignalSomeChildren(int signal, int target)
4017 {
4018  dlist_iter iter;
4019  bool signaled = false;
4020 
4021  dlist_foreach(iter, &BackendList)
4022  {
4023  Backend *bp = dlist_container(Backend, elem, iter.cur);
4024 
4025  if (bp->dead_end)
4026  continue;
4027 
4028  /*
4029  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4030  * it first and avoid touching shared memory for every child.
4031  */
4032  if (target != BACKEND_TYPE_ALL)
4033  {
4034  /*
4035  * Assign bkend_type for any recently announced WAL Sender
4036  * processes.
4037  */
4038  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4041 
4042  if (!(target & bp->bkend_type))
4043  continue;
4044  }
4045 
4046  ereport(DEBUG4,
4047  (errmsg_internal("sending signal %d to process %d",
4048  signal, (int) bp->pid)));
4049  signal_child(bp->pid, signal);
4050  signaled = true;
4051  }
4052  return signaled;
4053 }
4054 
4055 /*
4056  * Send a termination signal to children. This considers all of our children
4057  * processes, except syslogger and dead_end backends.
4058  */
4059 static void
4061 {
4062  SignalChildren(signal);
4063  if (StartupPID != 0)
4064  {
4065  signal_child(StartupPID, signal);
4066  if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
4068  }
4069  if (BgWriterPID != 0)
4070  signal_child(BgWriterPID, signal);
4071  if (CheckpointerPID != 0)
4072  signal_child(CheckpointerPID, signal);
4073  if (WalWriterPID != 0)
4074  signal_child(WalWriterPID, signal);
4075  if (WalReceiverPID != 0)
4076  signal_child(WalReceiverPID, signal);
4077  if (AutoVacPID != 0)
4078  signal_child(AutoVacPID, signal);
4079  if (PgArchPID != 0)
4080  signal_child(PgArchPID, signal);
4081 }
4082 
4083 /*
4084  * BackendStartup -- start backend process
4085  *
4086  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4087  *
4088  * Note: if you change this code, also consider StartAutovacuumWorker.
4089  */
4090 static int
4092 {
4093  Backend *bn; /* for backend cleanup */
4094  pid_t pid;
4095 
4096  /*
4097  * Create backend data structure. Better before the fork() so we can
4098  * handle failure cleanly.
4099  */
4100  bn = (Backend *) malloc(sizeof(Backend));
4101  if (!bn)
4102  {
4103  ereport(LOG,
4104  (errcode(ERRCODE_OUT_OF_MEMORY),
4105  errmsg("out of memory")));
4106  return STATUS_ERROR;
4107  }
4108 
4109  /*
4110  * Compute the cancel key that will be assigned to this backend. The
4111  * backend will have its own copy in the forked-off process' value of
4112  * MyCancelKey, so that it can transmit the key to the frontend.
4113  */
4115  {
4116  free(bn);
4117  ereport(LOG,
4118  (errcode(ERRCODE_INTERNAL_ERROR),
4119  errmsg("could not generate random cancel key")));
4120  return STATUS_ERROR;
4121  }
4122 
4123  bn->cancel_key = MyCancelKey;
4124 
4125  /* Pass down canAcceptConnections state */
4126  port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4127  bn->dead_end = (port->canAcceptConnections != CAC_OK);
4128 
4129  /*
4130  * Unless it's a dead_end child, assign it a child slot number
4131  */
4132  if (!bn->dead_end)
4134  else
4135  bn->child_slot = 0;
4136 
4137  /* Hasn't asked to be notified about any bgworkers yet */
4138  bn->bgworker_notify = false;
4139 
4140 #ifdef EXEC_BACKEND
4141  pid = backend_forkexec(port);
4142 #else /* !EXEC_BACKEND */
4143  pid = fork_process();
4144  if (pid == 0) /* child */
4145  {
4146  free(bn);
4147 
4148  /* Detangle from postmaster */
4150 
4151  /* Close the postmaster's sockets */
4152  ClosePostmasterPorts(false);
4153 
4154  /* Perform additional initialization and collect startup packet */
4156 
4157  /*
4158  * Create a per-backend PGPROC struct in shared memory. We must do
4159  * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4160  * this could be delayed a bit further, but EXEC_BACKEND needs to do
4161  * stuff with LWLocks before PostgresMain(), so we do it here as well
4162  * for symmetry.
4163  */
4164  InitProcess();
4165 
4166  /* And run the backend */
4167  BackendRun(port);
4168  }
4169 #endif /* EXEC_BACKEND */
4170 
4171  if (pid < 0)
4172  {
4173  /* in parent, fork failed */
4174  int save_errno = errno;
4175 
4176  if (!bn->dead_end)
4178  free(bn);
4179  errno = save_errno;
4180  ereport(LOG,
4181  (errmsg("could not fork new process for connection: %m")));
4182  report_fork_failure_to_client(port, save_errno);
4183  return STATUS_ERROR;
4184  }
4185 
4186  /* in parent, successful fork */
4187  ereport(DEBUG2,
4188  (errmsg_internal("forked new backend, pid=%d socket=%d",
4189  (int) pid, (int) port->sock)));
4190 
4191  /*
4192  * Everything's been successful, it's safe to add this backend to our list
4193  * of backends.
4194  */
4195  bn->pid = pid;
4196  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4198 
4199 #ifdef EXEC_BACKEND
4200  if (!bn->dead_end)
4201  ShmemBackendArrayAdd(bn);
4202 #endif
4203 
4204  return STATUS_OK;
4205 }
4206 
4207 /*
4208  * Try to report backend fork() failure to client before we close the
4209  * connection. Since we do not care to risk blocking the postmaster on
4210  * this connection, we set the connection to non-blocking and try only once.
4211  *
4212  * This is grungy special-purpose code; we cannot use backend libpq since
4213  * it's not up and running.
4214  */
4215 static void
4217 {
4218  char buffer[1000];
4219  int rc;
4220 
4221  /* Format the error message packet (always V2 protocol) */
4222  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4223  _("could not fork new process for connection: "),
4224  strerror(errnum));
4225 
4226  /* Set port to non-blocking. Don't do send() if this fails */
4227  if (!pg_set_noblock(port->sock))
4228  return;
4229 
4230  /* We'll retry after EINTR, but ignore all other failures */
4231  do
4232  {
4233  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4234  } while (rc < 0 && errno == EINTR);
4235 }
4236 
4237 
4238 /*
4239  * BackendInitialize -- initialize an interactive (postmaster-child)
4240  * backend process, and collect the client's startup packet.
4241  *
4242  * returns: nothing. Will not return at all if there's any failure.
4243  *
4244  * Note: this code does not depend on having any access to shared memory.
4245  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4246  * shared memory not have been touched yet; see comments within.
4247  * In the EXEC_BACKEND case, we are physically attached to shared memory
4248  * but have not yet set up most of our local pointers to shmem structures.
4249  */
4250 static void
4252 {
4253  int status;
4254  int ret;
4255  char remote_host[NI_MAXHOST];
4256  char remote_port[NI_MAXSERV];
4257  StringInfoData ps_data;
4258 
4259  /* Save port etc. for ps status */
4260  MyProcPort = port;
4261 
4262  /* Tell fd.c about the long-lived FD associated with the port */
4264 
4265  /*
4266  * PreAuthDelay is a debugging aid for investigating problems in the
4267  * authentication cycle: it can be set in postgresql.conf to allow time to
4268  * attach to the newly-forked backend with a debugger. (See also
4269  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4270  * is not honored until after authentication.)
4271  */
4272  if (PreAuthDelay > 0)
4273  pg_usleep(PreAuthDelay * 1000000L);
4274 
4275  /* This flag will remain set until InitPostgres finishes authentication */
4276  ClientAuthInProgress = true; /* limit visibility of log messages */
4277 
4278  /* set these to empty in case they are needed before we set them up */
4279  port->remote_host = "";
4280  port->remote_port = "";
4281 
4282  /*
4283  * Initialize libpq and enable reporting of ereport errors to the client.
4284  * Must do this now because authentication uses libpq to send messages.
4285  */
4286  pq_init(); /* initialize libpq to talk to client */
4287  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4288 
4289  /*
4290  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4291  * to collect the startup packet; while SIGQUIT results in _exit(2).
4292  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4293  * cleanly if a buggy client fails to send the packet promptly.
4294  *
4295  * Exiting with _exit(1) is only possible because we have not yet touched
4296  * shared memory; therefore no outside-the-process state needs to get
4297  * cleaned up.
4298  */
4300  /* SIGQUIT handler was already set up by InitPostmasterChild */
4301  InitializeTimeouts(); /* establishes SIGALRM handler */
4302  sigprocmask(SIG_SETMASK, &StartupBlockSig, NULL);
4303 
4304  /*
4305  * Get the remote host name and port for logging and status display.
4306  */
4307  remote_host[0] = '\0';
4308  remote_port[0] = '\0';
4309  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4310  remote_host, sizeof(remote_host),
4311  remote_port, sizeof(remote_port),
4312  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4313  ereport(WARNING,
4314  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4315  gai_strerror(ret))));
4316 
4317  /*
4318  * Save remote_host and remote_port in port structure (after this, they
4319  * will appear in log_line_prefix data for log messages).
4320  */
4321  port->remote_host = strdup(remote_host);
4322  port->remote_port = strdup(remote_port);
4323 
4324  /* And now we can issue the Log_connections message, if wanted */
4325  if (Log_connections)
4326  {
4327  if (remote_port[0])
4328  ereport(LOG,
4329  (errmsg("connection received: host=%s port=%s",
4330  remote_host,
4331  remote_port)));
4332  else
4333  ereport(LOG,
4334  (errmsg("connection received: host=%s",
4335  remote_host)));
4336  }
4337 
4338  /*
4339  * If we did a reverse lookup to name, we might as well save the results
4340  * rather than possibly repeating the lookup during authentication.
4341  *
4342  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4343  * get nothing useful for a client without an rDNS entry. Therefore, we
4344  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4345  * it into remote_hostname if so. (This test is conservative and might
4346  * sometimes classify a hostname as numeric, but an error in that
4347  * direction is safe; it only results in a possible extra lookup.)
4348  */
4349  if (log_hostname &&
4350  ret == 0 &&
4351  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4352  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4353  port->remote_hostname = strdup(remote_host);
4354 
4355  /*
4356  * Ready to begin client interaction. We will give up and _exit(1) after
4357  * a time delay, so that a broken client can't hog a connection
4358  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4359  * against the time limit.
4360  *
4361  * Note: AuthenticationTimeout is applied here while waiting for the
4362  * startup packet, and then again in InitPostgres for the duration of any
4363  * authentication operations. So a hostile client could tie up the
4364  * process for nearly twice AuthenticationTimeout before we kick him off.
4365  *
4366  * Note: because PostgresMain will call InitializeTimeouts again, the
4367  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4368  * since we never use it again after this function.
4369  */
4372 
4373  /*
4374  * Receive the startup packet (which might turn out to be a cancel request
4375  * packet).
4376  */
4377  status = ProcessStartupPacket(port, false, false);
4378 
4379  /*
4380  * Disable the timeout, and prevent SIGTERM again.
4381  */
4383  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4384 
4385  /*
4386  * As a safety check that nothing in startup has yet performed
4387  * shared-memory modifications that would need to be undone if we had
4388  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4389  * handlers have been registered yet. (This isn't terribly bulletproof,
4390  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4391  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4392  * handlers unfortunately, since pq_init() already registered one.)
4393  */
4395 
4396  /*
4397  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4398  * already did any appropriate error reporting.
4399  */
4400  if (status != STATUS_OK)
4401  proc_exit(0);
4402 
4403  /*
4404  * Now that we have the user and database name, we can set the process
4405  * title for ps. It's good to do this as early as possible in startup.
4406  */
4407  initStringInfo(&ps_data);
4408  if (am_walsender)
4410  appendStringInfo(&ps_data, "%s ", port->user_name);
4411  if (port->database_name[0] != '\0')
4412  appendStringInfo(&ps_data, "%s ", port->database_name);
4413  appendStringInfoString(&ps_data, port->remote_host);
4414  if (port->remote_port[0] != '\0')
4415  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4416 
4417  init_ps_display(ps_data.data);
4418  pfree(ps_data.data);
4419 
4420  set_ps_display("initializing");
4421 }
4422 
4423 
4424 /*
4425  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4426  *
4427  * returns:
4428  * Doesn't return at all.
4429  */
4430 static void
4432 {
4433  /*
4434  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4435  * just yet, though, because InitPostgres will need the HBA data.)
4436  */
4438 
4439  PostgresMain(port->database_name, port->user_name);
4440 }
4441 
4442 
4443 #ifdef EXEC_BACKEND
4444 
4445 /*
4446  * postmaster_forkexec -- fork and exec a postmaster subprocess
4447  *
4448  * The caller must have set up the argv array already, except for argv[2]
4449  * which will be filled with the name of the temp variable file.
4450  *
4451  * Returns the child process PID, or -1 on fork failure (a suitable error
4452  * message has been logged on failure).
4453  *
4454  * All uses of this routine will dispatch to SubPostmasterMain in the
4455  * child process.
4456  */
4457 pid_t
4458 postmaster_forkexec(int argc, char *argv[])
4459 {
4460  Port port;
4461 
4462  /* This entry point passes dummy values for the Port variables */
4463  memset(&port, 0, sizeof(port));
4464  return internal_forkexec(argc, argv, &port);
4465 }
4466 
4467 /*
4468  * backend_forkexec -- fork/exec off a backend process
4469  *
4470  * Some operating systems (WIN32) don't have fork() so we have to simulate
4471  * it by storing parameters that need to be passed to the child and
4472  * then create a new child process.
4473  *
4474  * returns the pid of the fork/exec'd process, or -1 on failure
4475  */
4476 static pid_t
4477 backend_forkexec(Port *port)
4478 {
4479  char *av[4];
4480  int ac = 0;
4481 
4482  av[ac++] = "postgres";
4483  av[ac++] = "--forkbackend";
4484  av[ac++] = NULL; /* filled in by internal_forkexec */
4485 
4486  av[ac] = NULL;
4487  Assert(ac < lengthof(av));
4488 
4489  return internal_forkexec(ac, av, port);
4490 }
4491 
4492 #ifndef WIN32
4493 
4494 /*
4495  * internal_forkexec non-win32 implementation
4496  *
4497  * - writes out backend variables to the parameter file
4498  * - fork():s, and then exec():s the child process
4499  */
4500 static pid_t
4501 internal_forkexec(int argc, char *argv[], Port *port)
4502 {
4503  static unsigned long tmpBackendFileNum = 0;
4504  pid_t pid;
4505  char tmpfilename[MAXPGPATH];
4506  BackendParameters param;
4507  FILE *fp;
4508 
4509  if (!save_backend_variables(&param, port))
4510  return -1; /* log made by save_backend_variables */
4511 
4512  /* Calculate name for temp file */
4513  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4515  MyProcPid, ++tmpBackendFileNum);
4516 
4517  /* Open file */
4518  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4519  if (!fp)
4520  {
4521  /*
4522  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4523  * directory, ignoring errors.
4524  */
4526 
4527  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4528  if (!fp)
4529  {
4530  ereport(LOG,
4532  errmsg("could not create file \"%s\": %m",
4533  tmpfilename)));
4534  return -1;
4535  }
4536  }
4537 
4538  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4539  {
4540  ereport(LOG,
4542  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4543  FreeFile(fp);
4544  return -1;
4545  }
4546 
4547  /* Release file */
4548  if (FreeFile(fp))
4549  {
4550  ereport(LOG,
4552  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4553  return -1;
4554  }
4555 
4556  /* Make sure caller set up argv properly */
4557  Assert(argc >= 3);
4558  Assert(argv[argc] == NULL);
4559  Assert(strncmp(argv[1], "--fork", 6) == 0);
4560  Assert(argv[2] == NULL);
4561 
4562  /* Insert temp file name after --fork argument */
4563  argv[2] = tmpfilename;
4564 
4565  /* Fire off execv in child */
4566  if ((pid = fork_process()) == 0)
4567  {
4568  if (execv(postgres_exec_path, argv) < 0)
4569  {
4570  ereport(LOG,
4571  (errmsg("could not execute server process \"%s\": %m",
4572  postgres_exec_path)));
4573  /* We're already in the child process here, can't return */
4574  exit(1);
4575  }
4576  }
4577 
4578  return pid; /* Parent returns pid, or -1 on fork failure */
4579 }
4580 #else /* WIN32 */
4581 
4582 /*
4583  * internal_forkexec win32 implementation
4584  *
4585  * - starts backend using CreateProcess(), in suspended state
4586  * - writes out backend variables to the parameter file
4587  * - during this, duplicates handles and sockets required for
4588  * inheritance into the new process
4589  * - resumes execution of the new process once the backend parameter
4590  * file is complete.
4591  */
4592 static pid_t
4593 internal_forkexec(int argc, char *argv[], Port *port)
4594 {
4595  int retry_count = 0;
4596  STARTUPINFO si;
4597  PROCESS_INFORMATION pi;
4598  int i;
4599  int j;
4600  char cmdLine[MAXPGPATH * 2];
4601  HANDLE paramHandle;
4602  BackendParameters *param;
4603  SECURITY_ATTRIBUTES sa;
4604  char paramHandleStr[32];
4605  win32_deadchild_waitinfo *childinfo;
4606 
4607  /* Make sure caller set up argv properly */
4608  Assert(argc >= 3);
4609  Assert(argv[argc] == NULL);
4610  Assert(strncmp(argv[1], "--fork", 6) == 0);
4611  Assert(argv[2] == NULL);
4612 
4613  /* Resume here if we need to retry */
4614 retry:
4615 
4616  /* Set up shared memory for parameter passing */
4617  ZeroMemory(&sa, sizeof(sa));
4618  sa.nLength = sizeof(sa);
4619  sa.bInheritHandle = TRUE;
4620  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4621  &sa,
4622  PAGE_READWRITE,
4623  0,
4624  sizeof(BackendParameters),
4625  NULL);
4626  if (paramHandle == INVALID_HANDLE_VALUE)
4627  {
4628  ereport(LOG,
4629  (errmsg("could not create backend parameter file mapping: error code %lu",
4630  GetLastError())));
4631  return -1;
4632  }
4633 
4634  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4635  if (!param)
4636  {
4637  ereport(LOG,
4638  (errmsg("could not map backend parameter memory: error code %lu",
4639  GetLastError())));
4640  CloseHandle(paramHandle);
4641  return -1;
4642  }
4643 
4644  /* Insert temp file name after --fork argument */
4645 #ifdef _WIN64
4646  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4647 #else
4648  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4649 #endif
4650  argv[2] = paramHandleStr;
4651 
4652  /* Format the cmd line */
4653  cmdLine[sizeof(cmdLine) - 1] = '\0';
4654  cmdLine[sizeof(cmdLine) - 2] = '\0';
4655  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4656  i = 0;
4657  while (argv[++i] != NULL)
4658  {
4659  j = strlen(cmdLine);
4660  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4661  }
4662  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4663  {
4664  ereport(LOG,
4665  (errmsg("subprocess command line too long")));
4666  UnmapViewOfFile(param);
4667  CloseHandle(paramHandle);
4668  return -1;
4669  }
4670 
4671  memset(&pi, 0, sizeof(pi));
4672  memset(&si, 0, sizeof(si));
4673  si.cb = sizeof(si);
4674 
4675  /*
4676  * Create the subprocess in a suspended state. This will be resumed later,
4677  * once we have written out the parameter file.
4678  */
4679  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4680  NULL, NULL, &si, &pi))
4681  {
4682  ereport(LOG,
4683  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4684  GetLastError())));
4685  UnmapViewOfFile(param);
4686  CloseHandle(paramHandle);
4687  return -1;
4688  }
4689 
4690  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4691  {
4692  /*
4693  * log made by save_backend_variables, but we have to clean up the
4694  * mess with the half-started process
4695  */
4696  if (!TerminateProcess(pi.hProcess, 255))
4697  ereport(LOG,
4698  (errmsg_internal("could not terminate unstarted process: error code %lu",
4699  GetLastError())));
4700  CloseHandle(pi.hProcess);
4701  CloseHandle(pi.hThread);
4702  UnmapViewOfFile(param);
4703  CloseHandle(paramHandle);
4704  return -1; /* log made by save_backend_variables */
4705  }
4706 
4707  /* Drop the parameter shared memory that is now inherited to the backend */
4708  if (!UnmapViewOfFile(param))
4709  ereport(LOG,
4710  (errmsg("could not unmap view of backend parameter file: error code %lu",
4711  GetLastError())));
4712  if (!CloseHandle(paramHandle))
4713  ereport(LOG,
4714  (errmsg("could not close handle to backend parameter file: error code %lu",
4715  GetLastError())));
4716 
4717  /*
4718  * Reserve the memory region used by our main shared memory segment before
4719  * we resume the child process. Normally this should succeed, but if ASLR
4720  * is active then it might sometimes fail due to the stack or heap having
4721  * gotten mapped into that range. In that case, just terminate the
4722  * process and retry.
4723  */
4724  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4725  {
4726  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4727  if (!TerminateProcess(pi.hProcess, 255))
4728  ereport(LOG,
4729  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4730  GetLastError())));
4731  CloseHandle(pi.hProcess);
4732  CloseHandle(pi.hThread);
4733  if (++retry_count < 100)
4734  goto retry;
4735  ereport(LOG,
4736  (errmsg("giving up after too many tries to reserve shared memory"),
4737  errhint("This might be caused by ASLR or antivirus software.")));
4738  return -1;
4739  }
4740 
4741  /*
4742  * Now that the backend variables are written out, we start the child
4743  * thread so it can start initializing while we set up the rest of the
4744  * parent state.
4745  */
4746  if (ResumeThread(pi.hThread) == -1)
4747  {
4748  if (!TerminateProcess(pi.hProcess, 255))
4749  {
4750  ereport(LOG,
4751  (errmsg_internal("could not terminate unstartable process: error code %lu",
4752  GetLastError())));
4753  CloseHandle(pi.hProcess);
4754  CloseHandle(pi.hThread);
4755  return -1;
4756  }
4757  CloseHandle(pi.hProcess);
4758  CloseHandle(pi.hThread);
4759  ereport(LOG,
4760  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4761  GetLastError())));
4762  return -1;
4763  }
4764 
4765  /*
4766  * Queue a waiter to signal when this child dies. The wait will be handled
4767  * automatically by an operating system thread pool. The memory will be
4768  * freed by a later call to waitpid().
4769  */
4770  childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4771  childinfo->procHandle = pi.hProcess;
4772  childinfo->procId = pi.dwProcessId;
4773 
4774  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4775  pi.hProcess,
4776  pgwin32_deadchild_callback,
4777  childinfo,
4778  INFINITE,
4779  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4780  ereport(FATAL,
4781  (errmsg_internal("could not register process for wait: error code %lu",
4782  GetLastError())));
4783 
4784  /* Don't close pi.hProcess here - waitpid() needs access to it */
4785 
4786  CloseHandle(pi.hThread);
4787 
4788  return pi.dwProcessId;
4789 }
4790 #endif /* WIN32 */
4791 
4792 
4793 /*
4794  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4795  * to what it would be if we'd simply forked on Unix, and then
4796  * dispatch to the appropriate place.
4797  *
4798  * The first two command line arguments are expected to be "--forkFOO"
4799  * (where FOO indicates which postmaster child we are to become), and
4800  * the name of a variables file that we can read to load data that would
4801  * have been inherited by fork() on Unix. Remaining arguments go to the
4802  * subprocess FooMain() routine.
4803  */
4804 void
4805 SubPostmasterMain(int argc, char *argv[])
4806 {
4807  Port port;
4808 
4809  /* In EXEC_BACKEND case we will not have inherited these settings */
4810  IsPostmasterEnvironment = true;
4812 
4813  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4815 
4816  /* Check we got appropriate args */
4817  if (argc < 3)
4818  elog(FATAL, "invalid subpostmaster invocation");
4819 
4820  /* Read in the variables file */
4821  memset(&port, 0, sizeof(Port));
4822  read_backend_variables(argv[2], &port);
4823 
4824  /* Close the postmaster's sockets (as soon as we know them) */
4825  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4826 
4827  /* Setup as postmaster child */
4829 
4830  /*
4831  * If appropriate, physically re-attach to shared memory segment. We want
4832  * to do this before going any further to ensure that we can attach at the
4833  * same address the postmaster used. On the other hand, if we choose not
4834  * to re-attach, we may have other cleanup to do.
4835  *
4836  * If testing EXEC_BACKEND on Linux, you should run this as root before
4837  * starting the postmaster:
4838  *
4839  * sysctl -w kernel.randomize_va_space=0
4840  *
4841  * This prevents using randomized stack and code addresses that cause the
4842  * child process's memory map to be different from the parent's, making it
4843  * sometimes impossible to attach to shared memory at the desired address.
4844  * Return the setting to its old value (usually '1' or '2') when finished.
4845  */
4846  if (strcmp(argv[1], "--forkbackend") == 0 ||
4847  strcmp(argv[1], "--forkavlauncher") == 0 ||
4848  strcmp(argv[1], "--forkavworker") == 0 ||
4849  strcmp(argv[1], "--forkaux") == 0 ||
4850  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4852  else
4854 
4855  /* autovacuum needs this set before calling InitProcess */
4856  if (strcmp(argv[1], "--forkavlauncher") == 0)
4857  AutovacuumLauncherIAm();
4858  if (strcmp(argv[1], "--forkavworker") == 0)
4859  AutovacuumWorkerIAm();
4860 
4861  /* Read in remaining GUC variables */
4862  read_nondefault_variables();
4863 
4864  /*
4865  * Check that the data directory looks valid, which will also check the
4866  * privileges on the data directory and update our umask and file/group
4867  * variables for creating files later. Note: this should really be done
4868  * before we create any files or directories.
4869  */
4870  checkDataDir();
4871 
4872  /*
4873  * (re-)read control file, as it contains config. The postmaster will
4874  * already have read this, but this process doesn't know about that.
4875  */
4876  LocalProcessControlFile(false);
4877 
4878  /*
4879  * Reload any libraries that were preloaded by the postmaster. Since we
4880  * exec'd this process, those libraries didn't come along with us; but we
4881  * should load them into all child processes to be consistent with the
4882  * non-EXEC_BACKEND behavior.
4883  */
4885 
4886  /* Run backend or appropriate child */
4887  if (strcmp(argv[1], "--forkbackend") == 0)
4888  {
4889  Assert(argc == 3); /* shouldn't be any more args */
4890 
4891  /*
4892  * Need to reinitialize the SSL library in the backend, since the
4893  * context structures contain function pointers and cannot be passed
4894  * through the parameter file.
4895  *
4896  * If for some reason reload fails (maybe the user installed broken
4897  * key files), soldier on without SSL; that's better than all
4898  * connections becoming impossible.
4899  *
4900  * XXX should we do this in all child processes? For the moment it's
4901  * enough to do it in backend children.
4902  */
4903 #ifdef USE_SSL
4904  if (EnableSSL)
4905  {
4906  if (secure_initialize(false) == 0)
4907  LoadedSSL = true;
4908  else
4909  ereport(LOG,
4910  (errmsg("SSL configuration could not be loaded in child process")));
4911  }
4912 #endif
4913 
4914  /*
4915  * Perform additional initialization and collect startup packet.
4916  *
4917  * We want to do this before InitProcess() for a couple of reasons: 1.
4918  * so that we aren't eating up a PGPROC slot while waiting on the
4919  * client. 2. so that if InitProcess() fails due to being out of
4920  * PGPROC slots, we have already initialized libpq and are able to
4921  * report the error to the client.
4922  */
4924 
4925  /* Restore basic shared memory pointers */
4927 
4928  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4929  InitProcess();
4930 
4931  /* Attach process to shared data structures */
4933 
4934  /* And run the backend */
4935  BackendRun(&port); /* does not return */
4936  }
4937  if (strcmp(argv[1], "--forkaux") == 0)
4938  {
4939  AuxProcType auxtype;
4940 
4941  Assert(argc == 4);
4942 
4943  /* Restore basic shared memory pointers */
4945 
4946  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4948 
4949  /* Attach process to shared data structures */
4951 
4952  auxtype = atoi(argv[3]);
4953  AuxiliaryProcessMain(auxtype); /* does not return */
4954  }
4955  if (strcmp(argv[1], "--forkavlauncher") == 0)
4956  {
4957  /* Restore basic shared memory pointers */
4959 
4960  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4961  InitProcess();
4962 
4963  /* Attach process to shared data structures */
4965 
4966  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4967  }
4968  if (strcmp(argv[1], "--forkavworker") == 0)
4969  {
4970  /* Restore basic shared memory pointers */
4972 
4973  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4974  InitProcess();
4975 
4976  /* Attach process to shared data structures */
4978 
4979  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4980  }
4981  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4982  {
4983  int shmem_slot;
4984 
4985  /* do this as early as possible; in particular, before InitProcess() */
4986  IsBackgroundWorker = true;
4987 
4988  /* Restore basic shared memory pointers */
4990 
4991  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4992  InitProcess();
4993 
4994  /* Attach process to shared data structures */
4996 
4997  /* Fetch MyBgworkerEntry from shared memory */
4998  shmem_slot = atoi(argv[1] + 15);
4999  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5000 
5002  }
5003  if (strcmp(argv[1], "--forklog") == 0)
5004  {
5005  /* Do not want to attach to shared memory */
5006 
5007  SysLoggerMain(argc, argv); /* does not return */
5008  }
5009 
5010  abort(); /* shouldn't get here */
5011 }
5012 #endif /* EXEC_BACKEND */
5013 
5014 
5015 /*
5016  * ExitPostmaster -- cleanup
5017  *
5018  * Do NOT call exit() directly --- always go through here!
5019  */
5020 static void
5021 ExitPostmaster(int status)
5022 {
5023 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5024 
5025  /*
5026  * There is no known cause for a postmaster to become multithreaded after
5027  * startup. Recheck to account for the possibility of unknown causes.
5028  * This message uses LOG level, because an unclean shutdown at this point
5029  * would usually not look much different from a clean shutdown.
5030  */
5031  if (pthread_is_threaded_np() != 0)
5032  ereport(LOG,
5033  (errcode(ERRCODE_INTERNAL_ERROR),
5034  errmsg_internal("postmaster became multithreaded"),
5035  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5036 #endif
5037 
5038  /* should cleanup shared memory and kill all backends */
5039 
5040  /*
5041  * Not sure of the semantics here. When the Postmaster dies, should the
5042  * backends all be killed? probably not.
5043  *
5044  * MUST -- vadim 05-10-1999
5045  */
5046 
5047  proc_exit(status);
5048 }
5049 
5050 /*
5051  * Handle pmsignal conditions representing requests from backends,
5052  * and check for promote and logrotate requests from pg_ctl.
5053  */
5054 static void
5056 {
5057  pending_pm_pmsignal = false;
5058 
5059  ereport(DEBUG2,
5060  (errmsg_internal("postmaster received pmsignal signal")));
5061 
5062  /*
5063  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5064  * unexpected states. If the startup process quickly starts up, completes
5065  * recovery, exits, we might process the death of the startup process
5066  * first. We don't want to go back to recovery in that case.
5067  */
5070  {
5071  /* WAL redo has started. We're out of reinitialization. */
5072  FatalError = false;
5073  AbortStartTime = 0;
5074 
5075  /*
5076  * Start the archiver if we're responsible for (re-)archiving received
5077  * files.
5078  */
5079  Assert(PgArchPID == 0);
5080  if (XLogArchivingAlways())
5082 
5083  /*
5084  * If we aren't planning to enter hot standby mode later, treat
5085  * RECOVERY_STARTED as meaning we're out of startup, and report status
5086  * accordingly.
5087  */
5088  if (!EnableHotStandby)
5089  {
5091 #ifdef USE_SYSTEMD
5092  sd_notify(0, "READY=1");
5093 #endif
5094  }
5095 
5096  pmState = PM_RECOVERY;
5097  }
5098 
5101  {
5102  ereport(LOG,
5103  (errmsg("database system is ready to accept read-only connections")));
5104 
5105  /* Report status */
5107 #ifdef USE_SYSTEMD
5108  sd_notify(0, "READY=1");
5109 #endif
5110 
5112  connsAllowed = true;
5113 
5114  /* Some workers may be scheduled to start now */
5115  StartWorkerNeeded = true;
5116  }
5117 
5118  /* Process background worker state changes. */
5120  {
5121  /* Accept new worker requests only if not stopping. */
5123  StartWorkerNeeded = true;
5124  }
5125 
5128 
5129  /* Tell syslogger to rotate logfile if requested */
5130  if (SysLoggerPID != 0)
5131  {
5132  if (CheckLogrotateSignal())
5133  {
5136  }
5138  {
5140  }
5141  }
5142 
5145  {
5146  /*
5147  * Start one iteration of the autovacuum daemon, even if autovacuuming
5148  * is nominally not enabled. This is so we can have an active defense
5149  * against transaction ID wraparound. We set a flag for the main loop
5150  * to do it rather than trying to do it here --- this is because the
5151  * autovac process itself may send the signal, and we want to handle
5152  * that by launching another iteration as soon as the current one
5153  * completes.
5154  */
5155  start_autovac_launcher = true;
5156  }
5157 
5160  {
5161  /* The autovacuum launcher wants us to start a worker process. */
5163  }
5164 
5166  {
5167  /* Startup Process wants us to start the walreceiver process. */
5168  /* Start immediately if possible, else remember request for later. */
5169  WalReceiverRequested = true;
5171  }
5172 
5173  /*
5174  * Try to advance postmaster's state machine, if a child requests it.
5175  *
5176  * Be careful about the order of this action relative to this function's
5177  * other actions. Generally, this should be after other actions, in case
5178  * they have effects PostmasterStateMachine would need to know about.
5179  * However, we should do it before the CheckPromoteSignal step, which
5180  * cannot have any (immediate) effect on the state machine, but does
5181  * depend on what state we're in now.
5182  */
5184  {
5186  }
5187 
5188  if (StartupPID != 0 &&
5189  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5190  pmState == PM_HOT_STANDBY) &&
5192  {
5193  /*
5194  * Tell startup process to finish recovery.
5195  *
5196  * Leave the promote signal file in place and let the Startup process
5197  * do the unlink.
5198  */
5200  }
5201 }
5202 
5203 /*
5204  * SIGTERM while processing startup packet.
5205  *
5206  * Running proc_exit() from a signal handler would be quite unsafe.
5207  * However, since we have not yet touched shared memory, we can just
5208  * pull the plug and exit without running any atexit handlers.
5209  *
5210  * One might be tempted to try to send a message, or log one, indicating
5211  * why we are disconnecting. However, that would be quite unsafe in itself.
5212  * Also, it seems undesirable to provide clues about the database's state
5213  * to a client that has not yet completed authentication, or even sent us
5214  * a startup packet.
5215  */
5216 static void
5218 {
5219  _exit(1);
5220 }
5221 
5222 /*
5223  * Dummy signal handler
5224  *
5225  * We use this for signals that we don't actually use in the postmaster,
5226  * but we do use in backends. If we were to SIG_IGN such signals in the
5227  * postmaster, then a newly started backend might drop a signal that arrives
5228  * before it's able to reconfigure its signal processing. (See notes in
5229  * tcop/postgres.c.)
5230  */
5231 static void
5233 {
5234 }
5235 
5236 /*
5237  * Timeout while processing startup packet.
5238  * As for process_startup_packet_die(), we exit via _exit(1).
5239  */
5240 static void
5242 {
5243  _exit(1);
5244 }
5245 
5246 
5247 /*
5248  * Generate a random cancel key.
5249  */
5250 static bool
5252 {
5253  return pg_strong_random(cancel_key, sizeof(int32));
5254 }
5255 
5256 /*
5257  * Count up number of child processes of specified types (dead_end children
5258  * are always excluded).
5259  */
5260 static int
5261 CountChildren(int target)
5262 {
5263  dlist_iter iter;
5264  int cnt = 0;
5265 
5266  dlist_foreach(iter, &BackendList)
5267  {
5268  Backend *bp = dlist_container(Backend, elem, iter.cur);
5269 
5270  if (bp->dead_end)
5271  continue;
5272 
5273  /*
5274  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5275  * it first and avoid touching shared memory for every child.
5276  */
5277  if (target != BACKEND_TYPE_ALL)
5278  {
5279  /*
5280  * Assign bkend_type for any recently announced WAL Sender
5281  * processes.
5282  */
5283  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5286 
5287  if (!(target & bp->bkend_type))
5288  continue;
5289  }
5290 
5291  cnt++;
5292  }
5293  return cnt;
5294 }
5295 
5296 
5297 /*
5298  * StartChildProcess -- start an auxiliary process for the postmaster
5299  *
5300  * "type" determines what kind of child will be started. All child types
5301  * initially go to AuxiliaryProcessMain, which will handle common setup.
5302  *
5303  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5304  * to start subprocess.
5305  */
5306 static pid_t
5308 {
5309  pid_t pid;
5310 
5311 #ifdef EXEC_BACKEND
5312  {
5313  char *av[10];
5314  int ac = 0;
5315  char typebuf[32];
5316 
5317  /*
5318  * Set up command-line arguments for subprocess
5319  */
5320  av[ac++] = "postgres";
5321  av[ac++] = "--forkaux";
5322  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5323 
5324  snprintf(typebuf, sizeof(typebuf), "%d", type);
5325  av[ac++] = typebuf;
5326 
5327  av[ac] = NULL;
5328  Assert(ac < lengthof(av));
5329 
5330  pid = postmaster_forkexec(ac, av);
5331  }
5332 #else /* !EXEC_BACKEND */
5333  pid = fork_process();
5334 
5335  if (pid == 0) /* child */
5336  {
5338 
5339  /* Close the postmaster's sockets */
5340  ClosePostmasterPorts(false);
5341 
5342  /* Release postmaster's working memory context */
5345  PostmasterContext = NULL;
5346 
5347  AuxiliaryProcessMain(type); /* does not return */
5348  }
5349 #endif /* EXEC_BACKEND */
5350 
5351  if (pid < 0)
5352  {
5353  /* in parent, fork failed */
5354  int save_errno = errno;
5355 
5356  errno = save_errno;
5357  switch (type)
5358  {
5359  case StartupProcess:
5360  ereport(LOG,
5361  (errmsg("could not fork startup process: %m")));
5362  break;
5363  case ArchiverProcess:
5364  ereport(LOG,
5365  (errmsg("could not fork archiver process: %m")));
5366  break;
5367  case BgWriterProcess:
5368  ereport(LOG,
5369  (errmsg("could not fork background writer process: %m")));
5370  break;
5371  case CheckpointerProcess:
5372  ereport(LOG,
5373  (errmsg("could not fork checkpointer process: %m")));
5374  break;
5375  case WalWriterProcess:
5376  ereport(LOG,
5377  (errmsg("could not fork WAL writer process: %m")));
5378  break;
5379  case WalReceiverProcess:
5380  ereport(LOG,
5381  (errmsg("could not fork WAL receiver process: %m")));
5382  break;
5383  default:
5384  ereport(LOG,
5385  (errmsg("could not fork process: %m")));
5386  break;
5387  }
5388 
5389  /*
5390  * fork failure is fatal during startup, but there's no need to choke
5391  * immediately if starting other child types fails.
5392  */
5393  if (type == StartupProcess)
5394  ExitPostmaster(1);
5395  return 0;
5396  }
5397 
5398  /*
5399  * in parent, successful fork
5400  */
5401  return pid;
5402 }
5403 
5404 /*
5405  * StartAutovacuumWorker
5406  * Start an autovac worker process.
5407  *
5408  * This function is here because it enters the resulting PID into the
5409  * postmaster's private backends list.
5410  *
5411  * NB -- this code very roughly matches BackendStartup.
5412  */
5413 static void
5415 {
5416  Backend *bn;
5417 
5418  /*
5419  * If not in condition to run a process, don't try, but handle it like a
5420  * fork failure. This does not normally happen, since the signal is only
5421  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5422  * we have to check to avoid race-condition problems during DB state
5423  * changes.
5424  */
5426  {
5427  /*
5428  * Compute the cancel key that will be assigned to this session. We
5429  * probably don't need cancel keys for autovac workers, but we'd
5430  * better have something random in the field to prevent unfriendly
5431  * people from sending cancels to them.
5432  */
5434  {
5435  ereport(LOG,
5436  (errcode(ERRCODE_INTERNAL_ERROR),
5437  errmsg("could not generate random cancel key")));
5438  return;
5439  }
5440 
5441  bn = (Backend *) malloc(sizeof(Backend));
5442  if (bn)
5443  {
5444  bn->cancel_key = MyCancelKey;
5445 
5446  /* Autovac workers are not dead_end and need a child slot */
5447  bn->dead_end = false;
5449  bn->bgworker_notify = false;
5450 
5451  bn->pid = StartAutoVacWorker();
5452  if (bn->pid > 0)
5453  {
5456 #ifdef EXEC_BACKEND
5457  ShmemBackendArrayAdd(bn);
5458 #endif
5459  /* all OK */
5460  return;
5461  }
5462 
5463  /*
5464  * fork failed, fall through to report -- actual error message was
5465  * logged by StartAutoVacWorker
5466  */
5468  free(bn);
5469  }
5470  else
5471  ereport(LOG,
5472  (errcode(ERRCODE_OUT_OF_MEMORY),
5473  errmsg("out of memory")));
5474  }
5475 
5476  /*
5477  * Report the failure to the launcher, if it's running. (If it's not, we
5478  * might not even be connected to shared memory, so don't try to call
5479  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5480  * responds to the condition, but we don't do that here, instead waiting
5481  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5482  * quick succession between the autovac launcher and postmaster in case
5483  * things get ugly.
5484  */
5485  if (AutoVacPID != 0)
5486  {
5488  avlauncher_needs_signal = true;
5489  }
5490 }
5491 
5492 /*
5493  * MaybeStartWalReceiver
5494  * Start the WAL receiver process, if not running and our state allows.
5495  *
5496  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5497  * clear WalReceiverRequested. However, there's a race condition if the
5498  * walreceiver terminates and the startup process immediately requests a new
5499  * one: it's quite possible to get the signal for the request before reaping
5500  * the dead walreceiver process. Better to risk launching an extra
5501  * walreceiver than to miss launching one we need. (The walreceiver code
5502  * has logic to recognize that it should go away if not needed.)
5503  */
5504 static void
5506 {
5507  if (WalReceiverPID == 0 &&
5508  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5509  pmState == PM_HOT_STANDBY) &&
5511  {
5513  if (WalReceiverPID != 0)
5514  WalReceiverRequested = false;
5515  /* else leave the flag set, so we'll try again later */
5516  }
5517 }
5518 
5519 
5520 /*
5521  * Create the opts file
5522  */
5523 static bool
5524 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5525 {
5526  FILE *fp;
5527  int i;
5528 
5529 #define OPTS_FILE "postmaster.opts"
5530 
5531  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5532  {
5533  ereport(LOG,
5535  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5536  return false;
5537  }
5538 
5539  fprintf(fp, "%s", fullprogname);
5540  for (i = 1; i < argc; i++)
5541  fprintf(fp, " \"%s\"", argv[i]);
5542  fputs("\n", fp);
5543 
5544  if (fclose(fp))
5545  {
5546  ereport(LOG,
5548  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5549  return false;
5550  }
5551 
5552  return true;
5553 }
5554 
5555 
5556 /*
5557  * MaxLivePostmasterChildren
5558  *
5559  * This reports the number of entries needed in per-child-process arrays
5560  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5561  * These arrays include regular backends, autovac workers, walsenders
5562  * and background workers, but not special children nor dead_end children.
5563  * This allows the arrays to have a fixed maximum size, to wit the same
5564  * too-many-children limit enforced by canAcceptConnections(). The exact value
5565  * isn't too critical as long as it's more than MaxBackends.
5566  */
5567 int
5569 {
5570  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5572 }
5573 
5574 /*
5575  * Connect background worker to a database.
5576  */
5577 void
5579 {
5581 
5582  /* XXX is this the right errcode? */
5584  ereport(FATAL,
5585  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5586  errmsg("database connection requirement not indicated during registration")));
5587 
5588  InitPostgres(dbname, InvalidOid, /* database to connect to */
5589  username, InvalidOid, /* role to connect as */
5590  false, /* never honor session_preload_libraries */
5591  (flags & BGWORKER_BYPASS_ALLOWCONN) != 0, /* ignore datallowconn? */
5592  NULL); /* no out_dbname */
5593 
5594  /* it had better not gotten out of "init" mode yet */
5595  if (!IsInitProcessingMode())
5596  ereport(ERROR,
5597  (errmsg("invalid processing mode in background worker")));
5599 }
5600 
5601 /*
5602  * Connect background worker to a database using OIDs.
5603  */
5604 void
5606 {
5608 
5609  /* XXX is this the right errcode? */
5611  ereport(FATAL,
5612  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5613  errmsg("database connection requirement not indicated during registration")));
5614 
5615  InitPostgres(NULL, dboid, /* database to connect to */
5616  NULL, useroid, /* role to connect as */
5617  false, /* never honor session_preload_libraries */
5618  (flags & BGWORKER_BYPASS_ALLOWCONN) != 0, /* ignore datallowconn? */
5619  NULL); /* no out_dbname */
5620 
5621  /* it had better not gotten out of "init" mode yet */
5622  if (!IsInitProcessingMode())
5623  ereport(ERROR,
5624  (errmsg("invalid processing mode in background worker")));
5626 }
5627 
5628 /*
5629  * Block/unblock signals in a background worker
5630  */
5631 void
5633 {
5634  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
5635 }
5636 
5637 void
5639 {
5640  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
5641 }
5642 
5643 #ifdef EXEC_BACKEND
5644 static pid_t
5645 bgworker_forkexec(int shmem_slot)
5646 {
5647  char *av[10];
5648  int ac = 0;
5649  char forkav[MAXPGPATH];
5650 
5651  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5652 
5653  av[ac++] = "postgres";
5654  av[ac++] = forkav;
5655  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5656  av[ac] = NULL;
5657 
5658  Assert(ac < lengthof(av));
5659 
5660  return postmaster_forkexec(ac, av);
5661 }
5662 #endif
5663 
5664 /*
5665  * Start a new bgworker.
5666  * Starting time conditions must have been checked already.
5667  *
5668  * Returns true on success, false on failure.
5669  * In either case, update the RegisteredBgWorker's state appropriately.
5670  *
5671  * This code is heavily based on autovacuum.c, q.v.
5672  */
5673 static bool
5675 {
5676  pid_t worker_pid;
5677 
5678  Assert(rw->rw_pid == 0);
5679 
5680  /*
5681  * Allocate and assign the Backend element. Note we must do this before
5682  * forking, so that we can handle failures (out of memory or child-process
5683  * slots) cleanly.
5684  *
5685  * Treat failure as though the worker had crashed. That way, the
5686  * postmaster will wait a bit before attempting to start it again; if we
5687  * tried again right away, most likely we'd find ourselves hitting the
5688  * same resource-exhaustion condition.
5689  */
5690  if (!assign_backendlist_entry(rw))
5691  {
5693  return false;
5694  }
5695 
5696  ereport(DEBUG1,
5697  (errmsg_internal("starting background worker process \"%s\"",
5698  rw->rw_worker.bgw_name)));
5699 
5700 #ifdef EXEC_BACKEND
5701  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5702 #else
5703  switch ((worker_pid = fork_process()))
5704 #endif
5705  {
5706  case -1:
5707  /* in postmaster, fork failed ... */
5708  ereport(LOG,
5709  (errmsg("could not fork worker process: %m")));
5710  /* undo what assign_backendlist_entry did */
5712  rw->rw_child_slot = 0;
5713  free(rw->rw_backend);
5714  rw->rw_backend = NULL;
5715  /* mark entry as crashed, so we'll try again later */
5717  break;
5718 
5719 #ifndef EXEC_BACKEND
5720  case 0:
5721  /* in postmaster child ... */
5723 
5724  /* Close the postmaster's sockets */
5725  ClosePostmasterPorts(false);
5726 
5727  /*
5728  * Before blowing away PostmasterContext, save this bgworker's
5729  * data where it can find it.
5730  */
5733  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5734 
5735  /* Release postmaster's working memory context */
5738  PostmasterContext = NULL;
5739 
5741 
5742  exit(1); /* should not get here */
5743  break;
5744 #endif
5745  default:
5746  /* in postmaster, fork successful ... */
5747  rw->rw_pid = worker_pid;
5748  rw->rw_backend->pid = rw->rw_pid;
5750  /* add new worker to lists of backends */
5752 #ifdef EXEC_BACKEND
5753  ShmemBackendArrayAdd(rw->rw_backend);
5754 #endif
5755  return true;
5756  }
5757 
5758  return false;
5759 }
5760 
5761 /*
5762  * Does the current postmaster state require starting a worker with the
5763  * specified start_time?
5764  */
5765 static bool
5767 {
5768  switch (pmState)
5769  {
5770  case PM_NO_CHILDREN:
5771  case PM_WAIT_DEAD_END: