PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef USE_BONJOUR
81 #include <dns_sd.h>
82 #endif
83 
84 #ifdef USE_SYSTEMD
85 #include <systemd/sd-daemon.h>
86 #endif
87 
88 #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 #include <pthread.h>
90 #endif
91 
92 #include "access/xlog.h"
93 #include "access/xlogrecovery.h"
94 #include "catalog/pg_control.h"
95 #include "common/file_perm.h"
96 #include "common/file_utils.h"
97 #include "common/ip.h"
98 #include "common/pg_prng.h"
99 #include "common/string.h"
100 #include "lib/ilist.h"
101 #include "libpq/auth.h"
102 #include "libpq/libpq.h"
103 #include "libpq/pqformat.h"
104 #include "libpq/pqsignal.h"
105 #include "nodes/queryjumble.h"
106 #include "pg_getopt.h"
107 #include "pgstat.h"
108 #include "port/pg_bswap.h"
109 #include "postmaster/autovacuum.h"
110 #include "postmaster/auxprocess.h"
112 #include "postmaster/fork_process.h"
113 #include "postmaster/pgarch.h"
114 #include "postmaster/postmaster.h"
115 #include "postmaster/syslogger.h"
118 #include "replication/slotsync.h"
119 #include "replication/walsender.h"
120 #include "storage/fd.h"
121 #include "storage/ipc.h"
122 #include "storage/pg_shmem.h"
123 #include "storage/pmsignal.h"
124 #include "storage/proc.h"
125 #include "tcop/tcopprot.h"
126 #include "utils/builtins.h"
127 #include "utils/datetime.h"
128 #include "utils/memutils.h"
129 #include "utils/pidfile.h"
130 #include "utils/ps_status.h"
131 #include "utils/timeout.h"
132 #include "utils/timestamp.h"
133 #include "utils/varlena.h"
134 
135 #ifdef EXEC_BACKEND
136 #include "storage/spin.h"
137 #endif
138 
139 
140 /*
141  * Possible types of a backend. Beyond being the possible bkend_type values in
142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143  * and CountChildren().
144  */
145 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150 
151 /*
152  * List of active backends (or child processes anyway; we don't actually
153  * know whether a given child has become a backend or is still in the
154  * authorization phase). This is used mainly to keep track of how many
155  * children we have and send them appropriate signals when necessary.
156  *
157  * As shown in the above set of backend types, this list includes not only
158  * "normal" client sessions, but also autovacuum workers, walsenders, and
159  * background workers. (Note that at the time of launch, walsenders are
160  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
161  * upon noticing they've changed their PMChildFlags entry. Hence that check
162  * must be done before any operation that needs to distinguish walsenders
163  * from normal backends.)
164  *
165  * Also, "dead_end" children are in it: these are children launched just for
166  * the purpose of sending a friendly rejection message to a would-be client.
167  * We must track them because they are attached to shared memory, but we know
168  * they will never become live backends. dead_end children are not assigned a
169  * PMChildSlot. dead_end children have bkend_type NORMAL.
170  *
171  * "Special" children such as the startup, bgwriter, autovacuum launcher, and
172  * slot sync worker tasks are not in this list. They are tracked via StartupPID
173  * and other pid_t variables below. (Thus, there can't be more than one of any
174  * given "special" child process type. We use BackendList entries for any
175  * child process there can be more than one of.)
176  */
177 typedef struct bkend
178 {
179  pid_t pid; /* process id of backend */
180  int32 cancel_key; /* cancel key for cancels for this backend */
181  int child_slot; /* PMChildSlot for this backend, if any */
182  int bkend_type; /* child process flavor, see above */
183  bool dead_end; /* is it going to send an error and quit? */
184  bool bgworker_notify; /* gets bgworker start/stop notifications */
185  dlist_node elem; /* list link in BackendList */
187 
189 
190 #ifdef EXEC_BACKEND
191 static Backend *ShmemBackendArray;
192 #endif
193 
195 
196 
197 
198 /* The socket number we are listening for connections on */
199 int PostPortNumber = DEF_PGPORT;
200 
201 /* The directory names for Unix socket(s) */
203 
204 /* The TCP listen address(es) */
206 
207 /*
208  * SuperuserReservedConnections is the number of backends reserved for
209  * superuser use, and ReservedConnections is the number of backends reserved
210  * for use by roles with privileges of the pg_use_reserved_connections
211  * predefined role. These are taken out of the pool of MaxConnections backend
212  * slots, so the number of backend slots available for roles that are neither
213  * superuser nor have privileges of pg_use_reserved_connections is
214  * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
215  *
216  * If the number of remaining slots is less than or equal to
217  * SuperuserReservedConnections, only superusers can make new connections. If
218  * the number of remaining slots is greater than SuperuserReservedConnections
219  * but less than or equal to
220  * (SuperuserReservedConnections + ReservedConnections), only superusers and
221  * roles with privileges of pg_use_reserved_connections can make new
222  * connections. Note that pre-existing superuser and
223  * pg_use_reserved_connections connections don't count against the limits.
224  */
227 
228 /* The socket(s) we're listening to. */
229 #define MAXLISTEN 64
230 static int NumListenSockets = 0;
231 static pgsocket *ListenSockets = NULL;
232 
233 /* still more option variables */
234 bool EnableSSL = false;
235 
236 int PreAuthDelay = 0;
238 
239 bool log_hostname; /* for ps display and logging */
240 bool Log_connections = false;
241 
242 bool enable_bonjour = false;
246 bool send_abort_for_crash = false;
247 bool send_abort_for_kill = false;
248 
249 /* PIDs of special child processes; 0 when not running */
250 static pid_t StartupPID = 0,
260 
261 /* Startup process's status */
262 typedef enum
263 {
266  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
269 
271 
272 /* Startup/shutdown state */
273 #define NoShutdown 0
274 #define SmartShutdown 1
275 #define FastShutdown 2
276 #define ImmediateShutdown 3
277 
278 static int Shutdown = NoShutdown;
279 
280 static bool FatalError = false; /* T if recovering from backend crash */
281 
282 /*
283  * We use a simple state machine to control startup, shutdown, and
284  * crash recovery (which is rather like shutdown followed by startup).
285  *
286  * After doing all the postmaster initialization work, we enter PM_STARTUP
287  * state and the startup process is launched. The startup process begins by
288  * reading the control file and other preliminary initialization steps.
289  * In a normal startup, or after crash recovery, the startup process exits
290  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
291  * is handled specially since it takes much longer and we would like to support
292  * hot standby during archive recovery.
293  *
294  * When the startup process is ready to start archive recovery, it signals the
295  * postmaster, and we switch to PM_RECOVERY state. The background writer and
296  * checkpointer are launched, while the startup process continues applying WAL.
297  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
298  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
299  * state and begin accepting connections to perform read-only queries. When
300  * archive recovery is finished, the startup process exits with exit code 0
301  * and we switch to PM_RUN state.
302  *
303  * Normal child backends can only be launched when we are in PM_RUN or
304  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
305  * In other states we handle connection requests by launching "dead_end"
306  * child processes, which will simply send the client an error message and
307  * quit. (We track these in the BackendList so that we can know when they
308  * are all gone; this is important because they're still connected to shared
309  * memory, and would interfere with an attempt to destroy the shmem segment,
310  * possibly leading to SHMALL failure when we try to make a new one.)
311  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
312  * to drain out of the system, and therefore stop accepting connection
313  * requests at all until the last existing child has quit (which hopefully
314  * will not be very long).
315  *
316  * Notice that this state variable does not distinguish *why* we entered
317  * states later than PM_RUN --- Shutdown and FatalError must be consulted
318  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
319  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
320  * states when trying to recover from a crash). It can be true in PM_STARTUP
321  * state, because we don't clear it until we've successfully started WAL redo.
322  */
323 typedef enum
324 {
325  PM_INIT, /* postmaster starting */
326  PM_STARTUP, /* waiting for startup subprocess */
327  PM_RECOVERY, /* in archive recovery mode */
328  PM_HOT_STANDBY, /* in hot standby mode */
329  PM_RUN, /* normal "database is alive" state */
330  PM_STOP_BACKENDS, /* need to stop remaining backends */
331  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
332  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
333  * ckpt */
334  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
335  * finish */
336  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
337  PM_NO_CHILDREN, /* all important children have exited */
338 } PMState;
339 
341 
342 /*
343  * While performing a "smart shutdown", we restrict new connections but stay
344  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
345  * connsAllowed is a sub-state indicator showing the active restriction.
346  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
347  */
348 static bool connsAllowed = true;
349 
350 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
351 /* Zero means timeout is not running */
352 static time_t AbortStartTime = 0;
353 
354 /* Length of said timeout */
355 #define SIGKILL_CHILDREN_AFTER_SECS 5
356 
357 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
358 
359 bool ClientAuthInProgress = false; /* T during new-client
360  * authentication */
361 
362 bool redirection_done = false; /* stderr redirected for syslogger? */
363 
364 /* received START_AUTOVAC_LAUNCHER signal */
365 static bool start_autovac_launcher = false;
366 
367 /* the launcher needs to be signaled to communicate some condition */
368 static bool avlauncher_needs_signal = false;
369 
370 /* received START_WALRECEIVER signal */
371 static bool WalReceiverRequested = false;
372 
373 /* set when there's a worker that needs to be started up */
374 static bool StartWorkerNeeded = true;
375 static bool HaveCrashedWorker = false;
376 
377 /* set when signals arrive */
378 static volatile sig_atomic_t pending_pm_pmsignal;
379 static volatile sig_atomic_t pending_pm_child_exit;
380 static volatile sig_atomic_t pending_pm_reload_request;
381 static volatile sig_atomic_t pending_pm_shutdown_request;
382 static volatile sig_atomic_t pending_pm_fast_shutdown_request;
383 static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
384 
385 /* event multiplexing object */
387 
388 #ifdef USE_SSL
389 /* Set when and if SSL has been initialized properly */
390 static bool LoadedSSL = false;
391 #endif
392 
393 #ifdef USE_BONJOUR
394 static DNSServiceRef bonjour_sdref = NULL;
395 #endif
396 
397 /*
398  * postmaster.c - function prototypes
399  */
400 static void CloseServerPorts(int status, Datum arg);
401 static void unlink_external_pid_file(int status, Datum arg);
402 static void getInstallationPaths(const char *argv0);
403 static void checkControlFile(void);
404 static Port *ConnCreate(int serverFd);
405 static void ConnFree(Port *port);
410 static void process_pm_pmsignal(void);
411 static void process_pm_child_exit(void);
412 static void process_pm_reload_request(void);
413 static void process_pm_shutdown_request(void);
415 static void dummy_handler(SIGNAL_ARGS);
416 static void StartupPacketTimeoutHandler(void);
417 static void CleanupBackend(int pid, int exitstatus);
418 static bool CleanupBackgroundWorker(int pid, int exitstatus);
419 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
420 static void LogChildExit(int lev, const char *procname,
421  int pid, int exitstatus);
422 static void PostmasterStateMachine(void);
423 static void BackendInitialize(Port *port);
424 static void BackendRun(Port *port) pg_attribute_noreturn();
425 static void ExitPostmaster(int status) pg_attribute_noreturn();
426 static int ServerLoop(void);
427 static int BackendStartup(Port *port);
428 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
429 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
430 static void processCancelRequest(Port *port, void *pkt);
431 static void report_fork_failure_to_client(Port *port, int errnum);
432 static CAC_state canAcceptConnections(int backend_type);
433 static bool RandomCancelKey(int32 *cancel_key);
434 static void signal_child(pid_t pid, int signal);
435 static void sigquit_child(pid_t pid);
436 static bool SignalSomeChildren(int signal, int target);
437 static void TerminateChildren(int signal);
438 
439 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
440 
441 static int CountChildren(int target);
443 static void maybe_start_bgworkers(void);
444 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
445 static pid_t StartChildProcess(AuxProcType type);
446 static void StartAutovacuumWorker(void);
447 static void MaybeStartWalReceiver(void);
448 static void MaybeStartWalSummarizer(void);
449 static void InitPostmasterDeathWatchHandle(void);
450 static void MaybeStartSlotSyncWorker(void);
451 
452 /*
453  * Archiver is allowed to start up at the current postmaster state?
454  *
455  * If WAL archiving is enabled always, we are allowed to start archiver
456  * even during recovery.
457  */
458 #define PgArchStartupAllowed() \
459  (((XLogArchivingActive() && pmState == PM_RUN) || \
460  (XLogArchivingAlways() && \
461  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
462  PgArchCanRestart())
463 
464 #ifdef EXEC_BACKEND
465 
466 #ifdef WIN32
467 #define WNOHANG 0 /* ignored, so any integer value will do */
468 
469 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
470 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
471 
472 static HANDLE win32ChildQueue;
473 
474 typedef struct
475 {
476  HANDLE waitHandle;
477  HANDLE procHandle;
478  DWORD procId;
479 } win32_deadchild_waitinfo;
480 #endif /* WIN32 */
481 
482 static pid_t backend_forkexec(Port *port);
483 static pid_t internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker);
484 
485 /* Type for a socket that can be inherited to a client process */
486 #ifdef WIN32
487 typedef struct
488 {
489  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
490  * if not a socket */
491  WSAPROTOCOL_INFO wsainfo;
492 } InheritableSocket;
493 #else
494 typedef int InheritableSocket;
495 #endif
496 
497 /*
498  * Structure contains all variables passed to exec:ed backends
499  */
500 typedef struct
501 {
502  bool has_port;
503  Port port;
504  InheritableSocket portsocket;
505 
506  bool has_bgworker;
507  BackgroundWorker bgworker;
508 
509  char DataDir[MAXPGPATH];
511  int MyPMChildSlot;
512 #ifndef WIN32
513  unsigned long UsedShmemSegID;
514 #else
515  void *ShmemProtectiveRegion;
516  HANDLE UsedShmemSegID;
517 #endif
518  void *UsedShmemSegAddr;
520  Backend *ShmemBackendArray;
521 #ifndef HAVE_SPINLOCKS
523 #endif
532  pid_t PostmasterPid;
536  bool redirection_done;
537  bool IsBinaryUpgrade;
538  bool query_id_enabled;
539  int max_safe_fds;
540  int MaxBackends;
541 #ifdef WIN32
542  HANDLE PostmasterHandle;
543  HANDLE initial_signal_pipe;
544  HANDLE syslogPipe[2];
545 #else
546  int postmaster_alive_fds[2];
547  int syslogPipe[2];
548 #endif
549  char my_exec_path[MAXPGPATH];
550  char pkglib_path[MAXPGPATH];
551 } BackendParameters;
552 
553 static void read_backend_variables(char *id, Port **port, BackgroundWorker **worker);
554 static void restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorker **worker);
555 
556 #ifndef WIN32
557 static bool save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker);
558 #else
559 static bool save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker,
560  HANDLE childProcess, pid_t childPid);
561 #endif
562 
563 static void ShmemBackendArrayAdd(Backend *bn);
564 static void ShmemBackendArrayRemove(Backend *bn);
565 #endif /* EXEC_BACKEND */
566 
567 /* Macros to check exit status of a child process */
568 #define EXIT_STATUS_0(st) ((st) == 0)
569 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
570 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
571 
572 #ifndef WIN32
573 /*
574  * File descriptors for pipe used to monitor if postmaster is alive.
575  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
576  */
577 int postmaster_alive_fds[2] = {-1, -1};
578 #else
579 /* Process handle of postmaster used for the same purpose on Windows */
580 HANDLE PostmasterHandle;
581 #endif
582 
583 /*
584  * Postmaster main entry point
585  */
586 void
587 PostmasterMain(int argc, char *argv[])
588 {
589  int opt;
590  int status;
591  char *userDoption = NULL;
592  bool listen_addr_saved = false;
593  char *output_config_variable = NULL;
594 
596 
598 
600 
601  /*
602  * Start our win32 signal implementation
603  */
604 #ifdef WIN32
606 #endif
607 
608  /*
609  * We should not be creating any files or directories before we check the
610  * data directory (see checkDataDir()), but just in case set the umask to
611  * the most restrictive (owner-only) permissions.
612  *
613  * checkDataDir() will reset the umask based on the data directory
614  * permissions.
615  */
616  umask(PG_MODE_MASK_OWNER);
617 
618  /*
619  * By default, palloc() requests in the postmaster will be allocated in
620  * the PostmasterContext, which is space that can be recycled by backends.
621  * Allocated data that needs to be available to backends should be
622  * allocated in TopMemoryContext.
623  */
625  "Postmaster",
628 
629  /* Initialize paths to installation files */
630  getInstallationPaths(argv[0]);
631 
632  /*
633  * Set up signal handlers for the postmaster process.
634  *
635  * CAUTION: when changing this list, check for side-effects on the signal
636  * handling setup of child processes. See tcop/postgres.c,
637  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
638  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
639  * postmaster/bgworker.c and postmaster/checkpointer.c.
640  */
641  pqinitmask();
642  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
643 
648  pqsignal(SIGALRM, SIG_IGN); /* ignored */
649  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
651  pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
653 
654  /* This may configure SIGURG, depending on platform. */
657 
658  /*
659  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
660  * ignore those signals in a postmaster environment, so that there is no
661  * risk of a child process freezing up due to writing to stderr. But for
662  * a standalone backend, their default handling is reasonable. Hence, all
663  * child processes should just allow the inherited settings to stand.
664  */
665 #ifdef SIGTTIN
666  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
667 #endif
668 #ifdef SIGTTOU
669  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
670 #endif
671 
672  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
673 #ifdef SIGXFSZ
674  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
675 #endif
676 
677  /* Begin accepting signals. */
678  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
679 
680  /*
681  * Options setup
682  */
684 
685  opterr = 1;
686 
687  /*
688  * Parse command-line options. CAUTION: keep this in sync with
689  * tcop/postgres.c (the option sets should not conflict) and with the
690  * common help() function in main/main.c.
691  */
692  while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
693  {
694  switch (opt)
695  {
696  case 'B':
697  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
698  break;
699 
700  case 'b':
701  /* Undocumented flag used for binary upgrades */
702  IsBinaryUpgrade = true;
703  break;
704 
705  case 'C':
706  output_config_variable = strdup(optarg);
707  break;
708 
709  case 'c':
710  case '-':
711  {
712  char *name,
713  *value;
714 
716  if (!value)
717  {
718  if (opt == '-')
719  ereport(ERROR,
720  (errcode(ERRCODE_SYNTAX_ERROR),
721  errmsg("--%s requires a value",
722  optarg)));
723  else
724  ereport(ERROR,
725  (errcode(ERRCODE_SYNTAX_ERROR),
726  errmsg("-c %s requires a value",
727  optarg)));
728  }
729 
731  pfree(name);
732  pfree(value);
733  break;
734  }
735 
736  case 'D':
737  userDoption = strdup(optarg);
738  break;
739 
740  case 'd':
742  break;
743 
744  case 'E':
745  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
746  break;
747 
748  case 'e':
749  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
750  break;
751 
752  case 'F':
753  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
754  break;
755 
756  case 'f':
758  {
759  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
760  progname, optarg);
761  ExitPostmaster(1);
762  }
763  break;
764 
765  case 'h':
766  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
767  break;
768 
769  case 'i':
770  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
771  break;
772 
773  case 'j':
774  /* only used by interactive backend */
775  break;
776 
777  case 'k':
778  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
779  break;
780 
781  case 'l':
782  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
783  break;
784 
785  case 'N':
786  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
787  break;
788 
789  case 'O':
790  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
791  break;
792 
793  case 'P':
794  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
795  break;
796 
797  case 'p':
799  break;
800 
801  case 'r':
802  /* only used by single-user backend */
803  break;
804 
805  case 'S':
807  break;
808 
809  case 's':
810  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
811  break;
812 
813  case 'T':
814 
815  /*
816  * This option used to be defined as sending SIGSTOP after a
817  * backend crash, but sending SIGABRT seems more useful.
818  */
819  SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
820  break;
821 
822  case 't':
823  {
824  const char *tmp = get_stats_option_name(optarg);
825 
826  if (tmp)
827  {
829  }
830  else
831  {
832  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
833  progname, optarg);
834  ExitPostmaster(1);
835  }
836  break;
837  }
838 
839  case 'W':
840  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
841  break;
842 
843  default:
844  write_stderr("Try \"%s --help\" for more information.\n",
845  progname);
846  ExitPostmaster(1);
847  }
848  }
849 
850  /*
851  * Postmaster accepts no non-option switch arguments.
852  */
853  if (optind < argc)
854  {
855  write_stderr("%s: invalid argument: \"%s\"\n",
856  progname, argv[optind]);
857  write_stderr("Try \"%s --help\" for more information.\n",
858  progname);
859  ExitPostmaster(1);
860  }
861 
862  /*
863  * Locate the proper configuration files and data directory, and read
864  * postgresql.conf for the first time.
865  */
867  ExitPostmaster(2);
868 
869  if (output_config_variable != NULL)
870  {
871  /*
872  * If this is a runtime-computed GUC, it hasn't yet been initialized,
873  * and the present value is not useful. However, this is a convenient
874  * place to print the value for most GUCs because it is safe to run
875  * postmaster startup to this point even if the server is already
876  * running. For the handful of runtime-computed GUCs that we cannot
877  * provide meaningful values for yet, we wait until later in
878  * postmaster startup to print the value. We won't be able to use -C
879  * on running servers for those GUCs, but using this option now would
880  * lead to incorrect results for them.
881  */
882  int flags = GetConfigOptionFlags(output_config_variable, true);
883 
884  if ((flags & GUC_RUNTIME_COMPUTED) == 0)
885  {
886  /*
887  * "-C guc" was specified, so print GUC's value and exit. No
888  * extra permission check is needed because the user is reading
889  * inside the data dir.
890  */
891  const char *config_val = GetConfigOption(output_config_variable,
892  false, false);
893 
894  puts(config_val ? config_val : "");
895  ExitPostmaster(0);
896  }
897 
898  /*
899  * A runtime-computed GUC will be printed later on. As we initialize
900  * a server startup sequence, silence any log messages that may show
901  * up in the output generated. FATAL and more severe messages are
902  * useful to show, even if one would only expect at least PANIC. LOG
903  * entries are hidden.
904  */
905  SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
907  }
908 
909  /* Verify that DataDir looks reasonable */
910  checkDataDir();
911 
912  /* Check that pg_control exists */
914 
915  /* And switch working directory into it */
916  ChangeToDataDir();
917 
918  /*
919  * Check for invalid combinations of GUC settings.
920  */
922  {
923  write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
924  progname,
927  ExitPostmaster(1);
928  }
930  ereport(ERROR,
931  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
933  ereport(ERROR,
934  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
936  ereport(ERROR,
937  (errmsg("WAL cannot be summarized when wal_level is \"minimal\"")));
938 
939  /*
940  * Other one-time internal sanity checks can go here, if they are fast.
941  * (Put any slow processing further down, after postmaster.pid creation.)
942  */
943  if (!CheckDateTokenTables())
944  {
945  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
946  ExitPostmaster(1);
947  }
948 
949  /*
950  * Now that we are done processing the postmaster arguments, reset
951  * getopt(3) library so that it will work correctly in subprocesses.
952  */
953  optind = 1;
954 #ifdef HAVE_INT_OPTRESET
955  optreset = 1; /* some systems need this too */
956 #endif
957 
958  /* For debugging: display postmaster environment */
959  {
960  extern char **environ;
961  char **p;
962 
963  ereport(DEBUG3,
964  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
965  progname)));
966  ereport(DEBUG3,
967  (errmsg_internal("-----------------------------------------")));
968  for (p = environ; *p; ++p)
969  ereport(DEBUG3,
970  (errmsg_internal("\t%s", *p)));
971  ereport(DEBUG3,
972  (errmsg_internal("-----------------------------------------")));
973  }
974 
975  /*
976  * Create lockfile for data directory.
977  *
978  * We want to do this before we try to grab the input sockets, because the
979  * data directory interlock is more reliable than the socket-file
980  * interlock (thanks to whoever decided to put socket files in /tmp :-().
981  * For the same reason, it's best to grab the TCP socket(s) before the
982  * Unix socket(s).
983  *
984  * Also note that this internally sets up the on_proc_exit function that
985  * is responsible for removing both data directory and socket lockfiles;
986  * so it must happen before opening sockets so that at exit, the socket
987  * lockfiles go away after CloseServerPorts runs.
988  */
989  CreateDataDirLockFile(true);
990 
991  /*
992  * Read the control file (for error checking and config info).
993  *
994  * Since we verify the control file's CRC, this has a useful side effect
995  * on machines where we need a run-time test for CRC support instructions.
996  * The postmaster will do the test once at startup, and then its child
997  * processes will inherit the correct function pointer and not need to
998  * repeat the test.
999  */
1000  LocalProcessControlFile(false);
1001 
1002  /*
1003  * Register the apply launcher. It's probably a good idea to call this
1004  * before any modules had a chance to take the background worker slots.
1005  */
1007 
1008  /*
1009  * process any libraries that should be preloaded at postmaster start
1010  */
1012 
1013  /*
1014  * Initialize SSL library, if specified.
1015  */
1016 #ifdef USE_SSL
1017  if (EnableSSL)
1018  {
1019  (void) secure_initialize(true);
1020  LoadedSSL = true;
1021  }
1022 #endif
1023 
1024  /*
1025  * Now that loadable modules have had their chance to alter any GUCs,
1026  * calculate MaxBackends.
1027  */
1029 
1030  /*
1031  * Give preloaded libraries a chance to request additional shared memory.
1032  */
1034 
1035  /*
1036  * Now that loadable modules have had their chance to request additional
1037  * shared memory, determine the value of any runtime-computed GUCs that
1038  * depend on the amount of shared memory required.
1039  */
1041 
1042  /*
1043  * Now that modules have been loaded, we can process any custom resource
1044  * managers specified in the wal_consistency_checking GUC.
1045  */
1047 
1048  /*
1049  * If -C was specified with a runtime-computed GUC, we held off printing
1050  * the value earlier, as the GUC was not yet initialized. We handle -C
1051  * for most GUCs before we lock the data directory so that the option may
1052  * be used on a running server. However, a handful of GUCs are runtime-
1053  * computed and do not have meaningful values until after locking the data
1054  * directory, and we cannot safely calculate their values earlier on a
1055  * running server. At this point, such GUCs should be properly
1056  * initialized, and we haven't yet set up shared memory, so this is a good
1057  * time to handle the -C option for these special GUCs.
1058  */
1059  if (output_config_variable != NULL)
1060  {
1061  const char *config_val = GetConfigOption(output_config_variable,
1062  false, false);
1063 
1064  puts(config_val ? config_val : "");
1065  ExitPostmaster(0);
1066  }
1067 
1068  /*
1069  * Set up shared memory and semaphores.
1070  *
1071  * Note: if using SysV shmem and/or semas, each postmaster startup will
1072  * normally choose the same IPC keys. This helps ensure that we will
1073  * clean up dead IPC objects if the postmaster crashes and is restarted.
1074  */
1076 
1077  /*
1078  * Estimate number of openable files. This must happen after setting up
1079  * semaphores, because on some platforms semaphores count as open files.
1080  */
1081  set_max_safe_fds();
1082 
1083  /*
1084  * Set reference point for stack-depth checking.
1085  */
1086  (void) set_stack_base();
1087 
1088  /*
1089  * Initialize pipe (or process handle on Windows) that allows children to
1090  * wake up from sleep on postmaster death.
1091  */
1093 
1094 #ifdef WIN32
1095 
1096  /*
1097  * Initialize I/O completion port used to deliver list of dead children.
1098  */
1099  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1100  if (win32ChildQueue == NULL)
1101  ereport(FATAL,
1102  (errmsg("could not create I/O completion port for child queue")));
1103 #endif
1104 
1105 #ifdef EXEC_BACKEND
1106  /* Write out nondefault GUC settings for child processes to use */
1107  write_nondefault_variables(PGC_POSTMASTER);
1108 
1109  /*
1110  * Clean out the temp directory used to transmit parameters to child
1111  * processes (see internal_forkexec, below). We must do this before
1112  * launching any child processes, else we have a race condition: we could
1113  * remove a parameter file before the child can read it. It should be
1114  * safe to do so now, because we verified earlier that there are no
1115  * conflicting Postgres processes in this data directory.
1116  */
1118 #endif
1119 
1120  /*
1121  * Forcibly remove the files signaling a standby promotion request.
1122  * Otherwise, the existence of those files triggers a promotion too early,
1123  * whether a user wants that or not.
1124  *
1125  * This removal of files is usually unnecessary because they can exist
1126  * only during a few moments during a standby promotion. However there is
1127  * a race condition: if pg_ctl promote is executed and creates the files
1128  * during a promotion, the files can stay around even after the server is
1129  * brought up to be the primary. Then, if a new standby starts by using
1130  * the backup taken from the new primary, the files can exist at server
1131  * startup and must be removed in order to avoid an unexpected promotion.
1132  *
1133  * Note that promotion signal files need to be removed before the startup
1134  * process is invoked. Because, after that, they can be used by
1135  * postmaster's SIGUSR1 signal handler.
1136  */
1138 
1139  /* Do the same for logrotate signal file */
1141 
1142  /* Remove any outdated file holding the current log filenames. */
1143  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1144  ereport(LOG,
1146  errmsg("could not remove file \"%s\": %m",
1148 
1149  /*
1150  * If enabled, start up syslogger collection subprocess
1151  */
1153 
1154  /*
1155  * Reset whereToSendOutput from DestDebug (its starting state) to
1156  * DestNone. This stops ereport from sending log messages to stderr unless
1157  * Log_destination permits. We don't do this until the postmaster is
1158  * fully launched, since startup failures may as well be reported to
1159  * stderr.
1160  *
1161  * If we are in fact disabling logging to stderr, first emit a log message
1162  * saying so, to provide a breadcrumb trail for users who may not remember
1163  * that their logging is configured to go somewhere else.
1164  */
1166  ereport(LOG,
1167  (errmsg("ending log output to stderr"),
1168  errhint("Future log output will go to log destination \"%s\".",
1170 
1172 
1173  /*
1174  * Report server startup in log. While we could emit this much earlier,
1175  * it seems best to do so after starting the log collector, if we intend
1176  * to use one.
1177  */
1178  ereport(LOG,
1179  (errmsg("starting %s", PG_VERSION_STR)));
1180 
1181  /*
1182  * Establish input sockets.
1183  *
1184  * First set up an on_proc_exit function that's charged with closing the
1185  * sockets again at postmaster shutdown.
1186  */
1187  ListenSockets = palloc(MAXLISTEN * sizeof(pgsocket));
1189 
1190  if (ListenAddresses)
1191  {
1192  char *rawstring;
1193  List *elemlist;
1194  ListCell *l;
1195  int success = 0;
1196 
1197  /* Need a modifiable copy of ListenAddresses */
1198  rawstring = pstrdup(ListenAddresses);
1199 
1200  /* Parse string into list of hostnames */
1201  if (!SplitGUCList(rawstring, ',', &elemlist))
1202  {
1203  /* syntax error in list */
1204  ereport(FATAL,
1205  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1206  errmsg("invalid list syntax in parameter \"%s\"",
1207  "listen_addresses")));
1208  }
1209 
1210  foreach(l, elemlist)
1211  {
1212  char *curhost = (char *) lfirst(l);
1213 
1214  if (strcmp(curhost, "*") == 0)
1215  status = StreamServerPort(AF_UNSPEC, NULL,
1216  (unsigned short) PostPortNumber,
1217  NULL,
1218  ListenSockets,
1220  MAXLISTEN);
1221  else
1222  status = StreamServerPort(AF_UNSPEC, curhost,
1223  (unsigned short) PostPortNumber,
1224  NULL,
1225  ListenSockets,
1227  MAXLISTEN);
1228 
1229  if (status == STATUS_OK)
1230  {
1231  success++;
1232  /* record the first successful host addr in lockfile */
1233  if (!listen_addr_saved)
1234  {
1236  listen_addr_saved = true;
1237  }
1238  }
1239  else
1240  ereport(WARNING,
1241  (errmsg("could not create listen socket for \"%s\"",
1242  curhost)));
1243  }
1244 
1245  if (!success && elemlist != NIL)
1246  ereport(FATAL,
1247  (errmsg("could not create any TCP/IP sockets")));
1248 
1249  list_free(elemlist);
1250  pfree(rawstring);
1251  }
1252 
1253 #ifdef USE_BONJOUR
1254  /* Register for Bonjour only if we opened TCP socket(s) */
1255  if (enable_bonjour && NumListenSockets > 0)
1256  {
1257  DNSServiceErrorType err;
1258 
1259  /*
1260  * We pass 0 for interface_index, which will result in registering on
1261  * all "applicable" interfaces. It's not entirely clear from the
1262  * DNS-SD docs whether this would be appropriate if we have bound to
1263  * just a subset of the available network interfaces.
1264  */
1265  err = DNSServiceRegister(&bonjour_sdref,
1266  0,
1267  0,
1268  bonjour_name,
1269  "_postgresql._tcp.",
1270  NULL,
1271  NULL,
1273  0,
1274  NULL,
1275  NULL,
1276  NULL);
1277  if (err != kDNSServiceErr_NoError)
1278  ereport(LOG,
1279  (errmsg("DNSServiceRegister() failed: error code %ld",
1280  (long) err)));
1281 
1282  /*
1283  * We don't bother to read the mDNS daemon's reply, and we expect that
1284  * it will automatically terminate our registration when the socket is
1285  * closed at postmaster termination. So there's nothing more to be
1286  * done here. However, the bonjour_sdref is kept around so that
1287  * forked children can close their copies of the socket.
1288  */
1289  }
1290 #endif
1291 
1293  {
1294  char *rawstring;
1295  List *elemlist;
1296  ListCell *l;
1297  int success = 0;
1298 
1299  /* Need a modifiable copy of Unix_socket_directories */
1300  rawstring = pstrdup(Unix_socket_directories);
1301 
1302  /* Parse string into list of directories */
1303  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1304  {
1305  /* syntax error in list */
1306  ereport(FATAL,
1307  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1308  errmsg("invalid list syntax in parameter \"%s\"",
1309  "unix_socket_directories")));
1310  }
1311 
1312  foreach(l, elemlist)
1313  {
1314  char *socketdir = (char *) lfirst(l);
1315 
1316  status = StreamServerPort(AF_UNIX, NULL,
1317  (unsigned short) PostPortNumber,
1318  socketdir,
1319  ListenSockets,
1321  MAXLISTEN);
1322 
1323  if (status == STATUS_OK)
1324  {
1325  success++;
1326  /* record the first successful Unix socket in lockfile */
1327  if (success == 1)
1329  }
1330  else
1331  ereport(WARNING,
1332  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1333  socketdir)));
1334  }
1335 
1336  if (!success && elemlist != NIL)
1337  ereport(FATAL,
1338  (errmsg("could not create any Unix-domain sockets")));
1339 
1340  list_free_deep(elemlist);
1341  pfree(rawstring);
1342  }
1343 
1344  /*
1345  * check that we have some socket to listen on
1346  */
1347  if (NumListenSockets == 0)
1348  ereport(FATAL,
1349  (errmsg("no socket created for listening")));
1350 
1351  /*
1352  * If no valid TCP ports, write an empty line for listen address,
1353  * indicating the Unix socket must be used. Note that this line is not
1354  * added to the lock file until there is a socket backing it.
1355  */
1356  if (!listen_addr_saved)
1358 
1359  /*
1360  * Record postmaster options. We delay this till now to avoid recording
1361  * bogus options (eg, unusable port number).
1362  */
1363  if (!CreateOptsFile(argc, argv, my_exec_path))
1364  ExitPostmaster(1);
1365 
1366  /*
1367  * Write the external PID file if requested
1368  */
1369  if (external_pid_file)
1370  {
1371  FILE *fpidfile = fopen(external_pid_file, "w");
1372 
1373  if (fpidfile)
1374  {
1375  fprintf(fpidfile, "%d\n", MyProcPid);
1376  fclose(fpidfile);
1377 
1378  /* Make PID file world readable */
1379  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1380  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1382  }
1383  else
1384  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1386 
1388  }
1389 
1390  /*
1391  * Remove old temporary files. At this point there can be no other
1392  * Postgres processes running in this directory, so this should be safe.
1393  */
1395 
1396  /*
1397  * Initialize the autovacuum subsystem (again, no process start yet)
1398  */
1399  autovac_init();
1400 
1401  /*
1402  * Load configuration files for client authentication.
1403  */
1404  if (!load_hba())
1405  {
1406  /*
1407  * It makes no sense to continue if we fail to load the HBA file,
1408  * since there is no way to connect to the database in this case.
1409  */
1410  ereport(FATAL,
1411  /* translator: %s is a configuration file */
1412  (errmsg("could not load %s", HbaFileName)));
1413  }
1414  if (!load_ident())
1415  {
1416  /*
1417  * We can start up without the IDENT file, although it means that you
1418  * cannot log in using any of the authentication methods that need a
1419  * user name mapping. load_ident() already logged the details of error
1420  * to the log.
1421  */
1422  }
1423 
1424 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1425 
1426  /*
1427  * On macOS, libintl replaces setlocale() with a version that calls
1428  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1429  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1430  * the process multithreaded. The postmaster calls sigprocmask() and
1431  * calls fork() without an immediate exec(), both of which have undefined
1432  * behavior in a multithreaded program. A multithreaded postmaster is the
1433  * normal case on Windows, which offers neither fork() nor sigprocmask().
1434  */
1435  if (pthread_is_threaded_np() != 0)
1436  ereport(FATAL,
1437  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1438  errmsg("postmaster became multithreaded during startup"),
1439  errhint("Set the LC_ALL environment variable to a valid locale.")));
1440 #endif
1441 
1442  /*
1443  * Remember postmaster startup time
1444  */
1446 
1447  /*
1448  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1449  * see what's happening.
1450  */
1452 
1453  /* Start bgwriter and checkpointer so they can help with recovery */
1454  if (CheckpointerPID == 0)
1456  if (BgWriterPID == 0)
1458 
1459  /*
1460  * We're ready to rock and roll...
1461  */
1463  Assert(StartupPID != 0);
1465  pmState = PM_STARTUP;
1466 
1467  /* Some workers may be scheduled to start now */
1469 
1470  status = ServerLoop();
1471 
1472  /*
1473  * ServerLoop probably shouldn't ever return, but if it does, close down.
1474  */
1475  ExitPostmaster(status != STATUS_OK);
1476 
1477  abort(); /* not reached */
1478 }
1479 
1480 
1481 /*
1482  * on_proc_exit callback to close server's listen sockets
1483  */
1484 static void
1486 {
1487  int i;
1488 
1489  /*
1490  * First, explicitly close all the socket FDs. We used to just let this
1491  * happen implicitly at postmaster exit, but it's better to close them
1492  * before we remove the postmaster.pid lockfile; otherwise there's a race
1493  * condition if a new postmaster wants to re-use the TCP port number.
1494  */
1495  for (i = 0; i < NumListenSockets; i++)
1497  NumListenSockets = 0;
1498 
1499  /*
1500  * Next, remove any filesystem entries for Unix sockets. To avoid race
1501  * conditions against incoming postmasters, this must happen after closing
1502  * the sockets and before removing lock files.
1503  */
1505 
1506  /*
1507  * We don't do anything about socket lock files here; those will be
1508  * removed in a later on_proc_exit callback.
1509  */
1510 }
1511 
1512 /*
1513  * on_proc_exit callback to delete external_pid_file
1514  */
1515 static void
1517 {
1518  if (external_pid_file)
1519  unlink(external_pid_file);
1520 }
1521 
1522 
1523 /*
1524  * Compute and check the directory paths to files that are part of the
1525  * installation (as deduced from the postgres executable's own location)
1526  */
1527 static void
1529 {
1530  DIR *pdir;
1531 
1532  /* Locate the postgres executable itself */
1533  if (find_my_exec(argv0, my_exec_path) < 0)
1534  ereport(FATAL,
1535  (errmsg("%s: could not locate my own executable path", argv0)));
1536 
1537 #ifdef EXEC_BACKEND
1538  /* Locate executable backend before we change working directory */
1539  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1540  postgres_exec_path) < 0)
1541  ereport(FATAL,
1542  (errmsg("%s: could not locate matching postgres executable",
1543  argv0)));
1544 #endif
1545 
1546  /*
1547  * Locate the pkglib directory --- this has to be set early in case we try
1548  * to load any modules from it in response to postgresql.conf entries.
1549  */
1551 
1552  /*
1553  * Verify that there's a readable directory there; otherwise the Postgres
1554  * installation is incomplete or corrupt. (A typical cause of this
1555  * failure is that the postgres executable has been moved or hardlinked to
1556  * some directory that's not a sibling of the installation lib/
1557  * directory.)
1558  */
1559  pdir = AllocateDir(pkglib_path);
1560  if (pdir == NULL)
1561  ereport(ERROR,
1563  errmsg("could not open directory \"%s\": %m",
1564  pkglib_path),
1565  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1566  my_exec_path)));
1567  FreeDir(pdir);
1568 
1569  /*
1570  * It's not worth checking the share/ directory. If the lib/ directory is
1571  * there, then share/ probably is too.
1572  */
1573 }
1574 
1575 /*
1576  * Check that pg_control exists in the correct location in the data directory.
1577  *
1578  * No attempt is made to validate the contents of pg_control here. This is
1579  * just a sanity check to see if we are looking at a real data directory.
1580  */
1581 static void
1583 {
1584  char path[MAXPGPATH];
1585  FILE *fp;
1586 
1587  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1588 
1589  fp = AllocateFile(path, PG_BINARY_R);
1590  if (fp == NULL)
1591  {
1592  write_stderr("%s: could not find the database system\n"
1593  "Expected to find it in the directory \"%s\",\n"
1594  "but could not open file \"%s\": %s\n",
1595  progname, DataDir, path, strerror(errno));
1596  ExitPostmaster(2);
1597  }
1598  FreeFile(fp);
1599 }
1600 
1601 /*
1602  * Determine how long should we let ServerLoop sleep, in milliseconds.
1603  *
1604  * In normal conditions we wait at most one minute, to ensure that the other
1605  * background tasks handled by ServerLoop get done even when no requests are
1606  * arriving. However, if there are background workers waiting to be started,
1607  * we don't actually sleep so that they are quickly serviced. Other exception
1608  * cases are as shown in the code.
1609  */
1610 static int
1612 {
1613  TimestampTz next_wakeup = 0;
1614 
1615  /*
1616  * Normal case: either there are no background workers at all, or we're in
1617  * a shutdown sequence (during which we ignore bgworkers altogether).
1618  */
1619  if (Shutdown > NoShutdown ||
1621  {
1622  if (AbortStartTime != 0)
1623  {
1624  int seconds;
1625 
1626  /* time left to abort; clamp to 0 in case it already expired */
1627  seconds = SIGKILL_CHILDREN_AFTER_SECS -
1628  (time(NULL) - AbortStartTime);
1629 
1630  return Max(seconds * 1000, 0);
1631  }
1632  else
1633  return 60 * 1000;
1634  }
1635 
1636  if (StartWorkerNeeded)
1637  return 0;
1638 
1639  if (HaveCrashedWorker)
1640  {
1641  slist_mutable_iter siter;
1642 
1643  /*
1644  * When there are crashed bgworkers, we sleep just long enough that
1645  * they are restarted when they request to be. Scan the list to
1646  * determine the minimum of all wakeup times according to most recent
1647  * crash time and requested restart interval.
1648  */
1650  {
1651  RegisteredBgWorker *rw;
1652  TimestampTz this_wakeup;
1653 
1654  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1655 
1656  if (rw->rw_crashed_at == 0)
1657  continue;
1658 
1660  || rw->rw_terminate)
1661  {
1662  ForgetBackgroundWorker(&siter);
1663  continue;
1664  }
1665 
1666  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1667  1000L * rw->rw_worker.bgw_restart_time);
1668  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1669  next_wakeup = this_wakeup;
1670  }
1671  }
1672 
1673  if (next_wakeup != 0)
1674  {
1675  int ms;
1676 
1677  /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1679  next_wakeup);
1680  return Min(60 * 1000, ms);
1681  }
1682 
1683  return 60 * 1000;
1684 }
1685 
1686 /*
1687  * Activate or deactivate notifications of server socket events. Since we
1688  * don't currently have a way to remove events from an existing WaitEventSet,
1689  * we'll just destroy and recreate the whole thing. This is called during
1690  * shutdown so we can wait for backends to exit without accepting new
1691  * connections, and during crash reinitialization when we need to start
1692  * listening for new connections again. The WaitEventSet will be freed in fork
1693  * children by ClosePostmasterPorts().
1694  */
1695 static void
1696 ConfigurePostmasterWaitSet(bool accept_connections)
1697 {
1698  if (pm_wait_set)
1700  pm_wait_set = NULL;
1701 
1703  accept_connections ? (1 + NumListenSockets) : 1);
1705  NULL);
1706 
1707  if (accept_connections)
1708  {
1709  for (int i = 0; i < NumListenSockets; i++)
1711  NULL, NULL);
1712  }
1713 }
1714 
1715 /*
1716  * Main idle loop of postmaster
1717  */
1718 static int
1720 {
1721  time_t last_lockfile_recheck_time,
1722  last_touch_time;
1723  WaitEvent events[MAXLISTEN];
1724  int nevents;
1725 
1727  last_lockfile_recheck_time = last_touch_time = time(NULL);
1728 
1729  for (;;)
1730  {
1731  time_t now;
1732 
1733  nevents = WaitEventSetWait(pm_wait_set,
1735  events,
1736  lengthof(events),
1737  0 /* postmaster posts no wait_events */ );
1738 
1739  /*
1740  * Latch set by signal handler, or new connection pending on any of
1741  * our sockets? If the latter, fork a child process to deal with it.
1742  */
1743  for (int i = 0; i < nevents; i++)
1744  {
1745  if (events[i].events & WL_LATCH_SET)
1747 
1748  /*
1749  * The following requests are handled unconditionally, even if we
1750  * didn't see WL_LATCH_SET. This gives high priority to shutdown
1751  * and reload requests where the latch happens to appear later in
1752  * events[] or will be reported by a later call to
1753  * WaitEventSetWait().
1754  */
1761  if (pending_pm_pmsignal)
1763 
1764  if (events[i].events & WL_SOCKET_ACCEPT)
1765  {
1766  Port *port;
1767 
1768  port = ConnCreate(events[i].fd);
1769  if (port)
1770  {
1772 
1773  /*
1774  * We no longer need the open socket or port structure in
1775  * this process
1776  */
1777  StreamClose(port->sock);
1778  ConnFree(port);
1779  }
1780  }
1781  }
1782 
1783  /* If we have lost the log collector, try to start a new one */
1784  if (SysLoggerPID == 0 && Logging_collector)
1786 
1787  /*
1788  * If no background writer process is running, and we are not in a
1789  * state that prevents it, start one. It doesn't matter if this
1790  * fails, we'll just try again later. Likewise for the checkpointer.
1791  */
1792  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1794  {
1795  if (CheckpointerPID == 0)
1797  if (BgWriterPID == 0)
1799  }
1800 
1801  /*
1802  * Likewise, if we have lost the walwriter process, try to start a new
1803  * one. But this is needed only in normal operation (else we cannot
1804  * be writing any new WAL).
1805  */
1806  if (WalWriterPID == 0 && pmState == PM_RUN)
1808 
1809  /*
1810  * If we have lost the autovacuum launcher, try to start a new one. We
1811  * don't want autovacuum to run in binary upgrade mode because
1812  * autovacuum might update relfrozenxid for empty tables before the
1813  * physical files are put in place.
1814  */
1815  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1817  pmState == PM_RUN)
1818  {
1820  if (AutoVacPID != 0)
1821  start_autovac_launcher = false; /* signal processed */
1822  }
1823 
1824  /* If we have lost the archiver, try to start a new one. */
1825  if (PgArchPID == 0 && PgArchStartupAllowed())
1827 
1828  /* If we need to start a slot sync worker, try to do that now */
1830 
1831  /* If we need to signal the autovacuum launcher, do so now */
1833  {
1834  avlauncher_needs_signal = false;
1835  if (AutoVacPID != 0)
1837  }
1838 
1839  /* If we need to start a WAL receiver, try to do that now */
1842 
1843  /* If we need to start a WAL summarizer, try to do that now */
1845 
1846  /* Get other worker processes running, if needed */
1849 
1850 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1851 
1852  /*
1853  * With assertions enabled, check regularly for appearance of
1854  * additional threads. All builds check at start and exit.
1855  */
1856  Assert(pthread_is_threaded_np() == 0);
1857 #endif
1858 
1859  /*
1860  * Lastly, check to see if it's time to do some things that we don't
1861  * want to do every single time through the loop, because they're a
1862  * bit expensive. Note that there's up to a minute of slop in when
1863  * these tasks will be performed, since DetermineSleepTime() will let
1864  * us sleep at most that long; except for SIGKILL timeout which has
1865  * special-case logic there.
1866  */
1867  now = time(NULL);
1868 
1869  /*
1870  * If we already sent SIGQUIT to children and they are slow to shut
1871  * down, it's time to send them SIGKILL (or SIGABRT if requested).
1872  * This doesn't happen normally, but under certain conditions backends
1873  * can get stuck while shutting down. This is a last measure to get
1874  * them unwedged.
1875  *
1876  * Note we also do this during recovery from a process crash.
1877  */
1878  if ((Shutdown >= ImmediateShutdown || FatalError) &&
1879  AbortStartTime != 0 &&
1881  {
1882  /* We were gentle with them before. Not anymore */
1883  ereport(LOG,
1884  /* translator: %s is SIGKILL or SIGABRT */
1885  (errmsg("issuing %s to recalcitrant children",
1886  send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1888  /* reset flag so we don't SIGKILL again */
1889  AbortStartTime = 0;
1890  }
1891 
1892  /*
1893  * Once a minute, verify that postmaster.pid hasn't been removed or
1894  * overwritten. If it has, we force a shutdown. This avoids having
1895  * postmasters and child processes hanging around after their database
1896  * is gone, and maybe causing problems if a new database cluster is
1897  * created in the same place. It also provides some protection
1898  * against a DBA foolishly removing postmaster.pid and manually
1899  * starting a new postmaster. Data corruption is likely to ensue from
1900  * that anyway, but we can minimize the damage by aborting ASAP.
1901  */
1902  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1903  {
1904  if (!RecheckDataDirLockFile())
1905  {
1906  ereport(LOG,
1907  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1909  }
1910  last_lockfile_recheck_time = now;
1911  }
1912 
1913  /*
1914  * Touch Unix socket and lock files every 58 minutes, to ensure that
1915  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1916  * no one runs cleaners with cutoff times of less than an hour ...
1917  */
1918  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1919  {
1920  TouchSocketFiles();
1922  last_touch_time = now;
1923  }
1924  }
1925 }
1926 
1927 /*
1928  * Read a client's startup packet and do something according to it.
1929  *
1930  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1931  * not return at all.
1932  *
1933  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1934  * if that's what you want. Return STATUS_ERROR if you don't want to
1935  * send anything to the client, which would typically be appropriate
1936  * if we detect a communications failure.)
1937  *
1938  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1939  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1940  * encryption layer sets both flags, but a rejected negotiation sets only the
1941  * flag for that layer, since the client may wish to try the other one. We
1942  * should make no assumption here about the order in which the client may make
1943  * requests.
1944  */
1945 static int
1946 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1947 {
1948  int32 len;
1949  char *buf;
1950  ProtocolVersion proto;
1951  MemoryContext oldcontext;
1952 
1953  pq_startmsgread();
1954 
1955  /*
1956  * Grab the first byte of the length word separately, so that we can tell
1957  * whether we have no data at all or an incomplete packet. (This might
1958  * sound inefficient, but it's not really, because of buffering in
1959  * pqcomm.c.)
1960  */
1961  if (pq_getbytes((char *) &len, 1) == EOF)
1962  {
1963  /*
1964  * If we get no data at all, don't clutter the log with a complaint;
1965  * such cases often occur for legitimate reasons. An example is that
1966  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1967  * client didn't like our response, it'll probably just drop the
1968  * connection. Service-monitoring software also often just opens and
1969  * closes a connection without sending anything. (So do port
1970  * scanners, which may be less benign, but it's not really our job to
1971  * notice those.)
1972  */
1973  return STATUS_ERROR;
1974  }
1975 
1976  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1977  {
1978  /* Got a partial length word, so bleat about that */
1979  if (!ssl_done && !gss_done)
1981  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1982  errmsg("incomplete startup packet")));
1983  return STATUS_ERROR;
1984  }
1985 
1986  len = pg_ntoh32(len);
1987  len -= 4;
1988 
1989  if (len < (int32) sizeof(ProtocolVersion) ||
1991  {
1993  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1994  errmsg("invalid length of startup packet")));
1995  return STATUS_ERROR;
1996  }
1997 
1998  /*
1999  * Allocate space to hold the startup packet, plus one extra byte that's
2000  * initialized to be zero. This ensures we will have null termination of
2001  * all strings inside the packet.
2002  */
2003  buf = palloc(len + 1);
2004  buf[len] = '\0';
2005 
2006  if (pq_getbytes(buf, len) == EOF)
2007  {
2009  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2010  errmsg("incomplete startup packet")));
2011  return STATUS_ERROR;
2012  }
2013  pq_endmsgread();
2014 
2015  /*
2016  * The first field is either a protocol version number or a special
2017  * request code.
2018  */
2019  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2020 
2021  if (proto == CANCEL_REQUEST_CODE)
2022  {
2023  if (len != sizeof(CancelRequestPacket))
2024  {
2026  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2027  errmsg("invalid length of startup packet")));
2028  return STATUS_ERROR;
2029  }
2031  /* Not really an error, but we don't want to proceed further */
2032  return STATUS_ERROR;
2033  }
2034 
2035  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2036  {
2037  char SSLok;
2038 
2039 #ifdef USE_SSL
2040  /* No SSL when disabled or on Unix sockets */
2041  if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2042  SSLok = 'N';
2043  else
2044  SSLok = 'S'; /* Support for SSL */
2045 #else
2046  SSLok = 'N'; /* No support for SSL */
2047 #endif
2048 
2049 retry1:
2050  if (send(port->sock, &SSLok, 1, 0) != 1)
2051  {
2052  if (errno == EINTR)
2053  goto retry1; /* if interrupted, just retry */
2056  errmsg("failed to send SSL negotiation response: %m")));
2057  return STATUS_ERROR; /* close the connection */
2058  }
2059 
2060 #ifdef USE_SSL
2061  if (SSLok == 'S' && secure_open_server(port) == -1)
2062  return STATUS_ERROR;
2063 #endif
2064 
2065  /*
2066  * At this point we should have no data already buffered. If we do,
2067  * it was received before we performed the SSL handshake, so it wasn't
2068  * encrypted and indeed may have been injected by a man-in-the-middle.
2069  * We report this case to the client.
2070  */
2071  if (pq_buffer_has_data())
2072  ereport(FATAL,
2073  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2074  errmsg("received unencrypted data after SSL request"),
2075  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2076 
2077  /*
2078  * regular startup packet, cancel, etc packet should follow, but not
2079  * another SSL negotiation request, and a GSS request should only
2080  * follow if SSL was rejected (client may negotiate in either order)
2081  */
2082  return ProcessStartupPacket(port, true, SSLok == 'S');
2083  }
2084  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2085  {
2086  char GSSok = 'N';
2087 
2088 #ifdef ENABLE_GSS
2089  /* No GSSAPI encryption when on Unix socket */
2090  if (port->laddr.addr.ss_family != AF_UNIX)
2091  GSSok = 'G';
2092 #endif
2093 
2094  while (send(port->sock, &GSSok, 1, 0) != 1)
2095  {
2096  if (errno == EINTR)
2097  continue;
2100  errmsg("failed to send GSSAPI negotiation response: %m")));
2101  return STATUS_ERROR; /* close the connection */
2102  }
2103 
2104 #ifdef ENABLE_GSS
2105  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2106  return STATUS_ERROR;
2107 #endif
2108 
2109  /*
2110  * At this point we should have no data already buffered. If we do,
2111  * it was received before we performed the GSS handshake, so it wasn't
2112  * encrypted and indeed may have been injected by a man-in-the-middle.
2113  * We report this case to the client.
2114  */
2115  if (pq_buffer_has_data())
2116  ereport(FATAL,
2117  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2118  errmsg("received unencrypted data after GSSAPI encryption request"),
2119  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2120 
2121  /*
2122  * regular startup packet, cancel, etc packet should follow, but not
2123  * another GSS negotiation request, and an SSL request should only
2124  * follow if GSS was rejected (client may negotiate in either order)
2125  */
2126  return ProcessStartupPacket(port, GSSok == 'G', true);
2127  }
2128 
2129  /* Could add additional special packet types here */
2130 
2131  /*
2132  * Set FrontendProtocol now so that ereport() knows what format to send if
2133  * we fail during startup.
2134  */
2135  FrontendProtocol = proto;
2136 
2137  /* Check that the major protocol version is in range. */
2140  ereport(FATAL,
2141  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2142  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2143  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2147 
2148  /*
2149  * Now fetch parameters out of startup packet and save them into the Port
2150  * structure. All data structures attached to the Port struct must be
2151  * allocated in TopMemoryContext so that they will remain available in a
2152  * running backend (even after PostmasterContext is destroyed). We need
2153  * not worry about leaking this storage on failure, since we aren't in the
2154  * postmaster process anymore.
2155  */
2157 
2158  /* Handle protocol version 3 startup packet */
2159  {
2160  int32 offset = sizeof(ProtocolVersion);
2161  List *unrecognized_protocol_options = NIL;
2162 
2163  /*
2164  * Scan packet body for name/option pairs. We can assume any string
2165  * beginning within the packet body is null-terminated, thanks to
2166  * zeroing extra byte above.
2167  */
2168  port->guc_options = NIL;
2169 
2170  while (offset < len)
2171  {
2172  char *nameptr = buf + offset;
2173  int32 valoffset;
2174  char *valptr;
2175 
2176  if (*nameptr == '\0')
2177  break; /* found packet terminator */
2178  valoffset = offset + strlen(nameptr) + 1;
2179  if (valoffset >= len)
2180  break; /* missing value, will complain below */
2181  valptr = buf + valoffset;
2182 
2183  if (strcmp(nameptr, "database") == 0)
2184  port->database_name = pstrdup(valptr);
2185  else if (strcmp(nameptr, "user") == 0)
2186  port->user_name = pstrdup(valptr);
2187  else if (strcmp(nameptr, "options") == 0)
2188  port->cmdline_options = pstrdup(valptr);
2189  else if (strcmp(nameptr, "replication") == 0)
2190  {
2191  /*
2192  * Due to backward compatibility concerns the replication
2193  * parameter is a hybrid beast which allows the value to be
2194  * either boolean or the string 'database'. The latter
2195  * connects to a specific database which is e.g. required for
2196  * logical decoding while.
2197  */
2198  if (strcmp(valptr, "database") == 0)
2199  {
2200  am_walsender = true;
2201  am_db_walsender = true;
2202  }
2203  else if (!parse_bool(valptr, &am_walsender))
2204  ereport(FATAL,
2205  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2206  errmsg("invalid value for parameter \"%s\": \"%s\"",
2207  "replication",
2208  valptr),
2209  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2210  }
2211  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2212  {
2213  /*
2214  * Any option beginning with _pq_. is reserved for use as a
2215  * protocol-level option, but at present no such options are
2216  * defined.
2217  */
2218  unrecognized_protocol_options =
2219  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2220  }
2221  else
2222  {
2223  /* Assume it's a generic GUC option */
2224  port->guc_options = lappend(port->guc_options,
2225  pstrdup(nameptr));
2226  port->guc_options = lappend(port->guc_options,
2227  pstrdup(valptr));
2228 
2229  /*
2230  * Copy application_name to port if we come across it. This
2231  * is done so we can log the application_name in the
2232  * connection authorization message. Note that the GUC would
2233  * be used but we haven't gone through GUC setup yet.
2234  */
2235  if (strcmp(nameptr, "application_name") == 0)
2236  {
2237  port->application_name = pg_clean_ascii(valptr, 0);
2238  }
2239  }
2240  offset = valoffset + strlen(valptr) + 1;
2241  }
2242 
2243  /*
2244  * If we didn't find a packet terminator exactly at the end of the
2245  * given packet length, complain.
2246  */
2247  if (offset != len - 1)
2248  ereport(FATAL,
2249  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2250  errmsg("invalid startup packet layout: expected terminator as last byte")));
2251 
2252  /*
2253  * If the client requested a newer protocol version or if the client
2254  * requested any protocol options we didn't recognize, let them know
2255  * the newest minor protocol version we do support and the names of
2256  * any unrecognized options.
2257  */
2259  unrecognized_protocol_options != NIL)
2260  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2261  }
2262 
2263  /* Check a user name was given. */
2264  if (port->user_name == NULL || port->user_name[0] == '\0')
2265  ereport(FATAL,
2266  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2267  errmsg("no PostgreSQL user name specified in startup packet")));
2268 
2269  /* The database defaults to the user name. */
2270  if (port->database_name == NULL || port->database_name[0] == '\0')
2271  port->database_name = pstrdup(port->user_name);
2272 
2273  if (am_walsender)
2275  else
2277 
2278  /*
2279  * Normal walsender backends, e.g. for streaming replication, are not
2280  * connected to a particular database. But walsenders used for logical
2281  * replication need to connect to a specific database. We allow streaming
2282  * replication commands to be issued even if connected to a database as it
2283  * can make sense to first make a basebackup and then stream changes
2284  * starting from that.
2285  */
2286  if (am_walsender && !am_db_walsender)
2287  port->database_name[0] = '\0';
2288 
2289  /*
2290  * Done putting stuff in TopMemoryContext.
2291  */
2292  MemoryContextSwitchTo(oldcontext);
2293 
2294  return STATUS_OK;
2295 }
2296 
2297 /*
2298  * Send a NegotiateProtocolVersion to the client. This lets the client know
2299  * that they have requested a newer minor protocol version than we are able
2300  * to speak. We'll speak the highest version we know about; the client can,
2301  * of course, abandon the connection if that's a problem.
2302  *
2303  * We also include in the response a list of protocol options we didn't
2304  * understand. This allows clients to include optional parameters that might
2305  * be present either in newer protocol versions or third-party protocol
2306  * extensions without fear of having to reconnect if those options are not
2307  * understood, while at the same time making certain that the client is aware
2308  * of which options were actually accepted.
2309  */
2310 static void
2311 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2312 {
2314  ListCell *lc;
2315 
2318  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2319  foreach(lc, unrecognized_protocol_options)
2320  pq_sendstring(&buf, lfirst(lc));
2321  pq_endmessage(&buf);
2322 
2323  /* no need to flush, some other message will follow */
2324 }
2325 
2326 /*
2327  * The client has sent a cancel request packet, not a normal
2328  * start-a-new-connection packet. Perform the necessary processing.
2329  * Nothing is sent back to the client.
2330  */
2331 static void
2333 {
2334  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2335  int backendPID;
2336  int32 cancelAuthCode;
2337  Backend *bp;
2338 
2339 #ifndef EXEC_BACKEND
2340  dlist_iter iter;
2341 #else
2342  int i;
2343 #endif
2344 
2345  backendPID = (int) pg_ntoh32(canc->backendPID);
2346  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2347 
2348  /*
2349  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2350  * longer access the postmaster's own backend list, and must rely on the
2351  * duplicate array in shared memory.
2352  */
2353 #ifndef EXEC_BACKEND
2354  dlist_foreach(iter, &BackendList)
2355  {
2356  bp = dlist_container(Backend, elem, iter.cur);
2357 #else
2358  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2359  {
2360  bp = (Backend *) &ShmemBackendArray[i];
2361 #endif
2362  if (bp->pid == backendPID)
2363  {
2364  if (bp->cancel_key == cancelAuthCode)
2365  {
2366  /* Found a match; signal that backend to cancel current op */
2367  ereport(DEBUG2,
2368  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2369  backendPID)));
2370  signal_child(bp->pid, SIGINT);
2371  }
2372  else
2373  /* Right PID, wrong key: no way, Jose */
2374  ereport(LOG,
2375  (errmsg("wrong key in cancel request for process %d",
2376  backendPID)));
2377  return;
2378  }
2379 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2380  }
2381 #else
2382  }
2383 #endif
2384 
2385  /* No matching backend */
2386  ereport(LOG,
2387  (errmsg("PID %d in cancel request did not match any process",
2388  backendPID)));
2389 }
2390 
2391 /*
2392  * canAcceptConnections --- check to see if database state allows connections
2393  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2394  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2395  * know whether a NORMAL connection might turn into a walsender.)
2396  */
2397 static CAC_state
2398 canAcceptConnections(int backend_type)
2399 {
2400  CAC_state result = CAC_OK;
2401 
2402  /*
2403  * Can't start backends when in startup/shutdown/inconsistent recovery
2404  * state. We treat autovac workers the same as user backends for this
2405  * purpose. However, bgworkers are excluded from this test; we expect
2406  * bgworker_should_start_now() decided whether the DB state allows them.
2407  */
2408  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2409  backend_type != BACKEND_TYPE_BGWORKER)
2410  {
2411  if (Shutdown > NoShutdown)
2412  return CAC_SHUTDOWN; /* shutdown is pending */
2413  else if (!FatalError && pmState == PM_STARTUP)
2414  return CAC_STARTUP; /* normal startup */
2415  else if (!FatalError && pmState == PM_RECOVERY)
2416  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2417  * state */
2418  else
2419  return CAC_RECOVERY; /* else must be crash recovery */
2420  }
2421 
2422  /*
2423  * "Smart shutdown" restrictions are applied only to normal connections,
2424  * not to autovac workers or bgworkers.
2425  */
2426  if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2427  return CAC_SHUTDOWN; /* shutdown is pending */
2428 
2429  /*
2430  * Don't start too many children.
2431  *
2432  * We allow more connections here than we can have backends because some
2433  * might still be authenticating; they might fail auth, or some existing
2434  * backend might exit before the auth cycle is completed. The exact
2435  * MaxBackends limit is enforced when a new backend tries to join the
2436  * shared-inval backend array.
2437  *
2438  * The limit here must match the sizes of the per-child-process arrays;
2439  * see comments for MaxLivePostmasterChildren().
2440  */
2442  result = CAC_TOOMANY;
2443 
2444  return result;
2445 }
2446 
2447 
2448 /*
2449  * ConnCreate -- create a local connection data structure
2450  *
2451  * Returns NULL on failure, other than out-of-memory which is fatal.
2452  */
2453 static Port *
2454 ConnCreate(int serverFd)
2455 {
2456  Port *port;
2457 
2458  if (!(port = (Port *) calloc(1, sizeof(Port))))
2459  {
2460  ereport(LOG,
2461  (errcode(ERRCODE_OUT_OF_MEMORY),
2462  errmsg("out of memory")));
2463  ExitPostmaster(1);
2464  }
2465 
2466  if (StreamConnection(serverFd, port) != STATUS_OK)
2467  {
2468  if (port->sock != PGINVALID_SOCKET)
2469  StreamClose(port->sock);
2470  ConnFree(port);
2471  return NULL;
2472  }
2473 
2474  return port;
2475 }
2476 
2477 
2478 /*
2479  * ConnFree -- free a local connection data structure
2480  *
2481  * Caller has already closed the socket if any, so there's not much
2482  * to do here.
2483  */
2484 static void
2486 {
2487  free(port);
2488 }
2489 
2490 
2491 /*
2492  * ClosePostmasterPorts -- close all the postmaster's open sockets
2493  *
2494  * This is called during child process startup to release file descriptors
2495  * that are not needed by that child process. The postmaster still has
2496  * them open, of course.
2497  *
2498  * Note: we pass am_syslogger as a boolean because we don't want to set
2499  * the global variable yet when this is called.
2500  */
2501 void
2502 ClosePostmasterPorts(bool am_syslogger)
2503 {
2504  /* Release resources held by the postmaster's WaitEventSet. */
2505  if (pm_wait_set)
2506  {
2508  pm_wait_set = NULL;
2509  }
2510 
2511 #ifndef WIN32
2512 
2513  /*
2514  * Close the write end of postmaster death watch pipe. It's important to
2515  * do this as early as possible, so that if postmaster dies, others won't
2516  * think that it's still running because we're holding the pipe open.
2517  */
2519  ereport(FATAL,
2521  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2523  /* Notify fd.c that we released one pipe FD. */
2525 #endif
2526 
2527  /*
2528  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2529  * so we don't call ReleaseExternalFD() here.
2530  *
2531  * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
2532  * EXEC_BACKEND mode.
2533  */
2534 #ifndef EXEC_BACKEND
2535  if (ListenSockets)
2536  {
2537  for (int i = 0; i < NumListenSockets; i++)
2540  }
2541  NumListenSockets = 0;
2542  ListenSockets = NULL;
2543 #endif
2544 
2545  /*
2546  * If using syslogger, close the read side of the pipe. We don't bother
2547  * tracking this in fd.c, either.
2548  */
2549  if (!am_syslogger)
2550  {
2551 #ifndef WIN32
2552  if (syslogPipe[0] >= 0)
2553  close(syslogPipe[0]);
2554  syslogPipe[0] = -1;
2555 #else
2556  if (syslogPipe[0])
2557  CloseHandle(syslogPipe[0]);
2558  syslogPipe[0] = 0;
2559 #endif
2560  }
2561 
2562 #ifdef USE_BONJOUR
2563  /* If using Bonjour, close the connection to the mDNS daemon */
2564  if (bonjour_sdref)
2565  close(DNSServiceRefSockFD(bonjour_sdref));
2566 #endif
2567 }
2568 
2569 
2570 /*
2571  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2572  *
2573  * Called early in the postmaster and every backend.
2574  */
2575 void
2577 {
2578  MyProcPid = getpid();
2581 
2582  /*
2583  * Set a different global seed in every process. We want something
2584  * unpredictable, so if possible, use high-quality random bits for the
2585  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2586  */
2588  {
2589  uint64 rseed;
2590 
2591  /*
2592  * Since PIDs and timestamps tend to change more frequently in their
2593  * least significant bits, shift the timestamp left to allow a larger
2594  * total number of seeds in a given time period. Since that would
2595  * leave only 20 bits of the timestamp that cycle every ~1 second,
2596  * also mix in some higher bits.
2597  */
2598  rseed = ((uint64) MyProcPid) ^
2599  ((uint64) MyStartTimestamp << 12) ^
2600  ((uint64) MyStartTimestamp >> 20);
2601 
2603  }
2604 
2605  /*
2606  * Also make sure that we've set a good seed for random(3). Use of that
2607  * is deprecated in core Postgres, but extensions might use it.
2608  */
2609 #ifndef WIN32
2611 #endif
2612 }
2613 
2614 /*
2615  * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2616  * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2617  */
2618 static void
2620 {
2621  pending_pm_pmsignal = true;
2622  SetLatch(MyLatch);
2623 }
2624 
2625 /*
2626  * pg_ctl uses SIGHUP to request a reload of the configuration files.
2627  */
2628 static void
2630 {
2632  SetLatch(MyLatch);
2633 }
2634 
2635 /*
2636  * Re-read config files, and tell children to do same.
2637  */
2638 static void
2640 {
2641  pending_pm_reload_request = false;
2642 
2643  ereport(DEBUG2,
2644  (errmsg_internal("postmaster received reload request signal")));
2645 
2646  if (Shutdown <= SmartShutdown)
2647  {
2648  ereport(LOG,
2649  (errmsg("received SIGHUP, reloading configuration files")));
2652  if (StartupPID != 0)
2654  if (BgWriterPID != 0)
2656  if (CheckpointerPID != 0)
2658  if (WalWriterPID != 0)
2660  if (WalReceiverPID != 0)
2662  if (WalSummarizerPID != 0)
2664  if (AutoVacPID != 0)
2666  if (PgArchPID != 0)
2668  if (SysLoggerPID != 0)
2670  if (SlotSyncWorkerPID != 0)
2672 
2673  /* Reload authentication config files too */
2674  if (!load_hba())
2675  ereport(LOG,
2676  /* translator: %s is a configuration file */
2677  (errmsg("%s was not reloaded", HbaFileName)));
2678 
2679  if (!load_ident())
2680  ereport(LOG,
2681  (errmsg("%s was not reloaded", IdentFileName)));
2682 
2683 #ifdef USE_SSL
2684  /* Reload SSL configuration as well */
2685  if (EnableSSL)
2686  {
2687  if (secure_initialize(false) == 0)
2688  LoadedSSL = true;
2689  else
2690  ereport(LOG,
2691  (errmsg("SSL configuration was not reloaded")));
2692  }
2693  else
2694  {
2695  secure_destroy();
2696  LoadedSSL = false;
2697  }
2698 #endif
2699 
2700 #ifdef EXEC_BACKEND
2701  /* Update the starting-point file for future children */
2702  write_nondefault_variables(PGC_SIGHUP);
2703 #endif
2704  }
2705 }
2706 
2707 /*
2708  * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2709  * shutdown.
2710  */
2711 static void
2713 {
2714  switch (postgres_signal_arg)
2715  {
2716  case SIGTERM:
2717  /* smart is implied if the other two flags aren't set */
2719  break;
2720  case SIGINT:
2723  break;
2724  case SIGQUIT:
2727  break;
2728  }
2729  SetLatch(MyLatch);
2730 }
2731 
2732 /*
2733  * Process shutdown request.
2734  */
2735 static void
2737 {
2738  int mode;
2739 
2740  ereport(DEBUG2,
2741  (errmsg_internal("postmaster received shutdown request signal")));
2742 
2744 
2745  /*
2746  * If more than one shutdown request signal arrived since the last server
2747  * loop, take the one that is the most immediate. That matches the
2748  * priority that would apply if we processed them one by one in any order.
2749  */
2751  {
2755  }
2757  {
2759  mode = FastShutdown;
2760  }
2761  else
2762  mode = SmartShutdown;
2763 
2764  switch (mode)
2765  {
2766  case SmartShutdown:
2767 
2768  /*
2769  * Smart Shutdown:
2770  *
2771  * Wait for children to end their work, then shut down.
2772  */
2773  if (Shutdown >= SmartShutdown)
2774  break;
2776  ereport(LOG,
2777  (errmsg("received smart shutdown request")));
2778 
2779  /* Report status */
2781 #ifdef USE_SYSTEMD
2782  sd_notify(0, "STOPPING=1");
2783 #endif
2784 
2785  /*
2786  * If we reached normal running, we go straight to waiting for
2787  * client backends to exit. If already in PM_STOP_BACKENDS or a
2788  * later state, do not change it.
2789  */
2790  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2791  connsAllowed = false;
2792  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2793  {
2794  /* There should be no clients, so proceed to stop children */
2796  }
2797 
2798  /*
2799  * Now wait for online backup mode to end and backends to exit. If
2800  * that is already the case, PostmasterStateMachine will take the
2801  * next step.
2802  */
2804  break;
2805 
2806  case FastShutdown:
2807 
2808  /*
2809  * Fast Shutdown:
2810  *
2811  * Abort all children with SIGTERM (rollback active transactions
2812  * and exit) and shut down when they are gone.
2813  */
2814  if (Shutdown >= FastShutdown)
2815  break;
2817  ereport(LOG,
2818  (errmsg("received fast shutdown request")));
2819 
2820  /* Report status */
2822 #ifdef USE_SYSTEMD
2823  sd_notify(0, "STOPPING=1");
2824 #endif
2825 
2826  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2827  {
2828  /* Just shut down background processes silently */
2830  }
2831  else if (pmState == PM_RUN ||
2833  {
2834  /* Report that we're about to zap live client sessions */
2835  ereport(LOG,
2836  (errmsg("aborting any active transactions")));
2838  }
2839 
2840  /*
2841  * PostmasterStateMachine will issue any necessary signals, or
2842  * take the next step if no child processes need to be killed.
2843  */
2845  break;
2846 
2847  case ImmediateShutdown:
2848 
2849  /*
2850  * Immediate Shutdown:
2851  *
2852  * abort all children with SIGQUIT, wait for them to exit,
2853  * terminate remaining ones with SIGKILL, then exit without
2854  * attempt to properly shut down the data base system.
2855  */
2856  if (Shutdown >= ImmediateShutdown)
2857  break;
2859  ereport(LOG,
2860  (errmsg("received immediate shutdown request")));
2861 
2862  /* Report status */
2864 #ifdef USE_SYSTEMD
2865  sd_notify(0, "STOPPING=1");
2866 #endif
2867 
2868  /* tell children to shut down ASAP */
2869  /* (note we don't apply send_abort_for_crash here) */
2873 
2874  /* set stopwatch for them to die */
2875  AbortStartTime = time(NULL);
2876 
2877  /*
2878  * Now wait for backends to exit. If there are none,
2879  * PostmasterStateMachine will take the next step.
2880  */
2882  break;
2883  }
2884 }
2885 
2886 static void
2888 {
2889  pending_pm_child_exit = true;
2890  SetLatch(MyLatch);
2891 }
2892 
2893 /*
2894  * Cleanup after a child process dies.
2895  */
2896 static void
2898 {
2899  int pid; /* process id of dead child process */
2900  int exitstatus; /* its exit status */
2901 
2902  pending_pm_child_exit = false;
2903 
2904  ereport(DEBUG4,
2905  (errmsg_internal("reaping dead processes")));
2906 
2907  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2908  {
2909  /*
2910  * Check if this child was a startup process.
2911  */
2912  if (pid == StartupPID)
2913  {
2914  StartupPID = 0;
2915 
2916  /*
2917  * Startup process exited in response to a shutdown request (or it
2918  * completed normally regardless of the shutdown request).
2919  */
2920  if (Shutdown > NoShutdown &&
2921  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2922  {
2925  /* PostmasterStateMachine logic does the rest */
2926  continue;
2927  }
2928 
2929  if (EXIT_STATUS_3(exitstatus))
2930  {
2931  ereport(LOG,
2932  (errmsg("shutdown at recovery target")));
2935  TerminateChildren(SIGTERM);
2937  /* PostmasterStateMachine logic does the rest */
2938  continue;
2939  }
2940 
2941  /*
2942  * Unexpected exit of startup process (including FATAL exit)
2943  * during PM_STARTUP is treated as catastrophic. There are no
2944  * other processes running yet, so we can just exit.
2945  */
2946  if (pmState == PM_STARTUP &&
2948  !EXIT_STATUS_0(exitstatus))
2949  {
2950  LogChildExit(LOG, _("startup process"),
2951  pid, exitstatus);
2952  ereport(LOG,
2953  (errmsg("aborting startup due to startup process failure")));
2954  ExitPostmaster(1);
2955  }
2956 
2957  /*
2958  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2959  * the startup process is catastrophic, so kill other children,
2960  * and set StartupStatus so we don't try to reinitialize after
2961  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2962  * then we previously sent the startup process a SIGQUIT; so
2963  * that's probably the reason it died, and we do want to try to
2964  * restart in that case.
2965  *
2966  * This stanza also handles the case where we sent a SIGQUIT
2967  * during PM_STARTUP due to some dead_end child crashing: in that
2968  * situation, if the startup process dies on the SIGQUIT, we need
2969  * to transition to PM_WAIT_BACKENDS state which will allow
2970  * PostmasterStateMachine to restart the startup process. (On the
2971  * other hand, the startup process might complete normally, if we
2972  * were too late with the SIGQUIT. In that case we'll fall
2973  * through and commence normal operations.)
2974  */
2975  if (!EXIT_STATUS_0(exitstatus))
2976  {
2978  {
2980  if (pmState == PM_STARTUP)
2982  }
2983  else
2985  HandleChildCrash(pid, exitstatus,
2986  _("startup process"));
2987  continue;
2988  }
2989 
2990  /*
2991  * Startup succeeded, commence normal operations
2992  */
2994  FatalError = false;
2995  AbortStartTime = 0;
2996  ReachedNormalRunning = true;
2997  pmState = PM_RUN;
2998  connsAllowed = true;
2999 
3000  /*
3001  * Crank up the background tasks, if we didn't do that already
3002  * when we entered consistent recovery state. It doesn't matter
3003  * if this fails, we'll just try again later.
3004  */
3005  if (CheckpointerPID == 0)
3007  if (BgWriterPID == 0)
3009  if (WalWriterPID == 0)
3012 
3013  /*
3014  * Likewise, start other special children as needed. In a restart
3015  * situation, some of them may be alive already.
3016  */
3019  if (PgArchStartupAllowed() && PgArchPID == 0)
3022 
3023  /* workers may be scheduled to start now */
3025 
3026  /* at this point we are really open for business */
3027  ereport(LOG,
3028  (errmsg("database system is ready to accept connections")));
3029 
3030  /* Report status */
3032 #ifdef USE_SYSTEMD
3033  sd_notify(0, "READY=1");
3034 #endif
3035 
3036  continue;
3037  }
3038 
3039  /*
3040  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3041  * one at the next iteration of the postmaster's main loop, if
3042  * necessary. Any other exit condition is treated as a crash.
3043  */
3044  if (pid == BgWriterPID)
3045  {
3046  BgWriterPID = 0;
3047  if (!EXIT_STATUS_0(exitstatus))
3048  HandleChildCrash(pid, exitstatus,
3049  _("background writer process"));
3050  continue;
3051  }
3052 
3053  /*
3054  * Was it the checkpointer?
3055  */
3056  if (pid == CheckpointerPID)
3057  {
3058  CheckpointerPID = 0;
3059  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3060  {
3061  /*
3062  * OK, we saw normal exit of the checkpointer after it's been
3063  * told to shut down. We expect that it wrote a shutdown
3064  * checkpoint. (If for some reason it didn't, recovery will
3065  * occur on next postmaster start.)
3066  *
3067  * At this point we should have no normal backend children
3068  * left (else we'd not be in PM_SHUTDOWN state) but we might
3069  * have dead_end children to wait for.
3070  *
3071  * If we have an archiver subprocess, tell it to do a last
3072  * archive cycle and quit. Likewise, if we have walsender
3073  * processes, tell them to send any remaining WAL and quit.
3074  */
3076 
3077  /* Waken archiver for the last time */
3078  if (PgArchPID != 0)
3080 
3081  /*
3082  * Waken walsenders for the last time. No regular backends
3083  * should be around anymore.
3084  */
3086 
3088  }
3089  else
3090  {
3091  /*
3092  * Any unexpected exit of the checkpointer (including FATAL
3093  * exit) is treated as a crash.
3094  */
3095  HandleChildCrash(pid, exitstatus,
3096  _("checkpointer process"));
3097  }
3098 
3099  continue;
3100  }
3101 
3102  /*
3103  * Was it the wal writer? Normal exit can be ignored; we'll start a
3104  * new one at the next iteration of the postmaster's main loop, if
3105  * necessary. Any other exit condition is treated as a crash.
3106  */
3107  if (pid == WalWriterPID)
3108  {
3109  WalWriterPID = 0;
3110  if (!EXIT_STATUS_0(exitstatus))
3111  HandleChildCrash(pid, exitstatus,
3112  _("WAL writer process"));
3113  continue;
3114  }
3115 
3116  /*
3117  * Was it the wal receiver? If exit status is zero (normal) or one
3118  * (FATAL exit), we assume everything is all right just like normal
3119  * backends. (If we need a new wal receiver, we'll start one at the
3120  * next iteration of the postmaster's main loop.)
3121  */
3122  if (pid == WalReceiverPID)
3123  {
3124  WalReceiverPID = 0;
3125  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3126  HandleChildCrash(pid, exitstatus,
3127  _("WAL receiver process"));
3128  continue;
3129  }
3130 
3131  /*
3132  * Was it the wal summarizer? Normal exit can be ignored; we'll start
3133  * a new one at the next iteration of the postmaster's main loop, if
3134  * necessary. Any other exit condition is treated as a crash.
3135  */
3136  if (pid == WalSummarizerPID)
3137  {
3138  WalSummarizerPID = 0;
3139  if (!EXIT_STATUS_0(exitstatus))
3140  HandleChildCrash(pid, exitstatus,
3141  _("WAL summarizer process"));
3142  continue;
3143  }
3144 
3145  /*
3146  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3147  * start a new one at the next iteration of the postmaster's main
3148  * loop, if necessary. Any other exit condition is treated as a
3149  * crash.
3150  */
3151  if (pid == AutoVacPID)
3152  {
3153  AutoVacPID = 0;
3154  if (!EXIT_STATUS_0(exitstatus))
3155  HandleChildCrash(pid, exitstatus,
3156  _("autovacuum launcher process"));
3157  continue;
3158  }
3159 
3160  /*
3161  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3162  * exit), we assume everything is all right just like normal backends
3163  * and just try to restart a new one so that we immediately retry
3164  * archiving remaining files. (If fail, we'll try again in future
3165  * cycles of the postmaster's main loop.) Unless we were waiting for
3166  * it to shut down; don't restart it in that case, and
3167  * PostmasterStateMachine() will advance to the next shutdown step.
3168  */
3169  if (pid == PgArchPID)
3170  {
3171  PgArchPID = 0;
3172  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3173  HandleChildCrash(pid, exitstatus,
3174  _("archiver process"));
3175  if (PgArchStartupAllowed())
3177  continue;
3178  }
3179 
3180  /* Was it the system logger? If so, try to start a new one */
3181  if (pid == SysLoggerPID)
3182  {
3183  SysLoggerPID = 0;
3184  /* for safety's sake, launch new logger *first* */
3186  if (!EXIT_STATUS_0(exitstatus))
3187  LogChildExit(LOG, _("system logger process"),
3188  pid, exitstatus);
3189  continue;
3190  }
3191 
3192  /*
3193  * Was it the slot sync worker? Normal exit or FATAL exit can be
3194  * ignored (FATAL can be caused by libpqwalreceiver on receiving
3195  * shutdown request by the startup process during promotion); we'll
3196  * start a new one at the next iteration of the postmaster's main
3197  * loop, if necessary. Any other exit condition is treated as a crash.
3198  */
3199  if (pid == SlotSyncWorkerPID)
3200  {
3201  SlotSyncWorkerPID = 0;
3202  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3203  HandleChildCrash(pid, exitstatus,
3204  _("slot sync worker process"));
3205  continue;
3206  }
3207 
3208  /* Was it one of our background workers? */
3209  if (CleanupBackgroundWorker(pid, exitstatus))
3210  {
3211  /* have it be restarted */
3212  HaveCrashedWorker = true;
3213  continue;
3214  }
3215 
3216  /*
3217  * Else do standard backend child cleanup.
3218  */
3219  CleanupBackend(pid, exitstatus);
3220  } /* loop over pending child-death reports */
3221 
3222  /*
3223  * After cleaning out the SIGCHLD queue, see if we have any state changes
3224  * or actions to make.
3225  */
3227 }
3228 
3229 /*
3230  * Scan the bgworkers list and see if the given PID (which has just stopped
3231  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3232  * bgworker, return false.
3233  *
3234  * This is heavily based on CleanupBackend. One important difference is that
3235  * we don't know yet that the dying process is a bgworker, so we must be silent
3236  * until we're sure it is.
3237  */
3238 static bool
3240  int exitstatus) /* child's exit status */
3241 {
3242  char namebuf[MAXPGPATH];
3243  slist_mutable_iter iter;
3244 
3246  {
3247  RegisteredBgWorker *rw;
3248 
3249  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3250 
3251  if (rw->rw_pid != pid)
3252  continue;
3253 
3254 #ifdef WIN32
3255  /* see CleanupBackend */
3256  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3257  exitstatus = 0;
3258 #endif
3259 
3260  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3261  rw->rw_worker.bgw_type);
3262 
3263 
3264  if (!EXIT_STATUS_0(exitstatus))
3265  {
3266  /* Record timestamp, so we know when to restart the worker. */
3268  }
3269  else
3270  {
3271  /* Zero exit status means terminate */
3272  rw->rw_crashed_at = 0;
3273  rw->rw_terminate = true;
3274  }
3275 
3276  /*
3277  * Additionally, just like a backend, any exit status other than 0 or
3278  * 1 is considered a crash and causes a system-wide restart.
3279  */
3280  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3281  {
3282  HandleChildCrash(pid, exitstatus, namebuf);
3283  return true;
3284  }
3285 
3286  /*
3287  * We must release the postmaster child slot. If the worker failed to
3288  * do so, it did not clean up after itself, requiring a crash-restart
3289  * cycle.
3290  */
3292  {
3293  HandleChildCrash(pid, exitstatus, namebuf);
3294  return true;
3295  }
3296 
3297  /* Get it out of the BackendList and clear out remaining data */
3298  dlist_delete(&rw->rw_backend->elem);
3299 #ifdef EXEC_BACKEND
3300  ShmemBackendArrayRemove(rw->rw_backend);
3301 #endif
3302 
3303  /*
3304  * It's possible that this background worker started some OTHER
3305  * background worker and asked to be notified when that worker started
3306  * or stopped. If so, cancel any notifications destined for the
3307  * now-dead backend.
3308  */
3309  if (rw->rw_backend->bgworker_notify)
3311  pfree(rw->rw_backend);
3312  rw->rw_backend = NULL;
3313  rw->rw_pid = 0;
3314  rw->rw_child_slot = 0;
3315  ReportBackgroundWorkerExit(&iter); /* report child death */
3316 
3317  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3318  namebuf, pid, exitstatus);
3319 
3320  return true;
3321  }
3322 
3323  return false;
3324 }
3325 
3326 /*
3327  * CleanupBackend -- cleanup after terminated backend.
3328  *
3329  * Remove all local state associated with backend.
3330  *
3331  * If you change this, see also CleanupBackgroundWorker.
3332  */
3333 static void
3335  int exitstatus) /* child's exit status. */
3336 {
3337  dlist_mutable_iter iter;
3338 
3339  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3340 
3341  /*
3342  * If a backend dies in an ugly way then we must signal all other backends
3343  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3344  * assume everything is all right and proceed to remove the backend from
3345  * the active backend list.
3346  */
3347 
3348 #ifdef WIN32
3349 
3350  /*
3351  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3352  * since that sometimes happens under load when the process fails to start
3353  * properly (long before it starts using shared memory). Microsoft reports
3354  * it is related to mutex failure:
3355  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3356  */
3357  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3358  {
3359  LogChildExit(LOG, _("server process"), pid, exitstatus);
3360  exitstatus = 0;
3361  }
3362 #endif
3363 
3364  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3365  {
3366  HandleChildCrash(pid, exitstatus, _("server process"));
3367  return;
3368  }
3369 
3371  {
3372  Backend *bp = dlist_container(Backend, elem, iter.cur);
3373 
3374  if (bp->pid == pid)
3375  {
3376  if (!bp->dead_end)
3377  {
3379  {
3380  /*
3381  * Uh-oh, the child failed to clean itself up. Treat as a
3382  * crash after all.
3383  */
3384  HandleChildCrash(pid, exitstatus, _("server process"));
3385  return;
3386  }
3387 #ifdef EXEC_BACKEND
3388  ShmemBackendArrayRemove(bp);
3389 #endif
3390  }
3391  if (bp->bgworker_notify)
3392  {
3393  /*
3394  * This backend may have been slated to receive SIGUSR1 when
3395  * some background worker started or stopped. Cancel those
3396  * notifications, as we don't want to signal PIDs that are not
3397  * PostgreSQL backends. This gets skipped in the (probably
3398  * very common) case where the backend has never requested any
3399  * such notifications.
3400  */
3402  }
3403  dlist_delete(iter.cur);
3404  pfree(bp);
3405  break;
3406  }
3407  }
3408 }
3409 
3410 /*
3411  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3412  * walwriter, autovacuum, archiver, slot sync worker, or background worker.
3413  *
3414  * The objectives here are to clean up our local state about the child
3415  * process, and to signal all other remaining children to quickdie.
3416  */
3417 static void
3418 HandleChildCrash(int pid, int exitstatus, const char *procname)
3419 {
3420  dlist_mutable_iter iter;
3421  slist_iter siter;
3422  Backend *bp;
3423  bool take_action;
3424 
3425  /*
3426  * We only log messages and send signals if this is the first process
3427  * crash and we're not doing an immediate shutdown; otherwise, we're only
3428  * here to update postmaster's idea of live processes. If we have already
3429  * signaled children, nonzero exit status is to be expected, so don't
3430  * clutter log.
3431  */
3432  take_action = !FatalError && Shutdown != ImmediateShutdown;
3433 
3434  if (take_action)
3435  {
3436  LogChildExit(LOG, procname, pid, exitstatus);
3437  ereport(LOG,
3438  (errmsg("terminating any other active server processes")));
3440  }
3441 
3442  /* Process background workers. */
3444  {
3445  RegisteredBgWorker *rw;
3446 
3447  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3448  if (rw->rw_pid == 0)
3449  continue; /* not running */
3450  if (rw->rw_pid == pid)
3451  {
3452  /*
3453  * Found entry for freshly-dead worker, so remove it.
3454  */
3456  dlist_delete(&rw->rw_backend->elem);
3457 #ifdef EXEC_BACKEND
3458  ShmemBackendArrayRemove(rw->rw_backend);
3459 #endif
3460  pfree(rw->rw_backend);
3461  rw->rw_backend = NULL;
3462  rw->rw_pid = 0;
3463  rw->rw_child_slot = 0;
3464  /* don't reset crashed_at */
3465  /* don't report child stop, either */
3466  /* Keep looping so we can signal remaining workers */
3467  }
3468  else
3469  {
3470  /*
3471  * This worker is still alive. Unless we did so already, tell it
3472  * to commit hara-kiri.
3473  */
3474  if (take_action)
3475  sigquit_child(rw->rw_pid);
3476  }
3477  }
3478 
3479  /* Process regular backends */
3481  {
3482  bp = dlist_container(Backend, elem, iter.cur);
3483 
3484  if (bp->pid == pid)
3485  {
3486  /*
3487  * Found entry for freshly-dead backend, so remove it.
3488  */
3489  if (!bp->dead_end)
3490  {
3492 #ifdef EXEC_BACKEND
3493  ShmemBackendArrayRemove(bp);
3494 #endif
3495  }
3496  dlist_delete(iter.cur);
3497  pfree(bp);
3498  /* Keep looping so we can signal remaining backends */
3499  }
3500  else
3501  {
3502  /*
3503  * This backend is still alive. Unless we did so already, tell it
3504  * to commit hara-kiri.
3505  *
3506  * We could exclude dead_end children here, but at least when
3507  * sending SIGABRT it seems better to include them.
3508  *
3509  * Background workers were already processed above; ignore them
3510  * here.
3511  */
3512  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3513  continue;
3514 
3515  if (take_action)
3516  sigquit_child(bp->pid);
3517  }
3518  }
3519 
3520  /* Take care of the startup process too */
3521  if (pid == StartupPID)
3522  {
3523  StartupPID = 0;
3524  /* Caller adjusts StartupStatus, so don't touch it here */
3525  }
3526  else if (StartupPID != 0 && take_action)
3527  {
3530  }
3531 
3532  /* Take care of the bgwriter too */
3533  if (pid == BgWriterPID)
3534  BgWriterPID = 0;
3535  else if (BgWriterPID != 0 && take_action)
3537 
3538  /* Take care of the checkpointer too */
3539  if (pid == CheckpointerPID)
3540  CheckpointerPID = 0;
3541  else if (CheckpointerPID != 0 && take_action)
3543 
3544  /* Take care of the walwriter too */
3545  if (pid == WalWriterPID)
3546  WalWriterPID = 0;
3547  else if (WalWriterPID != 0 && take_action)
3549 
3550  /* Take care of the walreceiver too */
3551  if (pid == WalReceiverPID)
3552  WalReceiverPID = 0;
3553  else if (WalReceiverPID != 0 && take_action)
3555 
3556  /* Take care of the walsummarizer too */
3557  if (pid == WalSummarizerPID)
3558  WalSummarizerPID = 0;
3559  else if (WalSummarizerPID != 0 && take_action)
3561 
3562  /* Take care of the autovacuum launcher too */
3563  if (pid == AutoVacPID)
3564  AutoVacPID = 0;
3565  else if (AutoVacPID != 0 && take_action)
3567 
3568  /* Take care of the archiver too */
3569  if (pid == PgArchPID)
3570  PgArchPID = 0;
3571  else if (PgArchPID != 0 && take_action)
3573 
3574  /* Take care of the slot sync worker too */
3575  if (pid == SlotSyncWorkerPID)
3576  SlotSyncWorkerPID = 0;
3577  else if (SlotSyncWorkerPID != 0 && take_action)
3579 
3580  /* We do NOT restart the syslogger */
3581 
3582  if (Shutdown != ImmediateShutdown)
3583  FatalError = true;
3584 
3585  /* We now transit into a state of waiting for children to die */
3586  if (pmState == PM_RECOVERY ||
3587  pmState == PM_HOT_STANDBY ||
3588  pmState == PM_RUN ||
3590  pmState == PM_SHUTDOWN)
3592 
3593  /*
3594  * .. and if this doesn't happen quickly enough, now the clock is ticking
3595  * for us to kill them without mercy.
3596  */
3597  if (AbortStartTime == 0)
3598  AbortStartTime = time(NULL);
3599 }
3600 
3601 /*
3602  * Log the death of a child process.
3603  */
3604 static void
3605 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3606 {
3607  /*
3608  * size of activity_buffer is arbitrary, but set equal to default
3609  * track_activity_query_size
3610  */
3611  char activity_buffer[1024];
3612  const char *activity = NULL;
3613 
3614  if (!EXIT_STATUS_0(exitstatus))
3615  activity = pgstat_get_crashed_backend_activity(pid,
3616  activity_buffer,
3617  sizeof(activity_buffer));
3618 
3619  if (WIFEXITED(exitstatus))
3620  ereport(lev,
3621 
3622  /*------
3623  translator: %s is a noun phrase describing a child process, such as
3624  "server process" */
3625  (errmsg("%s (PID %d) exited with exit code %d",
3626  procname, pid, WEXITSTATUS(exitstatus)),
3627  activity ? errdetail("Failed process was running: %s", activity) : 0));
3628  else if (WIFSIGNALED(exitstatus))
3629  {
3630 #if defined(WIN32)
3631  ereport(lev,
3632 
3633  /*------
3634  translator: %s is a noun phrase describing a child process, such as
3635  "server process" */
3636  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3637  procname, pid, WTERMSIG(exitstatus)),
3638  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3639  activity ? errdetail("Failed process was running: %s", activity) : 0));
3640 #else
3641  ereport(lev,
3642 
3643  /*------
3644  translator: %s is a noun phrase describing a child process, such as
3645  "server process" */
3646  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3647  procname, pid, WTERMSIG(exitstatus),
3648  pg_strsignal(WTERMSIG(exitstatus))),
3649  activity ? errdetail("Failed process was running: %s", activity) : 0));
3650 #endif
3651  }
3652  else
3653  ereport(lev,
3654 
3655  /*------
3656  translator: %s is a noun phrase describing a child process, such as
3657  "server process" */
3658  (errmsg("%s (PID %d) exited with unrecognized status %d",
3659  procname, pid, exitstatus),
3660  activity ? errdetail("Failed process was running: %s", activity) : 0));
3661 }
3662 
3663 /*
3664  * Advance the postmaster's state machine and take actions as appropriate
3665  *
3666  * This is common code for process_pm_shutdown_request(),
3667  * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3668  * that might mean we need to change state.
3669  */
3670 static void
3672 {
3673  /* If we're doing a smart shutdown, try to advance that state. */
3674  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3675  {
3676  if (!connsAllowed)
3677  {
3678  /*
3679  * This state ends when we have no normal client backends running.
3680  * Then we're ready to stop other children.
3681  */
3684  }
3685  }
3686 
3687  /*
3688  * If we're ready to do so, signal child processes to shut down. (This
3689  * isn't a persistent state, but treating it as a distinct pmState allows
3690  * us to share this code across multiple shutdown code paths.)
3691  */
3692  if (pmState == PM_STOP_BACKENDS)
3693  {
3694  /*
3695  * Forget any pending requests for background workers, since we're no
3696  * longer willing to launch any new workers. (If additional requests
3697  * arrive, BackgroundWorkerStateChange will reject them.)
3698  */
3700 
3701  /* Signal all backend children except walsenders */
3702  SignalSomeChildren(SIGTERM,
3704  /* and the autovac launcher too */
3705  if (AutoVacPID != 0)
3706  signal_child(AutoVacPID, SIGTERM);
3707  /* and the bgwriter too */
3708  if (BgWriterPID != 0)
3709  signal_child(BgWriterPID, SIGTERM);
3710  /* and the walwriter too */
3711  if (WalWriterPID != 0)
3712  signal_child(WalWriterPID, SIGTERM);
3713  /* If we're in recovery, also stop startup and walreceiver procs */
3714  if (StartupPID != 0)
3715  signal_child(StartupPID, SIGTERM);
3716  if (WalReceiverPID != 0)
3717  signal_child(WalReceiverPID, SIGTERM);
3718  if (WalSummarizerPID != 0)
3719  signal_child(WalSummarizerPID, SIGTERM);
3720  if (SlotSyncWorkerPID != 0)
3721  signal_child(SlotSyncWorkerPID, SIGTERM);
3722  /* checkpointer, archiver, stats, and syslogger may continue for now */
3723 
3724  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3726  }
3727 
3728  /*
3729  * If we are in a state-machine state that implies waiting for backends to
3730  * exit, see if they're all gone, and change state if so.
3731  */
3732  if (pmState == PM_WAIT_BACKENDS)
3733  {
3734  /*
3735  * PM_WAIT_BACKENDS state ends when we have no regular backends
3736  * (including autovac workers), no bgworkers (including unconnected
3737  * ones), and no walwriter, autovac launcher, bgwriter or slot sync
3738  * worker. If we are doing crash recovery or an immediate shutdown
3739  * then we expect the checkpointer to exit as well, otherwise not. The
3740  * stats and syslogger processes are disregarded since they are not
3741  * connected to shared memory; we also disregard dead_end children
3742  * here. Walsenders and archiver are also disregarded, they will be
3743  * terminated later after writing the checkpoint record.
3744  */
3746  StartupPID == 0 &&
3747  WalReceiverPID == 0 &&
3748  WalSummarizerPID == 0 &&
3749  BgWriterPID == 0 &&
3750  (CheckpointerPID == 0 ||
3752  WalWriterPID == 0 &&
3753  AutoVacPID == 0 &&
3754  SlotSyncWorkerPID == 0)
3755  {
3757  {
3758  /*
3759  * Start waiting for dead_end children to die. This state
3760  * change causes ServerLoop to stop creating new ones.
3761  */
3763 
3764  /*
3765  * We already SIGQUIT'd the archiver and stats processes, if
3766  * any, when we started immediate shutdown or entered
3767  * FatalError state.
3768  */
3769  }
3770  else
3771  {
3772  /*
3773  * If we get here, we are proceeding with normal shutdown. All
3774  * the regular children are gone, and it's time to tell the
3775  * checkpointer to do a shutdown checkpoint.
3776  */
3778  /* Start the checkpointer if not running */
3779  if (CheckpointerPID == 0)
3781  /* And tell it to shut down */
3782  if (CheckpointerPID != 0)
3783  {
3785  pmState = PM_SHUTDOWN;
3786  }
3787  else
3788  {
3789  /*
3790  * If we failed to fork a checkpointer, just shut down.
3791  * Any required cleanup will happen at next restart. We
3792  * set FatalError so that an "abnormal shutdown" message
3793  * gets logged when we exit.
3794  *
3795  * We don't consult send_abort_for_crash here, as it's
3796  * unlikely that dumping cores would illuminate the reason
3797  * for checkpointer fork failure.
3798  */
3799  FatalError = true;
3801 
3802  /* Kill the walsenders and archiver too */
3804  if (PgArchPID != 0)
3806  }
3807  }
3808  }
3809  }
3810 
3811  if (pmState == PM_SHUTDOWN_2)
3812  {
3813  /*
3814  * PM_SHUTDOWN_2 state ends when there's no other children than
3815  * dead_end children left. There shouldn't be any regular backends
3816  * left by now anyway; what we're really waiting for is walsenders and
3817  * archiver.
3818  */
3819  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3820  {
3822  }
3823  }
3824 
3825  if (pmState == PM_WAIT_DEAD_END)
3826  {
3827  /* Don't allow any new socket connection events. */
3829 
3830  /*
3831  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3832  * (ie, no dead_end children remain), and the archiver is gone too.
3833  *
3834  * The reason we wait for those two is to protect them against a new
3835  * postmaster starting conflicting subprocesses; this isn't an
3836  * ironclad protection, but it at least helps in the
3837  * shutdown-and-immediately-restart scenario. Note that they have
3838  * already been sent appropriate shutdown signals, either during a
3839  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3840  * FatalError processing.
3841  */
3842  if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3843  {
3844  /* These other guys should be dead already */
3845  Assert(StartupPID == 0);
3846  Assert(WalReceiverPID == 0);
3847  Assert(WalSummarizerPID == 0);
3848  Assert(BgWriterPID == 0);
3849  Assert(CheckpointerPID == 0);
3850  Assert(WalWriterPID == 0);
3851  Assert(AutoVacPID == 0);
3852  Assert(SlotSyncWorkerPID == 0);
3853  /* syslogger is not considered here */
3855  }
3856  }
3857 
3858  /*
3859  * If we've been told to shut down, we exit as soon as there are no
3860  * remaining children. If there was a crash, cleanup will occur at the
3861  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3862  * crash before exiting, but that seems unwise if we are quitting because
3863  * we got SIGTERM from init --- there may well not be time for recovery
3864  * before init decides to SIGKILL us.)
3865  *
3866  * Note that the syslogger continues to run. It will exit when it sees
3867  * EOF on its input pipe, which happens when there are no more upstream
3868  * processes.
3869  */
3871  {
3872  if (FatalError)
3873  {
3874  ereport(LOG, (errmsg("abnormal database system shutdown")));
3875  ExitPostmaster(1);
3876  }
3877  else
3878  {
3879  /*
3880  * Normal exit from the postmaster is here. We don't need to log
3881  * anything here, since the UnlinkLockFiles proc_exit callback
3882  * will do so, and that should be the last user-visible action.
3883  */
3884  ExitPostmaster(0);
3885  }
3886  }
3887 
3888  /*
3889  * If the startup process failed, or the user does not want an automatic
3890  * restart after backend crashes, wait for all non-syslogger children to
3891  * exit, and then exit postmaster. We don't try to reinitialize when the
3892  * startup process fails, because more than likely it will just fail again
3893  * and we will keep trying forever.
3894  */
3895  if (pmState == PM_NO_CHILDREN)
3896  {
3898  {
3899  ereport(LOG,
3900  (errmsg("shutting down due to startup process failure")));
3901  ExitPostmaster(1);
3902  }
3903  if (!restart_after_crash)
3904  {
3905  ereport(LOG,
3906  (errmsg("shutting down because restart_after_crash is off")));
3907  ExitPostmaster(1);
3908  }
3909  }
3910 
3911  /*
3912  * If we need to recover from a crash, wait for all non-syslogger children
3913  * to exit, then reset shmem and start the startup process.
3914  */
3915  if (FatalError && pmState == PM_NO_CHILDREN)
3916  {
3917  ereport(LOG,
3918  (errmsg("all server processes terminated; reinitializing")));
3919 
3920  /* remove leftover temporary files after a crash */
3923 
3924  /* allow background workers to immediately restart */
3926 
3927  shmem_exit(1);
3928 
3929  /* re-read control file into local memory */
3931 
3932  /* re-create shared memory and semaphores */
3934 
3936  Assert(StartupPID != 0);
3938  pmState = PM_STARTUP;
3939  /* crash recovery started, reset SIGKILL flag */
3940  AbortStartTime = 0;
3941 
3942  /* start accepting server socket connection events again */
3944  }
3945 }
3946 
3947 
3948 /*
3949  * Send a signal to a postmaster child process
3950  *
3951  * On systems that have setsid(), each child process sets itself up as a
3952  * process group leader. For signals that are generally interpreted in the
3953  * appropriate fashion, we signal the entire process group not just the
3954  * direct child process. This allows us to, for example, SIGQUIT a blocked
3955  * archive_recovery script, or SIGINT a script being run by a backend via
3956  * system().
3957  *
3958  * There is a race condition for recently-forked children: they might not
3959  * have executed setsid() yet. So we signal the child directly as well as
3960  * the group. We assume such a child will handle the signal before trying
3961  * to spawn any grandchild processes. We also assume that signaling the
3962  * child twice will not cause any problems.
3963  */
3964 static void
3965 signal_child(pid_t pid, int signal)
3966 {
3967  if (kill(pid, signal) < 0)
3968  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3969 #ifdef HAVE_SETSID
3970  switch (signal)
3971  {
3972  case SIGINT:
3973  case SIGTERM:
3974  case SIGQUIT:
3975  case SIGKILL:
3976  case SIGABRT:
3977  if (kill(-pid, signal) < 0)
3978  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3979  break;
3980  default:
3981  break;
3982  }
3983 #endif
3984 }
3985 
3986 /*
3987  * Convenience function for killing a child process after a crash of some
3988  * other child process. We log the action at a higher level than we would
3989  * otherwise do, and we apply send_abort_for_crash to decide which signal
3990  * to send. Normally it's SIGQUIT -- and most other comments in this file
3991  * are written on the assumption that it is -- but developers might prefer
3992  * to use SIGABRT to collect per-child core dumps.
3993  */
3994 static void
3995 sigquit_child(pid_t pid)
3996 {
3997  ereport(DEBUG2,
3998  (errmsg_internal("sending %s to process %d",
3999  (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
4000  (int) pid)));
4002 }
4003 
4004 /*
4005  * Send a signal to the targeted children (but NOT special children;
4006  * dead_end children are never signaled, either).
4007  */
4008 static bool
4009 SignalSomeChildren(int signal, int target)
4010 {
4011  dlist_iter iter;
4012  bool signaled = false;
4013 
4014  dlist_foreach(iter, &BackendList)
4015  {
4016  Backend *bp = dlist_container(Backend, elem, iter.cur);
4017 
4018  if (bp->dead_end)
4019  continue;
4020 
4021  /*
4022  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4023  * it first and avoid touching shared memory for every child.
4024  */
4025  if (target != BACKEND_TYPE_ALL)
4026  {
4027  /*
4028  * Assign bkend_type for any recently announced WAL Sender
4029  * processes.
4030  */
4031  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4034 
4035  if (!(target & bp->bkend_type))
4036  continue;
4037  }
4038 
4039  ereport(DEBUG4,
4040  (errmsg_internal("sending signal %d to process %d",
4041  signal, (int) bp->pid)));
4042  signal_child(bp->pid, signal);
4043  signaled = true;
4044  }
4045  return signaled;
4046 }
4047 
4048 /*
4049  * Send a termination signal to children. This considers all of our children
4050  * processes, except syslogger and dead_end backends.
4051  */
4052 static void
4054 {
4055  SignalChildren(signal);
4056  if (StartupPID != 0)
4057  {
4058  signal_child(StartupPID, signal);
4059  if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
4061  }
4062  if (BgWriterPID != 0)
4063  signal_child(BgWriterPID, signal);
4064  if (CheckpointerPID != 0)
4065  signal_child(CheckpointerPID, signal);
4066  if (WalWriterPID != 0)
4067  signal_child(WalWriterPID, signal);
4068  if (WalReceiverPID != 0)
4069  signal_child(WalReceiverPID, signal);
4070  if (WalSummarizerPID != 0)
4071  signal_child(WalSummarizerPID, signal);
4072  if (AutoVacPID != 0)
4073  signal_child(AutoVacPID, signal);
4074  if (PgArchPID != 0)
4075  signal_child(PgArchPID, signal);
4076  if (SlotSyncWorkerPID != 0)
4078 }
4079 
4080 /*
4081  * BackendStartup -- start backend process
4082  *
4083  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4084  *
4085  * Note: if you change this code, also consider StartAutovacuumWorker.
4086  */
4087 static int
4089 {
4090  Backend *bn; /* for backend cleanup */
4091  pid_t pid;
4092 
4093  /*
4094  * Create backend data structure. Better before the fork() so we can
4095  * handle failure cleanly.
4096  */
4097  bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
4098  if (!bn)
4099  {
4100  ereport(LOG,
4101  (errcode(ERRCODE_OUT_OF_MEMORY),
4102  errmsg("out of memory")));
4103  return STATUS_ERROR;
4104  }
4105 
4106  /*
4107  * Compute the cancel key that will be assigned to this backend. The
4108  * backend will have its own copy in the forked-off process' value of
4109  * MyCancelKey, so that it can transmit the key to the frontend.
4110  */
4112  {
4113  pfree(bn);
4114  ereport(LOG,
4115  (errcode(ERRCODE_INTERNAL_ERROR),
4116  errmsg("could not generate random cancel key")));
4117  return STATUS_ERROR;
4118  }
4119 
4120  bn->cancel_key = MyCancelKey;
4121 
4122  /* Pass down canAcceptConnections state */
4123  port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4124  bn->dead_end = (port->canAcceptConnections != CAC_OK);
4125 
4126  /*
4127  * Unless it's a dead_end child, assign it a child slot number
4128  */
4129  if (!bn->dead_end)
4131  else
4132  bn->child_slot = 0;
4133 
4134  /* Hasn't asked to be notified about any bgworkers yet */
4135  bn->bgworker_notify = false;
4136 
4137 #ifdef EXEC_BACKEND
4138  pid = backend_forkexec(port);
4139 #else /* !EXEC_BACKEND */
4140  pid = fork_process();
4141  if (pid == 0) /* child */
4142  {
4143  /* Detangle from postmaster */
4145 
4146  /* Close the postmaster's sockets */
4147  ClosePostmasterPorts(false);
4148 
4149  /* Perform additional initialization and collect startup packet */
4151 
4152  /* And run the backend */
4153  BackendRun(port);
4154  }
4155 #endif /* EXEC_BACKEND */
4156 
4157  if (pid < 0)
4158  {
4159  /* in parent, fork failed */
4160  int save_errno = errno;
4161 
4162  if (!bn->dead_end)
4164  pfree(bn);
4165  errno = save_errno;
4166  ereport(LOG,
4167  (errmsg("could not fork new process for connection: %m")));
4168  report_fork_failure_to_client(port, save_errno);
4169  return STATUS_ERROR;
4170  }
4171 
4172  /* in parent, successful fork */
4173  ereport(DEBUG2,
4174  (errmsg_internal("forked new backend, pid=%d socket=%d",
4175  (int) pid, (int) port->sock)));
4176 
4177  /*
4178  * Everything's been successful, it's safe to add this backend to our list
4179  * of backends.
4180  */
4181  bn->pid = pid;
4182  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4184 
4185 #ifdef EXEC_BACKEND
4186  if (!bn->dead_end)
4187  ShmemBackendArrayAdd(bn);
4188 #endif
4189 
4190  return STATUS_OK;
4191 }
4192 
4193 /*
4194  * Try to report backend fork() failure to client before we close the
4195  * connection. Since we do not care to risk blocking the postmaster on
4196  * this connection, we set the connection to non-blocking and try only once.
4197  *
4198  * This is grungy special-purpose code; we cannot use backend libpq since
4199  * it's not up and running.
4200  */
4201 static void
4203 {
4204  char buffer[1000];
4205  int rc;
4206 
4207  /* Format the error message packet (always V2 protocol) */
4208  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4209  _("could not fork new process for connection: "),
4210  strerror(errnum));
4211 
4212  /* Set port to non-blocking. Don't do send() if this fails */
4213  if (!pg_set_noblock(port->sock))
4214  return;
4215 
4216  /* We'll retry after EINTR, but ignore all other failures */
4217  do
4218  {
4219  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4220  } while (rc < 0 && errno == EINTR);
4221 }
4222 
4223 
4224 /*
4225  * BackendInitialize -- initialize an interactive (postmaster-child)
4226  * backend process, and collect the client's startup packet.
4227  *
4228  * returns: nothing. Will not return at all if there's any failure.
4229  *
4230  * Note: this code does not depend on having any access to shared memory.
4231  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4232  * shared memory not have been touched yet; see comments within.
4233  * In the EXEC_BACKEND case, we are physically attached to shared memory
4234  * but have not yet set up most of our local pointers to shmem structures.
4235  */
4236 static void
4238 {
4239  int status;
4240  int ret;
4241  char remote_host[NI_MAXHOST];
4242  char remote_port[NI_MAXSERV];
4243  StringInfoData ps_data;
4244 
4245  /* Save port etc. for ps status */
4246  MyProcPort = port;
4247 
4248  /* Tell fd.c about the long-lived FD associated with the port */
4250 
4251  /*
4252  * PreAuthDelay is a debugging aid for investigating problems in the
4253  * authentication cycle: it can be set in postgresql.conf to allow time to
4254  * attach to the newly-forked backend with a debugger. (See also
4255  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4256  * is not honored until after authentication.)
4257  */
4258  if (PreAuthDelay > 0)
4259  pg_usleep(PreAuthDelay * 1000000L);
4260 
4261  /* This flag will remain set until InitPostgres finishes authentication */
4262  ClientAuthInProgress = true; /* limit visibility of log messages */
4263 
4264  /* set these to empty in case they are needed before we set them up */
4265  port->remote_host = "";
4266  port->remote_port = "";
4267 
4268  /*
4269  * Initialize libpq and enable reporting of ereport errors to the client.
4270  * Must do this now because authentication uses libpq to send messages.
4271  */
4272  pq_init(); /* initialize libpq to talk to client */
4273  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4274 
4275  /*
4276  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4277  * to collect the startup packet; while SIGQUIT results in _exit(2).
4278  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4279  * cleanly if a buggy client fails to send the packet promptly.
4280  *
4281  * Exiting with _exit(1) is only possible because we have not yet touched
4282  * shared memory; therefore no outside-the-process state needs to get
4283  * cleaned up.
4284  */
4286  /* SIGQUIT handler was already set up by InitPostmasterChild */
4287  InitializeTimeouts(); /* establishes SIGALRM handler */
4288  sigprocmask(SIG_SETMASK, &StartupBlockSig, NULL);
4289 
4290  /*
4291  * Get the remote host name and port for logging and status display.
4292  */
4293  remote_host[0] = '\0';
4294  remote_port[0] = '\0';
4295  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4296  remote_host, sizeof(remote_host),
4297  remote_port, sizeof(remote_port),
4298  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4299  ereport(WARNING,
4300  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4301  gai_strerror(ret))));
4302 
4303  /*
4304  * Save remote_host and remote_port in port structure (after this, they
4305  * will appear in log_line_prefix data for log messages).
4306  */
4307  port->remote_host = strdup(remote_host);
4308  port->remote_port = strdup(remote_port);
4309 
4310  /* And now we can issue the Log_connections message, if wanted */
4311  if (Log_connections)
4312  {
4313  if (remote_port[0])
4314  ereport(LOG,
4315  (errmsg("connection received: host=%s port=%s",
4316  remote_host,
4317  remote_port)));
4318  else
4319  ereport(LOG,
4320  (errmsg("connection received: host=%s",
4321  remote_host)));
4322  }
4323 
4324  /*
4325  * If we did a reverse lookup to name, we might as well save the results
4326  * rather than possibly repeating the lookup during authentication.
4327  *
4328  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4329  * get nothing useful for a client without an rDNS entry. Therefore, we
4330  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4331  * it into remote_hostname if so. (This test is conservative and might
4332  * sometimes classify a hostname as numeric, but an error in that
4333  * direction is safe; it only results in a possible extra lookup.)
4334  */
4335  if (log_hostname &&
4336  ret == 0 &&
4337  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4338  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4339  port->remote_hostname = strdup(remote_host);
4340 
4341  /*
4342  * Ready to begin client interaction. We will give up and _exit(1) after
4343  * a time delay, so that a broken client can't hog a connection
4344  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4345  * against the time limit.
4346  *
4347  * Note: AuthenticationTimeout is applied here while waiting for the
4348  * startup packet, and then again in InitPostgres for the duration of any
4349  * authentication operations. So a hostile client could tie up the
4350  * process for nearly twice AuthenticationTimeout before we kick him off.
4351  *
4352  * Note: because PostgresMain will call InitializeTimeouts again, the
4353  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4354  * since we never use it again after this function.
4355  */
4358 
4359  /*
4360  * Receive the startup packet (which might turn out to be a cancel request
4361  * packet).
4362  */
4363  status = ProcessStartupPacket(port, false, false);
4364 
4365  /*
4366  * If we're going to reject the connection due to database state, say so
4367  * now instead of wasting cycles on an authentication exchange. (This also
4368  * allows a pg_ping utility to be written.)
4369  */
4370  if (status == STATUS_OK)
4371  {
4372  switch (port->canAcceptConnections)
4373  {
4374  case CAC_STARTUP:
4375  ereport(FATAL,
4377  errmsg("the database system is starting up")));
4378  break;
4379  case CAC_NOTCONSISTENT:
4380  if (EnableHotStandby)
4381  ereport(FATAL,
4383  errmsg("the database system is not yet accepting connections"),
4384  errdetail("Consistent recovery state has not been yet reached.")));
4385  else
4386  ereport(FATAL,
4388  errmsg("the database system is not accepting connections"),
4389  errdetail("Hot standby mode is disabled.")));
4390  break;
4391  case CAC_SHUTDOWN:
4392  ereport(FATAL,
4394  errmsg("the database system is shutting down")));
4395  break;
4396  case CAC_RECOVERY:
4397  ereport(FATAL,
4399  errmsg("the database system is in recovery mode")));
4400  break;
4401  case CAC_TOOMANY:
4402  ereport(FATAL,
4403  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
4404  errmsg("sorry, too many clients already")));
4405  break;
4406  case CAC_OK:
4407  break;
4408  }
4409  }
4410 
4411  /*
4412  * Disable the timeout, and prevent SIGTERM again.
4413  */
4415  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4416 
4417  /*
4418  * As a safety check that nothing in startup has yet performed
4419  * shared-memory modifications that would need to be undone if we had
4420  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4421  * handlers have been registered yet. (This isn't terribly bulletproof,
4422  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4423  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4424  * handlers unfortunately, since pq_init() already registered one.)
4425  */
4427 
4428  /*
4429  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4430  * already did any appropriate error reporting.
4431  */
4432  if (status != STATUS_OK)
4433  proc_exit(0);
4434 
4435  /*
4436  * Now that we have the user and database name, we can set the process
4437  * title for ps. It's good to do this as early as possible in startup.
4438  */
4439  initStringInfo(&ps_data);
4440  if (am_walsender)
4442  appendStringInfo(&ps_data, "%s ", port->user_name);
4443  if (port->database_name[0] != '\0')
4444  appendStringInfo(&ps_data, "%s ", port->database_name);
4445  appendStringInfoString(&ps_data, port->remote_host);
4446  if (port->remote_port[0] != '\0')
4447  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4448 
4449  init_ps_display(ps_data.data);
4450  pfree(ps_data.data);
4451 
4452  set_ps_display("initializing");
4453 }
4454 
4455 
4456 /*
4457  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4458  *
4459  * returns:
4460  * Doesn't return at all.
4461  */
4462 static void
4464 {
4465  /*
4466  * Create a per-backend PGPROC struct in shared memory. We must do this
4467  * before we can use LWLocks or access any shared memory.
4468  */
4469  InitProcess();
4470 
4471  /*
4472  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4473  * just yet, though, because InitPostgres will need the HBA data.)
4474  */
4476 
4477  PostgresMain(port->database_name, port->user_name);
4478 }
4479 
4480 
4481 #ifdef EXEC_BACKEND
4482 
4483 /*
4484  * postmaster_forkexec -- fork and exec a postmaster subprocess
4485  *
4486  * The caller must have set up the argv array already, except for argv[2]
4487  * which will be filled with the name of the temp variable file.
4488  *
4489  * Returns the child process PID, or -1 on fork failure (a suitable error
4490  * message has been logged on failure).
4491  *
4492  * All uses of this routine will dispatch to SubPostmasterMain in the
4493  * child process.
4494  */
4495 pid_t
4496 postmaster_forkexec(int argc, char *argv[])
4497 {
4498  return internal_forkexec(argc, argv, NULL, NULL);
4499 }
4500 
4501 /*
4502  * backend_forkexec -- fork/exec off a backend process
4503  *
4504  * Some operating systems (WIN32) don't have fork() so we have to simulate
4505  * it by storing parameters that need to be passed to the child and
4506  * then create a new child process.
4507  *
4508  * returns the pid of the fork/exec'd process, or -1 on failure
4509  */
4510 static pid_t
4511 backend_forkexec(Port *port)
4512 {
4513  char *av[4];
4514  int ac = 0;
4515 
4516  av[ac++] = "postgres";
4517  av[ac++] = "--forkbackend";
4518  av[ac++] = NULL; /* filled in by internal_forkexec */
4519 
4520  av[ac] = NULL;
4521  Assert(ac < lengthof(av));
4522 
4523  return internal_forkexec(ac, av, port, NULL);
4524 }
4525 
4526 #ifndef WIN32
4527 
4528 /*
4529  * internal_forkexec non-win32 implementation
4530  *
4531  * - writes out backend variables to the parameter file
4532  * - fork():s, and then exec():s the child process
4533  */
4534 static pid_t
4535 internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker)
4536 {
4537  static unsigned long tmpBackendFileNum = 0;
4538  pid_t pid;
4539  char tmpfilename[MAXPGPATH];
4540  BackendParameters param;
4541  FILE *fp;
4542 
4543  /*
4544  * Make sure padding bytes are initialized, to prevent Valgrind from
4545  * complaining about writing uninitialized bytes to the file. This isn't
4546  * performance critical, and the win32 implementation initializes the
4547  * padding bytes to zeros, so do it even when not using Valgrind.
4548  */
4549  memset(&param, 0, sizeof(BackendParameters));
4550 
4551  if (!save_backend_variables(&param, port, worker))
4552  return -1; /* log made by save_backend_variables */
4553 
4554  /* Calculate name for temp file */
4555  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4557  MyProcPid, ++tmpBackendFileNum);
4558 
4559  /* Open file */
4560  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4561  if (!fp)
4562  {
4563  /*
4564  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4565  * directory, ignoring errors.
4566  */
4568 
4569  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4570  if (!fp)
4571  {
4572  ereport(LOG,
4574  errmsg("could not create file \"%s\": %m",
4575  tmpfilename)));
4576  return -1;
4577  }
4578  }
4579 
4580  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4581  {
4582  ereport(LOG,
4584  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4585  FreeFile(fp);
4586  return -1;
4587  }
4588 
4589  /* Release file */
4590  if (FreeFile(fp))
4591  {
4592  ereport(LOG,
4594  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4595  return -1;
4596  }
4597 
4598  /* Make sure caller set up argv properly */
4599  Assert(argc >= 3);
4600  Assert(argv[argc] == NULL);
4601  Assert(strncmp(argv[1], "--fork", 6) == 0);
4602  Assert(argv[2] == NULL);
4603 
4604  /* Insert temp file name after --fork argument */
4605  argv[2] = tmpfilename;
4606 
4607  /* Fire off execv in child */
4608  if ((pid = fork_process()) == 0)
4609  {
4610  if (execv(postgres_exec_path, argv) < 0)
4611  {
4612  ereport(LOG,
4613  (errmsg("could not execute server process \"%s\": %m",
4614  postgres_exec_path)));
4615  /* We're already in the child process here, can't return */
4616  exit(1);
4617  }
4618  }
4619 
4620  return pid; /* Parent returns pid, or -1 on fork failure */
4621 }
4622 #else /* WIN32 */
4623 
4624 /*
4625  * internal_forkexec win32 implementation
4626  *
4627  * - starts backend using CreateProcess(), in suspended state
4628  * - writes out backend variables to the parameter file
4629  * - during this, duplicates handles and sockets required for
4630  * inheritance into the new process
4631  * - resumes execution of the new process once the backend parameter
4632  * file is complete.
4633  */
4634 static pid_t
4635 internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker)
4636 {
4637  int retry_count = 0;
4638  STARTUPINFO si;
4639  PROCESS_INFORMATION pi;
4640  int i;
4641  int j;
4642  char cmdLine[MAXPGPATH * 2];
4643  HANDLE paramHandle;
4644  BackendParameters *param;
4645  SECURITY_ATTRIBUTES sa;
4646  char paramHandleStr[32];
4647  win32_deadchild_waitinfo *childinfo;
4648 
4649  /* Make sure caller set up argv properly */
4650  Assert(argc >= 3);
4651  Assert(argv[argc] == NULL);
4652  Assert(strncmp(argv[1], "--fork", 6) == 0);
4653  Assert(argv[2] == NULL);
4654 
4655  /* Resume here if we need to retry */
4656 retry:
4657 
4658  /* Set up shared memory for parameter passing */
4659  ZeroMemory(&sa, sizeof(sa));
4660  sa.nLength = sizeof(sa);
4661  sa.bInheritHandle = TRUE;
4662  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4663  &sa,
4664  PAGE_READWRITE,
4665  0,
4666  sizeof(BackendParameters),
4667  NULL);
4668  if (paramHandle == INVALID_HANDLE_VALUE)
4669  {
4670  ereport(LOG,
4671  (errmsg("could not create backend parameter file mapping: error code %lu",
4672  GetLastError())));
4673  return -1;
4674  }
4675 
4676  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4677  if (!param)
4678  {
4679  ereport(LOG,
4680  (errmsg("could not map backend parameter memory: error code %lu",
4681  GetLastError())));
4682  CloseHandle(paramHandle);
4683  return -1;
4684  }
4685 
4686  /* Insert temp file name after --fork argument */
4687 #ifdef _WIN64
4688  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4689 #else
4690  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4691 #endif
4692  argv[2] = paramHandleStr;
4693 
4694  /* Format the cmd line */
4695  cmdLine[sizeof(cmdLine) - 1] = '\0';
4696  cmdLine[sizeof(cmdLine) - 2] = '\0';
4697  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4698  i = 0;
4699  while (argv[++i] != NULL)
4700  {
4701  j = strlen(cmdLine);
4702  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4703  }
4704  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4705  {
4706  ereport(LOG,
4707  (errmsg("subprocess command line too long")));
4708  UnmapViewOfFile(param);
4709  CloseHandle(paramHandle);
4710  return -1;
4711  }
4712 
4713  memset(&pi, 0, sizeof(pi));
4714  memset(&si, 0, sizeof(si));
4715  si.cb = sizeof(si);
4716 
4717  /*
4718  * Create the subprocess in a suspended state. This will be resumed later,
4719  * once we have written out the parameter file.
4720  */
4721  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4722  NULL, NULL, &si, &pi))
4723  {
4724  ereport(LOG,
4725  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4726  GetLastError())));
4727  UnmapViewOfFile(param);
4728  CloseHandle(paramHandle);
4729  return -1;
4730  }
4731 
4732  if (!save_backend_variables(param, port, worker, pi.hProcess, pi.dwProcessId))
4733  {
4734  /*
4735  * log made by save_backend_variables, but we have to clean up the
4736  * mess with the half-started process
4737  */
4738  if (!TerminateProcess(pi.hProcess, 255))
4739  ereport(LOG,
4740  (errmsg_internal("could not terminate unstarted process: error code %lu",
4741  GetLastError())));
4742  CloseHandle(pi.hProcess);
4743  CloseHandle(pi.hThread);
4744  UnmapViewOfFile(param);
4745  CloseHandle(paramHandle);
4746  return -1; /* log made by save_backend_variables */
4747  }
4748 
4749  /* Drop the parameter shared memory that is now inherited to the backend */
4750  if (!UnmapViewOfFile(param))
4751  ereport(LOG,
4752  (errmsg("could not unmap view of backend parameter file: error code %lu",
4753  GetLastError())));
4754  if (!CloseHandle(paramHandle))
4755  ereport(LOG,
4756  (errmsg("could not close handle to backend parameter file: error code %lu",
4757  GetLastError())));
4758 
4759  /*
4760  * Reserve the memory region used by our main shared memory segment before
4761  * we resume the child process. Normally this should succeed, but if ASLR
4762  * is active then it might sometimes fail due to the stack or heap having
4763  * gotten mapped into that range. In that case, just terminate the
4764  * process and retry.
4765  */
4766  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4767  {
4768  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4769  if (!TerminateProcess(pi.hProcess, 255))
4770  ereport(LOG,
4771  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4772  GetLastError())));
4773  CloseHandle(pi.hProcess);
4774  CloseHandle(pi.hThread);
4775  if (++retry_count < 100)
4776  goto retry;
4777  ereport(LOG,
4778  (errmsg("giving up after too many tries to reserve shared memory"),
4779  errhint("This might be caused by ASLR or antivirus software.")));
4780  return -1;
4781  }
4782 
4783  /*
4784  * Now that the backend variables are written out, we start the child
4785  * thread so it can start initializing while we set up the rest of the
4786  * parent state.
4787  */
4788  if (ResumeThread(pi.hThread) == -1)
4789  {
4790  if (!TerminateProcess(pi.hProcess, 255))
4791  {
4792  ereport(LOG,
4793  (errmsg_internal("could not terminate unstartable process: error code %lu",
4794  GetLastError())));
4795  CloseHandle(pi.hProcess);
4796  CloseHandle(pi.hThread);
4797  return -1;
4798  }
4799  CloseHandle(pi.hProcess);
4800  CloseHandle(pi.hThread);
4801  ereport(LOG,
4802  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4803  GetLastError())));
4804  return -1;
4805  }
4806 
4807  /*
4808  * Queue a waiter to signal when this child dies. The wait will be handled
4809  * automatically by an operating system thread pool. The memory will be
4810  * freed by a later call to waitpid().
4811  */
4812  childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4813  childinfo->procHandle = pi.hProcess;
4814  childinfo->procId = pi.dwProcessId;
4815 
4816  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4817  pi.hProcess,
4818  pgwin32_deadchild_callback,
4819  childinfo,
4820  INFINITE,
4821  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4822  ereport(FATAL,
4823  (errmsg_internal("could not register process for wait: error code %lu",
4824  GetLastError())));
4825 
4826  /* Don't close pi.hProcess here - waitpid() needs access to it */
4827 
4828  CloseHandle(pi.hThread);
4829 
4830  return pi.dwProcessId;
4831 }
4832 #endif /* WIN32 */
4833 
4834 
4835 /*
4836  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4837  * to what it would be if we'd simply forked on Unix, and then
4838  * dispatch to the appropriate place.
4839  *
4840  * The first two command line arguments are expected to be "--forkFOO"
4841  * (where FOO indicates which postmaster child we are to become), and
4842  * the name of a variables file that we can read to load data that would
4843  * have been inherited by fork() on Unix. Remaining arguments go to the
4844  * subprocess FooMain() routine.
4845  */
4846 void
4847 SubPostmasterMain(int argc, char *argv[])
4848 {
4849  Port *port;
4850  BackgroundWorker *worker;
4851 
4852  /* In EXEC_BACKEND case we will not have inherited these settings */
4853  IsPostmasterEnvironment = true;
4855 
4856  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4858 
4859  /* Check we got appropriate args */
4860  if (argc < 3)
4861  elog(FATAL, "invalid subpostmaster invocation");
4862 
4863  /* Read in the variables file */
4864  read_backend_variables(argv[2], &port, &worker);
4865 
4866  /* Close the postmaster's sockets (as soon as we know them) */
4867  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4868 
4869  /* Setup as postmaster child */
4871 
4872  /*
4873  * If appropriate, physically re-attach to shared memory segment. We want
4874  * to do this before going any further to ensure that we can attach at the
4875  * same address the postmaster used. On the other hand, if we choose not
4876  * to re-attach, we may have other cleanup to do.
4877  *
4878  * If testing EXEC_BACKEND on Linux, you should run this as root before
4879  * starting the postmaster:
4880  *
4881  * sysctl -w kernel.randomize_va_space=0
4882  *
4883  * This prevents using randomized stack and code addresses that cause the
4884  * child process's memory map to be different from the parent's, making it
4885  * sometimes impossible to attach to shared memory at the desired address.
4886  * Return the setting to its old value (usually '1' or '2') when finished.
4887  */
4888  if (strcmp(argv[1], "--forkbackend") == 0 ||
4889  strcmp(argv[1], "--forkavlauncher") == 0 ||
4890  strcmp(argv[1], "--forkssworker") == 0 ||
4891  strcmp(argv[1], "--forkavworker") == 0 ||
4892  strcmp(argv[1], "--forkaux") == 0 ||
4893  strcmp(argv[1], "--forkbgworker") == 0)
4895  else
4897 
4898  /* Read in remaining GUC variables */
4899  read_nondefault_variables();
4900 
4901  /*
4902  * Check that the data directory looks valid, which will also check the
4903  * privileges on the data directory and update our umask and file/group
4904  * variables for creating files later. Note: this should really be done
4905  * before we create any files or directories.
4906  */
4907  checkDataDir();
4908 
4909  /*
4910  * (re-)read control file, as it contains config. The postmaster will
4911  * already have read this, but this process doesn't know about that.
4912  */
4913  LocalProcessControlFile(false);
4914 
4915  /*
4916  * Reload any libraries that were preloaded by the postmaster. Since we
4917  * exec'd this process, those libraries didn't come along with us; but we
4918  * should load them into all child processes to be consistent with the
4919  * non-EXEC_BACKEND behavior.
4920  */
4922 
4923  /* Run backend or appropriate child */
4924  if (strcmp(argv[1], "--forkbackend") == 0)
4925  {
4926  Assert(argc == 3); /* shouldn't be any more args */
4927 
4928  /*
4929  * Need to reinitialize the SSL library in the backend, since the
4930  * context structures contain function pointers and cannot be passed
4931  * through the parameter file.
4932  *
4933  * If for some reason reload fails (maybe the user installed broken
4934  * key files), soldier on without SSL; that's better than all
4935  * connections becoming impossible.
4936  *
4937  * XXX should we do this in all child processes? For the moment it's
4938  * enough to do it in backend children.
4939  */
4940 #ifdef USE_SSL
4941  if (EnableSSL)
4942  {
4943  if (secure_initialize(false) == 0)
4944  LoadedSSL = true;
4945  else
4946  ereport(LOG,
4947  (errmsg("SSL configuration could not be loaded in child process")));
4948  }
4949 #endif
4950 
4951  /*
4952  * Perform additional initialization and collect startup packet.
4953  *
4954  * We want to do this before InitProcess() for a couple of reasons: 1.
4955  * so that we aren't eating up a PGPROC slot while waiting on the
4956  * client. 2. so that if InitProcess() fails due to being out of
4957  * PGPROC slots, we have already initialized libpq and are able to
4958  * report the error to the client.
4959  */
4961 
4962  /* Restore basic shared memory pointers */
4964 
4965  /* And run the backend */
4966  BackendRun(port); /* does not return */
4967  }
4968  if (strcmp(argv[1], "--forkaux") == 0)
4969  {
4970  AuxProcType auxtype;
4971 
4972  Assert(argc == 4);
4973 
4974  /* Restore basic shared memory pointers */
4976 
4977  auxtype = atoi(argv[3]);
4978  AuxiliaryProcessMain(auxtype); /* does not return */
4979  }
4980  if (strcmp(argv[1], "--forkavlauncher") == 0)
4981  {
4982  /* Restore basic shared memory pointers */
4984 
4985  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4986  }
4987  if (strcmp(argv[1], "--forkavworker") == 0)
4988  {
4989  /* Restore basic shared memory pointers */
4991 
4992  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4993  }
4994  if (strcmp(argv[1], "--forkssworker") == 0)
4995  {
4996  /* Restore basic shared memory pointers */
4998 
4999  ReplSlotSyncWorkerMain(argc - 2, argv + 2); /* does not return */
5000  }
5001  if (strcmp(argv[1], "--forkbgworker") == 0)
5002  {
5003  /* do this as early as possible; in particular, before InitProcess() */
5004  IsBackgroundWorker = true;
5005 
5006  /* Restore basic shared memory pointers */
5008 
5009  MyBgworkerEntry = worker;
5011  }
5012  if (strcmp(argv[1], "--forklog") == 0)
5013  {
5014  /* Do not want to attach to shared memory */
5015 
5016  SysLoggerMain(argc, argv); /* does not return */
5017  }
5018 
5019  abort(); /* shouldn't get here */
5020 }
5021 #endif /* EXEC_BACKEND */
5022 
5023 
5024 /*
5025  * ExitPostmaster -- cleanup
5026  *
5027  * Do NOT call exit() directly --- always go through here!
5028  */
5029 static void
5030 ExitPostmaster(int status)
5031 {
5032 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5033 
5034  /*
5035  * There is no known cause for a postmaster to become multithreaded after
5036  * startup. Recheck to account for the possibility of unknown causes.
5037  * This message uses LOG level, because an unclean shutdown at this point
5038  * would usually not look much different from a clean shutdown.
5039  */
5040  if (pthread_is_threaded_np() != 0)
5041  ereport(LOG,
5042  (errcode(ERRCODE_INTERNAL_ERROR),
5043  errmsg_internal("postmaster became multithreaded"),
5044  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5045 #endif
5046 
5047  /* should cleanup shared memory and kill all backends */
5048 
5049  /*
5050  * Not sure of the semantics here. When the Postmaster dies, should the
5051  * backends all be killed? probably not.
5052  *
5053  * MUST -- vadim 05-10-1999
5054  */
5055 
5056  proc_exit(status);
5057 }
5058 
5059 /*
5060  * Handle pmsignal conditions representing requests from backends,
5061  * and check for promote and logrotate requests from pg_ctl.
5062  */
5063 static void
5065 {
5066  pending_pm_pmsignal = false;
5067 
5068  ereport(DEBUG2,
5069  (errmsg_internal("postmaster received pmsignal signal")));
5070 
5071  /*
5072  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5073  * unexpected states. If the startup process quickly starts up, completes
5074  * recovery, exits, we might process the death of the startup process
5075  * first. We don't want to go back to recovery in that case.
5076  */
5079  {
5080  /* WAL redo has started. We're out of reinitialization. */
5081  FatalError = false;
5082  AbortStartTime = 0;
5083 
5084  /*
5085  * Start the archiver if we're responsible for (re-)archiving received
5086  * files.
5087  */
5088  Assert(PgArchPID == 0);
5089  if (XLogArchivingAlways())
5091 
5092  /*
5093  * If we aren't planning to enter hot standby mode later, treat
5094  * RECOVERY_STARTED as meaning we're out of startup, and report status
5095  * accordingly.
5096  */
5097  if (!EnableHotStandby)
5098  {
5100 #ifdef USE_SYSTEMD
5101  sd_notify(0, "READY=1");
5102 #endif
5103  }
5104 
5105  pmState = PM_RECOVERY;
5106  }
5107 
5110  {
5111  ereport(LOG,
5112  (errmsg("database system is ready to accept read-only connections")));
5113 
5114  /* Report status */
5116 #ifdef USE_SYSTEMD
5117  sd_notify(0, "READY=1");
5118 #endif
5119 
5121  connsAllowed = true;
5122 
5123  /* Some workers may be scheduled to start now */
5124  StartWorkerNeeded = true;
5125  }
5126 
5127  /* Process background worker state changes. */
5129  {
5130  /* Accept new worker requests only if not stopping. */
5132  StartWorkerNeeded = true;
5133  }
5134 
5137 
5138  /* Tell syslogger to rotate logfile if requested */
5139  if (SysLoggerPID != 0)
5140  {
5141  if (CheckLogrotateSignal())
5142  {
5145  }
5147  {
5149  }
5150  }
5151 
5154  {
5155  /*
5156  * Start one iteration of the autovacuum daemon, even if autovacuuming
5157  * is nominally not enabled. This is so we can have an active defense
5158  * against transaction ID wraparound. We set a flag for the main loop
5159  * to do it rather than trying to do it here --- this is because the
5160  * autovac process itself may send the signal, and we want to handle
5161  * that by launching another iteration as soon as the current one
5162  * completes.
5163  */
5164  start_autovac_launcher = true;
5165  }
5166 
5169  {
5170  /* The autovacuum launcher wants us to start a worker process. */
5172  }
5173 
5175  {
5176  /* Startup Process wants us to start the walreceiver process. */
5177  /* Start immediately if possible, else remember request for later. */
5178  WalReceiverRequested = true;
5180  }
5181 
5182  /*
5183  * Try to advance postmaster's state machine, if a child requests it.
5184  *
5185  * Be careful about the order of this action relative to this function's
5186  * other actions. Generally, this should be after other actions, in case
5187  * they have effects PostmasterStateMachine would need to know about.
5188  * However, we should do it before the CheckPromoteSignal step, which
5189  * cannot have any (immediate) effect on the state machine, but does
5190  * depend on what state we're in now.
5191  */
5193  {
5195  }
5196 
5197  if (StartupPID != 0 &&
5198  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5199  pmState == PM_HOT_STANDBY) &&
5201  {
5202  /*
5203  * Tell startup process to finish recovery.
5204  *
5205  * Leave the promote signal file in place and let the Startup process
5206  * do the unlink.
5207  */
5209  }
5210 }
5211 
5212 /*
5213  * SIGTERM while processing startup packet.
5214  *
5215  * Running proc_exit() from a signal handler would be quite unsafe.
5216  * However, since we have not yet touched shared memory, we can just
5217  * pull the plug and exit without running any atexit handlers.
5218  *
5219  * One might be tempted to try to send a message, or log one, indicating
5220  * why we are disconnecting. However, that would be quite unsafe in itself.
5221  * Also, it seems undesirable to provide clues about the database's state
5222  * to a client that has not yet completed authentication, or even sent us
5223  * a startup packet.
5224  */
5225 static void
5227 {
5228  _exit(1);
5229 }
5230 
5231 /*
5232  * Dummy signal handler
5233  *
5234  * We use this for signals that we don't actually use in the postmaster,
5235  * but we do use in backends. If we were to SIG_IGN such signals in the
5236  * postmaster, then a newly started backend might drop a signal that arrives
5237  * before it's able to reconfigure its signal processing. (See notes in
5238  * tcop/postgres.c.)
5239  */
5240 static void
5242 {
5243 }
5244 
5245 /*
5246  * Timeout while processing startup packet.
5247  * As for process_startup_packet_die(), we exit via _exit(1).
5248  */
5249 static void
5251 {
5252  _exit(1);
5253 }
5254 
5255 
5256 /*
5257  * Generate a random cancel key.
5258  */
5259 static bool
5261 {
5262  return pg_strong_random(cancel_key, sizeof(int32));
5263 }
5264 
5265 /*
5266  * Count up number of child processes of specified types (dead_end children
5267  * are always excluded).
5268  */
5269 static int
5270 CountChildren(int target)
5271 {
5272  dlist_iter iter;
5273  int cnt = 0;
5274 
5275  dlist_foreach(iter, &BackendList)
5276  {
5277  Backend *bp = dlist_container(Backend, elem, iter.cur);
5278 
5279  if (bp->dead_end)
5280  continue;
5281 
5282  /*
5283  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5284  * it first and avoid touching shared memory for every child.
5285  */
5286  if (target != BACKEND_TYPE_ALL)
5287  {
5288  /*
5289  * Assign bkend_type for any recently announced WAL Sender
5290  * processes.
5291  */
5292  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5295 
5296  if (!(target & bp->bkend_type))
5297  continue;
5298  }
5299 
5300  cnt++;
5301  }
5302  return cnt;
5303 }
5304 
5305 
5306 /*
5307  * StartChildProcess -- start an auxiliary process for the postmaster
5308  *
5309  * "type" determines what kind of child will be started. All child types
5310  * initially go to AuxiliaryProcessMain, which will handle common setup.
5311  *
5312  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5313  * to start subprocess.
5314  */
5315 static pid_t
5317 {
5318  pid_t pid;
5319 
5320 #ifdef EXEC_BACKEND
5321  {
5322  char *av[10];
5323  int ac = 0;
5324  char typebuf[32];
5325 
5326  /*
5327  * Set up command-line arguments for subprocess
5328  */
5329  av[ac++] = "postgres";
5330  av[ac++] = "--forkaux";
5331  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5332 
5333  snprintf(typebuf, sizeof(typebuf), "%d", type);
5334  av[ac++] = typebuf;
5335 
5336  av[ac] = NULL;
5337  Assert(ac < lengthof(av));
5338 
5339  pid = postmaster_forkexec(ac, av);
5340  }
5341 #else /* !EXEC_BACKEND */
5342  pid = fork_process();
5343 
5344  if (pid == 0) /* child */
5345  {
5347 
5348  /* Close the postmaster's sockets */
5349  ClosePostmasterPorts(false);
5350 
5351  /* Release postmaster's working memory context */
5354  PostmasterContext = NULL;
5355 
5356  AuxiliaryProcessMain(type); /* does not return */
5357  }
5358 #endif /* EXEC_BACKEND */
5359 
5360  if (pid < 0)
5361  {
5362  /* in parent, fork failed */
5363  int save_errno = errno;
5364 
5365  errno = save_errno;
5366  switch (type)
5367  {
5368  case StartupProcess:
5369  ereport(LOG,
5370  (errmsg("could not fork startup process: %m")));
5371  break;
5372  case ArchiverProcess:
5373  ereport(LOG,
5374  (errmsg("could not fork archiver process: %m")));
5375  break;
5376  case BgWriterProcess:
5377  ereport(LOG,
5378  (errmsg("could not fork background writer process: %m")));
5379  break;
5380  case CheckpointerProcess:
5381  ereport(LOG,
5382  (errmsg("could not fork checkpointer process: %m")));
5383  break;
5384  case WalWriterProcess:
5385  ereport(LOG,
5386  (errmsg("could not fork WAL writer process: %m")));
5387  break;
5388  case WalReceiverProcess:
5389  ereport(LOG,
5390  (errmsg("could not fork WAL receiver process: %m")));
5391  break;
5392  case WalSummarizerProcess:
5393  ereport(LOG,
5394  (errmsg("could not fork WAL summarizer process: %m")));
5395  break;
5396  default:
5397  ereport(LOG,
5398  (errmsg("could not fork process: %m")));
5399  break;
5400  }
5401 
5402  /*
5403  * fork failure is fatal during startup, but there's no need to choke
5404  * immediately if starting other child types fails.
5405  */
5406  if (type == StartupProcess)
5407  ExitPostmaster(1);
5408  return 0;
5409  }
5410 
5411  /*
5412  * in parent, successful fork
5413  */
5414  return pid;
5415 }
5416 
5417 /*
5418  * StartAutovacuumWorker
5419  * Start an autovac worker process.
5420  *
5421  * This function is here because it enters the resulting PID into the
5422  * postmaster's private backends list.
5423  *
5424  * NB -- this code very roughly matches BackendStartup.
5425  */
5426 static void
5428 {
5429  Backend *bn;
5430 
5431  /*
5432  * If not in condition to run a process, don't try, but handle it like a
5433  * fork failure. This does not normally happen, since the signal is only
5434  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5435  * we have to check to avoid race-condition problems during DB state
5436  * changes.
5437  */
5439  {
5440  /*
5441  * Compute the cancel key that will be assigned to this session. We
5442  * probably don't need cancel keys for autovac workers, but we'd
5443  * better have something random in the field to prevent unfriendly
5444  * people from sending cancels to them.
5445  */
5447  {
5448  ereport(LOG,
5449  (errcode(ERRCODE_INTERNAL_ERROR),
5450  errmsg("could not generate random cancel key")));
5451  return;
5452  }
5453 
5454  bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
5455  if (bn)
5456  {
5457  bn->cancel_key = MyCancelKey;
5458 
5459  /* Autovac workers are not dead_end and need a child slot */
5460  bn->dead_end = false;
5462  bn->bgworker_notify = false;
5463 
5464  bn->pid = StartAutoVacWorker();
5465  if (bn->pid > 0)
5466  {
5469 #ifdef EXEC_BACKEND
5470  ShmemBackendArrayAdd(bn);
5471 #endif
5472  /* all OK */
5473  return;
5474  }
5475 
5476  /*
5477  * fork failed, fall through to report -- actual error message was
5478  * logged by StartAutoVacWorker
5479  */
5481  pfree(bn);
5482  }
5483  else
5484  ereport(LOG,
5485  (errcode(ERRCODE_OUT_OF_MEMORY),
5486  errmsg("out of memory")));
5487  }
5488 
5489  /*
5490  * Report the failure to the launcher, if it's running. (If it's not, we
5491  * might not even be connected to shared memory, so don't try to call
5492  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5493  * responds to the condition, but we don't do that here, instead waiting
5494  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5495  * quick succession between the autovac launcher and postmaster in case
5496  * things get ugly.
5497  */
5498  if (AutoVacPID != 0)
5499  {
5501  avlauncher_needs_signal = true;
5502  }
5503 }
5504 
5505 /*
5506  * MaybeStartWalReceiver
5507  * Start the WAL receiver process, if not running and our state allows.
5508  *
5509  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5510  * clear WalReceiverRequested. However, there's a race condition if the
5511  * walreceiver terminates and the startup process immediately requests a new
5512  * one: it's quite possible to get the signal for the request before reaping
5513  * the dead walreceiver process. Better to risk launching an extra
5514  * walreceiver than to miss launching one we need. (The walreceiver code
5515  * has logic to recognize that it should go away if not needed.)
5516  */
5517 static void
5519 {
5520  if (WalReceiverPID == 0 &&
5521  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5522  pmState == PM_HOT_STANDBY) &&
5524  {
5526  if (WalReceiverPID != 0)
5527  WalReceiverRequested = false;
5528  /* else leave the flag set, so we'll try again later */
5529  }
5530 }
5531 
5532 /*
5533  * MaybeStartWalSummarizer
5534  * Start the WAL summarizer process, if not running and our state allows.
5535  */
5536 static void
5538 {
5539  if (summarize_wal && WalSummarizerPID == 0 &&
5540  (pmState == PM_RUN || pmState == PM_HOT_STANDBY) &&
5543 }
5544 
5545 
5546 /*
5547  * MaybeStartSlotSyncWorker
5548  * Start the slot sync worker, if not running and our state allows.
5549  *
5550  * We allow to start the slot sync worker when we are on a hot standby,
5551  * fast or immediate shutdown is not in progress, slot sync parameters
5552  * are configured correctly, and it is the first time of worker's launch,
5553  * or enough time has passed since the worker was launched last.
5554  */
5555 static void
5557 {
5558  if (SlotSyncWorkerPID == 0 && pmState == PM_HOT_STANDBY &&
5562 }
5563 
5564 /*
5565  * Create the opts file
5566  */
5567 static bool
5568 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5569 {
5570  FILE *fp;
5571  int i;
5572 
5573 #define OPTS_FILE "postmaster.opts"
5574 
5575  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5576  {
5577  ereport(LOG,
5579  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5580  return false;
5581  }
5582 
5583  fprintf(fp, "%s", fullprogname);
5584  for (i = 1; i < argc; i++)
5585  fprintf(fp, " \"%s\"", argv[i]);
5586  fputs("\n", fp);
5587 
5588  if (fclose(fp))
5589  {
5590  ereport(LOG,
5592  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5593  return false;
5594  }
5595 
5596  return true;
5597 }
5598 
5599 
5600 /*
5601  * MaxLivePostmasterChildren
5602  *
5603  * This reports the number of entries needed in per-child-process arrays
5604  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5605  * These arrays include regular backends, autovac workers, walsenders
5606  * and background workers, but not special children nor dead_end children.
5607  * This allows the arrays to have a fixed maximum size, to wit the same
5608  * too-many-children limit enforced by canAcceptConnections(). The exact value
5609  * isn't too critical as long as it's more than MaxBackends.
5610  */
5611 int
5613 {
5614  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5616 }
5617 
5618 /*
5619  * Connect background worker to a database.
5620  */
5621 void
5623 {
5625  bits32 init_flags = 0; /* never honor session_preload_libraries */
5626 
5627  /* ignore datallowconn? */
5628  if (flags & BGWORKER_BYPASS_ALLOWCONN)
5629  init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5630  /* ignore rolcanlogin? */
5631  if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5632  init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5633 
5634  /* XXX is this the right errcode? */
5636  ereport(FATAL,
5637  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5638  errmsg("database connection requirement not indicated during registration")));
5639 
5640  InitPostgres(dbname, InvalidOid, /* database to connect to */
5641  username, InvalidOid, /* role to connect as */
5642  init_flags,
5643  NULL); /* no out_dbname */
5644 
5645  /* it had better not gotten out of "init" mode yet */
5646  if (!IsInitProcessingMode())
5647  ereport(ERROR,
5648  (errmsg("invalid processing mode in background worker")));
5650 }
5651 
5652 /*
5653  * Connect background worker to a database using OIDs.
5654  */
5655 void
5657 {
5659  bits32 init_flags = 0; /* never honor session_preload_libraries */
5660 
5661  /* ignore datallowconn? */
5662  if (flags & BGWORKER_BYPASS_ALLOWCONN)
5663  init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5664  /* ignore rolcanlogin? */
5665  if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5666  init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5667 
5668  /* XXX is this the right errcode? */
5670  ereport(FATAL,
5671  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5672  errmsg("database connection requirement not indicated during registration")));
5673 
5674  InitPostgres(NULL, dboid, /* database to connect to */
5675  NULL, useroid, /* role to connect as */
5676  init_flags,
5677  NULL); /* no out_dbname */
5678 
5679  /* it had better not gotten out of "init" mode yet */
5680  if (!IsInitProcessingMode())
5681  ereport(ERROR,
5682  (errmsg("invalid processing mode in background worker")));
5684 }
5685 
5686 /*
5687  * Block/unblock signals in a background worker
5688  */
5689 void
5691 {
5692  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
5693 }
5694 
5695 void
5697 {
5698  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
5699 }
5700 
5701 #ifdef EXEC_BACKEND
5702 static pid_t
5703 bgworker_forkexec(BackgroundWorker *worker)
5704 {
5705  char *av[10];
5706  int ac = 0;
5707 
5708  av[ac++] = "postgres";
5709  av[ac++] = "--forkbgworker";
5710  av[ac++] = NULL; /* filled in by internal_forkexec */
5711  av[ac] = NULL;
5712 
5713  Assert(ac < lengthof(av));
5714 
5715  return internal_forkexec(ac, av, NULL, worker);
5716 }
5717 #endif
5718 
5719 /*
5720  * Start a new bgworker.
5721  * Starting time conditions must have been checked already.
5722  *
5723  * Returns true on success, false on failure.
5724  * In either case, update the RegisteredBgWorker's state appropriately.
5725  *
5726  * This code is heavily based on autovacuum.c, q.v.
5727  */
5728 static bool
5730 {