PostgreSQL Source Code  git master
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef USE_BONJOUR
81 #include <dns_sd.h>
82 #endif
83 
84 #ifdef USE_SYSTEMD
85 #include <systemd/sd-daemon.h>
86 #endif
87 
88 #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 #include <pthread.h>
90 #endif
91 
92 #include "access/transam.h"
93 #include "access/xlog.h"
94 #include "access/xlogrecovery.h"
95 #include "catalog/pg_control.h"
96 #include "common/file_perm.h"
97 #include "common/ip.h"
98 #include "common/pg_prng.h"
99 #include "common/string.h"
100 #include "lib/ilist.h"
101 #include "libpq/auth.h"
102 #include "libpq/libpq.h"
103 #include "libpq/pqformat.h"
104 #include "libpq/pqsignal.h"
105 #include "nodes/queryjumble.h"
106 #include "pg_getopt.h"
107 #include "pgstat.h"
108 #include "port/pg_bswap.h"
109 #include "postmaster/autovacuum.h"
110 #include "postmaster/auxprocess.h"
112 #include "postmaster/fork_process.h"
113 #include "postmaster/interrupt.h"
114 #include "postmaster/pgarch.h"
115 #include "postmaster/postmaster.h"
116 #include "postmaster/syslogger.h"
118 #include "replication/walsender.h"
119 #include "storage/fd.h"
120 #include "storage/ipc.h"
121 #include "storage/pg_shmem.h"
122 #include "storage/pmsignal.h"
123 #include "storage/proc.h"
124 #include "tcop/tcopprot.h"
125 #include "utils/builtins.h"
126 #include "utils/datetime.h"
127 #include "utils/memutils.h"
128 #include "utils/pidfile.h"
129 #include "utils/ps_status.h"
130 #include "utils/timeout.h"
131 #include "utils/timestamp.h"
132 #include "utils/varlena.h"
133 
134 #ifdef EXEC_BACKEND
135 #include "storage/spin.h"
136 #endif
137 
138 
139 /*
140  * Possible types of a backend. Beyond being the possible bkend_type values in
141  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
142  * and CountChildren().
143  */
144 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
145 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
146 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
147 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
148 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
149 
150 /*
151  * List of active backends (or child processes anyway; we don't actually
152  * know whether a given child has become a backend or is still in the
153  * authorization phase). This is used mainly to keep track of how many
154  * children we have and send them appropriate signals when necessary.
155  *
156  * As shown in the above set of backend types, this list includes not only
157  * "normal" client sessions, but also autovacuum workers, walsenders, and
158  * background workers. (Note that at the time of launch, walsenders are
159  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
160  * upon noticing they've changed their PMChildFlags entry. Hence that check
161  * must be done before any operation that needs to distinguish walsenders
162  * from normal backends.)
163  *
164  * Also, "dead_end" children are in it: these are children launched just for
165  * the purpose of sending a friendly rejection message to a would-be client.
166  * We must track them because they are attached to shared memory, but we know
167  * they will never become live backends. dead_end children are not assigned a
168  * PMChildSlot. dead_end children have bkend_type NORMAL.
169  *
170  * "Special" children such as the startup, bgwriter and autovacuum launcher
171  * tasks are not in this list. They are tracked via StartupPID and other
172  * pid_t variables below. (Thus, there can't be more than one of any given
173  * "special" child process type. We use BackendList entries for any child
174  * process there can be more than one of.)
175  */
176 typedef struct bkend
177 {
178  pid_t pid; /* process id of backend */
179  int32 cancel_key; /* cancel key for cancels for this backend */
180  int child_slot; /* PMChildSlot for this backend, if any */
181  int bkend_type; /* child process flavor, see above */
182  bool dead_end; /* is it going to send an error and quit? */
183  bool bgworker_notify; /* gets bgworker start/stop notifications */
184  dlist_node elem; /* list link in BackendList */
186 
188 
189 #ifdef EXEC_BACKEND
190 static Backend *ShmemBackendArray;
191 #endif
192 
194 
195 
196 
197 /* The socket number we are listening for connections on */
198 int PostPortNumber = DEF_PGPORT;
199 
200 /* The directory names for Unix socket(s) */
202 
203 /* The TCP listen address(es) */
205 
206 /*
207  * SuperuserReservedConnections is the number of backends reserved for
208  * superuser use, and ReservedConnections is the number of backends reserved
209  * for use by roles with privileges of the pg_use_reserved_connections
210  * predefined role. These are taken out of the pool of MaxConnections backend
211  * slots, so the number of backend slots available for roles that are neither
212  * superuser nor have privileges of pg_use_reserved_connections is
213  * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
214  *
215  * If the number of remaining slots is less than or equal to
216  * SuperuserReservedConnections, only superusers can make new connections. If
217  * the number of remaining slots is greater than SuperuserReservedConnections
218  * but less than or equal to
219  * (SuperuserReservedConnections + ReservedConnections), only superusers and
220  * roles with privileges of pg_use_reserved_connections can make new
221  * connections. Note that pre-existing superuser and
222  * pg_use_reserved_connections connections don't count against the limits.
223  */
226 
227 /* The socket(s) we're listening to. */
228 #define MAXLISTEN 64
230 
231 /* still more option variables */
232 bool EnableSSL = false;
233 
234 int PreAuthDelay = 0;
236 
237 bool log_hostname; /* for ps display and logging */
238 bool Log_connections = false;
239 bool Db_user_namespace = false;
240 
241 bool enable_bonjour = false;
245 bool send_abort_for_crash = false;
246 bool send_abort_for_kill = false;
247 
248 /* PIDs of special child processes; 0 when not running */
249 static pid_t StartupPID = 0,
257 
258 /* Startup process's status */
259 typedef enum
260 {
263  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 
268 
269 /* Startup/shutdown state */
270 #define NoShutdown 0
271 #define SmartShutdown 1
272 #define FastShutdown 2
273 #define ImmediateShutdown 3
274 
275 static int Shutdown = NoShutdown;
276 
277 static bool FatalError = false; /* T if recovering from backend crash */
278 
279 /*
280  * We use a simple state machine to control startup, shutdown, and
281  * crash recovery (which is rather like shutdown followed by startup).
282  *
283  * After doing all the postmaster initialization work, we enter PM_STARTUP
284  * state and the startup process is launched. The startup process begins by
285  * reading the control file and other preliminary initialization steps.
286  * In a normal startup, or after crash recovery, the startup process exits
287  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
288  * is handled specially since it takes much longer and we would like to support
289  * hot standby during archive recovery.
290  *
291  * When the startup process is ready to start archive recovery, it signals the
292  * postmaster, and we switch to PM_RECOVERY state. The background writer and
293  * checkpointer are launched, while the startup process continues applying WAL.
294  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
295  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
296  * state and begin accepting connections to perform read-only queries. When
297  * archive recovery is finished, the startup process exits with exit code 0
298  * and we switch to PM_RUN state.
299  *
300  * Normal child backends can only be launched when we are in PM_RUN or
301  * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
316  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
317  * states when trying to recover from a crash). It can be true in PM_STARTUP
318  * state, because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_STOP_BACKENDS, /* need to stop remaining backends */
328  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
329  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
330  * ckpt */
331  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
332  * finish */
333  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
334  PM_NO_CHILDREN /* all important children have exited */
336 
338 
339 /*
340  * While performing a "smart shutdown", we restrict new connections but stay
341  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
342  * connsAllowed is a sub-state indicator showing the active restriction.
343  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
344  */
345 static bool connsAllowed = true;
346 
347 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
348 /* Zero means timeout is not running */
349 static time_t AbortStartTime = 0;
350 
351 /* Length of said timeout */
352 #define SIGKILL_CHILDREN_AFTER_SECS 5
353 
354 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
355 
356 bool ClientAuthInProgress = false; /* T during new-client
357  * authentication */
358 
359 bool redirection_done = false; /* stderr redirected for syslogger? */
360 
361 /* received START_AUTOVAC_LAUNCHER signal */
362 static bool start_autovac_launcher = false;
363 
364 /* the launcher needs to be signaled to communicate some condition */
365 static bool avlauncher_needs_signal = false;
366 
367 /* received START_WALRECEIVER signal */
368 static bool WalReceiverRequested = false;
369 
370 /* set when there's a worker that needs to be started up */
371 static bool StartWorkerNeeded = true;
372 static bool HaveCrashedWorker = false;
373 
374 /* set when signals arrive */
375 static volatile sig_atomic_t pending_pm_pmsignal;
376 static volatile sig_atomic_t pending_pm_child_exit;
377 static volatile sig_atomic_t pending_pm_reload_request;
378 static volatile sig_atomic_t pending_pm_shutdown_request;
379 static volatile sig_atomic_t pending_pm_fast_shutdown_request;
380 static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
381 
382 /* event multiplexing object */
384 
385 #ifdef USE_SSL
386 /* Set when and if SSL has been initialized properly */
387 static bool LoadedSSL = false;
388 #endif
389 
390 #ifdef USE_BONJOUR
391 static DNSServiceRef bonjour_sdref = NULL;
392 #endif
393 
394 /*
395  * postmaster.c - function prototypes
396  */
397 static void CloseServerPorts(int status, Datum arg);
398 static void unlink_external_pid_file(int status, Datum arg);
399 static void getInstallationPaths(const char *argv0);
400 static void checkControlFile(void);
401 static Port *ConnCreate(int serverFd);
402 static void ConnFree(Port *port);
407 static void process_pm_pmsignal(void);
408 static void process_pm_child_exit(void);
409 static void process_pm_reload_request(void);
410 static void process_pm_shutdown_request(void);
412 static void dummy_handler(SIGNAL_ARGS);
413 static void StartupPacketTimeoutHandler(void);
414 static void CleanupBackend(int pid, int exitstatus);
415 static bool CleanupBackgroundWorker(int pid, int exitstatus);
416 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
417 static void LogChildExit(int lev, const char *procname,
418  int pid, int exitstatus);
419 static void PostmasterStateMachine(void);
420 static void BackendInitialize(Port *port);
421 static void BackendRun(Port *port) pg_attribute_noreturn();
422 static void ExitPostmaster(int status) pg_attribute_noreturn();
423 static int ServerLoop(void);
424 static int BackendStartup(Port *port);
425 static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
426 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
427 static void processCancelRequest(Port *port, void *pkt);
428 static void report_fork_failure_to_client(Port *port, int errnum);
429 static CAC_state canAcceptConnections(int backend_type);
430 static bool RandomCancelKey(int32 *cancel_key);
431 static void signal_child(pid_t pid, int signal);
432 static void sigquit_child(pid_t pid);
433 static bool SignalSomeChildren(int signal, int target);
434 static void TerminateChildren(int signal);
435 
436 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
437 
438 static int CountChildren(int target);
440 static void maybe_start_bgworkers(void);
441 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
442 static pid_t StartChildProcess(AuxProcType type);
443 static void StartAutovacuumWorker(void);
444 static void MaybeStartWalReceiver(void);
445 static void InitPostmasterDeathWatchHandle(void);
446 
447 /*
448  * Archiver is allowed to start up at the current postmaster state?
449  *
450  * If WAL archiving is enabled always, we are allowed to start archiver
451  * even during recovery.
452  */
453 #define PgArchStartupAllowed() \
454  (((XLogArchivingActive() && pmState == PM_RUN) || \
455  (XLogArchivingAlways() && \
456  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
457  PgArchCanRestart())
458 
459 #ifdef EXEC_BACKEND
460 
461 #ifdef WIN32
462 #define WNOHANG 0 /* ignored, so any integer value will do */
463 
464 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
465 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
466 
467 static HANDLE win32ChildQueue;
468 
469 typedef struct
470 {
471  HANDLE waitHandle;
472  HANDLE procHandle;
473  DWORD procId;
474 } win32_deadchild_waitinfo;
475 #endif /* WIN32 */
476 
477 static pid_t backend_forkexec(Port *port);
478 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
479 
480 /* Type for a socket that can be inherited to a client process */
481 #ifdef WIN32
482 typedef struct
483 {
484  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
485  * if not a socket */
486  WSAPROTOCOL_INFO wsainfo;
487 } InheritableSocket;
488 #else
489 typedef int InheritableSocket;
490 #endif
491 
492 /*
493  * Structure contains all variables passed to exec:ed backends
494  */
495 typedef struct
496 {
497  Port port;
498  InheritableSocket portsocket;
499  char DataDir[MAXPGPATH];
502  int MyPMChildSlot;
503 #ifndef WIN32
504  unsigned long UsedShmemSegID;
505 #else
506  void *ShmemProtectiveRegion;
507  HANDLE UsedShmemSegID;
508 #endif
509  void *UsedShmemSegAddr;
512  Backend *ShmemBackendArray;
513 #ifndef HAVE_SPINLOCKS
515 #endif
524  pid_t PostmasterPid;
528  bool redirection_done;
529  bool IsBinaryUpgrade;
530  bool query_id_enabled;
531  int max_safe_fds;
532  int MaxBackends;
533 #ifdef WIN32
534  HANDLE PostmasterHandle;
535  HANDLE initial_signal_pipe;
536  HANDLE syslogPipe[2];
537 #else
538  int postmaster_alive_fds[2];
539  int syslogPipe[2];
540 #endif
541  char my_exec_path[MAXPGPATH];
542  char pkglib_path[MAXPGPATH];
543 } BackendParameters;
544 
545 static void read_backend_variables(char *id, Port *port);
546 static void restore_backend_variables(BackendParameters *param, Port *port);
547 
548 #ifndef WIN32
549 static bool save_backend_variables(BackendParameters *param, Port *port);
550 #else
551 static bool save_backend_variables(BackendParameters *param, Port *port,
552  HANDLE childProcess, pid_t childPid);
553 #endif
554 
555 static void ShmemBackendArrayAdd(Backend *bn);
556 static void ShmemBackendArrayRemove(Backend *bn);
557 #endif /* EXEC_BACKEND */
558 
559 #define StartupDataBase() StartChildProcess(StartupProcess)
560 #define StartArchiver() StartChildProcess(ArchiverProcess)
561 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
562 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
563 #define StartWalWriter() StartChildProcess(WalWriterProcess)
564 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
565 
566 /* Macros to check exit status of a child process */
567 #define EXIT_STATUS_0(st) ((st) == 0)
568 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
569 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
570 
571 #ifndef WIN32
572 /*
573  * File descriptors for pipe used to monitor if postmaster is alive.
574  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
575  */
576 int postmaster_alive_fds[2] = {-1, -1};
577 #else
578 /* Process handle of postmaster used for the same purpose on Windows */
579 HANDLE PostmasterHandle;
580 #endif
581 
582 /*
583  * Postmaster main entry point
584  */
585 void
586 PostmasterMain(int argc, char *argv[])
587 {
588  int opt;
589  int status;
590  char *userDoption = NULL;
591  bool listen_addr_saved = false;
592  int i;
593  char *output_config_variable = NULL;
594 
596 
598 
600 
601  /*
602  * Start our win32 signal implementation
603  */
604 #ifdef WIN32
606 #endif
607 
608  /*
609  * We should not be creating any files or directories before we check the
610  * data directory (see checkDataDir()), but just in case set the umask to
611  * the most restrictive (owner-only) permissions.
612  *
613  * checkDataDir() will reset the umask based on the data directory
614  * permissions.
615  */
616  umask(PG_MODE_MASK_OWNER);
617 
618  /*
619  * By default, palloc() requests in the postmaster will be allocated in
620  * the PostmasterContext, which is space that can be recycled by backends.
621  * Allocated data that needs to be available to backends should be
622  * allocated in TopMemoryContext.
623  */
625  "Postmaster",
628 
629  /* Initialize paths to installation files */
630  getInstallationPaths(argv[0]);
631 
632  /*
633  * Set up signal handlers for the postmaster process.
634  *
635  * CAUTION: when changing this list, check for side-effects on the signal
636  * handling setup of child processes. See tcop/postgres.c,
637  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
638  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
639  * postmaster/bgworker.c and postmaster/checkpointer.c.
640  */
641  pqinitmask();
642  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
643 
648  pqsignal(SIGALRM, SIG_IGN); /* ignored */
649  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
651  pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
653 
654  /* This may configure SIGURG, depending on platform. */
657 
658  /*
659  * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
660  * ignore those signals in a postmaster environment, so that there is no
661  * risk of a child process freezing up due to writing to stderr. But for
662  * a standalone backend, their default handling is reasonable. Hence, all
663  * child processes should just allow the inherited settings to stand.
664  */
665 #ifdef SIGTTIN
666  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
667 #endif
668 #ifdef SIGTTOU
669  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
670 #endif
671 
672  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
673 #ifdef SIGXFSZ
674  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
675 #endif
676 
677  /* Begin accepting signals. */
678  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
679 
680  /*
681  * Options setup
682  */
684 
685  opterr = 1;
686 
687  /*
688  * Parse command-line options. CAUTION: keep this in sync with
689  * tcop/postgres.c (the option sets should not conflict) and with the
690  * common help() function in main/main.c.
691  */
692  while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
693  {
694  switch (opt)
695  {
696  case 'B':
697  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
698  break;
699 
700  case 'b':
701  /* Undocumented flag used for binary upgrades */
702  IsBinaryUpgrade = true;
703  break;
704 
705  case 'C':
706  output_config_variable = strdup(optarg);
707  break;
708 
709  case 'c':
710  case '-':
711  {
712  char *name,
713  *value;
714 
716  if (!value)
717  {
718  if (opt == '-')
719  ereport(ERROR,
720  (errcode(ERRCODE_SYNTAX_ERROR),
721  errmsg("--%s requires a value",
722  optarg)));
723  else
724  ereport(ERROR,
725  (errcode(ERRCODE_SYNTAX_ERROR),
726  errmsg("-c %s requires a value",
727  optarg)));
728  }
729 
731  pfree(name);
732  pfree(value);
733  break;
734  }
735 
736  case 'D':
737  userDoption = strdup(optarg);
738  break;
739 
740  case 'd':
742  break;
743 
744  case 'E':
745  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
746  break;
747 
748  case 'e':
749  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
750  break;
751 
752  case 'F':
753  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
754  break;
755 
756  case 'f':
758  {
759  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
760  progname, optarg);
761  ExitPostmaster(1);
762  }
763  break;
764 
765  case 'h':
766  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
767  break;
768 
769  case 'i':
770  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
771  break;
772 
773  case 'j':
774  /* only used by interactive backend */
775  break;
776 
777  case 'k':
778  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
779  break;
780 
781  case 'l':
782  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
783  break;
784 
785  case 'N':
786  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
787  break;
788 
789  case 'O':
790  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
791  break;
792 
793  case 'P':
794  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
795  break;
796 
797  case 'p':
799  break;
800 
801  case 'r':
802  /* only used by single-user backend */
803  break;
804 
805  case 'S':
807  break;
808 
809  case 's':
810  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
811  break;
812 
813  case 'T':
814 
815  /*
816  * This option used to be defined as sending SIGSTOP after a
817  * backend crash, but sending SIGABRT seems more useful.
818  */
819  SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
820  break;
821 
822  case 't':
823  {
824  const char *tmp = get_stats_option_name(optarg);
825 
826  if (tmp)
827  {
829  }
830  else
831  {
832  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
833  progname, optarg);
834  ExitPostmaster(1);
835  }
836  break;
837  }
838 
839  case 'W':
840  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
841  break;
842 
843  default:
844  write_stderr("Try \"%s --help\" for more information.\n",
845  progname);
846  ExitPostmaster(1);
847  }
848  }
849 
850  /*
851  * Postmaster accepts no non-option switch arguments.
852  */
853  if (optind < argc)
854  {
855  write_stderr("%s: invalid argument: \"%s\"\n",
856  progname, argv[optind]);
857  write_stderr("Try \"%s --help\" for more information.\n",
858  progname);
859  ExitPostmaster(1);
860  }
861 
862  /*
863  * Locate the proper configuration files and data directory, and read
864  * postgresql.conf for the first time.
865  */
867  ExitPostmaster(2);
868 
869  if (output_config_variable != NULL)
870  {
871  /*
872  * If this is a runtime-computed GUC, it hasn't yet been initialized,
873  * and the present value is not useful. However, this is a convenient
874  * place to print the value for most GUCs because it is safe to run
875  * postmaster startup to this point even if the server is already
876  * running. For the handful of runtime-computed GUCs that we cannot
877  * provide meaningful values for yet, we wait until later in
878  * postmaster startup to print the value. We won't be able to use -C
879  * on running servers for those GUCs, but using this option now would
880  * lead to incorrect results for them.
881  */
882  int flags = GetConfigOptionFlags(output_config_variable, true);
883 
884  if ((flags & GUC_RUNTIME_COMPUTED) == 0)
885  {
886  /*
887  * "-C guc" was specified, so print GUC's value and exit. No
888  * extra permission check is needed because the user is reading
889  * inside the data dir.
890  */
891  const char *config_val = GetConfigOption(output_config_variable,
892  false, false);
893 
894  puts(config_val ? config_val : "");
895  ExitPostmaster(0);
896  }
897 
898  /*
899  * A runtime-computed GUC will be printed later on. As we initialize
900  * a server startup sequence, silence any log messages that may show
901  * up in the output generated. FATAL and more severe messages are
902  * useful to show, even if one would only expect at least PANIC. LOG
903  * entries are hidden.
904  */
905  SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
907  }
908 
909  /* Verify that DataDir looks reasonable */
910  checkDataDir();
911 
912  /* Check that pg_control exists */
914 
915  /* And switch working directory into it */
916  ChangeToDataDir();
917 
918  /*
919  * Check for invalid combinations of GUC settings.
920  */
922  {
923  write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
924  progname,
927  ExitPostmaster(1);
928  }
930  ereport(ERROR,
931  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
933  ereport(ERROR,
934  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
935 
936  /*
937  * Other one-time internal sanity checks can go here, if they are fast.
938  * (Put any slow processing further down, after postmaster.pid creation.)
939  */
940  if (!CheckDateTokenTables())
941  {
942  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
943  ExitPostmaster(1);
944  }
945 
946  /*
947  * Now that we are done processing the postmaster arguments, reset
948  * getopt(3) library so that it will work correctly in subprocesses.
949  */
950  optind = 1;
951 #ifdef HAVE_INT_OPTRESET
952  optreset = 1; /* some systems need this too */
953 #endif
954 
955  /* For debugging: display postmaster environment */
956  {
957  extern char **environ;
958  char **p;
959 
960  ereport(DEBUG3,
961  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
962  progname)));
963  ereport(DEBUG3,
964  (errmsg_internal("-----------------------------------------")));
965  for (p = environ; *p; ++p)
966  ereport(DEBUG3,
967  (errmsg_internal("\t%s", *p)));
968  ereport(DEBUG3,
969  (errmsg_internal("-----------------------------------------")));
970  }
971 
972  /*
973  * Create lockfile for data directory.
974  *
975  * We want to do this before we try to grab the input sockets, because the
976  * data directory interlock is more reliable than the socket-file
977  * interlock (thanks to whoever decided to put socket files in /tmp :-().
978  * For the same reason, it's best to grab the TCP socket(s) before the
979  * Unix socket(s).
980  *
981  * Also note that this internally sets up the on_proc_exit function that
982  * is responsible for removing both data directory and socket lockfiles;
983  * so it must happen before opening sockets so that at exit, the socket
984  * lockfiles go away after CloseServerPorts runs.
985  */
986  CreateDataDirLockFile(true);
987 
988  /*
989  * Read the control file (for error checking and config info).
990  *
991  * Since we verify the control file's CRC, this has a useful side effect
992  * on machines where we need a run-time test for CRC support instructions.
993  * The postmaster will do the test once at startup, and then its child
994  * processes will inherit the correct function pointer and not need to
995  * repeat the test.
996  */
998 
999  /*
1000  * Register the apply launcher. It's probably a good idea to call this
1001  * before any modules had a chance to take the background worker slots.
1002  */
1004 
1005  /*
1006  * process any libraries that should be preloaded at postmaster start
1007  */
1009 
1010  /*
1011  * Initialize SSL library, if specified.
1012  */
1013 #ifdef USE_SSL
1014  if (EnableSSL)
1015  {
1016  (void) secure_initialize(true);
1017  LoadedSSL = true;
1018  }
1019 #endif
1020 
1021  /*
1022  * Now that loadable modules have had their chance to alter any GUCs,
1023  * calculate MaxBackends.
1024  */
1026 
1027  /*
1028  * Give preloaded libraries a chance to request additional shared memory.
1029  */
1031 
1032  /*
1033  * Now that loadable modules have had their chance to request additional
1034  * shared memory, determine the value of any runtime-computed GUCs that
1035  * depend on the amount of shared memory required.
1036  */
1038 
1039  /*
1040  * Now that modules have been loaded, we can process any custom resource
1041  * managers specified in the wal_consistency_checking GUC.
1042  */
1044 
1045  /*
1046  * If -C was specified with a runtime-computed GUC, we held off printing
1047  * the value earlier, as the GUC was not yet initialized. We handle -C
1048  * for most GUCs before we lock the data directory so that the option may
1049  * be used on a running server. However, a handful of GUCs are runtime-
1050  * computed and do not have meaningful values until after locking the data
1051  * directory, and we cannot safely calculate their values earlier on a
1052  * running server. At this point, such GUCs should be properly
1053  * initialized, and we haven't yet set up shared memory, so this is a good
1054  * time to handle the -C option for these special GUCs.
1055  */
1056  if (output_config_variable != NULL)
1057  {
1058  const char *config_val = GetConfigOption(output_config_variable,
1059  false, false);
1060 
1061  puts(config_val ? config_val : "");
1062  ExitPostmaster(0);
1063  }
1064 
1065  /*
1066  * Set up shared memory and semaphores.
1067  *
1068  * Note: if using SysV shmem and/or semas, each postmaster startup will
1069  * normally choose the same IPC keys. This helps ensure that we will
1070  * clean up dead IPC objects if the postmaster crashes and is restarted.
1071  */
1073 
1074  /*
1075  * Estimate number of openable files. This must happen after setting up
1076  * semaphores, because on some platforms semaphores count as open files.
1077  */
1078  set_max_safe_fds();
1079 
1080  /*
1081  * Set reference point for stack-depth checking.
1082  */
1083  (void) set_stack_base();
1084 
1085  /*
1086  * Initialize pipe (or process handle on Windows) that allows children to
1087  * wake up from sleep on postmaster death.
1088  */
1090 
1091 #ifdef WIN32
1092 
1093  /*
1094  * Initialize I/O completion port used to deliver list of dead children.
1095  */
1096  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1097  if (win32ChildQueue == NULL)
1098  ereport(FATAL,
1099  (errmsg("could not create I/O completion port for child queue")));
1100 #endif
1101 
1102 #ifdef EXEC_BACKEND
1103  /* Write out nondefault GUC settings for child processes to use */
1104  write_nondefault_variables(PGC_POSTMASTER);
1105 
1106  /*
1107  * Clean out the temp directory used to transmit parameters to child
1108  * processes (see internal_forkexec, below). We must do this before
1109  * launching any child processes, else we have a race condition: we could
1110  * remove a parameter file before the child can read it. It should be
1111  * safe to do so now, because we verified earlier that there are no
1112  * conflicting Postgres processes in this data directory.
1113  */
1115 #endif
1116 
1117  /*
1118  * Forcibly remove the files signaling a standby promotion request.
1119  * Otherwise, the existence of those files triggers a promotion too early,
1120  * whether a user wants that or not.
1121  *
1122  * This removal of files is usually unnecessary because they can exist
1123  * only during a few moments during a standby promotion. However there is
1124  * a race condition: if pg_ctl promote is executed and creates the files
1125  * during a promotion, the files can stay around even after the server is
1126  * brought up to be the primary. Then, if a new standby starts by using
1127  * the backup taken from the new primary, the files can exist at server
1128  * startup and must be removed in order to avoid an unexpected promotion.
1129  *
1130  * Note that promotion signal files need to be removed before the startup
1131  * process is invoked. Because, after that, they can be used by
1132  * postmaster's SIGUSR1 signal handler.
1133  */
1135 
1136  /* Do the same for logrotate signal file */
1138 
1139  /* Remove any outdated file holding the current log filenames. */
1140  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1141  ereport(LOG,
1143  errmsg("could not remove file \"%s\": %m",
1145 
1146  /*
1147  * If enabled, start up syslogger collection subprocess
1148  */
1150 
1151  /*
1152  * Reset whereToSendOutput from DestDebug (its starting state) to
1153  * DestNone. This stops ereport from sending log messages to stderr unless
1154  * Log_destination permits. We don't do this until the postmaster is
1155  * fully launched, since startup failures may as well be reported to
1156  * stderr.
1157  *
1158  * If we are in fact disabling logging to stderr, first emit a log message
1159  * saying so, to provide a breadcrumb trail for users who may not remember
1160  * that their logging is configured to go somewhere else.
1161  */
1163  ereport(LOG,
1164  (errmsg("ending log output to stderr"),
1165  errhint("Future log output will go to log destination \"%s\".",
1167 
1169 
1170  /*
1171  * Report server startup in log. While we could emit this much earlier,
1172  * it seems best to do so after starting the log collector, if we intend
1173  * to use one.
1174  */
1175  ereport(LOG,
1176  (errmsg("starting %s", PG_VERSION_STR)));
1177 
1178  /*
1179  * Establish input sockets.
1180  *
1181  * First, mark them all closed, and set up an on_proc_exit function that's
1182  * charged with closing the sockets again at postmaster shutdown.
1183  */
1184  for (i = 0; i < MAXLISTEN; i++)
1186 
1188 
1189  if (ListenAddresses)
1190  {
1191  char *rawstring;
1192  List *elemlist;
1193  ListCell *l;
1194  int success = 0;
1195 
1196  /* Need a modifiable copy of ListenAddresses */
1197  rawstring = pstrdup(ListenAddresses);
1198 
1199  /* Parse string into list of hostnames */
1200  if (!SplitGUCList(rawstring, ',', &elemlist))
1201  {
1202  /* syntax error in list */
1203  ereport(FATAL,
1204  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1205  errmsg("invalid list syntax in parameter \"%s\"",
1206  "listen_addresses")));
1207  }
1208 
1209  foreach(l, elemlist)
1210  {
1211  char *curhost = (char *) lfirst(l);
1212 
1213  if (strcmp(curhost, "*") == 0)
1214  status = StreamServerPort(AF_UNSPEC, NULL,
1215  (unsigned short) PostPortNumber,
1216  NULL,
1218  else
1219  status = StreamServerPort(AF_UNSPEC, curhost,
1220  (unsigned short) PostPortNumber,
1221  NULL,
1223 
1224  if (status == STATUS_OK)
1225  {
1226  success++;
1227  /* record the first successful host addr in lockfile */
1228  if (!listen_addr_saved)
1229  {
1231  listen_addr_saved = true;
1232  }
1233  }
1234  else
1235  ereport(WARNING,
1236  (errmsg("could not create listen socket for \"%s\"",
1237  curhost)));
1238  }
1239 
1240  if (!success && elemlist != NIL)
1241  ereport(FATAL,
1242  (errmsg("could not create any TCP/IP sockets")));
1243 
1244  list_free(elemlist);
1245  pfree(rawstring);
1246  }
1247 
1248 #ifdef USE_BONJOUR
1249  /* Register for Bonjour only if we opened TCP socket(s) */
1251  {
1252  DNSServiceErrorType err;
1253 
1254  /*
1255  * We pass 0 for interface_index, which will result in registering on
1256  * all "applicable" interfaces. It's not entirely clear from the
1257  * DNS-SD docs whether this would be appropriate if we have bound to
1258  * just a subset of the available network interfaces.
1259  */
1260  err = DNSServiceRegister(&bonjour_sdref,
1261  0,
1262  0,
1263  bonjour_name,
1264  "_postgresql._tcp.",
1265  NULL,
1266  NULL,
1268  0,
1269  NULL,
1270  NULL,
1271  NULL);
1272  if (err != kDNSServiceErr_NoError)
1273  ereport(LOG,
1274  (errmsg("DNSServiceRegister() failed: error code %ld",
1275  (long) err)));
1276 
1277  /*
1278  * We don't bother to read the mDNS daemon's reply, and we expect that
1279  * it will automatically terminate our registration when the socket is
1280  * closed at postmaster termination. So there's nothing more to be
1281  * done here. However, the bonjour_sdref is kept around so that
1282  * forked children can close their copies of the socket.
1283  */
1284  }
1285 #endif
1286 
1288  {
1289  char *rawstring;
1290  List *elemlist;
1291  ListCell *l;
1292  int success = 0;
1293 
1294  /* Need a modifiable copy of Unix_socket_directories */
1295  rawstring = pstrdup(Unix_socket_directories);
1296 
1297  /* Parse string into list of directories */
1298  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1299  {
1300  /* syntax error in list */
1301  ereport(FATAL,
1302  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1303  errmsg("invalid list syntax in parameter \"%s\"",
1304  "unix_socket_directories")));
1305  }
1306 
1307  foreach(l, elemlist)
1308  {
1309  char *socketdir = (char *) lfirst(l);
1310 
1311  status = StreamServerPort(AF_UNIX, NULL,
1312  (unsigned short) PostPortNumber,
1313  socketdir,
1315 
1316  if (status == STATUS_OK)
1317  {
1318  success++;
1319  /* record the first successful Unix socket in lockfile */
1320  if (success == 1)
1322  }
1323  else
1324  ereport(WARNING,
1325  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1326  socketdir)));
1327  }
1328 
1329  if (!success && elemlist != NIL)
1330  ereport(FATAL,
1331  (errmsg("could not create any Unix-domain sockets")));
1332 
1333  list_free_deep(elemlist);
1334  pfree(rawstring);
1335  }
1336 
1337  /*
1338  * check that we have some socket to listen on
1339  */
1340  if (ListenSocket[0] == PGINVALID_SOCKET)
1341  ereport(FATAL,
1342  (errmsg("no socket created for listening")));
1343 
1344  /*
1345  * If no valid TCP ports, write an empty line for listen address,
1346  * indicating the Unix socket must be used. Note that this line is not
1347  * added to the lock file until there is a socket backing it.
1348  */
1349  if (!listen_addr_saved)
1351 
1352  /*
1353  * Record postmaster options. We delay this till now to avoid recording
1354  * bogus options (eg, unusable port number).
1355  */
1356  if (!CreateOptsFile(argc, argv, my_exec_path))
1357  ExitPostmaster(1);
1358 
1359  /*
1360  * Write the external PID file if requested
1361  */
1362  if (external_pid_file)
1363  {
1364  FILE *fpidfile = fopen(external_pid_file, "w");
1365 
1366  if (fpidfile)
1367  {
1368  fprintf(fpidfile, "%d\n", MyProcPid);
1369  fclose(fpidfile);
1370 
1371  /* Make PID file world readable */
1372  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1373  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1375  }
1376  else
1377  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1379 
1381  }
1382 
1383  /*
1384  * Remove old temporary files. At this point there can be no other
1385  * Postgres processes running in this directory, so this should be safe.
1386  */
1388 
1389  /*
1390  * Initialize the autovacuum subsystem (again, no process start yet)
1391  */
1392  autovac_init();
1393 
1394  /*
1395  * Load configuration files for client authentication.
1396  */
1397  if (!load_hba())
1398  {
1399  /*
1400  * It makes no sense to continue if we fail to load the HBA file,
1401  * since there is no way to connect to the database in this case.
1402  */
1403  ereport(FATAL,
1404  /* translator: %s is a configuration file */
1405  (errmsg("could not load %s", HbaFileName)));
1406  }
1407  if (!load_ident())
1408  {
1409  /*
1410  * We can start up without the IDENT file, although it means that you
1411  * cannot log in using any of the authentication methods that need a
1412  * user name mapping. load_ident() already logged the details of error
1413  * to the log.
1414  */
1415  }
1416 
1417 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1418 
1419  /*
1420  * On macOS, libintl replaces setlocale() with a version that calls
1421  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1422  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1423  * the process multithreaded. The postmaster calls sigprocmask() and
1424  * calls fork() without an immediate exec(), both of which have undefined
1425  * behavior in a multithreaded program. A multithreaded postmaster is the
1426  * normal case on Windows, which offers neither fork() nor sigprocmask().
1427  */
1428  if (pthread_is_threaded_np() != 0)
1429  ereport(FATAL,
1430  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1431  errmsg("postmaster became multithreaded during startup"),
1432  errhint("Set the LC_ALL environment variable to a valid locale.")));
1433 #endif
1434 
1435  /*
1436  * Remember postmaster startup time
1437  */
1439 
1440  /*
1441  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1442  * see what's happening.
1443  */
1445 
1446  /* Start bgwriter and checkpointer so they can help with recovery */
1447  if (CheckpointerPID == 0)
1449  if (BgWriterPID == 0)
1451 
1452  /*
1453  * We're ready to rock and roll...
1454  */
1456  Assert(StartupPID != 0);
1458  pmState = PM_STARTUP;
1459 
1460  /* Some workers may be scheduled to start now */
1462 
1463  status = ServerLoop();
1464 
1465  /*
1466  * ServerLoop probably shouldn't ever return, but if it does, close down.
1467  */
1469 
1470  abort(); /* not reached */
1471 }
1472 
1473 
1474 /*
1475  * on_proc_exit callback to close server's listen sockets
1476  */
1477 static void
1479 {
1480  int i;
1481 
1482  /*
1483  * First, explicitly close all the socket FDs. We used to just let this
1484  * happen implicitly at postmaster exit, but it's better to close them
1485  * before we remove the postmaster.pid lockfile; otherwise there's a race
1486  * condition if a new postmaster wants to re-use the TCP port number.
1487  */
1488  for (i = 0; i < MAXLISTEN; i++)
1489  {
1491  {
1494  }
1495  }
1496 
1497  /*
1498  * Next, remove any filesystem entries for Unix sockets. To avoid race
1499  * conditions against incoming postmasters, this must happen after closing
1500  * the sockets and before removing lock files.
1501  */
1503 
1504  /*
1505  * We don't do anything about socket lock files here; those will be
1506  * removed in a later on_proc_exit callback.
1507  */
1508 }
1509 
1510 /*
1511  * on_proc_exit callback to delete external_pid_file
1512  */
1513 static void
1515 {
1516  if (external_pid_file)
1517  unlink(external_pid_file);
1518 }
1519 
1520 
1521 /*
1522  * Compute and check the directory paths to files that are part of the
1523  * installation (as deduced from the postgres executable's own location)
1524  */
1525 static void
1527 {
1528  DIR *pdir;
1529 
1530  /* Locate the postgres executable itself */
1531  if (find_my_exec(argv0, my_exec_path) < 0)
1532  ereport(FATAL,
1533  (errmsg("%s: could not locate my own executable path", argv0)));
1534 
1535 #ifdef EXEC_BACKEND
1536  /* Locate executable backend before we change working directory */
1537  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1538  postgres_exec_path) < 0)
1539  ereport(FATAL,
1540  (errmsg("%s: could not locate matching postgres executable",
1541  argv0)));
1542 #endif
1543 
1544  /*
1545  * Locate the pkglib directory --- this has to be set early in case we try
1546  * to load any modules from it in response to postgresql.conf entries.
1547  */
1549 
1550  /*
1551  * Verify that there's a readable directory there; otherwise the Postgres
1552  * installation is incomplete or corrupt. (A typical cause of this
1553  * failure is that the postgres executable has been moved or hardlinked to
1554  * some directory that's not a sibling of the installation lib/
1555  * directory.)
1556  */
1557  pdir = AllocateDir(pkglib_path);
1558  if (pdir == NULL)
1559  ereport(ERROR,
1561  errmsg("could not open directory \"%s\": %m",
1562  pkglib_path),
1563  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1564  my_exec_path)));
1565  FreeDir(pdir);
1566 
1567  /*
1568  * XXX is it worth similarly checking the share/ directory? If the lib/
1569  * directory is there, then share/ probably is too.
1570  */
1571 }
1572 
1573 /*
1574  * Check that pg_control exists in the correct location in the data directory.
1575  *
1576  * No attempt is made to validate the contents of pg_control here. This is
1577  * just a sanity check to see if we are looking at a real data directory.
1578  */
1579 static void
1581 {
1582  char path[MAXPGPATH];
1583  FILE *fp;
1584 
1585  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1586 
1587  fp = AllocateFile(path, PG_BINARY_R);
1588  if (fp == NULL)
1589  {
1590  write_stderr("%s: could not find the database system\n"
1591  "Expected to find it in the directory \"%s\",\n"
1592  "but could not open file \"%s\": %s\n",
1593  progname, DataDir, path, strerror(errno));
1594  ExitPostmaster(2);
1595  }
1596  FreeFile(fp);
1597 }
1598 
1599 /*
1600  * Determine how long should we let ServerLoop sleep, in milliseconds.
1601  *
1602  * In normal conditions we wait at most one minute, to ensure that the other
1603  * background tasks handled by ServerLoop get done even when no requests are
1604  * arriving. However, if there are background workers waiting to be started,
1605  * we don't actually sleep so that they are quickly serviced. Other exception
1606  * cases are as shown in the code.
1607  */
1608 static int
1610 {
1611  TimestampTz next_wakeup = 0;
1612 
1613  /*
1614  * Normal case: either there are no background workers at all, or we're in
1615  * a shutdown sequence (during which we ignore bgworkers altogether).
1616  */
1617  if (Shutdown > NoShutdown ||
1619  {
1620  if (AbortStartTime != 0)
1621  {
1622  int seconds;
1623 
1624  /* time left to abort; clamp to 0 in case it already expired */
1625  seconds = SIGKILL_CHILDREN_AFTER_SECS -
1626  (time(NULL) - AbortStartTime);
1627 
1628  return Max(seconds * 1000, 0);
1629  }
1630  else
1631  return 60 * 1000;
1632  }
1633 
1634  if (StartWorkerNeeded)
1635  return 0;
1636 
1637  if (HaveCrashedWorker)
1638  {
1639  slist_mutable_iter siter;
1640 
1641  /*
1642  * When there are crashed bgworkers, we sleep just long enough that
1643  * they are restarted when they request to be. Scan the list to
1644  * determine the minimum of all wakeup times according to most recent
1645  * crash time and requested restart interval.
1646  */
1648  {
1649  RegisteredBgWorker *rw;
1650  TimestampTz this_wakeup;
1651 
1652  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1653 
1654  if (rw->rw_crashed_at == 0)
1655  continue;
1656 
1658  || rw->rw_terminate)
1659  {
1660  ForgetBackgroundWorker(&siter);
1661  continue;
1662  }
1663 
1664  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1665  1000L * rw->rw_worker.bgw_restart_time);
1666  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1667  next_wakeup = this_wakeup;
1668  }
1669  }
1670 
1671  if (next_wakeup != 0)
1672  {
1673  int ms;
1674 
1675  /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1677  next_wakeup);
1678  return Min(60 * 1000, ms);
1679  }
1680 
1681  return 60 * 1000;
1682 }
1683 
1684 /*
1685  * Activate or deactivate notifications of server socket events. Since we
1686  * don't currently have a way to remove events from an existing WaitEventSet,
1687  * we'll just destroy and recreate the whole thing. This is called during
1688  * shutdown so we can wait for backends to exit without accepting new
1689  * connections, and during crash reinitialization when we need to start
1690  * listening for new connections again. The WaitEventSet will be freed in fork
1691  * children by ClosePostmasterPorts().
1692  */
1693 static void
1694 ConfigurePostmasterWaitSet(bool accept_connections)
1695 {
1696  int nsockets;
1697 
1698  if (pm_wait_set)
1700  pm_wait_set = NULL;
1701 
1702  /* How many server sockets do we need to wait for? */
1703  nsockets = 0;
1704  if (accept_connections)
1705  {
1706  while (nsockets < MAXLISTEN &&
1707  ListenSocket[nsockets] != PGINVALID_SOCKET)
1708  ++nsockets;
1709  }
1710 
1713  NULL);
1714 
1715  if (accept_connections)
1716  {
1717  for (int i = 0; i < nsockets; i++)
1719  NULL, NULL);
1720  }
1721 }
1722 
1723 /*
1724  * Main idle loop of postmaster
1725  */
1726 static int
1728 {
1729  time_t last_lockfile_recheck_time,
1730  last_touch_time;
1731  WaitEvent events[MAXLISTEN];
1732  int nevents;
1733 
1735  last_lockfile_recheck_time = last_touch_time = time(NULL);
1736 
1737  for (;;)
1738  {
1739  time_t now;
1740 
1741  nevents = WaitEventSetWait(pm_wait_set,
1743  events,
1744  lengthof(events),
1745  0 /* postmaster posts no wait_events */ );
1746 
1747  /*
1748  * Latch set by signal handler, or new connection pending on any of
1749  * our sockets? If the latter, fork a child process to deal with it.
1750  */
1751  for (int i = 0; i < nevents; i++)
1752  {
1753  if (events[i].events & WL_LATCH_SET)
1755 
1756  /*
1757  * The following requests are handled unconditionally, even if we
1758  * didn't see WL_LATCH_SET. This gives high priority to shutdown
1759  * and reload requests where the latch happens to appear later in
1760  * events[] or will be reported by a later call to
1761  * WaitEventSetWait().
1762  */
1769  if (pending_pm_pmsignal)
1771 
1772  if (events[i].events & WL_SOCKET_ACCEPT)
1773  {
1774  Port *port;
1775 
1776  port = ConnCreate(events[i].fd);
1777  if (port)
1778  {
1780 
1781  /*
1782  * We no longer need the open socket or port structure in
1783  * this process
1784  */
1785  StreamClose(port->sock);
1786  ConnFree(port);
1787  }
1788  }
1789  }
1790 
1791  /* If we have lost the log collector, try to start a new one */
1792  if (SysLoggerPID == 0 && Logging_collector)
1794 
1795  /*
1796  * If no background writer process is running, and we are not in a
1797  * state that prevents it, start one. It doesn't matter if this
1798  * fails, we'll just try again later. Likewise for the checkpointer.
1799  */
1800  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1802  {
1803  if (CheckpointerPID == 0)
1805  if (BgWriterPID == 0)
1807  }
1808 
1809  /*
1810  * Likewise, if we have lost the walwriter process, try to start a new
1811  * one. But this is needed only in normal operation (else we cannot
1812  * be writing any new WAL).
1813  */
1814  if (WalWriterPID == 0 && pmState == PM_RUN)
1816 
1817  /*
1818  * If we have lost the autovacuum launcher, try to start a new one. We
1819  * don't want autovacuum to run in binary upgrade mode because
1820  * autovacuum might update relfrozenxid for empty tables before the
1821  * physical files are put in place.
1822  */
1823  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1825  pmState == PM_RUN)
1826  {
1828  if (AutoVacPID != 0)
1829  start_autovac_launcher = false; /* signal processed */
1830  }
1831 
1832  /* If we have lost the archiver, try to start a new one. */
1833  if (PgArchPID == 0 && PgArchStartupAllowed())
1835 
1836  /* If we need to signal the autovacuum launcher, do so now */
1838  {
1839  avlauncher_needs_signal = false;
1840  if (AutoVacPID != 0)
1842  }
1843 
1844  /* If we need to start a WAL receiver, try to do that now */
1847 
1848  /* Get other worker processes running, if needed */
1851 
1852 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1853 
1854  /*
1855  * With assertions enabled, check regularly for appearance of
1856  * additional threads. All builds check at start and exit.
1857  */
1858  Assert(pthread_is_threaded_np() == 0);
1859 #endif
1860 
1861  /*
1862  * Lastly, check to see if it's time to do some things that we don't
1863  * want to do every single time through the loop, because they're a
1864  * bit expensive. Note that there's up to a minute of slop in when
1865  * these tasks will be performed, since DetermineSleepTime() will let
1866  * us sleep at most that long; except for SIGKILL timeout which has
1867  * special-case logic there.
1868  */
1869  now = time(NULL);
1870 
1871  /*
1872  * If we already sent SIGQUIT to children and they are slow to shut
1873  * down, it's time to send them SIGKILL (or SIGABRT if requested).
1874  * This doesn't happen normally, but under certain conditions backends
1875  * can get stuck while shutting down. This is a last measure to get
1876  * them unwedged.
1877  *
1878  * Note we also do this during recovery from a process crash.
1879  */
1880  if ((Shutdown >= ImmediateShutdown || FatalError) &&
1881  AbortStartTime != 0 &&
1883  {
1884  /* We were gentle with them before. Not anymore */
1885  ereport(LOG,
1886  /* translator: %s is SIGKILL or SIGABRT */
1887  (errmsg("issuing %s to recalcitrant children",
1888  send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1890  /* reset flag so we don't SIGKILL again */
1891  AbortStartTime = 0;
1892  }
1893 
1894  /*
1895  * Once a minute, verify that postmaster.pid hasn't been removed or
1896  * overwritten. If it has, we force a shutdown. This avoids having
1897  * postmasters and child processes hanging around after their database
1898  * is gone, and maybe causing problems if a new database cluster is
1899  * created in the same place. It also provides some protection
1900  * against a DBA foolishly removing postmaster.pid and manually
1901  * starting a new postmaster. Data corruption is likely to ensue from
1902  * that anyway, but we can minimize the damage by aborting ASAP.
1903  */
1904  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1905  {
1906  if (!RecheckDataDirLockFile())
1907  {
1908  ereport(LOG,
1909  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1911  }
1912  last_lockfile_recheck_time = now;
1913  }
1914 
1915  /*
1916  * Touch Unix socket and lock files every 58 minutes, to ensure that
1917  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1918  * no one runs cleaners with cutoff times of less than an hour ...
1919  */
1920  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1921  {
1922  TouchSocketFiles();
1924  last_touch_time = now;
1925  }
1926  }
1927 }
1928 
1929 /*
1930  * Read a client's startup packet and do something according to it.
1931  *
1932  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1933  * not return at all.
1934  *
1935  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1936  * if that's what you want. Return STATUS_ERROR if you don't want to
1937  * send anything to the client, which would typically be appropriate
1938  * if we detect a communications failure.)
1939  *
1940  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1941  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1942  * encryption layer sets both flags, but a rejected negotiation sets only the
1943  * flag for that layer, since the client may wish to try the other one. We
1944  * should make no assumption here about the order in which the client may make
1945  * requests.
1946  */
1947 static int
1948 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1949 {
1950  int32 len;
1951  char *buf;
1952  ProtocolVersion proto;
1953  MemoryContext oldcontext;
1954 
1955  pq_startmsgread();
1956 
1957  /*
1958  * Grab the first byte of the length word separately, so that we can tell
1959  * whether we have no data at all or an incomplete packet. (This might
1960  * sound inefficient, but it's not really, because of buffering in
1961  * pqcomm.c.)
1962  */
1963  if (pq_getbytes((char *) &len, 1) == EOF)
1964  {
1965  /*
1966  * If we get no data at all, don't clutter the log with a complaint;
1967  * such cases often occur for legitimate reasons. An example is that
1968  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1969  * client didn't like our response, it'll probably just drop the
1970  * connection. Service-monitoring software also often just opens and
1971  * closes a connection without sending anything. (So do port
1972  * scanners, which may be less benign, but it's not really our job to
1973  * notice those.)
1974  */
1975  return STATUS_ERROR;
1976  }
1977 
1978  if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1979  {
1980  /* Got a partial length word, so bleat about that */
1981  if (!ssl_done && !gss_done)
1983  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1984  errmsg("incomplete startup packet")));
1985  return STATUS_ERROR;
1986  }
1987 
1988  len = pg_ntoh32(len);
1989  len -= 4;
1990 
1991  if (len < (int32) sizeof(ProtocolVersion) ||
1993  {
1995  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1996  errmsg("invalid length of startup packet")));
1997  return STATUS_ERROR;
1998  }
1999 
2000  /*
2001  * Allocate space to hold the startup packet, plus one extra byte that's
2002  * initialized to be zero. This ensures we will have null termination of
2003  * all strings inside the packet.
2004  */
2005  buf = palloc(len + 1);
2006  buf[len] = '\0';
2007 
2008  if (pq_getbytes(buf, len) == EOF)
2009  {
2011  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2012  errmsg("incomplete startup packet")));
2013  return STATUS_ERROR;
2014  }
2015  pq_endmsgread();
2016 
2017  /*
2018  * The first field is either a protocol version number or a special
2019  * request code.
2020  */
2021  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2022 
2023  if (proto == CANCEL_REQUEST_CODE)
2024  {
2025  if (len != sizeof(CancelRequestPacket))
2026  {
2028  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2029  errmsg("invalid length of startup packet")));
2030  return STATUS_ERROR;
2031  }
2033  /* Not really an error, but we don't want to proceed further */
2034  return STATUS_ERROR;
2035  }
2036 
2037  if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2038  {
2039  char SSLok;
2040 
2041 #ifdef USE_SSL
2042  /* No SSL when disabled or on Unix sockets */
2043  if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2044  SSLok = 'N';
2045  else
2046  SSLok = 'S'; /* Support for SSL */
2047 #else
2048  SSLok = 'N'; /* No support for SSL */
2049 #endif
2050 
2051 retry1:
2052  if (send(port->sock, &SSLok, 1, 0) != 1)
2053  {
2054  if (errno == EINTR)
2055  goto retry1; /* if interrupted, just retry */
2058  errmsg("failed to send SSL negotiation response: %m")));
2059  return STATUS_ERROR; /* close the connection */
2060  }
2061 
2062 #ifdef USE_SSL
2063  if (SSLok == 'S' && secure_open_server(port) == -1)
2064  return STATUS_ERROR;
2065 #endif
2066 
2067  /*
2068  * At this point we should have no data already buffered. If we do,
2069  * it was received before we performed the SSL handshake, so it wasn't
2070  * encrypted and indeed may have been injected by a man-in-the-middle.
2071  * We report this case to the client.
2072  */
2073  if (pq_buffer_has_data())
2074  ereport(FATAL,
2075  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2076  errmsg("received unencrypted data after SSL request"),
2077  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2078 
2079  /*
2080  * regular startup packet, cancel, etc packet should follow, but not
2081  * another SSL negotiation request, and a GSS request should only
2082  * follow if SSL was rejected (client may negotiate in either order)
2083  */
2084  return ProcessStartupPacket(port, true, SSLok == 'S');
2085  }
2086  else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2087  {
2088  char GSSok = 'N';
2089 
2090 #ifdef ENABLE_GSS
2091  /* No GSSAPI encryption when on Unix socket */
2092  if (port->laddr.addr.ss_family != AF_UNIX)
2093  GSSok = 'G';
2094 #endif
2095 
2096  while (send(port->sock, &GSSok, 1, 0) != 1)
2097  {
2098  if (errno == EINTR)
2099  continue;
2102  errmsg("failed to send GSSAPI negotiation response: %m")));
2103  return STATUS_ERROR; /* close the connection */
2104  }
2105 
2106 #ifdef ENABLE_GSS
2107  if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2108  return STATUS_ERROR;
2109 #endif
2110 
2111  /*
2112  * At this point we should have no data already buffered. If we do,
2113  * it was received before we performed the GSS handshake, so it wasn't
2114  * encrypted and indeed may have been injected by a man-in-the-middle.
2115  * We report this case to the client.
2116  */
2117  if (pq_buffer_has_data())
2118  ereport(FATAL,
2119  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2120  errmsg("received unencrypted data after GSSAPI encryption request"),
2121  errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2122 
2123  /*
2124  * regular startup packet, cancel, etc packet should follow, but not
2125  * another GSS negotiation request, and an SSL request should only
2126  * follow if GSS was rejected (client may negotiate in either order)
2127  */
2128  return ProcessStartupPacket(port, GSSok == 'G', true);
2129  }
2130 
2131  /* Could add additional special packet types here */
2132 
2133  /*
2134  * Set FrontendProtocol now so that ereport() knows what format to send if
2135  * we fail during startup.
2136  */
2137  FrontendProtocol = proto;
2138 
2139  /* Check that the major protocol version is in range. */
2142  ereport(FATAL,
2143  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2144  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2145  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2149 
2150  /*
2151  * Now fetch parameters out of startup packet and save them into the Port
2152  * structure. All data structures attached to the Port struct must be
2153  * allocated in TopMemoryContext so that they will remain available in a
2154  * running backend (even after PostmasterContext is destroyed). We need
2155  * not worry about leaking this storage on failure, since we aren't in the
2156  * postmaster process anymore.
2157  */
2159 
2160  /* Handle protocol version 3 startup packet */
2161  {
2162  int32 offset = sizeof(ProtocolVersion);
2163  List *unrecognized_protocol_options = NIL;
2164 
2165  /*
2166  * Scan packet body for name/option pairs. We can assume any string
2167  * beginning within the packet body is null-terminated, thanks to
2168  * zeroing extra byte above.
2169  */
2170  port->guc_options = NIL;
2171 
2172  while (offset < len)
2173  {
2174  char *nameptr = buf + offset;
2175  int32 valoffset;
2176  char *valptr;
2177 
2178  if (*nameptr == '\0')
2179  break; /* found packet terminator */
2180  valoffset = offset + strlen(nameptr) + 1;
2181  if (valoffset >= len)
2182  break; /* missing value, will complain below */
2183  valptr = buf + valoffset;
2184 
2185  if (strcmp(nameptr, "database") == 0)
2186  port->database_name = pstrdup(valptr);
2187  else if (strcmp(nameptr, "user") == 0)
2188  port->user_name = pstrdup(valptr);
2189  else if (strcmp(nameptr, "options") == 0)
2190  port->cmdline_options = pstrdup(valptr);
2191  else if (strcmp(nameptr, "replication") == 0)
2192  {
2193  /*
2194  * Due to backward compatibility concerns the replication
2195  * parameter is a hybrid beast which allows the value to be
2196  * either boolean or the string 'database'. The latter
2197  * connects to a specific database which is e.g. required for
2198  * logical decoding while.
2199  */
2200  if (strcmp(valptr, "database") == 0)
2201  {
2202  am_walsender = true;
2203  am_db_walsender = true;
2204  }
2205  else if (!parse_bool(valptr, &am_walsender))
2206  ereport(FATAL,
2207  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2208  errmsg("invalid value for parameter \"%s\": \"%s\"",
2209  "replication",
2210  valptr),
2211  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2212  }
2213  else if (strncmp(nameptr, "_pq_.", 5) == 0)
2214  {
2215  /*
2216  * Any option beginning with _pq_. is reserved for use as a
2217  * protocol-level option, but at present no such options are
2218  * defined.
2219  */
2220  unrecognized_protocol_options =
2221  lappend(unrecognized_protocol_options, pstrdup(nameptr));
2222  }
2223  else
2224  {
2225  /* Assume it's a generic GUC option */
2226  port->guc_options = lappend(port->guc_options,
2227  pstrdup(nameptr));
2228  port->guc_options = lappend(port->guc_options,
2229  pstrdup(valptr));
2230 
2231  /*
2232  * Copy application_name to port if we come across it. This
2233  * is done so we can log the application_name in the
2234  * connection authorization message. Note that the GUC would
2235  * be used but we haven't gone through GUC setup yet.
2236  */
2237  if (strcmp(nameptr, "application_name") == 0)
2238  {
2239  port->application_name = pg_clean_ascii(valptr, 0);
2240  }
2241  }
2242  offset = valoffset + strlen(valptr) + 1;
2243  }
2244 
2245  /*
2246  * If we didn't find a packet terminator exactly at the end of the
2247  * given packet length, complain.
2248  */
2249  if (offset != len - 1)
2250  ereport(FATAL,
2251  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2252  errmsg("invalid startup packet layout: expected terminator as last byte")));
2253 
2254  /*
2255  * If the client requested a newer protocol version or if the client
2256  * requested any protocol options we didn't recognize, let them know
2257  * the newest minor protocol version we do support and the names of
2258  * any unrecognized options.
2259  */
2261  unrecognized_protocol_options != NIL)
2262  SendNegotiateProtocolVersion(unrecognized_protocol_options);
2263  }
2264 
2265  /* Check a user name was given. */
2266  if (port->user_name == NULL || port->user_name[0] == '\0')
2267  ereport(FATAL,
2268  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2269  errmsg("no PostgreSQL user name specified in startup packet")));
2270 
2271  /* The database defaults to the user name. */
2272  if (port->database_name == NULL || port->database_name[0] == '\0')
2273  port->database_name = pstrdup(port->user_name);
2274 
2275  if (Db_user_namespace)
2276  {
2277  /*
2278  * If user@, it is a global user, remove '@'. We only want to do this
2279  * if there is an '@' at the end and no earlier in the user string or
2280  * they may fake as a local user of another database attaching to this
2281  * database.
2282  */
2283  if (strchr(port->user_name, '@') ==
2284  port->user_name + strlen(port->user_name) - 1)
2285  *strchr(port->user_name, '@') = '\0';
2286  else
2287  {
2288  /* Append '@' and dbname */
2289  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2290  }
2291  }
2292 
2293  /*
2294  * Truncate given database and user names to length of a Postgres name.
2295  * This avoids lookup failures when overlength names are given.
2296  */
2297  if (strlen(port->database_name) >= NAMEDATALEN)
2298  port->database_name[NAMEDATALEN - 1] = '\0';
2299  if (strlen(port->user_name) >= NAMEDATALEN)
2300  port->user_name[NAMEDATALEN - 1] = '\0';
2301 
2302  if (am_walsender)
2304  else
2306 
2307  /*
2308  * Normal walsender backends, e.g. for streaming replication, are not
2309  * connected to a particular database. But walsenders used for logical
2310  * replication need to connect to a specific database. We allow streaming
2311  * replication commands to be issued even if connected to a database as it
2312  * can make sense to first make a basebackup and then stream changes
2313  * starting from that.
2314  */
2315  if (am_walsender && !am_db_walsender)
2316  port->database_name[0] = '\0';
2317 
2318  /*
2319  * Done putting stuff in TopMemoryContext.
2320  */
2321  MemoryContextSwitchTo(oldcontext);
2322 
2323  /*
2324  * If we're going to reject the connection due to database state, say so
2325  * now instead of wasting cycles on an authentication exchange. (This also
2326  * allows a pg_ping utility to be written.)
2327  */
2328  switch (port->canAcceptConnections)
2329  {
2330  case CAC_STARTUP:
2331  ereport(FATAL,
2333  errmsg("the database system is starting up")));
2334  break;
2335  case CAC_NOTCONSISTENT:
2336  if (EnableHotStandby)
2337  ereport(FATAL,
2339  errmsg("the database system is not yet accepting connections"),
2340  errdetail("Consistent recovery state has not been yet reached.")));
2341  else
2342  ereport(FATAL,
2344  errmsg("the database system is not accepting connections"),
2345  errdetail("Hot standby mode is disabled.")));
2346  break;
2347  case CAC_SHUTDOWN:
2348  ereport(FATAL,
2350  errmsg("the database system is shutting down")));
2351  break;
2352  case CAC_RECOVERY:
2353  ereport(FATAL,
2355  errmsg("the database system is in recovery mode")));
2356  break;
2357  case CAC_TOOMANY:
2358  ereport(FATAL,
2359  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2360  errmsg("sorry, too many clients already")));
2361  break;
2362  case CAC_OK:
2363  break;
2364  }
2365 
2366  return STATUS_OK;
2367 }
2368 
2369 /*
2370  * Send a NegotiateProtocolVersion to the client. This lets the client know
2371  * that they have requested a newer minor protocol version than we are able
2372  * to speak. We'll speak the highest version we know about; the client can,
2373  * of course, abandon the connection if that's a problem.
2374  *
2375  * We also include in the response a list of protocol options we didn't
2376  * understand. This allows clients to include optional parameters that might
2377  * be present either in newer protocol versions or third-party protocol
2378  * extensions without fear of having to reconnect if those options are not
2379  * understood, while at the same time making certain that the client is aware
2380  * of which options were actually accepted.
2381  */
2382 static void
2383 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2384 {
2386  ListCell *lc;
2387 
2388  pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2390  pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2391  foreach(lc, unrecognized_protocol_options)
2392  pq_sendstring(&buf, lfirst(lc));
2393  pq_endmessage(&buf);
2394 
2395  /* no need to flush, some other message will follow */
2396 }
2397 
2398 /*
2399  * The client has sent a cancel request packet, not a normal
2400  * start-a-new-connection packet. Perform the necessary processing.
2401  * Nothing is sent back to the client.
2402  */
2403 static void
2405 {
2406  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2407  int backendPID;
2408  int32 cancelAuthCode;
2409  Backend *bp;
2410 
2411 #ifndef EXEC_BACKEND
2412  dlist_iter iter;
2413 #else
2414  int i;
2415 #endif
2416 
2417  backendPID = (int) pg_ntoh32(canc->backendPID);
2418  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2419 
2420  /*
2421  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2422  * longer access the postmaster's own backend list, and must rely on the
2423  * duplicate array in shared memory.
2424  */
2425 #ifndef EXEC_BACKEND
2426  dlist_foreach(iter, &BackendList)
2427  {
2428  bp = dlist_container(Backend, elem, iter.cur);
2429 #else
2430  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2431  {
2432  bp = (Backend *) &ShmemBackendArray[i];
2433 #endif
2434  if (bp->pid == backendPID)
2435  {
2436  if (bp->cancel_key == cancelAuthCode)
2437  {
2438  /* Found a match; signal that backend to cancel current op */
2439  ereport(DEBUG2,
2440  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2441  backendPID)));
2442  signal_child(bp->pid, SIGINT);
2443  }
2444  else
2445  /* Right PID, wrong key: no way, Jose */
2446  ereport(LOG,
2447  (errmsg("wrong key in cancel request for process %d",
2448  backendPID)));
2449  return;
2450  }
2451 #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2452  }
2453 #else
2454  }
2455 #endif
2456 
2457  /* No matching backend */
2458  ereport(LOG,
2459  (errmsg("PID %d in cancel request did not match any process",
2460  backendPID)));
2461 }
2462 
2463 /*
2464  * canAcceptConnections --- check to see if database state allows connections
2465  * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2466  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2467  * know whether a NORMAL connection might turn into a walsender.)
2468  */
2469 static CAC_state
2470 canAcceptConnections(int backend_type)
2471 {
2472  CAC_state result = CAC_OK;
2473 
2474  /*
2475  * Can't start backends when in startup/shutdown/inconsistent recovery
2476  * state. We treat autovac workers the same as user backends for this
2477  * purpose. However, bgworkers are excluded from this test; we expect
2478  * bgworker_should_start_now() decided whether the DB state allows them.
2479  */
2480  if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2481  backend_type != BACKEND_TYPE_BGWORKER)
2482  {
2483  if (Shutdown > NoShutdown)
2484  return CAC_SHUTDOWN; /* shutdown is pending */
2485  else if (!FatalError && pmState == PM_STARTUP)
2486  return CAC_STARTUP; /* normal startup */
2487  else if (!FatalError && pmState == PM_RECOVERY)
2488  return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2489  * state */
2490  else
2491  return CAC_RECOVERY; /* else must be crash recovery */
2492  }
2493 
2494  /*
2495  * "Smart shutdown" restrictions are applied only to normal connections,
2496  * not to autovac workers or bgworkers.
2497  */
2498  if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2499  return CAC_SHUTDOWN; /* shutdown is pending */
2500 
2501  /*
2502  * Don't start too many children.
2503  *
2504  * We allow more connections here than we can have backends because some
2505  * might still be authenticating; they might fail auth, or some existing
2506  * backend might exit before the auth cycle is completed. The exact
2507  * MaxBackends limit is enforced when a new backend tries to join the
2508  * shared-inval backend array.
2509  *
2510  * The limit here must match the sizes of the per-child-process arrays;
2511  * see comments for MaxLivePostmasterChildren().
2512  */
2514  result = CAC_TOOMANY;
2515 
2516  return result;
2517 }
2518 
2519 
2520 /*
2521  * ConnCreate -- create a local connection data structure
2522  *
2523  * Returns NULL on failure, other than out-of-memory which is fatal.
2524  */
2525 static Port *
2526 ConnCreate(int serverFd)
2527 {
2528  Port *port;
2529 
2530  if (!(port = (Port *) calloc(1, sizeof(Port))))
2531  {
2532  ereport(LOG,
2533  (errcode(ERRCODE_OUT_OF_MEMORY),
2534  errmsg("out of memory")));
2535  ExitPostmaster(1);
2536  }
2537 
2538  if (StreamConnection(serverFd, port) != STATUS_OK)
2539  {
2540  if (port->sock != PGINVALID_SOCKET)
2541  StreamClose(port->sock);
2542  ConnFree(port);
2543  return NULL;
2544  }
2545 
2546  return port;
2547 }
2548 
2549 
2550 /*
2551  * ConnFree -- free a local connection data structure
2552  *
2553  * Caller has already closed the socket if any, so there's not much
2554  * to do here.
2555  */
2556 static void
2558 {
2559  free(port);
2560 }
2561 
2562 
2563 /*
2564  * ClosePostmasterPorts -- close all the postmaster's open sockets
2565  *
2566  * This is called during child process startup to release file descriptors
2567  * that are not needed by that child process. The postmaster still has
2568  * them open, of course.
2569  *
2570  * Note: we pass am_syslogger as a boolean because we don't want to set
2571  * the global variable yet when this is called.
2572  */
2573 void
2574 ClosePostmasterPorts(bool am_syslogger)
2575 {
2576  int i;
2577 
2578  /* Release resources held by the postmaster's WaitEventSet. */
2579  if (pm_wait_set)
2580  {
2582  pm_wait_set = NULL;
2583  }
2584 
2585 #ifndef WIN32
2586 
2587  /*
2588  * Close the write end of postmaster death watch pipe. It's important to
2589  * do this as early as possible, so that if postmaster dies, others won't
2590  * think that it's still running because we're holding the pipe open.
2591  */
2593  ereport(FATAL,
2595  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2597  /* Notify fd.c that we released one pipe FD. */
2599 #endif
2600 
2601  /*
2602  * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2603  * so we don't call ReleaseExternalFD() here.
2604  */
2605  for (i = 0; i < MAXLISTEN; i++)
2606  {
2608  {
2611  }
2612  }
2613 
2614  /*
2615  * If using syslogger, close the read side of the pipe. We don't bother
2616  * tracking this in fd.c, either.
2617  */
2618  if (!am_syslogger)
2619  {
2620 #ifndef WIN32
2621  if (syslogPipe[0] >= 0)
2622  close(syslogPipe[0]);
2623  syslogPipe[0] = -1;
2624 #else
2625  if (syslogPipe[0])
2626  CloseHandle(syslogPipe[0]);
2627  syslogPipe[0] = 0;
2628 #endif
2629  }
2630 
2631 #ifdef USE_BONJOUR
2632  /* If using Bonjour, close the connection to the mDNS daemon */
2633  if (bonjour_sdref)
2634  close(DNSServiceRefSockFD(bonjour_sdref));
2635 #endif
2636 }
2637 
2638 
2639 /*
2640  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2641  *
2642  * Called early in the postmaster and every backend.
2643  */
2644 void
2646 {
2647  MyProcPid = getpid();
2650 
2651  /*
2652  * Set a different global seed in every process. We want something
2653  * unpredictable, so if possible, use high-quality random bits for the
2654  * seed. Otherwise, fall back to a seed based on timestamp and PID.
2655  */
2657  {
2658  uint64 rseed;
2659 
2660  /*
2661  * Since PIDs and timestamps tend to change more frequently in their
2662  * least significant bits, shift the timestamp left to allow a larger
2663  * total number of seeds in a given time period. Since that would
2664  * leave only 20 bits of the timestamp that cycle every ~1 second,
2665  * also mix in some higher bits.
2666  */
2667  rseed = ((uint64) MyProcPid) ^
2668  ((uint64) MyStartTimestamp << 12) ^
2669  ((uint64) MyStartTimestamp >> 20);
2670 
2672  }
2673 
2674  /*
2675  * Also make sure that we've set a good seed for random(3). Use of that
2676  * is deprecated in core Postgres, but extensions might use it.
2677  */
2678 #ifndef WIN32
2680 #endif
2681 }
2682 
2683 /*
2684  * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2685  * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2686  */
2687 static void
2689 {
2690  int save_errno = errno;
2691 
2692  pending_pm_pmsignal = true;
2693  SetLatch(MyLatch);
2694 
2695  errno = save_errno;
2696 }
2697 
2698 /*
2699  * pg_ctl uses SIGHUP to request a reload of the configuration files.
2700  */
2701 static void
2703 {
2704  int save_errno = errno;
2705 
2707  SetLatch(MyLatch);
2708 
2709  errno = save_errno;
2710 }
2711 
2712 /*
2713  * Re-read config files, and tell children to do same.
2714  */
2715 static void
2717 {
2718  pending_pm_reload_request = false;
2719 
2720  ereport(DEBUG2,
2721  (errmsg_internal("postmaster received reload request signal")));
2722 
2723  if (Shutdown <= SmartShutdown)
2724  {
2725  ereport(LOG,
2726  (errmsg("received SIGHUP, reloading configuration files")));
2729  if (StartupPID != 0)
2731  if (BgWriterPID != 0)
2733  if (CheckpointerPID != 0)
2735  if (WalWriterPID != 0)
2737  if (WalReceiverPID != 0)
2739  if (AutoVacPID != 0)
2741  if (PgArchPID != 0)
2743  if (SysLoggerPID != 0)
2745 
2746  /* Reload authentication config files too */
2747  if (!load_hba())
2748  ereport(LOG,
2749  /* translator: %s is a configuration file */
2750  (errmsg("%s was not reloaded", HbaFileName)));
2751 
2752  if (!load_ident())
2753  ereport(LOG,
2754  (errmsg("%s was not reloaded", IdentFileName)));
2755 
2756 #ifdef USE_SSL
2757  /* Reload SSL configuration as well */
2758  if (EnableSSL)
2759  {
2760  if (secure_initialize(false) == 0)
2761  LoadedSSL = true;
2762  else
2763  ereport(LOG,
2764  (errmsg("SSL configuration was not reloaded")));
2765  }
2766  else
2767  {
2768  secure_destroy();
2769  LoadedSSL = false;
2770  }
2771 #endif
2772 
2773 #ifdef EXEC_BACKEND
2774  /* Update the starting-point file for future children */
2775  write_nondefault_variables(PGC_SIGHUP);
2776 #endif
2777  }
2778 }
2779 
2780 /*
2781  * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2782  * shutdown.
2783  */
2784 static void
2786 {
2787  int save_errno = errno;
2788 
2789  switch (postgres_signal_arg)
2790  {
2791  case SIGTERM:
2792  /* smart is implied if the other two flags aren't set */
2794  break;
2795  case SIGINT:
2798  break;
2799  case SIGQUIT:
2802  break;
2803  }
2804  SetLatch(MyLatch);
2805 
2806  errno = save_errno;
2807 }
2808 
2809 /*
2810  * Process shutdown request.
2811  */
2812 static void
2814 {
2815  int mode;
2816 
2817  ereport(DEBUG2,
2818  (errmsg_internal("postmaster received shutdown request signal")));
2819 
2821 
2822  /*
2823  * If more than one shutdown request signal arrived since the last server
2824  * loop, take the one that is the most immediate. That matches the
2825  * priority that would apply if we processed them one by one in any order.
2826  */
2828  {
2832  }
2834  {
2836  mode = FastShutdown;
2837  }
2838  else
2839  mode = SmartShutdown;
2840 
2841  switch (mode)
2842  {
2843  case SmartShutdown:
2844 
2845  /*
2846  * Smart Shutdown:
2847  *
2848  * Wait for children to end their work, then shut down.
2849  */
2850  if (Shutdown >= SmartShutdown)
2851  break;
2853  ereport(LOG,
2854  (errmsg("received smart shutdown request")));
2855 
2856  /* Report status */
2858 #ifdef USE_SYSTEMD
2859  sd_notify(0, "STOPPING=1");
2860 #endif
2861 
2862  /*
2863  * If we reached normal running, we go straight to waiting for
2864  * client backends to exit. If already in PM_STOP_BACKENDS or a
2865  * later state, do not change it.
2866  */
2867  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2868  connsAllowed = false;
2869  else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2870  {
2871  /* There should be no clients, so proceed to stop children */
2873  }
2874 
2875  /*
2876  * Now wait for online backup mode to end and backends to exit. If
2877  * that is already the case, PostmasterStateMachine will take the
2878  * next step.
2879  */
2881  break;
2882 
2883  case FastShutdown:
2884 
2885  /*
2886  * Fast Shutdown:
2887  *
2888  * Abort all children with SIGTERM (rollback active transactions
2889  * and exit) and shut down when they are gone.
2890  */
2891  if (Shutdown >= FastShutdown)
2892  break;
2894  ereport(LOG,
2895  (errmsg("received fast shutdown request")));
2896 
2897  /* Report status */
2899 #ifdef USE_SYSTEMD
2900  sd_notify(0, "STOPPING=1");
2901 #endif
2902 
2903  if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2904  {
2905  /* Just shut down background processes silently */
2907  }
2908  else if (pmState == PM_RUN ||
2910  {
2911  /* Report that we're about to zap live client sessions */
2912  ereport(LOG,
2913  (errmsg("aborting any active transactions")));
2915  }
2916 
2917  /*
2918  * PostmasterStateMachine will issue any necessary signals, or
2919  * take the next step if no child processes need to be killed.
2920  */
2922  break;
2923 
2924  case ImmediateShutdown:
2925 
2926  /*
2927  * Immediate Shutdown:
2928  *
2929  * abort all children with SIGQUIT, wait for them to exit,
2930  * terminate remaining ones with SIGKILL, then exit without
2931  * attempt to properly shut down the data base system.
2932  */
2933  if (Shutdown >= ImmediateShutdown)
2934  break;
2936  ereport(LOG,
2937  (errmsg("received immediate shutdown request")));
2938 
2939  /* Report status */
2941 #ifdef USE_SYSTEMD
2942  sd_notify(0, "STOPPING=1");
2943 #endif
2944 
2945  /* tell children to shut down ASAP */
2946  /* (note we don't apply send_abort_for_crash here) */
2950 
2951  /* set stopwatch for them to die */
2952  AbortStartTime = time(NULL);
2953 
2954  /*
2955  * Now wait for backends to exit. If there are none,
2956  * PostmasterStateMachine will take the next step.
2957  */
2959  break;
2960  }
2961 }
2962 
2963 static void
2965 {
2966  int save_errno = errno;
2967 
2968  pending_pm_child_exit = true;
2969  SetLatch(MyLatch);
2970 
2971  errno = save_errno;
2972 }
2973 
2974 /*
2975  * Cleanup after a child process dies.
2976  */
2977 static void
2979 {
2980  int pid; /* process id of dead child process */
2981  int exitstatus; /* its exit status */
2982 
2983  pending_pm_child_exit = false;
2984 
2985  ereport(DEBUG4,
2986  (errmsg_internal("reaping dead processes")));
2987 
2988  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2989  {
2990  /*
2991  * Check if this child was a startup process.
2992  */
2993  if (pid == StartupPID)
2994  {
2995  StartupPID = 0;
2996 
2997  /*
2998  * Startup process exited in response to a shutdown request (or it
2999  * completed normally regardless of the shutdown request).
3000  */
3001  if (Shutdown > NoShutdown &&
3002  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
3003  {
3006  /* PostmasterStateMachine logic does the rest */
3007  continue;
3008  }
3009 
3010  if (EXIT_STATUS_3(exitstatus))
3011  {
3012  ereport(LOG,
3013  (errmsg("shutdown at recovery target")));
3016  TerminateChildren(SIGTERM);
3018  /* PostmasterStateMachine logic does the rest */
3019  continue;
3020  }
3021 
3022  /*
3023  * Unexpected exit of startup process (including FATAL exit)
3024  * during PM_STARTUP is treated as catastrophic. There are no
3025  * other processes running yet, so we can just exit.
3026  */
3027  if (pmState == PM_STARTUP &&
3029  !EXIT_STATUS_0(exitstatus))
3030  {
3031  LogChildExit(LOG, _("startup process"),
3032  pid, exitstatus);
3033  ereport(LOG,
3034  (errmsg("aborting startup due to startup process failure")));
3035  ExitPostmaster(1);
3036  }
3037 
3038  /*
3039  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3040  * the startup process is catastrophic, so kill other children,
3041  * and set StartupStatus so we don't try to reinitialize after
3042  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3043  * then we previously sent the startup process a SIGQUIT; so
3044  * that's probably the reason it died, and we do want to try to
3045  * restart in that case.
3046  *
3047  * This stanza also handles the case where we sent a SIGQUIT
3048  * during PM_STARTUP due to some dead_end child crashing: in that
3049  * situation, if the startup process dies on the SIGQUIT, we need
3050  * to transition to PM_WAIT_BACKENDS state which will allow
3051  * PostmasterStateMachine to restart the startup process. (On the
3052  * other hand, the startup process might complete normally, if we
3053  * were too late with the SIGQUIT. In that case we'll fall
3054  * through and commence normal operations.)
3055  */
3056  if (!EXIT_STATUS_0(exitstatus))
3057  {
3059  {
3061  if (pmState == PM_STARTUP)
3063  }
3064  else
3066  HandleChildCrash(pid, exitstatus,
3067  _("startup process"));
3068  continue;
3069  }
3070 
3071  /*
3072  * Startup succeeded, commence normal operations
3073  */
3075  FatalError = false;
3076  AbortStartTime = 0;
3077  ReachedNormalRunning = true;
3078  pmState = PM_RUN;
3079  connsAllowed = true;
3080 
3081  /*
3082  * Crank up the background tasks, if we didn't do that already
3083  * when we entered consistent recovery state. It doesn't matter
3084  * if this fails, we'll just try again later.
3085  */
3086  if (CheckpointerPID == 0)
3088  if (BgWriterPID == 0)
3090  if (WalWriterPID == 0)
3092 
3093  /*
3094  * Likewise, start other special children as needed. In a restart
3095  * situation, some of them may be alive already.
3096  */
3099  if (PgArchStartupAllowed() && PgArchPID == 0)
3101 
3102  /* workers may be scheduled to start now */
3104 
3105  /* at this point we are really open for business */
3106  ereport(LOG,
3107  (errmsg("database system is ready to accept connections")));
3108 
3109  /* Report status */
3111 #ifdef USE_SYSTEMD
3112  sd_notify(0, "READY=1");
3113 #endif
3114 
3115  continue;
3116  }
3117 
3118  /*
3119  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3120  * one at the next iteration of the postmaster's main loop, if
3121  * necessary. Any other exit condition is treated as a crash.
3122  */
3123  if (pid == BgWriterPID)
3124  {
3125  BgWriterPID = 0;
3126  if (!EXIT_STATUS_0(exitstatus))
3127  HandleChildCrash(pid, exitstatus,
3128  _("background writer process"));
3129  continue;
3130  }
3131 
3132  /*
3133  * Was it the checkpointer?
3134  */
3135  if (pid == CheckpointerPID)
3136  {
3137  CheckpointerPID = 0;
3138  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3139  {
3140  /*
3141  * OK, we saw normal exit of the checkpointer after it's been
3142  * told to shut down. We expect that it wrote a shutdown
3143  * checkpoint. (If for some reason it didn't, recovery will
3144  * occur on next postmaster start.)
3145  *
3146  * At this point we should have no normal backend children
3147  * left (else we'd not be in PM_SHUTDOWN state) but we might
3148  * have dead_end children to wait for.
3149  *
3150  * If we have an archiver subprocess, tell it to do a last
3151  * archive cycle and quit. Likewise, if we have walsender
3152  * processes, tell them to send any remaining WAL and quit.
3153  */
3155 
3156  /* Waken archiver for the last time */
3157  if (PgArchPID != 0)
3159 
3160  /*
3161  * Waken walsenders for the last time. No regular backends
3162  * should be around anymore.
3163  */
3165 
3167  }
3168  else
3169  {
3170  /*
3171  * Any unexpected exit of the checkpointer (including FATAL
3172  * exit) is treated as a crash.
3173  */
3174  HandleChildCrash(pid, exitstatus,
3175  _("checkpointer process"));
3176  }
3177 
3178  continue;
3179  }
3180 
3181  /*
3182  * Was it the wal writer? Normal exit can be ignored; we'll start a
3183  * new one at the next iteration of the postmaster's main loop, if
3184  * necessary. Any other exit condition is treated as a crash.
3185  */
3186  if (pid == WalWriterPID)
3187  {
3188  WalWriterPID = 0;
3189  if (!EXIT_STATUS_0(exitstatus))
3190  HandleChildCrash(pid, exitstatus,
3191  _("WAL writer process"));
3192  continue;
3193  }
3194 
3195  /*
3196  * Was it the wal receiver? If exit status is zero (normal) or one
3197  * (FATAL exit), we assume everything is all right just like normal
3198  * backends. (If we need a new wal receiver, we'll start one at the
3199  * next iteration of the postmaster's main loop.)
3200  */
3201  if (pid == WalReceiverPID)
3202  {
3203  WalReceiverPID = 0;
3204  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3205  HandleChildCrash(pid, exitstatus,
3206  _("WAL receiver process"));
3207  continue;
3208  }
3209 
3210  /*
3211  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3212  * start a new one at the next iteration of the postmaster's main
3213  * loop, if necessary. Any other exit condition is treated as a
3214  * crash.
3215  */
3216  if (pid == AutoVacPID)
3217  {
3218  AutoVacPID = 0;
3219  if (!EXIT_STATUS_0(exitstatus))
3220  HandleChildCrash(pid, exitstatus,
3221  _("autovacuum launcher process"));
3222  continue;
3223  }
3224 
3225  /*
3226  * Was it the archiver? If exit status is zero (normal) or one (FATAL
3227  * exit), we assume everything is all right just like normal backends
3228  * and just try to restart a new one so that we immediately retry
3229  * archiving remaining files. (If fail, we'll try again in future
3230  * cycles of the postmaster's main loop.) Unless we were waiting for
3231  * it to shut down; don't restart it in that case, and
3232  * PostmasterStateMachine() will advance to the next shutdown step.
3233  */
3234  if (pid == PgArchPID)
3235  {
3236  PgArchPID = 0;
3237  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3238  HandleChildCrash(pid, exitstatus,
3239  _("archiver process"));
3240  if (PgArchStartupAllowed())
3242  continue;
3243  }
3244 
3245  /* Was it the system logger? If so, try to start a new one */
3246  if (pid == SysLoggerPID)
3247  {
3248  SysLoggerPID = 0;
3249  /* for safety's sake, launch new logger *first* */
3251  if (!EXIT_STATUS_0(exitstatus))
3252  LogChildExit(LOG, _("system logger process"),
3253  pid, exitstatus);
3254  continue;
3255  }
3256 
3257  /* Was it one of our background workers? */
3258  if (CleanupBackgroundWorker(pid, exitstatus))
3259  {
3260  /* have it be restarted */
3261  HaveCrashedWorker = true;
3262  continue;
3263  }
3264 
3265  /*
3266  * Else do standard backend child cleanup.
3267  */
3268  CleanupBackend(pid, exitstatus);
3269  } /* loop over pending child-death reports */
3270 
3271  /*
3272  * After cleaning out the SIGCHLD queue, see if we have any state changes
3273  * or actions to make.
3274  */
3276 }
3277 
3278 /*
3279  * Scan the bgworkers list and see if the given PID (which has just stopped
3280  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3281  * bgworker, return false.
3282  *
3283  * This is heavily based on CleanupBackend. One important difference is that
3284  * we don't know yet that the dying process is a bgworker, so we must be silent
3285  * until we're sure it is.
3286  */
3287 static bool
3289  int exitstatus) /* child's exit status */
3290 {
3291  char namebuf[MAXPGPATH];
3292  slist_mutable_iter iter;
3293 
3295  {
3296  RegisteredBgWorker *rw;
3297 
3298  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3299 
3300  if (rw->rw_pid != pid)
3301  continue;
3302 
3303 #ifdef WIN32
3304  /* see CleanupBackend */
3305  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3306  exitstatus = 0;
3307 #endif
3308 
3309  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3310  rw->rw_worker.bgw_type);
3311 
3312 
3313  if (!EXIT_STATUS_0(exitstatus))
3314  {
3315  /* Record timestamp, so we know when to restart the worker. */
3317  }
3318  else
3319  {
3320  /* Zero exit status means terminate */
3321  rw->rw_crashed_at = 0;
3322  rw->rw_terminate = true;
3323  }
3324 
3325  /*
3326  * Additionally, just like a backend, any exit status other than 0 or
3327  * 1 is considered a crash and causes a system-wide restart.
3328  */
3329  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3330  {
3331  HandleChildCrash(pid, exitstatus, namebuf);
3332  return true;
3333  }
3334 
3335  /*
3336  * We must release the postmaster child slot. If the worker failed to
3337  * do so, it did not clean up after itself, requiring a crash-restart
3338  * cycle.
3339  */
3341  {
3342  HandleChildCrash(pid, exitstatus, namebuf);
3343  return true;
3344  }
3345 
3346  /* Get it out of the BackendList and clear out remaining data */
3347  dlist_delete(&rw->rw_backend->elem);
3348 #ifdef EXEC_BACKEND
3349  ShmemBackendArrayRemove(rw->rw_backend);
3350 #endif
3351 
3352  /*
3353  * It's possible that this background worker started some OTHER
3354  * background worker and asked to be notified when that worker started
3355  * or stopped. If so, cancel any notifications destined for the
3356  * now-dead backend.
3357  */
3358  if (rw->rw_backend->bgworker_notify)
3360  free(rw->rw_backend);
3361  rw->rw_backend = NULL;
3362  rw->rw_pid = 0;
3363  rw->rw_child_slot = 0;
3364  ReportBackgroundWorkerExit(&iter); /* report child death */
3365 
3366  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3367  namebuf, pid, exitstatus);
3368 
3369  return true;
3370  }
3371 
3372  return false;
3373 }
3374 
3375 /*
3376  * CleanupBackend -- cleanup after terminated backend.
3377  *
3378  * Remove all local state associated with backend.
3379  *
3380  * If you change this, see also CleanupBackgroundWorker.
3381  */
3382 static void
3384  int exitstatus) /* child's exit status. */
3385 {
3386  dlist_mutable_iter iter;
3387 
3388  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3389 
3390  /*
3391  * If a backend dies in an ugly way then we must signal all other backends
3392  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3393  * assume everything is all right and proceed to remove the backend from
3394  * the active backend list.
3395  */
3396 
3397 #ifdef WIN32
3398 
3399  /*
3400  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3401  * since that sometimes happens under load when the process fails to start
3402  * properly (long before it starts using shared memory). Microsoft reports
3403  * it is related to mutex failure:
3404  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3405  */
3406  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3407  {
3408  LogChildExit(LOG, _("server process"), pid, exitstatus);
3409  exitstatus = 0;
3410  }
3411 #endif
3412 
3413  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3414  {
3415  HandleChildCrash(pid, exitstatus, _("server process"));
3416  return;
3417  }
3418 
3420  {
3421  Backend *bp = dlist_container(Backend, elem, iter.cur);
3422 
3423  if (bp->pid == pid)
3424  {
3425  if (!bp->dead_end)
3426  {
3428  {
3429  /*
3430  * Uh-oh, the child failed to clean itself up. Treat as a
3431  * crash after all.
3432  */
3433  HandleChildCrash(pid, exitstatus, _("server process"));
3434  return;
3435  }
3436 #ifdef EXEC_BACKEND
3437  ShmemBackendArrayRemove(bp);
3438 #endif
3439  }
3440  if (bp->bgworker_notify)
3441  {
3442  /*
3443  * This backend may have been slated to receive SIGUSR1 when
3444  * some background worker started or stopped. Cancel those
3445  * notifications, as we don't want to signal PIDs that are not
3446  * PostgreSQL backends. This gets skipped in the (probably
3447  * very common) case where the backend has never requested any
3448  * such notifications.
3449  */
3451  }
3452  dlist_delete(iter.cur);
3453  free(bp);
3454  break;
3455  }
3456  }
3457 }
3458 
3459 /*
3460  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3461  * walwriter, autovacuum, archiver or background worker.
3462  *
3463  * The objectives here are to clean up our local state about the child
3464  * process, and to signal all other remaining children to quickdie.
3465  */
3466 static void
3467 HandleChildCrash(int pid, int exitstatus, const char *procname)
3468 {
3469  dlist_mutable_iter iter;
3470  slist_iter siter;
3471  Backend *bp;
3472  bool take_action;
3473 
3474  /*
3475  * We only log messages and send signals if this is the first process
3476  * crash and we're not doing an immediate shutdown; otherwise, we're only
3477  * here to update postmaster's idea of live processes. If we have already
3478  * signaled children, nonzero exit status is to be expected, so don't
3479  * clutter log.
3480  */
3481  take_action = !FatalError && Shutdown != ImmediateShutdown;
3482 
3483  if (take_action)
3484  {
3485  LogChildExit(LOG, procname, pid, exitstatus);
3486  ereport(LOG,
3487  (errmsg("terminating any other active server processes")));
3489  }
3490 
3491  /* Process background workers. */
3493  {
3494  RegisteredBgWorker *rw;
3495 
3496  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3497  if (rw->rw_pid == 0)
3498  continue; /* not running */
3499  if (rw->rw_pid == pid)
3500  {
3501  /*
3502  * Found entry for freshly-dead worker, so remove it.
3503  */
3505  dlist_delete(&rw->rw_backend->elem);
3506 #ifdef EXEC_BACKEND
3507  ShmemBackendArrayRemove(rw->rw_backend);
3508 #endif
3509  free(rw->rw_backend);
3510  rw->rw_backend = NULL;
3511  rw->rw_pid = 0;
3512  rw->rw_child_slot = 0;
3513  /* don't reset crashed_at */
3514  /* don't report child stop, either */
3515  /* Keep looping so we can signal remaining workers */
3516  }
3517  else
3518  {
3519  /*
3520  * This worker is still alive. Unless we did so already, tell it
3521  * to commit hara-kiri.
3522  */
3523  if (take_action)
3524  sigquit_child(rw->rw_pid);
3525  }
3526  }
3527 
3528  /* Process regular backends */
3530  {
3531  bp = dlist_container(Backend, elem, iter.cur);
3532 
3533  if (bp->pid == pid)
3534  {
3535  /*
3536  * Found entry for freshly-dead backend, so remove it.
3537  */
3538  if (!bp->dead_end)
3539  {
3541 #ifdef EXEC_BACKEND
3542  ShmemBackendArrayRemove(bp);
3543 #endif
3544  }
3545  dlist_delete(iter.cur);
3546  free(bp);
3547  /* Keep looping so we can signal remaining backends */
3548  }
3549  else
3550  {
3551  /*
3552  * This backend is still alive. Unless we did so already, tell it
3553  * to commit hara-kiri.
3554  *
3555  * We could exclude dead_end children here, but at least when
3556  * sending SIGABRT it seems better to include them.
3557  *
3558  * Background workers were already processed above; ignore them
3559  * here.
3560  */
3561  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3562  continue;
3563 
3564  if (take_action)
3565  sigquit_child(bp->pid);
3566  }
3567  }
3568 
3569  /* Take care of the startup process too */
3570  if (pid == StartupPID)
3571  {
3572  StartupPID = 0;
3573  /* Caller adjusts StartupStatus, so don't touch it here */
3574  }
3575  else if (StartupPID != 0 && take_action)
3576  {
3579  }
3580 
3581  /* Take care of the bgwriter too */
3582  if (pid == BgWriterPID)
3583  BgWriterPID = 0;
3584  else if (BgWriterPID != 0 && take_action)
3586 
3587  /* Take care of the checkpointer too */
3588  if (pid == CheckpointerPID)
3589  CheckpointerPID = 0;
3590  else if (CheckpointerPID != 0 && take_action)
3592 
3593  /* Take care of the walwriter too */
3594  if (pid == WalWriterPID)
3595  WalWriterPID = 0;
3596  else if (WalWriterPID != 0 && take_action)
3598 
3599  /* Take care of the walreceiver too */
3600  if (pid == WalReceiverPID)
3601  WalReceiverPID = 0;
3602  else if (WalReceiverPID != 0 && take_action)
3604 
3605  /* Take care of the autovacuum launcher too */
3606  if (pid == AutoVacPID)
3607  AutoVacPID = 0;
3608  else if (AutoVacPID != 0 && take_action)
3610 
3611  /* Take care of the archiver too */
3612  if (pid == PgArchPID)
3613  PgArchPID = 0;
3614  else if (PgArchPID != 0 && take_action)
3616 
3617  /* We do NOT restart the syslogger */
3618 
3619  if (Shutdown != ImmediateShutdown)
3620  FatalError = true;
3621 
3622  /* We now transit into a state of waiting for children to die */
3623  if (pmState == PM_RECOVERY ||
3624  pmState == PM_HOT_STANDBY ||
3625  pmState == PM_RUN ||
3627  pmState == PM_SHUTDOWN)
3629 
3630  /*
3631  * .. and if this doesn't happen quickly enough, now the clock is ticking
3632  * for us to kill them without mercy.
3633  */
3634  if (AbortStartTime == 0)
3635  AbortStartTime = time(NULL);
3636 }
3637 
3638 /*
3639  * Log the death of a child process.
3640  */
3641 static void
3642 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3643 {
3644  /*
3645  * size of activity_buffer is arbitrary, but set equal to default
3646  * track_activity_query_size
3647  */
3648  char activity_buffer[1024];
3649  const char *activity = NULL;
3650 
3651  if (!EXIT_STATUS_0(exitstatus))
3652  activity = pgstat_get_crashed_backend_activity(pid,
3653  activity_buffer,
3654  sizeof(activity_buffer));
3655 
3656  if (WIFEXITED(exitstatus))
3657  ereport(lev,
3658 
3659  /*------
3660  translator: %s is a noun phrase describing a child process, such as
3661  "server process" */
3662  (errmsg("%s (PID %d) exited with exit code %d",
3663  procname, pid, WEXITSTATUS(exitstatus)),
3664  activity ? errdetail("Failed process was running: %s", activity) : 0));
3665  else if (WIFSIGNALED(exitstatus))
3666  {
3667 #if defined(WIN32)
3668  ereport(lev,
3669 
3670  /*------
3671  translator: %s is a noun phrase describing a child process, such as
3672  "server process" */
3673  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3674  procname, pid, WTERMSIG(exitstatus)),
3675  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3676  activity ? errdetail("Failed process was running: %s", activity) : 0));
3677 #else
3678  ereport(lev,
3679 
3680  /*------
3681  translator: %s is a noun phrase describing a child process, such as
3682  "server process" */
3683  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3684  procname, pid, WTERMSIG(exitstatus),
3685  pg_strsignal(WTERMSIG(exitstatus))),
3686  activity ? errdetail("Failed process was running: %s", activity) : 0));
3687 #endif
3688  }
3689  else
3690  ereport(lev,
3691 
3692  /*------
3693  translator: %s is a noun phrase describing a child process, such as
3694  "server process" */
3695  (errmsg("%s (PID %d) exited with unrecognized status %d",
3696  procname, pid, exitstatus),
3697  activity ? errdetail("Failed process was running: %s", activity) : 0));
3698 }
3699 
3700 /*
3701  * Advance the postmaster's state machine and take actions as appropriate
3702  *
3703  * This is common code for process_pm_shutdown_request(),
3704  * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3705  * that might mean we need to change state.
3706  */
3707 static void
3709 {
3710  /* If we're doing a smart shutdown, try to advance that state. */
3711  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3712  {
3713  if (!connsAllowed)
3714  {
3715  /*
3716  * This state ends when we have no normal client backends running.
3717  * Then we're ready to stop other children.
3718  */
3721  }
3722  }
3723 
3724  /*
3725  * If we're ready to do so, signal child processes to shut down. (This
3726  * isn't a persistent state, but treating it as a distinct pmState allows
3727  * us to share this code across multiple shutdown code paths.)
3728  */
3729  if (pmState == PM_STOP_BACKENDS)
3730  {
3731  /*
3732  * Forget any pending requests for background workers, since we're no
3733  * longer willing to launch any new workers. (If additional requests
3734  * arrive, BackgroundWorkerStateChange will reject them.)
3735  */
3737 
3738  /* Signal all backend children except walsenders */
3739  SignalSomeChildren(SIGTERM,
3741  /* and the autovac launcher too */
3742  if (AutoVacPID != 0)
3743  signal_child(AutoVacPID, SIGTERM);
3744  /* and the bgwriter too */
3745  if (BgWriterPID != 0)
3746  signal_child(BgWriterPID, SIGTERM);
3747  /* and the walwriter too */
3748  if (WalWriterPID != 0)
3749  signal_child(WalWriterPID, SIGTERM);
3750  /* If we're in recovery, also stop startup and walreceiver procs */
3751  if (StartupPID != 0)
3752  signal_child(StartupPID, SIGTERM);
3753  if (WalReceiverPID != 0)
3754  signal_child(WalReceiverPID, SIGTERM);
3755  /* checkpointer, archiver, stats, and syslogger may continue for now */
3756 
3757  /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3759  }
3760 
3761  /*
3762  * If we are in a state-machine state that implies waiting for backends to
3763  * exit, see if they're all gone, and change state if so.
3764  */
3765  if (pmState == PM_WAIT_BACKENDS)
3766  {
3767  /*
3768  * PM_WAIT_BACKENDS state ends when we have no regular backends
3769  * (including autovac workers), no bgworkers (including unconnected
3770  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3771  * doing crash recovery or an immediate shutdown then we expect the
3772  * checkpointer to exit as well, otherwise not. The stats and
3773  * syslogger processes are disregarded since they are not connected to
3774  * shared memory; we also disregard dead_end children here. Walsenders
3775  * and archiver are also disregarded, they will be terminated later
3776  * after writing the checkpoint record.
3777  */
3779  StartupPID == 0 &&
3780  WalReceiverPID == 0 &&
3781  BgWriterPID == 0 &&
3782  (CheckpointerPID == 0 ||
3784  WalWriterPID == 0 &&
3785  AutoVacPID == 0)
3786  {
3788  {
3789  /*
3790  * Start waiting for dead_end children to die. This state
3791  * change causes ServerLoop to stop creating new ones.
3792  */
3794 
3795  /*
3796  * We already SIGQUIT'd the archiver and stats processes, if
3797  * any, when we started immediate shutdown or entered
3798  * FatalError state.
3799  */
3800  }
3801  else
3802  {
3803  /*
3804  * If we get here, we are proceeding with normal shutdown. All
3805  * the regular children are gone, and it's time to tell the
3806  * checkpointer to do a shutdown checkpoint.
3807  */
3809  /* Start the checkpointer if not running */
3810  if (CheckpointerPID == 0)
3812  /* And tell it to shut down */
3813  if (CheckpointerPID != 0)
3814  {
3816  pmState = PM_SHUTDOWN;
3817  }
3818  else
3819  {
3820  /*
3821  * If we failed to fork a checkpointer, just shut down.
3822  * Any required cleanup will happen at next restart. We
3823  * set FatalError so that an "abnormal shutdown" message
3824  * gets logged when we exit.
3825  *
3826  * We don't consult send_abort_for_crash here, as it's
3827  * unlikely that dumping cores would illuminate the reason
3828  * for checkpointer fork failure.
3829  */
3830  FatalError = true;
3832 
3833  /* Kill the walsenders and archiver too */
3835  if (PgArchPID != 0)
3837  }
3838  }
3839  }
3840  }
3841 
3842  if (pmState == PM_SHUTDOWN_2)
3843  {
3844  /*
3845  * PM_SHUTDOWN_2 state ends when there's no other children than
3846  * dead_end children left. There shouldn't be any regular backends
3847  * left by now anyway; what we're really waiting for is walsenders and
3848  * archiver.
3849  */
3850  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3851  {
3853  }
3854  }
3855 
3856  if (pmState == PM_WAIT_DEAD_END)
3857  {
3858  /* Don't allow any new socket connection events. */
3860 
3861  /*
3862  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3863  * (ie, no dead_end children remain), and the archiver is gone too.
3864  *
3865  * The reason we wait for those two is to protect them against a new
3866  * postmaster starting conflicting subprocesses; this isn't an
3867  * ironclad protection, but it at least helps in the
3868  * shutdown-and-immediately-restart scenario. Note that they have
3869  * already been sent appropriate shutdown signals, either during a
3870  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3871  * FatalError processing.
3872  */
3873  if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3874  {
3875  /* These other guys should be dead already */
3876  Assert(StartupPID == 0);
3877  Assert(WalReceiverPID == 0);
3878  Assert(BgWriterPID == 0);
3879  Assert(CheckpointerPID == 0);
3880  Assert(WalWriterPID == 0);
3881  Assert(AutoVacPID == 0);
3882  /* syslogger is not considered here */
3884  }
3885  }
3886 
3887  /*
3888  * If we've been told to shut down, we exit as soon as there are no
3889  * remaining children. If there was a crash, cleanup will occur at the
3890  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3891  * crash before exiting, but that seems unwise if we are quitting because
3892  * we got SIGTERM from init --- there may well not be time for recovery
3893  * before init decides to SIGKILL us.)
3894  *
3895  * Note that the syslogger continues to run. It will exit when it sees
3896  * EOF on its input pipe, which happens when there are no more upstream
3897  * processes.
3898  */
3900  {
3901  if (FatalError)
3902  {
3903  ereport(LOG, (errmsg("abnormal database system shutdown")));
3904  ExitPostmaster(1);
3905  }
3906  else
3907  {
3908  /*
3909  * Normal exit from the postmaster is here. We don't need to log
3910  * anything here, since the UnlinkLockFiles proc_exit callback
3911  * will do so, and that should be the last user-visible action.
3912  */
3913  ExitPostmaster(0);
3914  }
3915  }
3916 
3917  /*
3918  * If the startup process failed, or the user does not want an automatic
3919  * restart after backend crashes, wait for all non-syslogger children to
3920  * exit, and then exit postmaster. We don't try to reinitialize when the
3921  * startup process fails, because more than likely it will just fail again
3922  * and we will keep trying forever.
3923  */
3924  if (pmState == PM_NO_CHILDREN)
3925  {
3927  {
3928  ereport(LOG,
3929  (errmsg("shutting down due to startup process failure")));
3930  ExitPostmaster(1);
3931  }
3932  if (!restart_after_crash)
3933  {
3934  ereport(LOG,
3935  (errmsg("shutting down because restart_after_crash is off")));
3936  ExitPostmaster(1);
3937  }
3938  }
3939 
3940  /*
3941  * If we need to recover from a crash, wait for all non-syslogger children
3942  * to exit, then reset shmem and StartupDataBase.
3943  */
3944  if (FatalError && pmState == PM_NO_CHILDREN)
3945  {
3946  ereport(LOG,
3947  (errmsg("all server processes terminated; reinitializing")));
3948 
3949  /* remove leftover temporary files after a crash */
3952 
3953  /* allow background workers to immediately restart */
3955 
3956  shmem_exit(1);
3957 
3958  /* re-read control file into local memory */
3960 
3961  /* re-create shared memory and semaphores */
3963 
3965  Assert(StartupPID != 0);
3967  pmState = PM_STARTUP;
3968  /* crash recovery started, reset SIGKILL flag */
3969  AbortStartTime = 0;
3970 
3971  /* start accepting server socket connection events again */
3973  }
3974 }
3975 
3976 
3977 /*
3978  * Send a signal to a postmaster child process
3979  *
3980  * On systems that have setsid(), each child process sets itself up as a
3981  * process group leader. For signals that are generally interpreted in the
3982  * appropriate fashion, we signal the entire process group not just the
3983  * direct child process. This allows us to, for example, SIGQUIT a blocked
3984  * archive_recovery script, or SIGINT a script being run by a backend via
3985  * system().
3986  *
3987  * There is a race condition for recently-forked children: they might not
3988  * have executed setsid() yet. So we signal the child directly as well as
3989  * the group. We assume such a child will handle the signal before trying
3990  * to spawn any grandchild processes. We also assume that signaling the
3991  * child twice will not cause any problems.
3992  */
3993 static void
3994 signal_child(pid_t pid, int signal)
3995 {
3996  if (kill(pid, signal) < 0)
3997  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3998 #ifdef HAVE_SETSID
3999  switch (signal)
4000  {
4001  case SIGINT:
4002  case SIGTERM:
4003  case SIGQUIT:
4004  case SIGKILL:
4005  case SIGABRT:
4006  if (kill(-pid, signal) < 0)
4007  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4008  break;
4009  default:
4010  break;
4011  }
4012 #endif
4013 }
4014 
4015 /*
4016  * Convenience function for killing a child process after a crash of some
4017  * other child process. We log the action at a higher level than we would
4018  * otherwise do, and we apply send_abort_for_crash to decide which signal
4019  * to send. Normally it's SIGQUIT -- and most other comments in this file
4020  * are written on the assumption that it is -- but developers might prefer
4021  * to use SIGABRT to collect per-child core dumps.
4022  */
4023 static void
4024 sigquit_child(pid_t pid)
4025 {
4026  ereport(DEBUG2,
4027  (errmsg_internal("sending %s to process %d",
4028  (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
4029  (int) pid)));
4031 }
4032 
4033 /*
4034  * Send a signal to the targeted children (but NOT special children;
4035  * dead_end children are never signaled, either).
4036  */
4037 static bool
4038 SignalSomeChildren(int signal, int target)
4039 {
4040  dlist_iter iter;
4041  bool signaled = false;
4042 
4043  dlist_foreach(iter, &BackendList)
4044  {
4045  Backend *bp = dlist_container(Backend, elem, iter.cur);
4046 
4047  if (bp->dead_end)
4048  continue;
4049 
4050  /*
4051  * Since target == BACKEND_TYPE_ALL is the most common case, we test
4052  * it first and avoid touching shared memory for every child.
4053  */
4054  if (target != BACKEND_TYPE_ALL)
4055  {
4056  /*
4057  * Assign bkend_type for any recently announced WAL Sender
4058  * processes.
4059  */
4060  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4063 
4064  if (!(target & bp->bkend_type))
4065  continue;
4066  }
4067 
4068  ereport(DEBUG4,
4069  (errmsg_internal("sending signal %d to process %d",
4070  signal, (int) bp->pid)));
4071  signal_child(bp->pid, signal);
4072  signaled = true;
4073  }
4074  return signaled;
4075 }
4076 
4077 /*
4078  * Send a termination signal to children. This considers all of our children
4079  * processes, except syslogger and dead_end backends.
4080  */
4081 static void
4083 {
4084  SignalChildren(signal);
4085  if (StartupPID != 0)
4086  {
4087  signal_child(StartupPID, signal);
4088  if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
4090  }
4091  if (BgWriterPID != 0)
4092  signal_child(BgWriterPID, signal);
4093  if (CheckpointerPID != 0)
4094  signal_child(CheckpointerPID, signal);
4095  if (WalWriterPID != 0)
4096  signal_child(WalWriterPID, signal);
4097  if (WalReceiverPID != 0)
4098  signal_child(WalReceiverPID, signal);
4099  if (AutoVacPID != 0)
4100  signal_child(AutoVacPID, signal);
4101  if (PgArchPID != 0)
4102  signal_child(PgArchPID, signal);
4103 }
4104 
4105 /*
4106  * BackendStartup -- start backend process
4107  *
4108  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4109  *
4110  * Note: if you change this code, also consider StartAutovacuumWorker.
4111  */
4112 static int
4114 {
4115  Backend *bn; /* for backend cleanup */
4116  pid_t pid;
4117 
4118  /*
4119  * Create backend data structure. Better before the fork() so we can
4120  * handle failure cleanly.
4121  */
4122  bn = (Backend *) malloc(sizeof(Backend));
4123  if (!bn)
4124  {
4125  ereport(LOG,
4126  (errcode(ERRCODE_OUT_OF_MEMORY),
4127  errmsg("out of memory")));
4128  return STATUS_ERROR;
4129  }
4130 
4131  /*
4132  * Compute the cancel key that will be assigned to this backend. The
4133  * backend will have its own copy in the forked-off process' value of
4134  * MyCancelKey, so that it can transmit the key to the frontend.
4135  */
4137  {
4138  free(bn);
4139  ereport(LOG,
4140  (errcode(ERRCODE_INTERNAL_ERROR),
4141  errmsg("could not generate random cancel key")));
4142  return STATUS_ERROR;
4143  }
4144 
4145  bn->cancel_key = MyCancelKey;
4146 
4147  /* Pass down canAcceptConnections state */
4148  port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4149  bn->dead_end = (port->canAcceptConnections != CAC_OK);
4150 
4151  /*
4152  * Unless it's a dead_end child, assign it a child slot number
4153  */
4154  if (!bn->dead_end)
4156  else
4157  bn->child_slot = 0;
4158 
4159  /* Hasn't asked to be notified about any bgworkers yet */
4160  bn->bgworker_notify = false;
4161 
4162 #ifdef EXEC_BACKEND
4163  pid = backend_forkexec(port);
4164 #else /* !EXEC_BACKEND */
4165  pid = fork_process();
4166  if (pid == 0) /* child */
4167  {
4168  free(bn);
4169 
4170  /* Detangle from postmaster */
4172 
4173  /* Close the postmaster's sockets */
4174  ClosePostmasterPorts(false);
4175 
4176  /* Perform additional initialization and collect startup packet */
4178 
4179  /*
4180  * Create a per-backend PGPROC struct in shared memory. We must do
4181  * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4182  * this could be delayed a bit further, but EXEC_BACKEND needs to do
4183  * stuff with LWLocks before PostgresMain(), so we do it here as well
4184  * for symmetry.
4185  */
4186  InitProcess();
4187 
4188  /* And run the backend */
4189  BackendRun(port);
4190  }
4191 #endif /* EXEC_BACKEND */
4192 
4193  if (pid < 0)
4194  {
4195  /* in parent, fork failed */
4196  int save_errno = errno;
4197 
4198  if (!bn->dead_end)
4200  free(bn);
4201  errno = save_errno;
4202  ereport(LOG,
4203  (errmsg("could not fork new process for connection: %m")));
4204  report_fork_failure_to_client(port, save_errno);
4205  return STATUS_ERROR;
4206  }
4207 
4208  /* in parent, successful fork */
4209  ereport(DEBUG2,
4210  (errmsg_internal("forked new backend, pid=%d socket=%d",
4211  (int) pid, (int) port->sock)));
4212 
4213  /*
4214  * Everything's been successful, it's safe to add this backend to our list
4215  * of backends.
4216  */
4217  bn->pid = pid;
4218  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4220 
4221 #ifdef EXEC_BACKEND
4222  if (!bn->dead_end)
4223  ShmemBackendArrayAdd(bn);
4224 #endif
4225 
4226  return STATUS_OK;
4227 }
4228 
4229 /*
4230  * Try to report backend fork() failure to client before we close the
4231  * connection. Since we do not care to risk blocking the postmaster on
4232  * this connection, we set the connection to non-blocking and try only once.
4233  *
4234  * This is grungy special-purpose code; we cannot use backend libpq since
4235  * it's not up and running.
4236  */
4237 static void
4239 {
4240  char buffer[1000];
4241  int rc;
4242 
4243  /* Format the error message packet (always V2 protocol) */
4244  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4245  _("could not fork new process for connection: "),
4246  strerror(errnum));
4247 
4248  /* Set port to non-blocking. Don't do send() if this fails */
4249  if (!pg_set_noblock(port->sock))
4250  return;
4251 
4252  /* We'll retry after EINTR, but ignore all other failures */
4253  do
4254  {
4255  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4256  } while (rc < 0 && errno == EINTR);
4257 }
4258 
4259 
4260 /*
4261  * BackendInitialize -- initialize an interactive (postmaster-child)
4262  * backend process, and collect the client's startup packet.
4263  *
4264  * returns: nothing. Will not return at all if there's any failure.
4265  *
4266  * Note: this code does not depend on having any access to shared memory.
4267  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4268  * shared memory not have been touched yet; see comments within.
4269  * In the EXEC_BACKEND case, we are physically attached to shared memory
4270  * but have not yet set up most of our local pointers to shmem structures.
4271  */
4272 static void
4274 {
4275  int status;
4276  int ret;
4277  char remote_host[NI_MAXHOST];
4278  char remote_port[NI_MAXSERV];
4279  StringInfoData ps_data;
4280 
4281  /* Save port etc. for ps status */
4282  MyProcPort = port;
4283 
4284  /* Tell fd.c about the long-lived FD associated with the port */
4286 
4287  /*
4288  * PreAuthDelay is a debugging aid for investigating problems in the
4289  * authentication cycle: it can be set in postgresql.conf to allow time to
4290  * attach to the newly-forked backend with a debugger. (See also
4291  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4292  * is not honored until after authentication.)
4293  */
4294  if (PreAuthDelay > 0)
4295  pg_usleep(PreAuthDelay * 1000000L);
4296 
4297  /* This flag will remain set until InitPostgres finishes authentication */
4298  ClientAuthInProgress = true; /* limit visibility of log messages */
4299 
4300  /* set these to empty in case they are needed before we set them up */
4301  port->remote_host = "";
4302  port->remote_port = "";
4303 
4304  /*
4305  * Initialize libpq and enable reporting of ereport errors to the client.
4306  * Must do this now because authentication uses libpq to send messages.
4307  */
4308  pq_init(); /* initialize libpq to talk to client */
4309  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4310 
4311  /*
4312  * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4313  * to collect the startup packet; while SIGQUIT results in _exit(2).
4314  * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4315  * cleanly if a buggy client fails to send the packet promptly.
4316  *
4317  * Exiting with _exit(1) is only possible because we have not yet touched
4318  * shared memory; therefore no outside-the-process state needs to get
4319  * cleaned up.
4320  */
4322  /* SIGQUIT handler was already set up by InitPostmasterChild */
4323  InitializeTimeouts(); /* establishes SIGALRM handler */
4324  sigprocmask(SIG_SETMASK, &StartupBlockSig, NULL);
4325 
4326  /*
4327  * Get the remote host name and port for logging and status display.
4328  */
4329  remote_host[0] = '\0';
4330  remote_port[0] = '\0';
4331  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4332  remote_host, sizeof(remote_host),
4333  remote_port, sizeof(remote_port),
4334  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4335  ereport(WARNING,
4336  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4337  gai_strerror(ret))));
4338 
4339  /*
4340  * Save remote_host and remote_port in port structure (after this, they
4341  * will appear in log_line_prefix data for log messages).
4342  */
4343  port->remote_host = strdup(remote_host);
4344  port->remote_port = strdup(remote_port);
4345 
4346  /* And now we can issue the Log_connections message, if wanted */
4347  if (Log_connections)
4348  {
4349  if (remote_port[0])
4350  ereport(LOG,
4351  (errmsg("connection received: host=%s port=%s",
4352  remote_host,
4353  remote_port)));
4354  else
4355  ereport(LOG,
4356  (errmsg("connection received: host=%s",
4357  remote_host)));
4358  }
4359 
4360  /*
4361  * If we did a reverse lookup to name, we might as well save the results
4362  * rather than possibly repeating the lookup during authentication.
4363  *
4364  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4365  * get nothing useful for a client without an rDNS entry. Therefore, we
4366  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4367  * it into remote_hostname if so. (This test is conservative and might
4368  * sometimes classify a hostname as numeric, but an error in that
4369  * direction is safe; it only results in a possible extra lookup.)
4370  */
4371  if (log_hostname &&
4372  ret == 0 &&
4373  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4374  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4375  port->remote_hostname = strdup(remote_host);
4376 
4377  /*
4378  * Ready to begin client interaction. We will give up and _exit(1) after
4379  * a time delay, so that a broken client can't hog a connection
4380  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4381  * against the time limit.
4382  *
4383  * Note: AuthenticationTimeout is applied here while waiting for the
4384  * startup packet, and then again in InitPostgres for the duration of any
4385  * authentication operations. So a hostile client could tie up the
4386  * process for nearly twice AuthenticationTimeout before we kick him off.
4387  *
4388  * Note: because PostgresMain will call InitializeTimeouts again, the
4389  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4390  * since we never use it again after this function.
4391  */
4394 
4395  /*
4396  * Receive the startup packet (which might turn out to be a cancel request
4397  * packet).
4398  */
4399  status = ProcessStartupPacket(port, false, false);
4400 
4401  /*
4402  * Disable the timeout, and prevent SIGTERM again.
4403  */
4405  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4406 
4407  /*
4408  * As a safety check that nothing in startup has yet performed
4409  * shared-memory modifications that would need to be undone if we had
4410  * exited through SIGTERM or timeout above, check that no on_shmem_exit
4411  * handlers have been registered yet. (This isn't terribly bulletproof,
4412  * since someone might misuse an on_proc_exit handler for shmem cleanup,
4413  * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4414  * handlers unfortunately, since pq_init() already registered one.)
4415  */
4417 
4418  /*
4419  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4420  * already did any appropriate error reporting.
4421  */
4422  if (status != STATUS_OK)
4423  proc_exit(0);
4424 
4425  /*
4426  * Now that we have the user and database name, we can set the process
4427  * title for ps. It's good to do this as early as possible in startup.
4428  */
4429  initStringInfo(&ps_data);
4430  if (am_walsender)
4432  appendStringInfo(&ps_data, "%s ", port->user_name);
4433  if (port->database_name[0] != '\0')
4434  appendStringInfo(&ps_data, "%s ", port->database_name);
4435  appendStringInfoString(&ps_data, port->remote_host);
4436  if (port->remote_port[0] != '\0')
4437  appendStringInfo(&ps_data, "(%s)", port->remote_port);
4438 
4439  init_ps_display(ps_data.data);
4440  pfree(ps_data.data);
4441 
4442  set_ps_display("initializing");
4443 }
4444 
4445 
4446 /*
4447  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4448  *
4449  * returns:
4450  * Doesn't return at all.
4451  */
4452 static void
4454 {
4455  /*
4456  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4457  * just yet, though, because InitPostgres will need the HBA data.)
4458  */
4460 
4461  PostgresMain(port->database_name, port->user_name);
4462 }
4463 
4464 
4465 #ifdef EXEC_BACKEND
4466 
4467 /*
4468  * postmaster_forkexec -- fork and exec a postmaster subprocess
4469  *
4470  * The caller must have set up the argv array already, except for argv[2]
4471  * which will be filled with the name of the temp variable file.
4472  *
4473  * Returns the child process PID, or -1 on fork failure (a suitable error
4474  * message has been logged on failure).
4475  *
4476  * All uses of this routine will dispatch to SubPostmasterMain in the
4477  * child process.
4478  */
4479 pid_t
4480 postmaster_forkexec(int argc, char *argv[])
4481 {
4482  Port port;
4483 
4484  /* This entry point passes dummy values for the Port variables */
4485  memset(&port, 0, sizeof(port));
4486  return internal_forkexec(argc, argv, &port);
4487 }
4488 
4489 /*
4490  * backend_forkexec -- fork/exec off a backend process
4491  *
4492  * Some operating systems (WIN32) don't have fork() so we have to simulate
4493  * it by storing parameters that need to be passed to the child and
4494  * then create a new child process.
4495  *
4496  * returns the pid of the fork/exec'd process, or -1 on failure
4497  */
4498 static pid_t
4499 backend_forkexec(Port *port)
4500 {
4501  char *av[4];
4502  int ac = 0;
4503 
4504  av[ac++] = "postgres";
4505  av[ac++] = "--forkbackend";
4506  av[ac++] = NULL; /* filled in by internal_forkexec */
4507 
4508  av[ac] = NULL;
4509  Assert(ac < lengthof(av));
4510 
4511  return internal_forkexec(ac, av, port);
4512 }
4513 
4514 #ifndef WIN32
4515 
4516 /*
4517  * internal_forkexec non-win32 implementation
4518  *
4519  * - writes out backend variables to the parameter file
4520  * - fork():s, and then exec():s the child process
4521  */
4522 static pid_t
4523 internal_forkexec(int argc, char *argv[], Port *port)
4524 {
4525  static unsigned long tmpBackendFileNum = 0;
4526  pid_t pid;
4527  char tmpfilename[MAXPGPATH];
4528  BackendParameters param;
4529  FILE *fp;
4530 
4531  if (!save_backend_variables(&param, port))
4532  return -1; /* log made by save_backend_variables */
4533 
4534  /* Calculate name for temp file */
4535  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4537  MyProcPid, ++tmpBackendFileNum);
4538 
4539  /* Open file */
4540  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4541  if (!fp)
4542  {
4543  /*
4544  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4545  * directory, ignoring errors.
4546  */
4548 
4549  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4550  if (!fp)
4551  {
4552  ereport(LOG,
4554  errmsg("could not create file \"%s\": %m",
4555  tmpfilename)));
4556  return -1;
4557  }
4558  }
4559 
4560  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4561  {
4562  ereport(LOG,
4564  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4565  FreeFile(fp);
4566  return -1;
4567  }
4568 
4569  /* Release file */
4570  if (FreeFile(fp))
4571  {
4572  ereport(LOG,
4574  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4575  return -1;
4576  }
4577 
4578  /* Make sure caller set up argv properly */
4579  Assert(argc >= 3);
4580  Assert(argv[argc] == NULL);
4581  Assert(strncmp(argv[1], "--fork", 6) == 0);
4582  Assert(argv[2] == NULL);
4583 
4584  /* Insert temp file name after --fork argument */
4585  argv[2] = tmpfilename;
4586 
4587  /* Fire off execv in child */
4588  if ((pid = fork_process()) == 0)
4589  {
4590  if (execv(postgres_exec_path, argv) < 0)
4591  {
4592  ereport(LOG,
4593  (errmsg("could not execute server process \"%s\": %m",
4594  postgres_exec_path)));
4595  /* We're already in the child process here, can't return */
4596  exit(1);
4597  }
4598  }
4599 
4600  return pid; /* Parent returns pid, or -1 on fork failure */
4601 }
4602 #else /* WIN32 */
4603 
4604 /*
4605  * internal_forkexec win32 implementation
4606  *
4607  * - starts backend using CreateProcess(), in suspended state
4608  * - writes out backend variables to the parameter file
4609  * - during this, duplicates handles and sockets required for
4610  * inheritance into the new process
4611  * - resumes execution of the new process once the backend parameter
4612  * file is complete.
4613  */
4614 static pid_t
4615 internal_forkexec(int argc, char *argv[], Port *port)
4616 {
4617  int retry_count = 0;
4618  STARTUPINFO si;
4619  PROCESS_INFORMATION pi;
4620  int i;
4621  int j;
4622  char cmdLine[MAXPGPATH * 2];
4623  HANDLE paramHandle;
4624  BackendParameters *param;
4625  SECURITY_ATTRIBUTES sa;
4626  char paramHandleStr[32];
4627  win32_deadchild_waitinfo *childinfo;
4628 
4629  /* Make sure caller set up argv properly */
4630  Assert(argc >= 3);
4631  Assert(argv[argc] == NULL);
4632  Assert(strncmp(argv[1], "--fork", 6) == 0);
4633  Assert(argv[2] == NULL);
4634 
4635  /* Resume here if we need to retry */
4636 retry:
4637 
4638  /* Set up shared memory for parameter passing */
4639  ZeroMemory(&sa, sizeof(sa));
4640  sa.nLength = sizeof(sa);
4641  sa.bInheritHandle = TRUE;
4642  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4643  &sa,
4644  PAGE_READWRITE,
4645  0,
4646  sizeof(BackendParameters),
4647  NULL);
4648  if (paramHandle == INVALID_HANDLE_VALUE)
4649  {
4650  ereport(LOG,
4651  (errmsg("could not create backend parameter file mapping: error code %lu",
4652  GetLastError())));
4653  return -1;
4654  }
4655 
4656  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4657  if (!param)
4658  {
4659  ereport(LOG,
4660  (errmsg("could not map backend parameter memory: error code %lu",
4661  GetLastError())));
4662  CloseHandle(paramHandle);
4663  return -1;
4664  }
4665 
4666  /* Insert temp file name after --fork argument */
4667 #ifdef _WIN64
4668  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4669 #else
4670  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4671 #endif
4672  argv[2] = paramHandleStr;
4673 
4674  /* Format the cmd line */
4675  cmdLine[sizeof(cmdLine) - 1] = '\0';
4676  cmdLine[sizeof(cmdLine) - 2] = '\0';
4677  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4678  i = 0;
4679  while (argv[++i] != NULL)
4680  {
4681  j = strlen(cmdLine);
4682  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4683  }
4684  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4685  {
4686  ereport(LOG,
4687  (errmsg("subprocess command line too long")));
4688  UnmapViewOfFile(param);
4689  CloseHandle(paramHandle);
4690  return -1;
4691  }
4692 
4693  memset(&pi, 0, sizeof(pi));
4694  memset(&si, 0, sizeof(si));
4695  si.cb = sizeof(si);
4696 
4697  /*
4698  * Create the subprocess in a suspended state. This will be resumed later,
4699  * once we have written out the parameter file.
4700  */
4701  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4702  NULL, NULL, &si, &pi))
4703  {
4704  ereport(LOG,
4705  (errmsg("CreateProcess() call failed: %m (error code %lu)",
4706  GetLastError())));
4707  UnmapViewOfFile(param);
4708  CloseHandle(paramHandle);
4709  return -1;
4710  }
4711 
4712  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4713  {
4714  /*
4715  * log made by save_backend_variables, but we have to clean up the
4716  * mess with the half-started process
4717  */
4718  if (!TerminateProcess(pi.hProcess, 255))
4719  ereport(LOG,
4720  (errmsg_internal("could not terminate unstarted process: error code %lu",
4721  GetLastError())));
4722  CloseHandle(pi.hProcess);
4723  CloseHandle(pi.hThread);
4724  UnmapViewOfFile(param);
4725  CloseHandle(paramHandle);
4726  return -1; /* log made by save_backend_variables */
4727  }
4728 
4729  /* Drop the parameter shared memory that is now inherited to the backend */
4730  if (!UnmapViewOfFile(param))
4731  ereport(LOG,
4732  (errmsg("could not unmap view of backend parameter file: error code %lu",
4733  GetLastError())));
4734  if (!CloseHandle(paramHandle))
4735  ereport(LOG,
4736  (errmsg("could not close handle to backend parameter file: error code %lu",
4737  GetLastError())));
4738 
4739  /*
4740  * Reserve the memory region used by our main shared memory segment before
4741  * we resume the child process. Normally this should succeed, but if ASLR
4742  * is active then it might sometimes fail due to the stack or heap having
4743  * gotten mapped into that range. In that case, just terminate the
4744  * process and retry.
4745  */
4746  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4747  {
4748  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4749  if (!TerminateProcess(pi.hProcess, 255))
4750  ereport(LOG,
4751  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4752  GetLastError())));
4753  CloseHandle(pi.hProcess);
4754  CloseHandle(pi.hThread);
4755  if (++retry_count < 100)
4756  goto retry;
4757  ereport(LOG,
4758  (errmsg("giving up after too many tries to reserve shared memory"),
4759  errhint("This might be caused by ASLR or antivirus software.")));
4760  return -1;
4761  }
4762 
4763  /*
4764  * Now that the backend variables are written out, we start the child
4765  * thread so it can start initializing while we set up the rest of the
4766  * parent state.
4767  */
4768  if (ResumeThread(pi.hThread) == -1)
4769  {
4770  if (!TerminateProcess(pi.hProcess, 255))
4771  {
4772  ereport(LOG,
4773  (errmsg_internal("could not terminate unstartable process: error code %lu",
4774  GetLastError())));
4775  CloseHandle(pi.hProcess);
4776  CloseHandle(pi.hThread);
4777  return -1;
4778  }
4779  CloseHandle(pi.hProcess);
4780  CloseHandle(pi.hThread);
4781  ereport(LOG,
4782  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4783  GetLastError())));
4784  return -1;
4785  }
4786 
4787  /*
4788  * Queue a waiter to signal when this child dies. The wait will be handled
4789  * automatically by an operating system thread pool. The memory will be
4790  * freed by a later call to waitpid().
4791  */
4792  childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4793  childinfo->procHandle = pi.hProcess;
4794  childinfo->procId = pi.dwProcessId;
4795 
4796  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4797  pi.hProcess,
4798  pgwin32_deadchild_callback,
4799  childinfo,
4800  INFINITE,
4801  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4802  ereport(FATAL,
4803  (errmsg_internal("could not register process for wait: error code %lu",
4804  GetLastError())));
4805 
4806  /* Don't close pi.hProcess here - waitpid() needs access to it */
4807 
4808  CloseHandle(pi.hThread);
4809 
4810  return pi.dwProcessId;
4811 }
4812 #endif /* WIN32 */
4813 
4814 
4815 /*
4816  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4817  * to what it would be if we'd simply forked on Unix, and then
4818  * dispatch to the appropriate place.
4819  *
4820  * The first two command line arguments are expected to be "--forkFOO"
4821  * (where FOO indicates which postmaster child we are to become), and
4822  * the name of a variables file that we can read to load data that would
4823  * have been inherited by fork() on Unix. Remaining arguments go to the
4824  * subprocess FooMain() routine.
4825  */
4826 void
4827 SubPostmasterMain(int argc, char *argv[])
4828 {
4829  Port port;
4830 
4831  /* In EXEC_BACKEND case we will not have inherited these settings */
4832  IsPostmasterEnvironment = true;
4834 
4835  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4837 
4838  /* Check we got appropriate args */
4839  if (argc < 3)
4840  elog(FATAL, "invalid subpostmaster invocation");
4841 
4842  /* Read in the variables file */
4843  memset(&port, 0, sizeof(Port));
4844  read_backend_variables(argv[2], &port);
4845 
4846  /* Close the postmaster's sockets (as soon as we know them) */
4847  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4848 
4849  /* Setup as postmaster child */
4851 
4852  /*
4853  * If appropriate, physically re-attach to shared memory segment. We want
4854  * to do this before going any further to ensure that we can attach at the
4855  * same address the postmaster used. On the other hand, if we choose not
4856  * to re-attach, we may have other cleanup to do.
4857  *
4858  * If testing EXEC_BACKEND on Linux, you should run this as root before
4859  * starting the postmaster:
4860  *
4861  * sysctl -w kernel.randomize_va_space=0
4862  *
4863  * This prevents using randomized stack and code addresses that cause the
4864  * child process's memory map to be different from the parent's, making it
4865  * sometimes impossible to attach to shared memory at the desired address.
4866  * Return the setting to its old value (usually '1' or '2') when finished.
4867  */
4868  if (strcmp(argv[1], "--forkbackend") == 0 ||
4869  strcmp(argv[1], "--forkavlauncher") == 0 ||
4870  strcmp(argv[1], "--forkavworker") == 0 ||
4871  strcmp(argv[1], "--forkaux") == 0 ||
4872  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4874  else
4876 
4877  /* autovacuum needs this set before calling InitProcess */
4878  if (strcmp(argv[1], "--forkavlauncher") == 0)
4879  AutovacuumLauncherIAm();
4880  if (strcmp(argv[1], "--forkavworker") == 0)
4881  AutovacuumWorkerIAm();
4882 
4883  /* Read in remaining GUC variables */
4884  read_nondefault_variables();
4885 
4886  /*
4887  * Check that the data directory looks valid, which will also check the
4888  * privileges on the data directory and update our umask and file/group
4889  * variables for creating files later. Note: this should really be done
4890  * before we create any files or directories.
4891  */
4892  checkDataDir();
4893 
4894  /*
4895  * (re-)read control file, as it contains config. The postmaster will
4896  * already have read this, but this process doesn't know about that.
4897  */
4898  LocalProcessControlFile(false);
4899 
4900  /*
4901  * Reload any libraries that were preloaded by the postmaster. Since we
4902  * exec'd this process, those libraries didn't come along with us; but we
4903  * should load them into all child processes to be consistent with the
4904  * non-EXEC_BACKEND behavior.
4905  */
4907 
4908  /* Run backend or appropriate child */
4909  if (strcmp(argv[1], "--forkbackend") == 0)
4910  {
4911  Assert(argc == 3); /* shouldn't be any more args */
4912 
4913  /*
4914  * Need to reinitialize the SSL library in the backend, since the
4915  * context structures contain function pointers and cannot be passed
4916  * through the parameter file.
4917  *
4918  * If for some reason reload fails (maybe the user installed broken
4919  * key files), soldier on without SSL; that's better than all
4920  * connections becoming impossible.
4921  *
4922  * XXX should we do this in all child processes? For the moment it's
4923  * enough to do it in backend children.
4924  */
4925 #ifdef USE_SSL
4926  if (EnableSSL)
4927  {
4928  if (secure_initialize(false) == 0)
4929  LoadedSSL = true;
4930  else
4931  ereport(LOG,
4932  (errmsg("SSL configuration could not be loaded in child process")));
4933  }
4934 #endif
4935 
4936  /*
4937  * Perform additional initialization and collect startup packet.
4938  *
4939  * We want to do this before InitProcess() for a couple of reasons: 1.
4940  * so that we aren't eating up a PGPROC slot while waiting on the
4941  * client. 2. so that if InitProcess() fails due to being out of
4942  * PGPROC slots, we have already initialized libpq and are able to
4943  * report the error to the client.
4944  */
4946 
4947  /* Restore basic shared memory pointers */
4949 
4950  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4951  InitProcess();
4952 
4953  /* Attach process to shared data structures */
4955 
4956  /* And run the backend */
4957  BackendRun(&port); /* does not return */
4958  }
4959  if (strcmp(argv[1], "--forkaux") == 0)
4960  {
4961  AuxProcType auxtype;
4962 
4963  Assert(argc == 4);
4964 
4965  /* Restore basic shared memory pointers */
4967 
4968  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4970 
4971  /* Attach process to shared data structures */
4973 
4974  auxtype = atoi(argv[3]);
4975  AuxiliaryProcessMain(auxtype); /* does not return */
4976  }
4977  if (strcmp(argv[1], "--forkavlauncher") == 0)
4978  {
4979  /* Restore basic shared memory pointers */
4981 
4982  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4983  InitProcess();
4984 
4985  /* Attach process to shared data structures */
4987 
4988  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4989  }
4990  if (strcmp(argv[1], "--forkavworker") == 0)
4991  {
4992  /* Restore basic shared memory pointers */
4994 
4995  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4996  InitProcess();
4997 
4998  /* Attach process to shared data structures */
5000 
5001  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5002  }
5003  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5004  {
5005  int shmem_slot;
5006 
5007  /* do this as early as possible; in particular, before InitProcess() */
5008  IsBackgroundWorker = true;
5009 
5010  /* Restore basic shared memory pointers */
5012 
5013  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5014  InitProcess();
5015 
5016  /* Attach process to shared data structures */
5018 
5019  /* Fetch MyBgworkerEntry from shared memory */
5020  shmem_slot = atoi(argv[1] + 15);
5021  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5022 
5024  }
5025  if (strcmp(argv[1], "--forklog") == 0)
5026  {
5027  /* Do not want to attach to shared memory */
5028 
5029  SysLoggerMain(argc, argv); /* does not return */
5030  }
5031 
5032  abort(); /* shouldn't get here */
5033 }
5034 #endif /* EXEC_BACKEND */
5035 
5036 
5037 /*
5038  * ExitPostmaster -- cleanup
5039  *
5040  * Do NOT call exit() directly --- always go through here!
5041  */
5042 static void
5044 {
5045 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5046 
5047  /*
5048  * There is no known cause for a postmaster to become multithreaded after
5049  * startup. Recheck to account for the possibility of unknown causes.
5050  * This message uses LOG level, because an unclean shutdown at this point
5051  * would usually not look much different from a clean shutdown.
5052  */
5053  if (pthread_is_threaded_np() != 0)
5054  ereport(LOG,
5055  (errcode(ERRCODE_INTERNAL_ERROR),
5056  errmsg_internal("postmaster became multithreaded"),
5057  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5058 #endif
5059 
5060  /* should cleanup shared memory and kill all backends */
5061 
5062  /*
5063  * Not sure of the semantics here. When the Postmaster dies, should the
5064  * backends all be killed? probably not.
5065  *
5066  * MUST -- vadim 05-10-1999
5067  */
5068 
5069  proc_exit(status);
5070 }
5071 
5072 /*
5073  * Handle pmsignal conditions representing requests from backends,
5074  * and check for promote and logrotate requests from pg_ctl.
5075  */
5076 static void
5078 {
5079  pending_pm_pmsignal = false;
5080 
5081  ereport(DEBUG2,
5082  (errmsg_internal("postmaster received pmsignal signal")));
5083 
5084  /*
5085  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5086  * unexpected states. If the startup process quickly starts up, completes
5087  * recovery, exits, we might process the death of the startup process
5088  * first. We don't want to go back to recovery in that case.
5089  */
5092  {
5093  /* WAL redo has started. We're out of reinitialization. */
5094  FatalError = false;
5095  AbortStartTime = 0;
5096 
5097  /*
5098  * Start the archiver if we're responsible for (re-)archiving received
5099  * files.
5100  */
5101  Assert(PgArchPID == 0);
5102  if (XLogArchivingAlways())
5104 
5105  /*
5106  * If we aren't planning to enter hot standby mode later, treat
5107  * RECOVERY_STARTED as meaning we're out of startup, and report status
5108  * accordingly.
5109  */
5110  if (!EnableHotStandby)
5111  {
5113 #ifdef USE_SYSTEMD
5114  sd_notify(0, "READY=1");
5115 #endif
5116  }
5117 
5118  pmState = PM_RECOVERY;
5119  }
5120 
5123  {
5124  ereport(LOG,
5125  (errmsg("database system is ready to accept read-only connections")));
5126 
5127  /* Report status */
5129 #ifdef USE_SYSTEMD
5130  sd_notify(0, "READY=1");
5131 #endif
5132 
5134  connsAllowed = true;
5135 
5136  /* Some workers may be scheduled to start now */
5137  StartWorkerNeeded = true;
5138  }
5139 
5140  /* Process background worker state changes. */
5142  {
5143  /* Accept new worker requests only if not stopping. */
5145  StartWorkerNeeded = true;
5146  }
5147 
5150 
5151  /* Tell syslogger to rotate logfile if requested */
5152  if (SysLoggerPID != 0)
5153  {
5154  if (CheckLogrotateSignal())
5155  {
5158  }
5160  {
5162  }
5163  }
5164 
5167  {
5168  /*
5169  * Start one iteration of the autovacuum daemon, even if autovacuuming
5170  * is nominally not enabled. This is so we can have an active defense
5171  * against transaction ID wraparound. We set a flag for the main loop
5172  * to do it rather than trying to do it here --- this is because the
5173  * autovac process itself may send the signal, and we want to handle
5174  * that by launching another iteration as soon as the current one
5175  * completes.
5176  */
5177  start_autovac_launcher = true;
5178  }
5179 
5182  {
5183  /* The autovacuum launcher wants us to start a worker process. */
5185  }
5186 
5188  {
5189  /* Startup Process wants us to start the walreceiver process. */
5190  /* Start immediately if possible, else remember request for later. */
5191  WalReceiverRequested = true;
5193  }
5194 
5195  /*
5196  * Try to advance postmaster's state machine, if a child requests it.
5197  *
5198  * Be careful about the order of this action relative to this function's
5199  * other actions. Generally, this should be after other actions, in case
5200  * they have effects PostmasterStateMachine would need to know about.
5201  * However, we should do it before the CheckPromoteSignal step, which
5202  * cannot have any (immediate) effect on the state machine, but does
5203  * depend on what state we're in now.
5204  */
5206  {
5208  }
5209 
5210  if (StartupPID != 0 &&
5211  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5212  pmState == PM_HOT_STANDBY) &&
5214  {
5215  /*
5216  * Tell startup process to finish recovery.
5217  *
5218  * Leave the promote signal file in place and let the Startup process
5219  * do the unlink.
5220  */
5222  }
5223 }
5224 
5225 /*
5226  * SIGTERM while processing startup packet.
5227  *
5228  * Running proc_exit() from a signal handler would be quite unsafe.
5229  * However, since we have not yet touched shared memory, we can just
5230  * pull the plug and exit without running any atexit handlers.
5231  *
5232  * One might be tempted to try to send a message, or log one, indicating
5233  * why we are disconnecting. However, that would be quite unsafe in itself.
5234  * Also, it seems undesirable to provide clues about the database's state
5235  * to a client that has not yet completed authentication, or even sent us
5236  * a startup packet.
5237  */
5238 static void
5240 {
5241  _exit(1);
5242 }
5243 
5244 /*
5245  * Dummy signal handler
5246  *
5247  * We use this for signals that we don't actually use in the postmaster,
5248  * but we do use in backends. If we were to SIG_IGN such signals in the
5249  * postmaster, then a newly started backend might drop a signal that arrives
5250  * before it's able to reconfigure its signal processing. (See notes in
5251  * tcop/postgres.c.)
5252  */
5253 static void
5255 {
5256 }
5257 
5258 /*
5259  * Timeout while processing startup packet.
5260  * As for process_startup_packet_die(), we exit via _exit(1).
5261  */
5262 static void
5264 {
5265  _exit(1);
5266 }
5267 
5268 
5269 /*
5270  * Generate a random cancel key.
5271  */
5272 static bool
5274 {
5275  return pg_strong_random(cancel_key, sizeof(int32));
5276 }
5277 
5278 /*
5279  * Count up number of child processes of specified types (dead_end children
5280  * are always excluded).
5281  */
5282 static int
5283 CountChildren(int target)
5284 {
5285  dlist_iter iter;
5286  int cnt = 0;
5287 
5288  dlist_foreach(iter, &BackendList)
5289  {
5290  Backend *bp = dlist_container(Backend, elem, iter.cur);
5291 
5292  if (bp->dead_end)
5293  continue;
5294 
5295  /*
5296  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5297  * it first and avoid touching shared memory for every child.
5298  */
5299  if (target != BACKEND_TYPE_ALL)
5300  {
5301  /*
5302  * Assign bkend_type for any recently announced WAL Sender
5303  * processes.
5304  */
5305  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5308 
5309  if (!(target & bp->bkend_type))
5310  continue;
5311  }
5312 
5313  cnt++;
5314  }
5315  return cnt;
5316 }
5317 
5318 
5319 /*
5320  * StartChildProcess -- start an auxiliary process for the postmaster
5321  *
5322  * "type" determines what kind of child will be started. All child types
5323  * initially go to AuxiliaryProcessMain, which will handle common setup.
5324  *
5325  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5326  * to start subprocess.
5327  */
5328 static pid_t
5330 {
5331  pid_t pid;
5332 
5333 #ifdef EXEC_BACKEND
5334  {
5335  char *av[10];
5336  int ac = 0;
5337  char typebuf[32];
5338 
5339  /*
5340  * Set up command-line arguments for subprocess
5341  */
5342  av[ac++] = "postgres";
5343  av[ac++] = "--forkaux";
5344  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5345 
5346  snprintf(typebuf, sizeof(typebuf), "%d", type);
5347  av[ac++] = typebuf;
5348 
5349  av[ac] = NULL;
5350  Assert(ac < lengthof(av));
5351 
5352  pid = postmaster_forkexec(ac, av);
5353  }
5354 #else /* !EXEC_BACKEND */
5355  pid = fork_process();
5356 
5357  if (pid == 0) /* child */
5358  {
5360 
5361  /* Close the postmaster's sockets */
5362  ClosePostmasterPorts(false);
5363 
5364  /* Release postmaster's working memory context */
5367  PostmasterContext = NULL;
5368 
5369  AuxiliaryProcessMain(type); /* does not return */
5370  }
5371 #endif /* EXEC_BACKEND */
5372 
5373  if (pid < 0)
5374  {
5375  /* in parent, fork failed */
5376  int save_errno = errno;
5377 
5378  errno = save_errno;
5379  switch (type)
5380  {
5381  case StartupProcess:
5382  ereport(LOG,
5383  (errmsg("could not fork startup process: %m")));
5384  break;
5385  case ArchiverProcess:
5386  ereport(LOG,
5387  (errmsg("could not fork archiver process: %m")));
5388  break;
5389  case BgWriterProcess:
5390  ereport(LOG,
5391  (errmsg("could not fork background writer process: %m")));
5392  break;
5393  case CheckpointerProcess:
5394  ereport(LOG,
5395  (errmsg("could not fork checkpointer process: %m")));
5396  break;
5397  case WalWriterProcess:
5398  ereport(LOG,
5399  (errmsg("could not fork WAL writer process: %m")));
5400  break;
5401  case WalReceiverProcess:
5402  ereport(LOG,
5403  (errmsg("could not fork WAL receiver process: %m")));
5404  break;
5405  default:
5406  ereport(LOG,
5407  (errmsg("could not fork process: %m")));
5408  break;
5409  }
5410 
5411  /*
5412  * fork failure is fatal during startup, but there's no need to choke
5413  * immediately if starting other child types fails.
5414  */
5415  if (type == StartupProcess)
5416  ExitPostmaster(1);
5417  return 0;
5418  }
5419 
5420  /*
5421  * in parent, successful fork
5422  */
5423  return pid;
5424 }
5425 
5426 /*
5427  * StartAutovacuumWorker
5428  * Start an autovac worker process.
5429  *
5430  * This function is here because it enters the resulting PID into the
5431  * postmaster's private backends list.
5432  *
5433  * NB -- this code very roughly matches BackendStartup.
5434  */
5435 static void
5437 {
5438  Backend *bn;
5439 
5440  /*
5441  * If not in condition to run a process, don't try, but handle it like a
5442  * fork failure. This does not normally happen, since the signal is only
5443  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5444  * we have to check to avoid race-condition problems during DB state
5445  * changes.
5446  */
5448  {
5449  /*
5450  * Compute the cancel key that will be assigned to this session. We
5451  * probably don't need cancel keys for autovac workers, but we'd
5452  * better have something random in the field to prevent unfriendly
5453  * people from sending cancels to them.
5454  */
5456  {
5457  ereport(LOG,
5458  (errcode(ERRCODE_INTERNAL_ERROR),
5459  errmsg("could not generate random cancel key")));
5460  return;
5461  }
5462 
5463  bn = (Backend *) malloc(sizeof(Backend));
5464  if (bn)
5465  {
5466  bn->cancel_key = MyCancelKey;
5467 
5468  /* Autovac workers are not dead_end and need a child slot */
5469  bn->dead_end = false;
5471  bn->bgworker_notify = false;
5472 
5473  bn->pid = StartAutoVacWorker();
5474  if (bn->pid > 0)
5475  {
5478 #ifdef EXEC_BACKEND
5479  ShmemBackendArrayAdd(bn);
5480 #endif
5481  /* all OK */
5482  return;
5483  }
5484 
5485  /*
5486  * fork failed, fall through to report -- actual error message was
5487  * logged by StartAutoVacWorker
5488  */
5490  free(bn);
5491  }
5492  else
5493  ereport(LOG,
5494  (errcode(ERRCODE_OUT_OF_MEMORY),
5495  errmsg("out of memory")));
5496  }
5497 
5498  /*
5499  * Report the failure to the launcher, if it's running. (If it's not, we
5500  * might not even be connected to shared memory, so don't try to call
5501  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5502  * responds to the condition, but we don't do that here, instead waiting
5503  * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5504  * quick succession between the autovac launcher and postmaster in case
5505  * things get ugly.
5506  */
5507  if (AutoVacPID != 0)
5508  {
5510  avlauncher_needs_signal = true;
5511  }
5512 }
5513 
5514 /*
5515  * MaybeStartWalReceiver
5516  * Start the WAL receiver process, if not running and our state allows.
5517  *
5518  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5519  * clear WalReceiverRequested. However, there's a race condition if the
5520  * walreceiver terminates and the startup process immediately requests a new
5521  * one: it's quite possible to get the signal for the request before reaping
5522  * the dead walreceiver process. Better to risk launching an extra
5523  * walreceiver than to miss launching one we need. (The walreceiver code
5524  * has logic to recognize that it should go away if not needed.)
5525  */
5526 static void
5528 {
5529  if (WalReceiverPID == 0 &&
5530  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5531  pmState == PM_HOT_STANDBY) &&
5533  {
5535  if (WalReceiverPID != 0)
5536  WalReceiverRequested = false;
5537  /* else leave the flag set, so we'll try again later */
5538  }
5539 }
5540 
5541 
5542 /*
5543  * Create the opts file
5544  */
5545 static bool
5546 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5547 {
5548  FILE *fp;
5549  int i;
5550 
5551 #define OPTS_FILE "postmaster.opts"
5552 
5553  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5554  {
5555  ereport(LOG,
5557  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5558  return false;
5559  }
5560 
5561  fprintf(fp, "%s", fullprogname);
5562  for (i = 1; i < argc; i++)
5563  fprintf(fp, " \"%s\"", argv[i]);
5564  fputs("\n", fp);
5565 
5566  if (fclose(fp))
5567  {
5568  ereport(LOG,
5570  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5571  return false;
5572  }
5573 
5574  return true;
5575 }
5576 
5577 
5578 /*
5579  * MaxLivePostmasterChildren
5580  *
5581  * This reports the number of entries needed in per-child-process arrays
5582  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5583  * These arrays include regular backends, autovac workers, walsenders
5584  * and background workers, but not special children nor dead_end children.
5585  * This allows the arrays to have a fixed maximum size, to wit the same
5586  * too-many-children limit enforced by canAcceptConnections(). The exact value
5587  * isn't too critical as long as it's more than MaxBackends.
5588  */
5589 int
5591 {
5592  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5594 }
5595 
5596 /*
5597  * Connect background worker to a database.
5598  */
5599 void
5601 {
5603 
5604  /* XXX is this the right errcode? */
5606  ereport(FATAL,
5607  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5608  errmsg("database connection requirement not indicated during registration")));
5609 
5610  InitPostgres(dbname, InvalidOid, /* database to connect to */
5611  username, InvalidOid, /* role to connect as */
5612  false, /* never honor session_preload_libraries */
5613  (flags & BGWORKER_BYPASS_ALLOWCONN) != 0, /* ignore datallowconn? */
5614  NULL); /* no out_dbname */
5615 
5616  /* it had better not gotten out of "init" mode yet */
5617  if (!IsInitProcessingMode())
5618  ereport(ERROR,
5619  (errmsg("invalid processing mode in background worker")));
5621 }
5622 
5623 /*
5624  * Connect background worker to a database using OIDs.
5625  */
5626 void
5628 {
5630 
5631  /* XXX is this the right errcode? */
5633  ereport(FATAL,
5634  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5635  errmsg("database connection requirement not indicated during registration")));
5636 
5637  InitPostgres(NULL, dboid, /* database to connect to */
5638  NULL, useroid, /* role to connect as */
5639  false, /* never honor session_preload_libraries */
5640  (flags & BGWORKER_BYPASS_ALLOWCONN) != 0, /* ignore datallowconn? */
5641  NULL); /* no out_dbname */
5642 
5643  /* it had better not gotten out of "init" mode yet */
5644  if (!IsInitProcessingMode())
5645  ereport(ERROR,
5646  (errmsg("invalid processing mode in background worker")));
5648 }
5649 
5650 /*
5651  * Block/unblock signals in a background worker
5652  */
5653 void
5655 {
5656  sigprocmask(SIG_SETMASK, &BlockSig, NULL);
5657 }
5658 
5659 void
5661 {
5662  sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
5663 }
5664 
5665 #ifdef EXEC_BACKEND
5666 static pid_t
5667 bgworker_forkexec(int shmem_slot)
5668 {
5669  char *av[10];
5670  int ac = 0;
5671  char forkav[MAXPGPATH];
5672 
5673  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5674 
5675  av[ac++] = "postgres";
5676  av[ac++] = forkav;
5677  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5678  av[ac] = NULL;
5679 
5680  Assert(ac < lengthof(av));
5681 
5682  return postmaster_forkexec(ac, av);
5683 }
5684 #endif
5685 
5686 /*
5687  * Start a new bgworker.
5688  * Starting time conditions must have been checked already.
5689  *
5690  * Returns true on success, false on failure.
5691  * In either case, update the RegisteredBgWorker's state appropriately.
5692  *
5693  * This code is heavily based on autovacuum.c, q.v.
5694  */
5695 static bool
5697 {
5698  pid_t worker_pid;
5699 
5700  Assert(rw->rw_pid == 0);
5701 
5702  /*
5703  * Allocate and assign the Backend element. Note we must do this before
5704  * forking, so that we can handle failures (out of memory or child-process
5705  * slots) cleanly.
5706  *
5707  * Treat failure as though the worker had crashed. That way, the
5708  * postmaster will wait a bit before attempting to start it again; if we
5709  * tried again right away, most likely we'd find ourselves hitting the
5710  * same resource-exhaustion condition.
5711  */
5712  if (!assign_backendlist_entry(rw))
5713  {
5715  return false;
5716  }
5717 
5718  ereport(DEBUG1,
5719  (errmsg_internal("starting background worker process \"%s\"",
5720  rw->rw_worker.bgw_name)));
5721 
5722 #ifdef EXEC_BACKEND
5723  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5724 #else
5725  switch ((worker_pid = fork_process()))
5726 #endif
5727  {
5728  case -1:
5729  /* in postmaster, fork failed ... */
5730  ereport(LOG,
5731  (errmsg("could not fork worker process: %m")));
5732  /* undo what assign_backendlist_entry did */
5734  rw->rw_child_slot = 0;
5735  free(rw->rw_backend);
5736  rw->rw_backend = NULL;
5737  /* mark entry as crashed, so we'll try again later */
5739  break;
5740 
5741 #ifndef EXEC_BACKEND
5742  case 0:
5743  /* in postmaster child ... */
5745 
5746  /* Close the postmaster's sockets */
5747  ClosePostmasterPorts(false);
5748 
5749  /*
5750  * Before blowing away PostmasterContext, save this bgworker's
5751  * data where it can find it.
5752  */
5755  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5756 
5757  /* Release postmaster's working memory context */
5758