PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netinet/in.h>
78 #include <arpa/inet.h>
79 #include <netdb.h>
80 #include <limits.h>
81 
82 #ifdef HAVE_SYS_SELECT_H
83 #include <sys/select.h>
84 #endif
85 
86 #ifdef USE_BONJOUR
87 #include <dns_sd.h>
88 #endif
89 
90 #ifdef USE_SYSTEMD
91 #include <systemd/sd-daemon.h>
92 #endif
93 
94 #ifdef HAVE_PTHREAD_IS_THREADED_NP
95 #include <pthread.h>
96 #endif
97 
98 #include "access/transam.h"
99 #include "access/xlog.h"
100 #include "bootstrap/bootstrap.h"
101 #include "catalog/pg_control.h"
102 #include "common/ip.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqsignal.h"
107 #include "miscadmin.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "postmaster/autovacuum.h"
112 #include "postmaster/fork_process.h"
113 #include "postmaster/pgarch.h"
114 #include "postmaster/postmaster.h"
115 #include "postmaster/syslogger.h"
117 #include "replication/walsender.h"
118 #include "storage/fd.h"
119 #include "storage/ipc.h"
120 #include "storage/pg_shmem.h"
121 #include "storage/pmsignal.h"
122 #include "storage/proc.h"
123 #include "tcop/tcopprot.h"
124 #include "utils/builtins.h"
125 #include "utils/datetime.h"
126 #include "utils/dynamic_loader.h"
127 #include "utils/memutils.h"
128 #include "utils/pidfile.h"
129 #include "utils/ps_status.h"
130 #include "utils/timeout.h"
131 #include "utils/varlena.h"
132 
133 #ifdef EXEC_BACKEND
134 #include "storage/spin.h"
135 #endif
136 
137 
138 /*
139  * Possible types of a backend. Beyond being the possible bkend_type values in
140  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
141  * and CountChildren().
142  */
143 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
144 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
145 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
146 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
147 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
148 
149 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
150 
151 /*
152  * List of active backends (or child processes anyway; we don't actually
153  * know whether a given child has become a backend or is still in the
154  * authorization phase). This is used mainly to keep track of how many
155  * children we have and send them appropriate signals when necessary.
156  *
157  * "Special" children such as the startup, bgwriter and autovacuum launcher
158  * tasks are not in this list. Autovacuum worker and walsender are in it.
159  * Also, "dead_end" children are in it: these are children launched just for
160  * the purpose of sending a friendly rejection message to a would-be client.
161  * We must track them because they are attached to shared memory, but we know
162  * they will never become live backends. dead_end children are not assigned a
163  * PMChildSlot.
164  *
165  * Background workers are in this list, too.
166  */
167 typedef struct bkend
168 {
169  pid_t pid; /* process id of backend */
170  int32 cancel_key; /* cancel key for cancels for this backend */
171  int child_slot; /* PMChildSlot for this backend, if any */
172 
173  /*
174  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
175  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
176  * bkend_type is normal, you should check for a recent transition.
177  */
179  bool dead_end; /* is it going to send an error and quit? */
180  bool bgworker_notify; /* gets bgworker start/stop notifications */
181  dlist_node elem; /* list link in BackendList */
182 } Backend;
183 
185 
186 #ifdef EXEC_BACKEND
187 static Backend *ShmemBackendArray;
188 #endif
189 
191 
192 
193 
194 /* The socket number we are listening for connections on */
196 
197 /* The directory names for Unix socket(s) */
199 
200 /* The TCP listen address(es) */
202 
203 /*
204  * ReservedBackends is the number of backends reserved for superuser use.
205  * This number is taken out of the pool size given by MaxBackends so
206  * number of backend slots available to non-superusers is
207  * (MaxBackends - ReservedBackends). Note what this really means is
208  * "if there are <= ReservedBackends connections available, only superusers
209  * can make new connections" --- pre-existing superuser connections don't
210  * count against the limit.
211  */
213 
214 /* The socket(s) we're listening to. */
215 #define MAXLISTEN 64
217 
218 /*
219  * Set by the -o option
220  */
221 static char ExtraOptions[MAXPGPATH];
222 
223 /*
224  * These globals control the behavior of the postmaster in case some
225  * backend dumps core. Normally, it kills all peers of the dead backend
226  * and reinitializes shared memory. By specifying -s or -n, we can have
227  * the postmaster stop (rather than kill) peers and not reinitialize
228  * shared data structures. (Reinit is currently dead code, though.)
229  */
230 static bool Reinit = true;
231 static int SendStop = false;
232 
233 /* still more option variables */
234 bool EnableSSL = false;
235 
236 int PreAuthDelay = 0;
238 
239 bool log_hostname; /* for ps display and logging */
240 bool Log_connections = false;
241 bool Db_user_namespace = false;
242 
243 bool enable_bonjour = false;
246 
247 /* PIDs of special child processes; 0 when not running */
248 static pid_t StartupPID = 0,
257 
258 /* Startup process's status */
259 typedef enum
260 {
263  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 
268 
269 /* Startup/shutdown state */
270 #define NoShutdown 0
271 #define SmartShutdown 1
272 #define FastShutdown 2
273 #define ImmediateShutdown 3
274 
275 static int Shutdown = NoShutdown;
276 
277 static bool FatalError = false; /* T if recovering from backend crash */
278 
279 /*
280  * We use a simple state machine to control startup, shutdown, and
281  * crash recovery (which is rather like shutdown followed by startup).
282  *
283  * After doing all the postmaster initialization work, we enter PM_STARTUP
284  * state and the startup process is launched. The startup process begins by
285  * reading the control file and other preliminary initialization steps.
286  * In a normal startup, or after crash recovery, the startup process exits
287  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
288  * is handled specially since it takes much longer and we would like to support
289  * hot standby during archive recovery.
290  *
291  * When the startup process is ready to start archive recovery, it signals the
292  * postmaster, and we switch to PM_RECOVERY state. The background writer and
293  * checkpointer are launched, while the startup process continues applying WAL.
294  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
295  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
296  * state and begin accepting connections to perform read-only queries. When
297  * archive recovery is finished, the startup process exits with exit code 0
298  * and we switch to PM_RUN state.
299  *
300  * Normal child backends can only be launched when we are in PM_RUN or
301  * PM_HOT_STANDBY state. (We also allow launch of normal
302  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
303  * In other states we handle connection requests by launching "dead_end"
304  * child processes, which will simply send the client an error message and
305  * quit. (We track these in the BackendList so that we can know when they
306  * are all gone; this is important because they're still connected to shared
307  * memory, and would interfere with an attempt to destroy the shmem segment,
308  * possibly leading to SHMALL failure when we try to make a new one.)
309  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
310  * to drain out of the system, and therefore stop accepting connection
311  * requests at all until the last existing child has quit (which hopefully
312  * will not be very long).
313  *
314  * Notice that this state variable does not distinguish *why* we entered
315  * states later than PM_RUN --- Shutdown and FatalError must be consulted
316  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
317  * states, nor in PM_SHUTDOWN states (because we don't enter those states
318  * when trying to recover from a crash). It can be true in PM_STARTUP state,
319  * because we don't clear it until we've successfully started WAL redo.
320  */
321 typedef enum
322 {
323  PM_INIT, /* postmaster starting */
324  PM_STARTUP, /* waiting for startup subprocess */
325  PM_RECOVERY, /* in archive recovery mode */
326  PM_HOT_STANDBY, /* in hot standby mode */
327  PM_RUN, /* normal "database is alive" state */
328  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
329  PM_WAIT_READONLY, /* waiting for read only backends to exit */
330  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
331  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
332  * ckpt */
333  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
334  * finish */
335  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
336  PM_NO_CHILDREN /* all important children have exited */
337 } PMState;
338 
340 
341 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
342 /* Zero means timeout is not running */
343 static time_t AbortStartTime = 0;
344 
345 /* Length of said timeout */
346 #define SIGKILL_CHILDREN_AFTER_SECS 5
347 
348 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
349 
350 bool ClientAuthInProgress = false; /* T during new-client
351  * authentication */
352 
353 bool redirection_done = false; /* stderr redirected for syslogger? */
354 
355 /* received START_AUTOVAC_LAUNCHER signal */
356 static volatile sig_atomic_t start_autovac_launcher = false;
357 
358 /* the launcher needs to be signalled to communicate some condition */
359 static volatile bool avlauncher_needs_signal = false;
360 
361 /* received START_WALRECEIVER signal */
362 static volatile sig_atomic_t WalReceiverRequested = false;
363 
364 /* set when there's a worker that needs to be started up */
365 static volatile bool StartWorkerNeeded = true;
366 static volatile bool HaveCrashedWorker = false;
367 
368 #ifndef HAVE_STRONG_RANDOM
369 /*
370  * State for assigning cancel keys.
371  * Also, the global MyCancelKey passes the cancel key assigned to a given
372  * backend from the postmaster to that backend (via fork).
373  */
374 static unsigned int random_seed = 0;
375 static struct timeval random_start_time;
376 #endif
377 
378 #ifdef USE_SSL
379 /* Set when and if SSL has been initialized properly */
380 static bool LoadedSSL = false;
381 #endif
382 
383 #ifdef USE_BONJOUR
384 static DNSServiceRef bonjour_sdref = NULL;
385 #endif
386 
387 /*
388  * postmaster.c - function prototypes
389  */
390 static void CloseServerPorts(int status, Datum arg);
391 static void unlink_external_pid_file(int status, Datum arg);
392 static void getInstallationPaths(const char *argv0);
393 static void checkDataDir(void);
394 static Port *ConnCreate(int serverFd);
395 static void ConnFree(Port *port);
396 static void reset_shared(int port);
397 static void SIGHUP_handler(SIGNAL_ARGS);
398 static void pmdie(SIGNAL_ARGS);
399 static void reaper(SIGNAL_ARGS);
400 static void sigusr1_handler(SIGNAL_ARGS);
401 static void startup_die(SIGNAL_ARGS);
402 static void dummy_handler(SIGNAL_ARGS);
403 static void StartupPacketTimeoutHandler(void);
404 static void CleanupBackend(int pid, int exitstatus);
405 static bool CleanupBackgroundWorker(int pid, int exitstatus);
406 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
407 static void LogChildExit(int lev, const char *procname,
408  int pid, int exitstatus);
409 static void PostmasterStateMachine(void);
410 static void BackendInitialize(Port *port);
411 static void BackendRun(Port *port) pg_attribute_noreturn();
412 static void ExitPostmaster(int status) pg_attribute_noreturn();
413 static int ServerLoop(void);
414 static int BackendStartup(Port *port);
415 static int ProcessStartupPacket(Port *port, bool SSLdone);
416 static void processCancelRequest(Port *port, void *pkt);
417 static int initMasks(fd_set *rmask);
418 static void report_fork_failure_to_client(Port *port, int errnum);
419 static CAC_state canAcceptConnections(void);
420 static bool RandomCancelKey(int32 *cancel_key);
421 static void signal_child(pid_t pid, int signal);
422 static bool SignalSomeChildren(int signal, int targets);
423 static void TerminateChildren(int signal);
424 
425 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
426 
427 static int CountChildren(int target);
429 static void maybe_start_bgworkers(void);
430 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
431 static pid_t StartChildProcess(AuxProcType type);
432 static void StartAutovacuumWorker(void);
433 static void MaybeStartWalReceiver(void);
434 static void InitPostmasterDeathWatchHandle(void);
435 
436 /*
437  * Archiver is allowed to start up at the current postmaster state?
438  *
439  * If WAL archiving is enabled always, we are allowed to start archiver
440  * even during recovery.
441  */
442 #define PgArchStartupAllowed() \
443  ((XLogArchivingActive() && pmState == PM_RUN) || \
444  (XLogArchivingAlways() && \
445  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
446 
447 #ifdef EXEC_BACKEND
448 
449 #ifdef WIN32
450 #define WNOHANG 0 /* ignored, so any integer value will do */
451 
452 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
453 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
454 
455 static HANDLE win32ChildQueue;
456 
457 typedef struct
458 {
459  HANDLE waitHandle;
460  HANDLE procHandle;
461  DWORD procId;
462 } win32_deadchild_waitinfo;
463 #endif /* WIN32 */
464 
465 static pid_t backend_forkexec(Port *port);
466 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
467 
468 /* Type for a socket that can be inherited to a client process */
469 #ifdef WIN32
470 typedef struct
471 {
472  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
473  * if not a socket */
474  WSAPROTOCOL_INFO wsainfo;
475 } InheritableSocket;
476 #else
477 typedef int InheritableSocket;
478 #endif
479 
480 /*
481  * Structure contains all variables passed to exec:ed backends
482  */
483 typedef struct
484 {
485  Port port;
486  InheritableSocket portsocket;
487  char DataDir[MAXPGPATH];
490  int MyPMChildSlot;
491 #ifndef WIN32
492  unsigned long UsedShmemSegID;
493 #else
494  HANDLE UsedShmemSegID;
495 #endif
496  void *UsedShmemSegAddr;
499  Backend *ShmemBackendArray;
500 #ifndef HAVE_SPINLOCKS
502 #endif
511  InheritableSocket pgStatSock;
512  pid_t PostmasterPid;
516  bool redirection_done;
517  bool IsBinaryUpgrade;
518  int max_safe_fds;
519  int MaxBackends;
520 #ifdef WIN32
521  HANDLE PostmasterHandle;
522  HANDLE initial_signal_pipe;
523  HANDLE syslogPipe[2];
524 #else
525  int postmaster_alive_fds[2];
526  int syslogPipe[2];
527 #endif
528  char my_exec_path[MAXPGPATH];
529  char pkglib_path[MAXPGPATH];
530  char ExtraOptions[MAXPGPATH];
531 } BackendParameters;
532 
533 static void read_backend_variables(char *id, Port *port);
534 static void restore_backend_variables(BackendParameters *param, Port *port);
535 
536 #ifndef WIN32
537 static bool save_backend_variables(BackendParameters *param, Port *port);
538 #else
539 static bool save_backend_variables(BackendParameters *param, Port *port,
540  HANDLE childProcess, pid_t childPid);
541 #endif
542 
543 static void ShmemBackendArrayAdd(Backend *bn);
544 static void ShmemBackendArrayRemove(Backend *bn);
545 #endif /* EXEC_BACKEND */
546 
547 #define StartupDataBase() StartChildProcess(StartupProcess)
548 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
549 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
550 #define StartWalWriter() StartChildProcess(WalWriterProcess)
551 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
552 
553 /* Macros to check exit status of a child process */
554 #define EXIT_STATUS_0(st) ((st) == 0)
555 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
556 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
557 
558 #ifndef WIN32
559 /*
560  * File descriptors for pipe used to monitor if postmaster is alive.
561  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
562  */
563 int postmaster_alive_fds[2] = {-1, -1};
564 #else
565 /* Process handle of postmaster used for the same purpose on Windows */
566 HANDLE PostmasterHandle;
567 #endif
568 
569 /*
570  * Postmaster main entry point
571  */
572 void
573 PostmasterMain(int argc, char *argv[])
574 {
575  int opt;
576  int status;
577  char *userDoption = NULL;
578  bool listen_addr_saved = false;
579  int i;
580  char *output_config_variable = NULL;
581 
582  MyProcPid = PostmasterPid = getpid();
583 
584  MyStartTime = time(NULL);
585 
587 
588  /*
589  * for security, no dir or file created can be group or other accessible
590  */
591  umask(S_IRWXG | S_IRWXO);
592 
593  /*
594  * Initialize random(3) so we don't get the same values in every run.
595  *
596  * Note: the seed is pretty predictable from externally-visible facts such
597  * as postmaster start time, so avoid using random() for security-critical
598  * random values during postmaster startup. At the time of first
599  * connection, PostmasterRandom will select a hopefully-more-random seed.
600  */
601  srandom((unsigned int) (MyProcPid ^ MyStartTime));
602 
603  /*
604  * By default, palloc() requests in the postmaster will be allocated in
605  * the PostmasterContext, which is space that can be recycled by backends.
606  * Allocated data that needs to be available to backends should be
607  * allocated in TopMemoryContext.
608  */
610  "Postmaster",
613 
614  /* Initialize paths to installation files */
615  getInstallationPaths(argv[0]);
616 
617  /*
618  * Set up signal handlers for the postmaster process.
619  *
620  * In the postmaster, we want to install non-ignored handlers *without*
621  * SA_RESTART. This is because they'll be blocked at all times except
622  * when ServerLoop is waiting for something to happen, and during that
623  * window, we want signals to exit the select(2) wait so that ServerLoop
624  * can respond if anything interesting happened. On some platforms,
625  * signals marked SA_RESTART would not cause the select() wait to end.
626  * Child processes will generally want SA_RESTART, but we expect them to
627  * set up their own handlers before unblocking signals.
628  *
629  * CAUTION: when changing this list, check for side-effects on the signal
630  * handling setup of child processes. See tcop/postgres.c,
631  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
632  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
633  * postmaster/syslogger.c, postmaster/bgworker.c and
634  * postmaster/checkpointer.c.
635  */
636  pqinitmask();
638 
639  pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and
640  * have children do same */
641  pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */
642  pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */
643  pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */
644  pqsignal(SIGALRM, SIG_IGN); /* ignored */
645  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
646  pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child
647  * process */
648  pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for
649  * children */
650  pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */
651  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
652  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
653  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
654 #ifdef SIGXFSZ
655  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
656 #endif
657 
658  /*
659  * Options setup
660  */
662 
663  opterr = 1;
664 
665  /*
666  * Parse command-line options. CAUTION: keep this in sync with
667  * tcop/postgres.c (the option sets should not conflict) and with the
668  * common help() function in main/main.c.
669  */
670  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
671  {
672  switch (opt)
673  {
674  case 'B':
675  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
676  break;
677 
678  case 'b':
679  /* Undocumented flag used for binary upgrades */
680  IsBinaryUpgrade = true;
681  break;
682 
683  case 'C':
684  output_config_variable = strdup(optarg);
685  break;
686 
687  case 'D':
688  userDoption = strdup(optarg);
689  break;
690 
691  case 'd':
693  break;
694 
695  case 'E':
696  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
697  break;
698 
699  case 'e':
700  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
701  break;
702 
703  case 'F':
704  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
705  break;
706 
707  case 'f':
709  {
710  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
711  progname, optarg);
712  ExitPostmaster(1);
713  }
714  break;
715 
716  case 'h':
717  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
718  break;
719 
720  case 'i':
721  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
722  break;
723 
724  case 'j':
725  /* only used by interactive backend */
726  break;
727 
728  case 'k':
729  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
730  break;
731 
732  case 'l':
733  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
734  break;
735 
736  case 'N':
737  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
738  break;
739 
740  case 'n':
741  /* Don't reinit shared mem after abnormal exit */
742  Reinit = false;
743  break;
744 
745  case 'O':
746  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
747  break;
748 
749  case 'o':
750  /* Other options to pass to the backend on the command line */
752  sizeof(ExtraOptions) - strlen(ExtraOptions),
753  " %s", optarg);
754  break;
755 
756  case 'P':
757  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
758  break;
759 
760  case 'p':
762  break;
763 
764  case 'r':
765  /* only used by single-user backend */
766  break;
767 
768  case 'S':
770  break;
771 
772  case 's':
773  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
774  break;
775 
776  case 'T':
777 
778  /*
779  * In the event that some backend dumps core, send SIGSTOP,
780  * rather than SIGQUIT, to all its peers. This lets the wily
781  * post_hacker collect core dumps from everyone.
782  */
783  SendStop = true;
784  break;
785 
786  case 't':
787  {
788  const char *tmp = get_stats_option_name(optarg);
789 
790  if (tmp)
791  {
793  }
794  else
795  {
796  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
797  progname, optarg);
798  ExitPostmaster(1);
799  }
800  break;
801  }
802 
803  case 'W':
804  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
805  break;
806 
807  case 'c':
808  case '-':
809  {
810  char *name,
811  *value;
812 
813  ParseLongOption(optarg, &name, &value);
814  if (!value)
815  {
816  if (opt == '-')
817  ereport(ERROR,
818  (errcode(ERRCODE_SYNTAX_ERROR),
819  errmsg("--%s requires a value",
820  optarg)));
821  else
822  ereport(ERROR,
823  (errcode(ERRCODE_SYNTAX_ERROR),
824  errmsg("-c %s requires a value",
825  optarg)));
826  }
827 
829  free(name);
830  if (value)
831  free(value);
832  break;
833  }
834 
835  default:
836  write_stderr("Try \"%s --help\" for more information.\n",
837  progname);
838  ExitPostmaster(1);
839  }
840  }
841 
842  /*
843  * Postmaster accepts no non-option switch arguments.
844  */
845  if (optind < argc)
846  {
847  write_stderr("%s: invalid argument: \"%s\"\n",
848  progname, argv[optind]);
849  write_stderr("Try \"%s --help\" for more information.\n",
850  progname);
851  ExitPostmaster(1);
852  }
853 
854  /*
855  * Locate the proper configuration files and data directory, and read
856  * postgresql.conf for the first time.
857  */
858  if (!SelectConfigFiles(userDoption, progname))
859  ExitPostmaster(2);
860 
861  if (output_config_variable != NULL)
862  {
863  /*
864  * "-C guc" was specified, so print GUC's value and exit. No extra
865  * permission check is needed because the user is reading inside the
866  * data dir.
867  */
868  const char *config_val = GetConfigOption(output_config_variable,
869  false, false);
870 
871  puts(config_val ? config_val : "");
872  ExitPostmaster(0);
873  }
874 
875  /* Verify that DataDir looks reasonable */
876  checkDataDir();
877 
878  /* And switch working directory into it */
879  ChangeToDataDir();
880 
881  /*
882  * Check for invalid combinations of GUC settings.
883  */
885  {
886  write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
887  ExitPostmaster(1);
888  }
890  {
891  write_stderr("%s: max_wal_senders must be less than max_connections\n", progname);
892  ExitPostmaster(1);
893  }
895  ereport(ERROR,
896  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
898  ereport(ERROR,
899  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
900 
901  /*
902  * Other one-time internal sanity checks can go here, if they are fast.
903  * (Put any slow processing further down, after postmaster.pid creation.)
904  */
905  if (!CheckDateTokenTables())
906  {
907  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
908  ExitPostmaster(1);
909  }
910 
911  /*
912  * Now that we are done processing the postmaster arguments, reset
913  * getopt(3) library so that it will work correctly in subprocesses.
914  */
915  optind = 1;
916 #ifdef HAVE_INT_OPTRESET
917  optreset = 1; /* some systems need this too */
918 #endif
919 
920  /* For debugging: display postmaster environment */
921  {
922  extern char **environ;
923  char **p;
924 
925  ereport(DEBUG3,
926  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
927  progname)));
928  ereport(DEBUG3,
929  (errmsg_internal("-----------------------------------------")));
930  for (p = environ; *p; ++p)
931  ereport(DEBUG3,
932  (errmsg_internal("\t%s", *p)));
933  ereport(DEBUG3,
934  (errmsg_internal("-----------------------------------------")));
935  }
936 
937  /*
938  * Create lockfile for data directory.
939  *
940  * We want to do this before we try to grab the input sockets, because the
941  * data directory interlock is more reliable than the socket-file
942  * interlock (thanks to whoever decided to put socket files in /tmp :-().
943  * For the same reason, it's best to grab the TCP socket(s) before the
944  * Unix socket(s).
945  *
946  * Also note that this internally sets up the on_proc_exit function that
947  * is responsible for removing both data directory and socket lockfiles;
948  * so it must happen before opening sockets so that at exit, the socket
949  * lockfiles go away after CloseServerPorts runs.
950  */
951  CreateDataDirLockFile(true);
952 
953  /*
954  * Initialize SSL library, if specified.
955  */
956 #ifdef USE_SSL
957  if (EnableSSL)
958  {
959  (void) secure_initialize(true);
960  LoadedSSL = true;
961  }
962 #endif
963 
964  /*
965  * Register the apply launcher. Since it registers a background worker,
966  * it needs to be called before InitializeMaxBackends(), and it's probably
967  * a good idea to call it before any modules had chance to take the
968  * background worker slots.
969  */
971 
972  /*
973  * process any libraries that should be preloaded at postmaster start
974  */
976 
977  /*
978  * Now that loadable modules have had their chance to register background
979  * workers, calculate MaxBackends.
980  */
982 
983  /*
984  * Establish input sockets.
985  *
986  * First, mark them all closed, and set up an on_proc_exit function that's
987  * charged with closing the sockets again at postmaster shutdown.
988  */
989  for (i = 0; i < MAXLISTEN; i++)
991 
993 
994  if (ListenAddresses)
995  {
996  char *rawstring;
997  List *elemlist;
998  ListCell *l;
999  int success = 0;
1000 
1001  /* Need a modifiable copy of ListenAddresses */
1002  rawstring = pstrdup(ListenAddresses);
1003 
1004  /* Parse string into list of hostnames */
1005  if (!SplitIdentifierString(rawstring, ',', &elemlist))
1006  {
1007  /* syntax error in list */
1008  ereport(FATAL,
1009  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1010  errmsg("invalid list syntax in parameter \"%s\"",
1011  "listen_addresses")));
1012  }
1013 
1014  foreach(l, elemlist)
1015  {
1016  char *curhost = (char *) lfirst(l);
1017 
1018  if (strcmp(curhost, "*") == 0)
1019  status = StreamServerPort(AF_UNSPEC, NULL,
1020  (unsigned short) PostPortNumber,
1021  NULL,
1023  else
1024  status = StreamServerPort(AF_UNSPEC, curhost,
1025  (unsigned short) PostPortNumber,
1026  NULL,
1027  ListenSocket, MAXLISTEN);
1028 
1029  if (status == STATUS_OK)
1030  {
1031  success++;
1032  /* record the first successful host addr in lockfile */
1033  if (!listen_addr_saved)
1034  {
1036  listen_addr_saved = true;
1037  }
1038  }
1039  else
1040  ereport(WARNING,
1041  (errmsg("could not create listen socket for \"%s\"",
1042  curhost)));
1043  }
1044 
1045  if (!success && elemlist != NIL)
1046  ereport(FATAL,
1047  (errmsg("could not create any TCP/IP sockets")));
1048 
1049  list_free(elemlist);
1050  pfree(rawstring);
1051  }
1052 
1053 #ifdef USE_BONJOUR
1054  /* Register for Bonjour only if we opened TCP socket(s) */
1056  {
1057  DNSServiceErrorType err;
1058 
1059  /*
1060  * We pass 0 for interface_index, which will result in registering on
1061  * all "applicable" interfaces. It's not entirely clear from the
1062  * DNS-SD docs whether this would be appropriate if we have bound to
1063  * just a subset of the available network interfaces.
1064  */
1065  err = DNSServiceRegister(&bonjour_sdref,
1066  0,
1067  0,
1068  bonjour_name,
1069  "_postgresql._tcp.",
1070  NULL,
1071  NULL,
1072  htons(PostPortNumber),
1073  0,
1074  NULL,
1075  NULL,
1076  NULL);
1077  if (err != kDNSServiceErr_NoError)
1078  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1079  (long) err);
1080 
1081  /*
1082  * We don't bother to read the mDNS daemon's reply, and we expect that
1083  * it will automatically terminate our registration when the socket is
1084  * closed at postmaster termination. So there's nothing more to be
1085  * done here. However, the bonjour_sdref is kept around so that
1086  * forked children can close their copies of the socket.
1087  */
1088  }
1089 #endif
1090 
1091 #ifdef HAVE_UNIX_SOCKETS
1093  {
1094  char *rawstring;
1095  List *elemlist;
1096  ListCell *l;
1097  int success = 0;
1098 
1099  /* Need a modifiable copy of Unix_socket_directories */
1100  rawstring = pstrdup(Unix_socket_directories);
1101 
1102  /* Parse string into list of directories */
1103  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1104  {
1105  /* syntax error in list */
1106  ereport(FATAL,
1107  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1108  errmsg("invalid list syntax in parameter \"%s\"",
1109  "unix_socket_directories")));
1110  }
1111 
1112  foreach(l, elemlist)
1113  {
1114  char *socketdir = (char *) lfirst(l);
1115 
1116  status = StreamServerPort(AF_UNIX, NULL,
1117  (unsigned short) PostPortNumber,
1118  socketdir,
1119  ListenSocket, MAXLISTEN);
1120 
1121  if (status == STATUS_OK)
1122  {
1123  success++;
1124  /* record the first successful Unix socket in lockfile */
1125  if (success == 1)
1127  }
1128  else
1129  ereport(WARNING,
1130  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1131  socketdir)));
1132  }
1133 
1134  if (!success && elemlist != NIL)
1135  ereport(FATAL,
1136  (errmsg("could not create any Unix-domain sockets")));
1137 
1138  list_free_deep(elemlist);
1139  pfree(rawstring);
1140  }
1141 #endif
1142 
1143  /*
1144  * check that we have some socket to listen on
1145  */
1146  if (ListenSocket[0] == PGINVALID_SOCKET)
1147  ereport(FATAL,
1148  (errmsg("no socket created for listening")));
1149 
1150  /*
1151  * If no valid TCP ports, write an empty line for listen address,
1152  * indicating the Unix socket must be used. Note that this line is not
1153  * added to the lock file until there is a socket backing it.
1154  */
1155  if (!listen_addr_saved)
1157 
1158  /*
1159  * Set up shared memory and semaphores.
1160  */
1162 
1163  /*
1164  * Estimate number of openable files. This must happen after setting up
1165  * semaphores, because on some platforms semaphores count as open files.
1166  */
1167  set_max_safe_fds();
1168 
1169  /*
1170  * Set reference point for stack-depth checking.
1171  */
1172  set_stack_base();
1173 
1174  /*
1175  * Initialize pipe (or process handle on Windows) that allows children to
1176  * wake up from sleep on postmaster death.
1177  */
1179 
1180 #ifdef WIN32
1181 
1182  /*
1183  * Initialize I/O completion port used to deliver list of dead children.
1184  */
1185  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1186  if (win32ChildQueue == NULL)
1187  ereport(FATAL,
1188  (errmsg("could not create I/O completion port for child queue")));
1189 #endif
1190 
1191  /*
1192  * Record postmaster options. We delay this till now to avoid recording
1193  * bogus options (eg, NBuffers too high for available memory).
1194  */
1195  if (!CreateOptsFile(argc, argv, my_exec_path))
1196  ExitPostmaster(1);
1197 
1198 #ifdef EXEC_BACKEND
1199  /* Write out nondefault GUC settings for child processes to use */
1200  write_nondefault_variables(PGC_POSTMASTER);
1201 #endif
1202 
1203  /*
1204  * Write the external PID file if requested
1205  */
1206  if (external_pid_file)
1207  {
1208  FILE *fpidfile = fopen(external_pid_file, "w");
1209 
1210  if (fpidfile)
1211  {
1212  fprintf(fpidfile, "%d\n", MyProcPid);
1213  fclose(fpidfile);
1214 
1215  /* Make PID file world readable */
1216  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1217  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1219  }
1220  else
1221  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1223 
1225  }
1226 
1227  /*
1228  * Remove old temporary files. At this point there can be no other
1229  * Postgres processes running in this directory, so this should be safe.
1230  */
1232 
1233  /*
1234  * Forcibly remove the files signaling a standby promotion request.
1235  * Otherwise, the existence of those files triggers a promotion too early,
1236  * whether a user wants that or not.
1237  *
1238  * This removal of files is usually unnecessary because they can exist
1239  * only during a few moments during a standby promotion. However there is
1240  * a race condition: if pg_ctl promote is executed and creates the files
1241  * during a promotion, the files can stay around even after the server is
1242  * brought up to new master. Then, if new standby starts by using the
1243  * backup taken from that master, the files can exist at the server
1244  * startup and should be removed in order to avoid an unexpected
1245  * promotion.
1246  *
1247  * Note that promotion signal files need to be removed before the startup
1248  * process is invoked. Because, after that, they can be used by
1249  * postmaster's SIGUSR1 signal handler.
1250  */
1252 
1253  /* Remove any outdated file holding the current log filenames. */
1254  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1255  ereport(LOG,
1257  errmsg("could not remove file \"%s\": %m",
1259 
1260  /*
1261  * If enabled, start up syslogger collection subprocess
1262  */
1264 
1265  /*
1266  * Reset whereToSendOutput from DestDebug (its starting state) to
1267  * DestNone. This stops ereport from sending log messages to stderr unless
1268  * Log_destination permits. We don't do this until the postmaster is
1269  * fully launched, since startup failures may as well be reported to
1270  * stderr.
1271  *
1272  * If we are in fact disabling logging to stderr, first emit a log message
1273  * saying so, to provide a breadcrumb trail for users who may not remember
1274  * that their logging is configured to go somewhere else.
1275  */
1277  ereport(LOG,
1278  (errmsg("ending log output to stderr"),
1279  errhint("Future log output will go to log destination \"%s\".",
1281 
1283 
1284  /*
1285  * Initialize stats collection subsystem (this does NOT start the
1286  * collector process!)
1287  */
1288  pgstat_init();
1289 
1290  /*
1291  * Initialize the autovacuum subsystem (again, no process start yet)
1292  */
1293  autovac_init();
1294 
1295  /*
1296  * Load configuration files for client authentication.
1297  */
1298  if (!load_hba())
1299  {
1300  /*
1301  * It makes no sense to continue if we fail to load the HBA file,
1302  * since there is no way to connect to the database in this case.
1303  */
1304  ereport(FATAL,
1305  (errmsg("could not load pg_hba.conf")));
1306  }
1307  if (!load_ident())
1308  {
1309  /*
1310  * We can start up without the IDENT file, although it means that you
1311  * cannot log in using any of the authentication methods that need a
1312  * user name mapping. load_ident() already logged the details of error
1313  * to the log.
1314  */
1315  }
1316 
1317 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1318 
1319  /*
1320  * On macOS, libintl replaces setlocale() with a version that calls
1321  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1322  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1323  * the process multithreaded. The postmaster calls sigprocmask() and
1324  * calls fork() without an immediate exec(), both of which have undefined
1325  * behavior in a multithreaded program. A multithreaded postmaster is the
1326  * normal case on Windows, which offers neither fork() nor sigprocmask().
1327  */
1328  if (pthread_is_threaded_np() != 0)
1329  ereport(FATAL,
1330  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1331  errmsg("postmaster became multithreaded during startup"),
1332  errhint("Set the LC_ALL environment variable to a valid locale.")));
1333 #endif
1334 
1335  /*
1336  * Remember postmaster startup time
1337  */
1339 #ifndef HAVE_STRONG_RANDOM
1340  /* RandomCancelKey wants its own copy */
1342 #endif
1343 
1344  /*
1345  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1346  * see what's happening.
1347  */
1349 
1350  /*
1351  * We're ready to rock and roll...
1352  */
1354  Assert(StartupPID != 0);
1356  pmState = PM_STARTUP;
1357 
1358  /* Some workers may be scheduled to start now */
1360 
1361  status = ServerLoop();
1362 
1363  /*
1364  * ServerLoop probably shouldn't ever return, but if it does, close down.
1365  */
1366  ExitPostmaster(status != STATUS_OK);
1367 
1368  abort(); /* not reached */
1369 }
1370 
1371 
1372 /*
1373  * on_proc_exit callback to close server's listen sockets
1374  */
1375 static void
1377 {
1378  int i;
1379 
1380  /*
1381  * First, explicitly close all the socket FDs. We used to just let this
1382  * happen implicitly at postmaster exit, but it's better to close them
1383  * before we remove the postmaster.pid lockfile; otherwise there's a race
1384  * condition if a new postmaster wants to re-use the TCP port number.
1385  */
1386  for (i = 0; i < MAXLISTEN; i++)
1387  {
1388  if (ListenSocket[i] != PGINVALID_SOCKET)
1389  {
1392  }
1393  }
1394 
1395  /*
1396  * Next, remove any filesystem entries for Unix sockets. To avoid race
1397  * conditions against incoming postmasters, this must happen after closing
1398  * the sockets and before removing lock files.
1399  */
1401 
1402  /*
1403  * We don't do anything about socket lock files here; those will be
1404  * removed in a later on_proc_exit callback.
1405  */
1406 }
1407 
1408 /*
1409  * on_proc_exit callback to delete external_pid_file
1410  */
1411 static void
1413 {
1414  if (external_pid_file)
1416 }
1417 
1418 
1419 /*
1420  * Compute and check the directory paths to files that are part of the
1421  * installation (as deduced from the postgres executable's own location)
1422  */
1423 static void
1425 {
1426  DIR *pdir;
1427 
1428  /* Locate the postgres executable itself */
1429  if (find_my_exec(argv0, my_exec_path) < 0)
1430  elog(FATAL, "%s: could not locate my own executable path", argv0);
1431 
1432 #ifdef EXEC_BACKEND
1433  /* Locate executable backend before we change working directory */
1434  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1435  postgres_exec_path) < 0)
1436  ereport(FATAL,
1437  (errmsg("%s: could not locate matching postgres executable",
1438  argv0)));
1439 #endif
1440 
1441  /*
1442  * Locate the pkglib directory --- this has to be set early in case we try
1443  * to load any modules from it in response to postgresql.conf entries.
1444  */
1446 
1447  /*
1448  * Verify that there's a readable directory there; otherwise the Postgres
1449  * installation is incomplete or corrupt. (A typical cause of this
1450  * failure is that the postgres executable has been moved or hardlinked to
1451  * some directory that's not a sibling of the installation lib/
1452  * directory.)
1453  */
1454  pdir = AllocateDir(pkglib_path);
1455  if (pdir == NULL)
1456  ereport(ERROR,
1458  errmsg("could not open directory \"%s\": %m",
1459  pkglib_path),
1460  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1461  my_exec_path)));
1462  FreeDir(pdir);
1463 
1464  /*
1465  * XXX is it worth similarly checking the share/ directory? If the lib/
1466  * directory is there, then share/ probably is too.
1467  */
1468 }
1469 
1470 
1471 /*
1472  * Validate the proposed data directory
1473  */
1474 static void
1476 {
1477  char path[MAXPGPATH];
1478  FILE *fp;
1479  struct stat stat_buf;
1480 
1481  Assert(DataDir);
1482 
1483  if (stat(DataDir, &stat_buf) != 0)
1484  {
1485  if (errno == ENOENT)
1486  ereport(FATAL,
1488  errmsg("data directory \"%s\" does not exist",
1489  DataDir)));
1490  else
1491  ereport(FATAL,
1493  errmsg("could not read permissions of directory \"%s\": %m",
1494  DataDir)));
1495  }
1496 
1497  /* eventual chdir would fail anyway, but let's test ... */
1498  if (!S_ISDIR(stat_buf.st_mode))
1499  ereport(FATAL,
1500  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1501  errmsg("specified data directory \"%s\" is not a directory",
1502  DataDir)));
1503 
1504  /*
1505  * Check that the directory belongs to my userid; if not, reject.
1506  *
1507  * This check is an essential part of the interlock that prevents two
1508  * postmasters from starting in the same directory (see CreateLockFile()).
1509  * Do not remove or weaken it.
1510  *
1511  * XXX can we safely enable this check on Windows?
1512  */
1513 #if !defined(WIN32) && !defined(__CYGWIN__)
1514  if (stat_buf.st_uid != geteuid())
1515  ereport(FATAL,
1516  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1517  errmsg("data directory \"%s\" has wrong ownership",
1518  DataDir),
1519  errhint("The server must be started by the user that owns the data directory.")));
1520 #endif
1521 
1522  /*
1523  * Check if the directory has group or world access. If so, reject.
1524  *
1525  * It would be possible to allow weaker constraints (for example, allow
1526  * group access) but we cannot make a general assumption that that is
1527  * okay; for example there are platforms where nearly all users
1528  * customarily belong to the same group. Perhaps this test should be
1529  * configurable.
1530  *
1531  * XXX temporarily suppress check when on Windows, because there may not
1532  * be proper support for Unix-y file permissions. Need to think of a
1533  * reasonable check to apply on Windows.
1534  */
1535 #if !defined(WIN32) && !defined(__CYGWIN__)
1536  if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1537  ereport(FATAL,
1538  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1539  errmsg("data directory \"%s\" has group or world access",
1540  DataDir),
1541  errdetail("Permissions should be u=rwx (0700).")));
1542 #endif
1543 
1544  /* Look for PG_VERSION before looking for pg_control */
1546 
1547  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1548 
1549  fp = AllocateFile(path, PG_BINARY_R);
1550  if (fp == NULL)
1551  {
1552  write_stderr("%s: could not find the database system\n"
1553  "Expected to find it in the directory \"%s\",\n"
1554  "but could not open file \"%s\": %s\n",
1555  progname, DataDir, path, strerror(errno));
1556  ExitPostmaster(2);
1557  }
1558  FreeFile(fp);
1559 }
1560 
1561 /*
1562  * Determine how long should we let ServerLoop sleep.
1563  *
1564  * In normal conditions we wait at most one minute, to ensure that the other
1565  * background tasks handled by ServerLoop get done even when no requests are
1566  * arriving. However, if there are background workers waiting to be started,
1567  * we don't actually sleep so that they are quickly serviced. Other exception
1568  * cases are as shown in the code.
1569  */
1570 static void
1571 DetermineSleepTime(struct timeval *timeout)
1572 {
1573  TimestampTz next_wakeup = 0;
1574 
1575  /*
1576  * Normal case: either there are no background workers at all, or we're in
1577  * a shutdown sequence (during which we ignore bgworkers altogether).
1578  */
1579  if (Shutdown > NoShutdown ||
1581  {
1582  if (AbortStartTime != 0)
1583  {
1584  /* time left to abort; clamp to 0 in case it already expired */
1585  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1586  (time(NULL) - AbortStartTime);
1587  timeout->tv_sec = Max(timeout->tv_sec, 0);
1588  timeout->tv_usec = 0;
1589  }
1590  else
1591  {
1592  timeout->tv_sec = 60;
1593  timeout->tv_usec = 0;
1594  }
1595  return;
1596  }
1597 
1598  if (StartWorkerNeeded)
1599  {
1600  timeout->tv_sec = 0;
1601  timeout->tv_usec = 0;
1602  return;
1603  }
1604 
1605  if (HaveCrashedWorker)
1606  {
1607  slist_mutable_iter siter;
1608 
1609  /*
1610  * When there are crashed bgworkers, we sleep just long enough that
1611  * they are restarted when they request to be. Scan the list to
1612  * determine the minimum of all wakeup times according to most recent
1613  * crash time and requested restart interval.
1614  */
1616  {
1617  RegisteredBgWorker *rw;
1618  TimestampTz this_wakeup;
1619 
1620  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1621 
1622  if (rw->rw_crashed_at == 0)
1623  continue;
1624 
1626  || rw->rw_terminate)
1627  {
1628  ForgetBackgroundWorker(&siter);
1629  continue;
1630  }
1631 
1632  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1633  1000L * rw->rw_worker.bgw_restart_time);
1634  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1635  next_wakeup = this_wakeup;
1636  }
1637  }
1638 
1639  if (next_wakeup != 0)
1640  {
1641  long secs;
1642  int microsecs;
1643 
1645  &secs, &microsecs);
1646  timeout->tv_sec = secs;
1647  timeout->tv_usec = microsecs;
1648 
1649  /* Ensure we don't exceed one minute */
1650  if (timeout->tv_sec > 60)
1651  {
1652  timeout->tv_sec = 60;
1653  timeout->tv_usec = 0;
1654  }
1655  }
1656  else
1657  {
1658  timeout->tv_sec = 60;
1659  timeout->tv_usec = 0;
1660  }
1661 }
1662 
1663 /*
1664  * Main idle loop of postmaster
1665  *
1666  * NB: Needs to be called with signals blocked
1667  */
1668 static int
1670 {
1671  fd_set readmask;
1672  int nSockets;
1673  time_t last_lockfile_recheck_time,
1674  last_touch_time;
1675 
1676  last_lockfile_recheck_time = last_touch_time = time(NULL);
1677 
1678  nSockets = initMasks(&readmask);
1679 
1680  for (;;)
1681  {
1682  fd_set rmask;
1683  int selres;
1684  time_t now;
1685 
1686  /*
1687  * Wait for a connection request to arrive.
1688  *
1689  * We block all signals except while sleeping. That makes it safe for
1690  * signal handlers, which again block all signals while executing, to
1691  * do nontrivial work.
1692  *
1693  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1694  * any new connections, so we don't call select(), and just sleep.
1695  */
1696  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1697 
1698  if (pmState == PM_WAIT_DEAD_END)
1699  {
1701 
1702  pg_usleep(100000L); /* 100 msec seems reasonable */
1703  selres = 0;
1704 
1705  PG_SETMASK(&BlockSig);
1706  }
1707  else
1708  {
1709  /* must set timeout each time; some OSes change it! */
1710  struct timeval timeout;
1711 
1712  /* Needs to run with blocked signals! */
1713  DetermineSleepTime(&timeout);
1714 
1716 
1717  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1718 
1719  PG_SETMASK(&BlockSig);
1720  }
1721 
1722  /* Now check the select() result */
1723  if (selres < 0)
1724  {
1725  if (errno != EINTR && errno != EWOULDBLOCK)
1726  {
1727  ereport(LOG,
1729  errmsg("select() failed in postmaster: %m")));
1730  return STATUS_ERROR;
1731  }
1732  }
1733 
1734  /*
1735  * New connection pending on any of our sockets? If so, fork a child
1736  * process to deal with it.
1737  */
1738  if (selres > 0)
1739  {
1740  int i;
1741 
1742  for (i = 0; i < MAXLISTEN; i++)
1743  {
1744  if (ListenSocket[i] == PGINVALID_SOCKET)
1745  break;
1746  if (FD_ISSET(ListenSocket[i], &rmask))
1747  {
1748  Port *port;
1749 
1750  port = ConnCreate(ListenSocket[i]);
1751  if (port)
1752  {
1753  BackendStartup(port);
1754 
1755  /*
1756  * We no longer need the open socket or port structure
1757  * in this process
1758  */
1759  StreamClose(port->sock);
1760  ConnFree(port);
1761  }
1762  }
1763  }
1764  }
1765 
1766  /* If we have lost the log collector, try to start a new one */
1767  if (SysLoggerPID == 0 && Logging_collector)
1769 
1770  /*
1771  * If no background writer process is running, and we are not in a
1772  * state that prevents it, start one. It doesn't matter if this
1773  * fails, we'll just try again later. Likewise for the checkpointer.
1774  */
1775  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1777  {
1778  if (CheckpointerPID == 0)
1780  if (BgWriterPID == 0)
1782  }
1783 
1784  /*
1785  * Likewise, if we have lost the walwriter process, try to start a new
1786  * one. But this is needed only in normal operation (else we cannot
1787  * be writing any new WAL).
1788  */
1789  if (WalWriterPID == 0 && pmState == PM_RUN)
1791 
1792  /*
1793  * If we have lost the autovacuum launcher, try to start a new one. We
1794  * don't want autovacuum to run in binary upgrade mode because
1795  * autovacuum might update relfrozenxid for empty tables before the
1796  * physical files are put in place.
1797  */
1798  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1800  pmState == PM_RUN)
1801  {
1803  if (AutoVacPID != 0)
1804  start_autovac_launcher = false; /* signal processed */
1805  }
1806 
1807  /* If we have lost the stats collector, try to start a new one */
1808  if (PgStatPID == 0 &&
1809  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1810  PgStatPID = pgstat_start();
1811 
1812  /* If we have lost the archiver, try to start a new one. */
1813  if (PgArchPID == 0 && PgArchStartupAllowed())
1814  PgArchPID = pgarch_start();
1815 
1816  /* If we need to signal the autovacuum launcher, do so now */
1818  {
1819  avlauncher_needs_signal = false;
1820  if (AutoVacPID != 0)
1821  kill(AutoVacPID, SIGUSR2);
1822  }
1823 
1824  /* If we need to start a WAL receiver, try to do that now */
1827 
1828  /* Get other worker processes running, if needed */
1831 
1832 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1833 
1834  /*
1835  * With assertions enabled, check regularly for appearance of
1836  * additional threads. All builds check at start and exit.
1837  */
1838  Assert(pthread_is_threaded_np() == 0);
1839 #endif
1840 
1841  /*
1842  * Lastly, check to see if it's time to do some things that we don't
1843  * want to do every single time through the loop, because they're a
1844  * bit expensive. Note that there's up to a minute of slop in when
1845  * these tasks will be performed, since DetermineSleepTime() will let
1846  * us sleep at most that long; except for SIGKILL timeout which has
1847  * special-case logic there.
1848  */
1849  now = time(NULL);
1850 
1851  /*
1852  * If we already sent SIGQUIT to children and they are slow to shut
1853  * down, it's time to send them SIGKILL. This doesn't happen
1854  * normally, but under certain conditions backends can get stuck while
1855  * shutting down. This is a last measure to get them unwedged.
1856  *
1857  * Note we also do this during recovery from a process crash.
1858  */
1859  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1860  AbortStartTime != 0 &&
1862  {
1863  /* We were gentle with them before. Not anymore */
1865  /* reset flag so we don't SIGKILL again */
1866  AbortStartTime = 0;
1867  }
1868 
1869  /*
1870  * Once a minute, verify that postmaster.pid hasn't been removed or
1871  * overwritten. If it has, we force a shutdown. This avoids having
1872  * postmasters and child processes hanging around after their database
1873  * is gone, and maybe causing problems if a new database cluster is
1874  * created in the same place. It also provides some protection
1875  * against a DBA foolishly removing postmaster.pid and manually
1876  * starting a new postmaster. Data corruption is likely to ensue from
1877  * that anyway, but we can minimize the damage by aborting ASAP.
1878  */
1879  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1880  {
1881  if (!RecheckDataDirLockFile())
1882  {
1883  ereport(LOG,
1884  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1885  kill(MyProcPid, SIGQUIT);
1886  }
1887  last_lockfile_recheck_time = now;
1888  }
1889 
1890  /*
1891  * Touch Unix socket and lock files every 58 minutes, to ensure that
1892  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1893  * no one runs cleaners with cutoff times of less than an hour ...
1894  */
1895  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1896  {
1897  TouchSocketFiles();
1899  last_touch_time = now;
1900  }
1901  }
1902 }
1903 
1904 /*
1905  * Initialise the masks for select() for the ports we are listening on.
1906  * Return the number of sockets to listen on.
1907  */
1908 static int
1909 initMasks(fd_set *rmask)
1910 {
1911  int maxsock = -1;
1912  int i;
1913 
1914  FD_ZERO(rmask);
1915 
1916  for (i = 0; i < MAXLISTEN; i++)
1917  {
1918  int fd = ListenSocket[i];
1919 
1920  if (fd == PGINVALID_SOCKET)
1921  break;
1922  FD_SET(fd, rmask);
1923 
1924  if (fd > maxsock)
1925  maxsock = fd;
1926  }
1927 
1928  return maxsock + 1;
1929 }
1930 
1931 
1932 /*
1933  * Read a client's startup packet and do something according to it.
1934  *
1935  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1936  * not return at all.
1937  *
1938  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1939  * if that's what you want. Return STATUS_ERROR if you don't want to
1940  * send anything to the client, which would typically be appropriate
1941  * if we detect a communications failure.)
1942  */
1943 static int
1945 {
1946  int32 len;
1947  void *buf;
1948  ProtocolVersion proto;
1949  MemoryContext oldcontext;
1950 
1951  pq_startmsgread();
1952  if (pq_getbytes((char *) &len, 4) == EOF)
1953  {
1954  /*
1955  * EOF after SSLdone probably means the client didn't like our
1956  * response to NEGOTIATE_SSL_CODE. That's not an error condition, so
1957  * don't clutter the log with a complaint.
1958  */
1959  if (!SSLdone)
1961  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1962  errmsg("incomplete startup packet")));
1963  return STATUS_ERROR;
1964  }
1965 
1966  len = ntohl(len);
1967  len -= 4;
1968 
1969  if (len < (int32) sizeof(ProtocolVersion) ||
1971  {
1973  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1974  errmsg("invalid length of startup packet")));
1975  return STATUS_ERROR;
1976  }
1977 
1978  /*
1979  * Allocate at least the size of an old-style startup packet, plus one
1980  * extra byte, and make sure all are zeroes. This ensures we will have
1981  * null termination of all strings, in both fixed- and variable-length
1982  * packet layouts.
1983  */
1984  if (len <= (int32) sizeof(StartupPacket))
1985  buf = palloc0(sizeof(StartupPacket) + 1);
1986  else
1987  buf = palloc0(len + 1);
1988 
1989  if (pq_getbytes(buf, len) == EOF)
1990  {
1992  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1993  errmsg("incomplete startup packet")));
1994  return STATUS_ERROR;
1995  }
1996  pq_endmsgread();
1997 
1998  /*
1999  * The first field is either a protocol version number or a special
2000  * request code.
2001  */
2002  port->proto = proto = ntohl(*((ProtocolVersion *) buf));
2003 
2004  if (proto == CANCEL_REQUEST_CODE)
2005  {
2006  processCancelRequest(port, buf);
2007  /* Not really an error, but we don't want to proceed further */
2008  return STATUS_ERROR;
2009  }
2010 
2011  if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
2012  {
2013  char SSLok;
2014 
2015 #ifdef USE_SSL
2016  /* No SSL when disabled or on Unix sockets */
2017  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2018  SSLok = 'N';
2019  else
2020  SSLok = 'S'; /* Support for SSL */
2021 #else
2022  SSLok = 'N'; /* No support for SSL */
2023 #endif
2024 
2025 retry1:
2026  if (send(port->sock, &SSLok, 1, 0) != 1)
2027  {
2028  if (errno == EINTR)
2029  goto retry1; /* if interrupted, just retry */
2032  errmsg("failed to send SSL negotiation response: %m")));
2033  return STATUS_ERROR; /* close the connection */
2034  }
2035 
2036 #ifdef USE_SSL
2037  if (SSLok == 'S' && secure_open_server(port) == -1)
2038  return STATUS_ERROR;
2039 #endif
2040  /* regular startup packet, cancel, etc packet should follow... */
2041  /* but not another SSL negotiation request */
2042  return ProcessStartupPacket(port, true);
2043  }
2044 
2045  /* Could add additional special packet types here */
2046 
2047  /*
2048  * Set FrontendProtocol now so that ereport() knows what format to send if
2049  * we fail during startup.
2050  */
2051  FrontendProtocol = proto;
2052 
2053  /* Check we can handle the protocol the frontend is using. */
2054 
2059  ereport(FATAL,
2060  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2061  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2062  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2066 
2067  /*
2068  * Now fetch parameters out of startup packet and save them into the Port
2069  * structure. All data structures attached to the Port struct must be
2070  * allocated in TopMemoryContext so that they will remain available in a
2071  * running backend (even after PostmasterContext is destroyed). We need
2072  * not worry about leaking this storage on failure, since we aren't in the
2073  * postmaster process anymore.
2074  */
2076 
2077  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2078  {
2079  int32 offset = sizeof(ProtocolVersion);
2080 
2081  /*
2082  * Scan packet body for name/option pairs. We can assume any string
2083  * beginning within the packet body is null-terminated, thanks to
2084  * zeroing extra byte above.
2085  */
2086  port->guc_options = NIL;
2087 
2088  while (offset < len)
2089  {
2090  char *nameptr = ((char *) buf) + offset;
2091  int32 valoffset;
2092  char *valptr;
2093 
2094  if (*nameptr == '\0')
2095  break; /* found packet terminator */
2096  valoffset = offset + strlen(nameptr) + 1;
2097  if (valoffset >= len)
2098  break; /* missing value, will complain below */
2099  valptr = ((char *) buf) + valoffset;
2100 
2101  if (strcmp(nameptr, "database") == 0)
2102  port->database_name = pstrdup(valptr);
2103  else if (strcmp(nameptr, "user") == 0)
2104  port->user_name = pstrdup(valptr);
2105  else if (strcmp(nameptr, "options") == 0)
2106  port->cmdline_options = pstrdup(valptr);
2107  else if (strcmp(nameptr, "replication") == 0)
2108  {
2109  /*
2110  * Due to backward compatibility concerns the replication
2111  * parameter is a hybrid beast which allows the value to be
2112  * either boolean or the string 'database'. The latter
2113  * connects to a specific database which is e.g. required for
2114  * logical decoding while.
2115  */
2116  if (strcmp(valptr, "database") == 0)
2117  {
2118  am_walsender = true;
2119  am_db_walsender = true;
2120  }
2121  else if (!parse_bool(valptr, &am_walsender))
2122  ereport(FATAL,
2123  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2124  errmsg("invalid value for parameter \"%s\": \"%s\"",
2125  "replication",
2126  valptr),
2127  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2128  }
2129  else
2130  {
2131  /* Assume it's a generic GUC option */
2132  port->guc_options = lappend(port->guc_options,
2133  pstrdup(nameptr));
2134  port->guc_options = lappend(port->guc_options,
2135  pstrdup(valptr));
2136  }
2137  offset = valoffset + strlen(valptr) + 1;
2138  }
2139 
2140  /*
2141  * If we didn't find a packet terminator exactly at the end of the
2142  * given packet length, complain.
2143  */
2144  if (offset != len - 1)
2145  ereport(FATAL,
2146  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2147  errmsg("invalid startup packet layout: expected terminator as last byte")));
2148  }
2149  else
2150  {
2151  /*
2152  * Get the parameters from the old-style, fixed-width-fields startup
2153  * packet as C strings. The packet destination was cleared first so a
2154  * short packet has zeros silently added. We have to be prepared to
2155  * truncate the pstrdup result for oversize fields, though.
2156  */
2157  StartupPacket *packet = (StartupPacket *) buf;
2158 
2159  port->database_name = pstrdup(packet->database);
2160  if (strlen(port->database_name) > sizeof(packet->database))
2161  port->database_name[sizeof(packet->database)] = '\0';
2162  port->user_name = pstrdup(packet->user);
2163  if (strlen(port->user_name) > sizeof(packet->user))
2164  port->user_name[sizeof(packet->user)] = '\0';
2165  port->cmdline_options = pstrdup(packet->options);
2166  if (strlen(port->cmdline_options) > sizeof(packet->options))
2167  port->cmdline_options[sizeof(packet->options)] = '\0';
2168  port->guc_options = NIL;
2169  }
2170 
2171  /* Check a user name was given. */
2172  if (port->user_name == NULL || port->user_name[0] == '\0')
2173  ereport(FATAL,
2174  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2175  errmsg("no PostgreSQL user name specified in startup packet")));
2176 
2177  /* The database defaults to the user name. */
2178  if (port->database_name == NULL || port->database_name[0] == '\0')
2179  port->database_name = pstrdup(port->user_name);
2180 
2181  if (Db_user_namespace)
2182  {
2183  /*
2184  * If user@, it is a global user, remove '@'. We only want to do this
2185  * if there is an '@' at the end and no earlier in the user string or
2186  * they may fake as a local user of another database attaching to this
2187  * database.
2188  */
2189  if (strchr(port->user_name, '@') ==
2190  port->user_name + strlen(port->user_name) - 1)
2191  *strchr(port->user_name, '@') = '\0';
2192  else
2193  {
2194  /* Append '@' and dbname */
2195  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2196  }
2197  }
2198 
2199  /*
2200  * Truncate given database and user names to length of a Postgres name.
2201  * This avoids lookup failures when overlength names are given.
2202  */
2203  if (strlen(port->database_name) >= NAMEDATALEN)
2204  port->database_name[NAMEDATALEN - 1] = '\0';
2205  if (strlen(port->user_name) >= NAMEDATALEN)
2206  port->user_name[NAMEDATALEN - 1] = '\0';
2207 
2208  /*
2209  * Normal walsender backends, e.g. for streaming replication, are not
2210  * connected to a particular database. But walsenders used for logical
2211  * replication need to connect to a specific database. We allow streaming
2212  * replication commands to be issued even if connected to a database as it
2213  * can make sense to first make a basebackup and then stream changes
2214  * starting from that.
2215  */
2216  if (am_walsender && !am_db_walsender)
2217  port->database_name[0] = '\0';
2218 
2219  /*
2220  * Done putting stuff in TopMemoryContext.
2221  */
2222  MemoryContextSwitchTo(oldcontext);
2223 
2224  /*
2225  * If we're going to reject the connection due to database state, say so
2226  * now instead of wasting cycles on an authentication exchange. (This also
2227  * allows a pg_ping utility to be written.)
2228  */
2229  switch (port->canAcceptConnections)
2230  {
2231  case CAC_STARTUP:
2232  ereport(FATAL,
2234  errmsg("the database system is starting up")));
2235  break;
2236  case CAC_SHUTDOWN:
2237  ereport(FATAL,
2239  errmsg("the database system is shutting down")));
2240  break;
2241  case CAC_RECOVERY:
2242  ereport(FATAL,
2244  errmsg("the database system is in recovery mode")));
2245  break;
2246  case CAC_TOOMANY:
2247  ereport(FATAL,
2248  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2249  errmsg("sorry, too many clients already")));
2250  break;
2251  case CAC_WAITBACKUP:
2252  /* OK for now, will check in InitPostgres */
2253  break;
2254  case CAC_OK:
2255  break;
2256  }
2257 
2258  return STATUS_OK;
2259 }
2260 
2261 
2262 /*
2263  * The client has sent a cancel request packet, not a normal
2264  * start-a-new-connection packet. Perform the necessary processing.
2265  * Nothing is sent back to the client.
2266  */
2267 static void
2269 {
2270  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2271  int backendPID;
2272  int32 cancelAuthCode;
2273  Backend *bp;
2274 
2275 #ifndef EXEC_BACKEND
2276  dlist_iter iter;
2277 #else
2278  int i;
2279 #endif
2280 
2281  backendPID = (int) ntohl(canc->backendPID);
2282  cancelAuthCode = (int32) ntohl(canc->cancelAuthCode);
2283 
2284  /*
2285  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2286  * longer access the postmaster's own backend list, and must rely on the
2287  * duplicate array in shared memory.
2288  */
2289 #ifndef EXEC_BACKEND
2290  dlist_foreach(iter, &BackendList)
2291  {
2292  bp = dlist_container(Backend, elem, iter.cur);
2293 #else
2294  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2295  {
2296  bp = (Backend *) &ShmemBackendArray[i];
2297 #endif
2298  if (bp->pid == backendPID)
2299  {
2300  if (bp->cancel_key == cancelAuthCode)
2301  {
2302  /* Found a match; signal that backend to cancel current op */
2303  ereport(DEBUG2,
2304  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2305  backendPID)));
2306  signal_child(bp->pid, SIGINT);
2307  }
2308  else
2309  /* Right PID, wrong key: no way, Jose */
2310  ereport(LOG,
2311  (errmsg("wrong key in cancel request for process %d",
2312  backendPID)));
2313  return;
2314  }
2315  }
2316 
2317  /* No matching backend */
2318  ereport(LOG,
2319  (errmsg("PID %d in cancel request did not match any process",
2320  backendPID)));
2321 }
2322 
2323 /*
2324  * canAcceptConnections --- check to see if database state allows connections.
2325  */
2326 static CAC_state
2328 {
2330 
2331  /*
2332  * Can't start backends when in startup/shutdown/inconsistent recovery
2333  * state.
2334  *
2335  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2336  * allowed so that a superuser can end online backup mode); we return
2337  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2338  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2339  * have checked for too many children.
2340  */
2341  if (pmState != PM_RUN)
2342  {
2343  if (pmState == PM_WAIT_BACKUP)
2344  result = CAC_WAITBACKUP; /* allow superusers only */
2345  else if (Shutdown > NoShutdown)
2346  return CAC_SHUTDOWN; /* shutdown is pending */
2347  else if (!FatalError &&
2348  (pmState == PM_STARTUP ||
2349  pmState == PM_RECOVERY))
2350  return CAC_STARTUP; /* normal startup */
2351  else if (!FatalError &&
2353  result = CAC_OK; /* connection OK during hot standby */
2354  else
2355  return CAC_RECOVERY; /* else must be crash recovery */
2356  }
2357 
2358  /*
2359  * Don't start too many children.
2360  *
2361  * We allow more connections than we can have backends here because some
2362  * might still be authenticating; they might fail auth, or some existing
2363  * backend might exit before the auth cycle is completed. The exact
2364  * MaxBackends limit is enforced when a new backend tries to join the
2365  * shared-inval backend array.
2366  *
2367  * The limit here must match the sizes of the per-child-process arrays;
2368  * see comments for MaxLivePostmasterChildren().
2369  */
2371  result = CAC_TOOMANY;
2372 
2373  return result;
2374 }
2375 
2376 
2377 /*
2378  * ConnCreate -- create a local connection data structure
2379  *
2380  * Returns NULL on failure, other than out-of-memory which is fatal.
2381  */
2382 static Port *
2383 ConnCreate(int serverFd)
2384 {
2385  Port *port;
2386 
2387  if (!(port = (Port *) calloc(1, sizeof(Port))))
2388  {
2389  ereport(LOG,
2390  (errcode(ERRCODE_OUT_OF_MEMORY),
2391  errmsg("out of memory")));
2392  ExitPostmaster(1);
2393  }
2394 
2395  if (StreamConnection(serverFd, port) != STATUS_OK)
2396  {
2397  if (port->sock != PGINVALID_SOCKET)
2398  StreamClose(port->sock);
2399  ConnFree(port);
2400  return NULL;
2401  }
2402 
2403  /*
2404  * Allocate GSSAPI specific state struct
2405  */
2406 #ifndef EXEC_BACKEND
2407 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2408  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2409  if (!port->gss)
2410  {
2411  ereport(LOG,
2412  (errcode(ERRCODE_OUT_OF_MEMORY),
2413  errmsg("out of memory")));
2414  ExitPostmaster(1);
2415  }
2416 #endif
2417 #endif
2418 
2419  return port;
2420 }
2421 
2422 
2423 /*
2424  * ConnFree -- free a local connection data structure
2425  */
2426 static void
2428 {
2429 #ifdef USE_SSL
2430  secure_close(conn);
2431 #endif
2432  if (conn->gss)
2433  free(conn->gss);
2434  free(conn);
2435 }
2436 
2437 
2438 /*
2439  * ClosePostmasterPorts -- close all the postmaster's open sockets
2440  *
2441  * This is called during child process startup to release file descriptors
2442  * that are not needed by that child process. The postmaster still has
2443  * them open, of course.
2444  *
2445  * Note: we pass am_syslogger as a boolean because we don't want to set
2446  * the global variable yet when this is called.
2447  */
2448 void
2450 {
2451  int i;
2452 
2453 #ifndef WIN32
2454 
2455  /*
2456  * Close the write end of postmaster death watch pipe. It's important to
2457  * do this as early as possible, so that if postmaster dies, others won't
2458  * think that it's still running because we're holding the pipe open.
2459  */
2461  ereport(FATAL,
2463  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2465 #endif
2466 
2467  /* Close the listen sockets */
2468  for (i = 0; i < MAXLISTEN; i++)
2469  {
2470  if (ListenSocket[i] != PGINVALID_SOCKET)
2471  {
2474  }
2475  }
2476 
2477  /* If using syslogger, close the read side of the pipe */
2478  if (!am_syslogger)
2479  {
2480 #ifndef WIN32
2481  if (syslogPipe[0] >= 0)
2482  close(syslogPipe[0]);
2483  syslogPipe[0] = -1;
2484 #else
2485  if (syslogPipe[0])
2486  CloseHandle(syslogPipe[0]);
2487  syslogPipe[0] = 0;
2488 #endif
2489  }
2490 
2491 #ifdef USE_BONJOUR
2492  /* If using Bonjour, close the connection to the mDNS daemon */
2493  if (bonjour_sdref)
2494  close(DNSServiceRefSockFD(bonjour_sdref));
2495 #endif
2496 }
2497 
2498 
2499 /*
2500  * reset_shared -- reset shared memory and semaphores
2501  */
2502 static void
2503 reset_shared(int port)
2504 {
2505  /*
2506  * Create or re-create shared memory and semaphores.
2507  *
2508  * Note: in each "cycle of life" we will normally assign the same IPC keys
2509  * (if using SysV shmem and/or semas), since the port number is used to
2510  * determine IPC keys. This helps ensure that we will clean up dead IPC
2511  * objects if the postmaster crashes and is restarted.
2512  */
2513  CreateSharedMemoryAndSemaphores(false, port);
2514 }
2515 
2516 
2517 /*
2518  * SIGHUP -- reread config files, and tell children to do same
2519  */
2520 static void
2522 {
2523  int save_errno = errno;
2524 
2525  PG_SETMASK(&BlockSig);
2526 
2527  if (Shutdown <= SmartShutdown)
2528  {
2529  ereport(LOG,
2530  (errmsg("received SIGHUP, reloading configuration files")));
2533  if (StartupPID != 0)
2535  if (BgWriterPID != 0)
2537  if (CheckpointerPID != 0)
2539  if (WalWriterPID != 0)
2541  if (WalReceiverPID != 0)
2543  if (AutoVacPID != 0)
2545  if (PgArchPID != 0)
2547  if (SysLoggerPID != 0)
2549  if (PgStatPID != 0)
2551 
2552  /* Reload authentication config files too */
2553  if (!load_hba())
2554  ereport(LOG,
2555  (errmsg("pg_hba.conf was not reloaded")));
2556 
2557  if (!load_ident())
2558  ereport(LOG,
2559  (errmsg("pg_ident.conf was not reloaded")));
2560 
2561 #ifdef USE_SSL
2562  /* Reload SSL configuration as well */
2563  if (EnableSSL)
2564  {
2565  if (secure_initialize(false) == 0)
2566  LoadedSSL = true;
2567  else
2568  ereport(LOG,
2569  (errmsg("SSL configuration was not reloaded")));
2570  }
2571  else
2572  {
2573  secure_destroy();
2574  LoadedSSL = false;
2575  }
2576 #endif
2577 
2578 #ifdef EXEC_BACKEND
2579  /* Update the starting-point file for future children */
2580  write_nondefault_variables(PGC_SIGHUP);
2581 #endif
2582  }
2583 
2585 
2586  errno = save_errno;
2587 }
2588 
2589 
2590 /*
2591  * pmdie -- signal handler for processing various postmaster signals.
2592  */
2593 static void
2595 {
2596  int save_errno = errno;
2597 
2598  PG_SETMASK(&BlockSig);
2599 
2600  ereport(DEBUG2,
2601  (errmsg_internal("postmaster received signal %d",
2602  postgres_signal_arg)));
2603 
2604  switch (postgres_signal_arg)
2605  {
2606  case SIGTERM:
2607 
2608  /*
2609  * Smart Shutdown:
2610  *
2611  * Wait for children to end their work, then shut down.
2612  */
2613  if (Shutdown >= SmartShutdown)
2614  break;
2616  ereport(LOG,
2617  (errmsg("received smart shutdown request")));
2618 
2619  /* Report status */
2621 #ifdef USE_SYSTEMD
2622  sd_notify(0, "STOPPING=1");
2623 #endif
2624 
2625  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2627  {
2628  /* autovac workers are told to shut down immediately */
2629  /* and bgworkers too; does this need tweaking? */
2630  SignalSomeChildren(SIGTERM,
2632  /* and the autovac launcher too */
2633  if (AutoVacPID != 0)
2634  signal_child(AutoVacPID, SIGTERM);
2635  /* and the bgwriter too */
2636  if (BgWriterPID != 0)
2637  signal_child(BgWriterPID, SIGTERM);
2638  /* and the walwriter too */
2639  if (WalWriterPID != 0)
2640  signal_child(WalWriterPID, SIGTERM);
2641 
2642  /*
2643  * If we're in recovery, we can't kill the startup process
2644  * right away, because at present doing so does not release
2645  * its locks. We might want to change this in a future
2646  * release. For the time being, the PM_WAIT_READONLY state
2647  * indicates that we're waiting for the regular (read only)
2648  * backends to die off; once they do, we'll kill the startup
2649  * and walreceiver processes.
2650  */
2651  pmState = (pmState == PM_RUN) ?
2653  }
2654 
2655  /*
2656  * Now wait for online backup mode to end and backends to exit. If
2657  * that is already the case, PostmasterStateMachine will take the
2658  * next step.
2659  */
2661  break;
2662 
2663  case SIGINT:
2664 
2665  /*
2666  * Fast Shutdown:
2667  *
2668  * Abort all children with SIGTERM (rollback active transactions
2669  * and exit) and shut down when they are gone.
2670  */
2671  if (Shutdown >= FastShutdown)
2672  break;
2674  ereport(LOG,
2675  (errmsg("received fast shutdown request")));
2676 
2677  /* Report status */
2679 #ifdef USE_SYSTEMD
2680  sd_notify(0, "STOPPING=1");
2681 #endif
2682 
2683  if (StartupPID != 0)
2684  signal_child(StartupPID, SIGTERM);
2685  if (BgWriterPID != 0)
2686  signal_child(BgWriterPID, SIGTERM);
2687  if (WalReceiverPID != 0)
2688  signal_child(WalReceiverPID, SIGTERM);
2689  if (pmState == PM_RECOVERY)
2690  {
2692 
2693  /*
2694  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2695  * and/or checkpointer should be active in this state; we just
2696  * signaled the first four, and we don't want to kill
2697  * checkpointer yet.
2698  */
2700  }
2701  else if (pmState == PM_RUN ||
2702  pmState == PM_WAIT_BACKUP ||
2706  {
2707  ereport(LOG,
2708  (errmsg("aborting any active transactions")));
2709  /* shut down all backends and workers */
2710  SignalSomeChildren(SIGTERM,
2713  /* and the autovac launcher too */
2714  if (AutoVacPID != 0)
2715  signal_child(AutoVacPID, SIGTERM);
2716  /* and the walwriter too */
2717  if (WalWriterPID != 0)
2718  signal_child(WalWriterPID, SIGTERM);
2720  }
2721 
2722  /*
2723  * Now wait for backends to exit. If there are none,
2724  * PostmasterStateMachine will take the next step.
2725  */
2727  break;
2728 
2729  case SIGQUIT:
2730 
2731  /*
2732  * Immediate Shutdown:
2733  *
2734  * abort all children with SIGQUIT, wait for them to exit,
2735  * terminate remaining ones with SIGKILL, then exit without
2736  * attempt to properly shut down the data base system.
2737  */
2738  if (Shutdown >= ImmediateShutdown)
2739  break;
2741  ereport(LOG,
2742  (errmsg("received immediate shutdown request")));
2743 
2744  /* Report status */
2746 #ifdef USE_SYSTEMD
2747  sd_notify(0, "STOPPING=1");
2748 #endif
2749 
2752 
2753  /* set stopwatch for them to die */
2754  AbortStartTime = time(NULL);
2755 
2756  /*
2757  * Now wait for backends to exit. If there are none,
2758  * PostmasterStateMachine will take the next step.
2759  */
2761  break;
2762  }
2763 
2765 
2766  errno = save_errno;
2767 }
2768 
2769 /*
2770  * Reaper -- signal handler to cleanup after a child process dies.
2771  */
2772 static void
2774 {
2775  int save_errno = errno;
2776  int pid; /* process id of dead child process */
2777  int exitstatus; /* its exit status */
2778 
2779  PG_SETMASK(&BlockSig);
2780 
2781  ereport(DEBUG4,
2782  (errmsg_internal("reaping dead processes")));
2783 
2784  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2785  {
2786  /*
2787  * Check if this child was a startup process.
2788  */
2789  if (pid == StartupPID)
2790  {
2791  StartupPID = 0;
2792 
2793  /*
2794  * Startup process exited in response to a shutdown request (or it
2795  * completed normally regardless of the shutdown request).
2796  */
2797  if (Shutdown > NoShutdown &&
2798  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2799  {
2802  /* PostmasterStateMachine logic does the rest */
2803  continue;
2804  }
2805 
2806  if (EXIT_STATUS_3(exitstatus))
2807  {
2808  ereport(LOG,
2809  (errmsg("shutdown at recovery target")));
2812  TerminateChildren(SIGTERM);
2814  /* PostmasterStateMachine logic does the rest */
2815  continue;
2816  }
2817 
2818  /*
2819  * Unexpected exit of startup process (including FATAL exit)
2820  * during PM_STARTUP is treated as catastrophic. There are no
2821  * other processes running yet, so we can just exit.
2822  */
2823  if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2824  {
2825  LogChildExit(LOG, _("startup process"),
2826  pid, exitstatus);
2827  ereport(LOG,
2828  (errmsg("aborting startup due to startup process failure")));
2829  ExitPostmaster(1);
2830  }
2831 
2832  /*
2833  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2834  * the startup process is catastrophic, so kill other children,
2835  * and set StartupStatus so we don't try to reinitialize after
2836  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2837  * then we previously sent the startup process a SIGQUIT; so
2838  * that's probably the reason it died, and we do want to try to
2839  * restart in that case.
2840  */
2841  if (!EXIT_STATUS_0(exitstatus))
2842  {
2845  else
2847  HandleChildCrash(pid, exitstatus,
2848  _("startup process"));
2849  continue;
2850  }
2851 
2852  /*
2853  * Startup succeeded, commence normal operations
2854  */
2856  FatalError = false;
2857  Assert(AbortStartTime == 0);
2858  ReachedNormalRunning = true;
2859  pmState = PM_RUN;
2860 
2861  /*
2862  * Crank up the background tasks, if we didn't do that already
2863  * when we entered consistent recovery state. It doesn't matter
2864  * if this fails, we'll just try again later.
2865  */
2866  if (CheckpointerPID == 0)
2868  if (BgWriterPID == 0)
2870  if (WalWriterPID == 0)
2872 
2873  /*
2874  * Likewise, start other special children as needed. In a restart
2875  * situation, some of them may be alive already.
2876  */
2879  if (PgArchStartupAllowed() && PgArchPID == 0)
2880  PgArchPID = pgarch_start();
2881  if (PgStatPID == 0)
2882  PgStatPID = pgstat_start();
2883 
2884  /* workers may be scheduled to start now */
2886 
2887  /* at this point we are really open for business */
2888  ereport(LOG,
2889  (errmsg("database system is ready to accept connections")));
2890 
2891  /* Report status */
2893 #ifdef USE_SYSTEMD
2894  sd_notify(0, "READY=1");
2895 #endif
2896 
2897  continue;
2898  }
2899 
2900  /*
2901  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2902  * one at the next iteration of the postmaster's main loop, if
2903  * necessary. Any other exit condition is treated as a crash.
2904  */
2905  if (pid == BgWriterPID)
2906  {
2907  BgWriterPID = 0;
2908  if (!EXIT_STATUS_0(exitstatus))
2909  HandleChildCrash(pid, exitstatus,
2910  _("background writer process"));
2911  continue;
2912  }
2913 
2914  /*
2915  * Was it the checkpointer?
2916  */
2917  if (pid == CheckpointerPID)
2918  {
2919  CheckpointerPID = 0;
2920  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2921  {
2922  /*
2923  * OK, we saw normal exit of the checkpointer after it's been
2924  * told to shut down. We expect that it wrote a shutdown
2925  * checkpoint. (If for some reason it didn't, recovery will
2926  * occur on next postmaster start.)
2927  *
2928  * At this point we should have no normal backend children
2929  * left (else we'd not be in PM_SHUTDOWN state) but we might
2930  * have dead_end children to wait for.
2931  *
2932  * If we have an archiver subprocess, tell it to do a last
2933  * archive cycle and quit. Likewise, if we have walsender
2934  * processes, tell them to send any remaining WAL and quit.
2935  */
2937 
2938  /* Waken archiver for the last time */
2939  if (PgArchPID != 0)
2941 
2942  /*
2943  * Waken walsenders for the last time. No regular backends
2944  * should be around anymore.
2945  */
2947 
2949 
2950  /*
2951  * We can also shut down the stats collector now; there's
2952  * nothing left for it to do.
2953  */
2954  if (PgStatPID != 0)
2956  }
2957  else
2958  {
2959  /*
2960  * Any unexpected exit of the checkpointer (including FATAL
2961  * exit) is treated as a crash.
2962  */
2963  HandleChildCrash(pid, exitstatus,
2964  _("checkpointer process"));
2965  }
2966 
2967  continue;
2968  }
2969 
2970  /*
2971  * Was it the wal writer? Normal exit can be ignored; we'll start a
2972  * new one at the next iteration of the postmaster's main loop, if
2973  * necessary. Any other exit condition is treated as a crash.
2974  */
2975  if (pid == WalWriterPID)
2976  {
2977  WalWriterPID = 0;
2978  if (!EXIT_STATUS_0(exitstatus))
2979  HandleChildCrash(pid, exitstatus,
2980  _("WAL writer process"));
2981  continue;
2982  }
2983 
2984  /*
2985  * Was it the wal receiver? If exit status is zero (normal) or one
2986  * (FATAL exit), we assume everything is all right just like normal
2987  * backends. (If we need a new wal receiver, we'll start one at the
2988  * next iteration of the postmaster's main loop.)
2989  */
2990  if (pid == WalReceiverPID)
2991  {
2992  WalReceiverPID = 0;
2993  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2994  HandleChildCrash(pid, exitstatus,
2995  _("WAL receiver process"));
2996  continue;
2997  }
2998 
2999  /*
3000  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3001  * start a new one at the next iteration of the postmaster's main
3002  * loop, if necessary. Any other exit condition is treated as a
3003  * crash.
3004  */
3005  if (pid == AutoVacPID)
3006  {
3007  AutoVacPID = 0;
3008  if (!EXIT_STATUS_0(exitstatus))
3009  HandleChildCrash(pid, exitstatus,
3010  _("autovacuum launcher process"));
3011  continue;
3012  }
3013 
3014  /*
3015  * Was it the archiver? If so, just try to start a new one; no need
3016  * to force reset of the rest of the system. (If fail, we'll try
3017  * again in future cycles of the main loop.). Unless we were waiting
3018  * for it to shut down; don't restart it in that case, and
3019  * PostmasterStateMachine() will advance to the next shutdown step.
3020  */
3021  if (pid == PgArchPID)
3022  {
3023  PgArchPID = 0;
3024  if (!EXIT_STATUS_0(exitstatus))
3025  LogChildExit(LOG, _("archiver process"),
3026  pid, exitstatus);
3027  if (PgArchStartupAllowed())
3028  PgArchPID = pgarch_start();
3029  continue;
3030  }
3031 
3032  /*
3033  * Was it the statistics collector? If so, just try to start a new
3034  * one; no need to force reset of the rest of the system. (If fail,
3035  * we'll try again in future cycles of the main loop.)
3036  */
3037  if (pid == PgStatPID)
3038  {
3039  PgStatPID = 0;
3040  if (!EXIT_STATUS_0(exitstatus))
3041  LogChildExit(LOG, _("statistics collector process"),
3042  pid, exitstatus);
3043  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3044  PgStatPID = pgstat_start();
3045  continue;
3046  }
3047 
3048  /* Was it the system logger? If so, try to start a new one */
3049  if (pid == SysLoggerPID)
3050  {
3051  SysLoggerPID = 0;
3052  /* for safety's sake, launch new logger *first* */
3054  if (!EXIT_STATUS_0(exitstatus))
3055  LogChildExit(LOG, _("system logger process"),
3056  pid, exitstatus);
3057  continue;
3058  }
3059 
3060  /* Was it one of our background workers? */
3061  if (CleanupBackgroundWorker(pid, exitstatus))
3062  {
3063  /* have it be restarted */
3064  HaveCrashedWorker = true;
3065  continue;
3066  }
3067 
3068  /*
3069  * Else do standard backend child cleanup.
3070  */
3071  CleanupBackend(pid, exitstatus);
3072  } /* loop over pending child-death reports */
3073 
3074  /*
3075  * After cleaning out the SIGCHLD queue, see if we have any state changes
3076  * or actions to make.
3077  */
3079 
3080  /* Done with signal handler */
3082 
3083  errno = save_errno;
3084 }
3085 
3086 /*
3087  * Scan the bgworkers list and see if the given PID (which has just stopped
3088  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3089  * bgworker, return false.
3090  *
3091  * This is heavily based on CleanupBackend. One important difference is that
3092  * we don't know yet that the dying process is a bgworker, so we must be silent
3093  * until we're sure it is.
3094  */
3095 static bool
3097  int exitstatus) /* child's exit status */
3098 {
3099  char namebuf[MAXPGPATH];
3100  slist_mutable_iter iter;
3101 
3103  {
3104  RegisteredBgWorker *rw;
3105 
3106  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3107 
3108  if (rw->rw_pid != pid)
3109  continue;
3110 
3111 #ifdef WIN32
3112  /* see CleanupBackend */
3113  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3114  exitstatus = 0;
3115 #endif
3116 
3117  snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"),
3118  rw->rw_worker.bgw_name);
3119 
3120  if (!EXIT_STATUS_0(exitstatus))
3121  {
3122  /* Record timestamp, so we know when to restart the worker. */
3124  }
3125  else
3126  {
3127  /* Zero exit status means terminate */
3128  rw->rw_crashed_at = 0;
3129  rw->rw_terminate = true;
3130  }
3131 
3132  /*
3133  * Additionally, for shared-memory-connected workers, just like a
3134  * backend, any exit status other than 0 or 1 is considered a crash
3135  * and causes a system-wide restart.
3136  */
3137  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3138  {
3139  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3140  {
3141  HandleChildCrash(pid, exitstatus, namebuf);
3142  return true;
3143  }
3144  }
3145 
3146  /*
3147  * We must release the postmaster child slot whether this worker is
3148  * connected to shared memory or not, but we only treat it as a crash
3149  * if it is in fact connected.
3150  */
3153  {
3154  HandleChildCrash(pid, exitstatus, namebuf);
3155  return true;
3156  }
3157 
3158  /* Get it out of the BackendList and clear out remaining data */
3159  dlist_delete(&rw->rw_backend->elem);
3160 #ifdef EXEC_BACKEND
3161  ShmemBackendArrayRemove(rw->rw_backend);
3162 #endif
3163 
3164  /*
3165  * It's possible that this background worker started some OTHER
3166  * background worker and asked to be notified when that worker started
3167  * or stopped. If so, cancel any notifications destined for the
3168  * now-dead backend.
3169  */
3170  if (rw->rw_backend->bgworker_notify)
3172  free(rw->rw_backend);
3173  rw->rw_backend = NULL;
3174  rw->rw_pid = 0;
3175  rw->rw_child_slot = 0;
3176  ReportBackgroundWorkerExit(&iter); /* report child death */
3177 
3178  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3179  namebuf, pid, exitstatus);
3180 
3181  return true;
3182  }
3183 
3184  return false;
3185 }
3186 
3187 /*
3188  * CleanupBackend -- cleanup after terminated backend.
3189  *
3190  * Remove all local state associated with backend.
3191  *
3192  * If you change this, see also CleanupBackgroundWorker.
3193  */
3194 static void
3196  int exitstatus) /* child's exit status. */
3197 {
3198  dlist_mutable_iter iter;
3199 
3200  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3201 
3202  /*
3203  * If a backend dies in an ugly way then we must signal all other backends
3204  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3205  * assume everything is all right and proceed to remove the backend from
3206  * the active backend list.
3207  */
3208 
3209 #ifdef WIN32
3210 
3211  /*
3212  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3213  * since that sometimes happens under load when the process fails to start
3214  * properly (long before it starts using shared memory). Microsoft reports
3215  * it is related to mutex failure:
3216  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3217  */
3218  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3219  {
3220  LogChildExit(LOG, _("server process"), pid, exitstatus);
3221  exitstatus = 0;
3222  }
3223 #endif
3224 
3225  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3226  {
3227  HandleChildCrash(pid, exitstatus, _("server process"));
3228  return;
3229  }
3230 
3231  dlist_foreach_modify(iter, &BackendList)
3232  {
3233  Backend *bp = dlist_container(Backend, elem, iter.cur);
3234 
3235  if (bp->pid == pid)
3236  {
3237  if (!bp->dead_end)
3238  {
3240  {
3241  /*
3242  * Uh-oh, the child failed to clean itself up. Treat as a
3243  * crash after all.
3244  */
3245  HandleChildCrash(pid, exitstatus, _("server process"));
3246  return;
3247  }
3248 #ifdef EXEC_BACKEND
3249  ShmemBackendArrayRemove(bp);
3250 #endif
3251  }
3252  if (bp->bgworker_notify)
3253  {
3254  /*
3255  * This backend may have been slated to receive SIGUSR1 when
3256  * some background worker started or stopped. Cancel those
3257  * notifications, as we don't want to signal PIDs that are not
3258  * PostgreSQL backends. This gets skipped in the (probably
3259  * very common) case where the backend has never requested any
3260  * such notifications.
3261  */
3263  }
3264  dlist_delete(iter.cur);
3265  free(bp);
3266  break;
3267  }
3268  }
3269 }
3270 
3271 /*
3272  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3273  * walwriter, autovacuum, or background worker.
3274  *
3275  * The objectives here are to clean up our local state about the child
3276  * process, and to signal all other remaining children to quickdie.
3277  */
3278 static void
3279 HandleChildCrash(int pid, int exitstatus, const char *procname)
3280 {
3281  dlist_mutable_iter iter;
3282  slist_iter siter;
3283  Backend *bp;
3284  bool take_action;
3285 
3286  /*
3287  * We only log messages and send signals if this is the first process
3288  * crash and we're not doing an immediate shutdown; otherwise, we're only
3289  * here to update postmaster's idea of live processes. If we have already
3290  * signalled children, nonzero exit status is to be expected, so don't
3291  * clutter log.
3292  */
3293  take_action = !FatalError && Shutdown != ImmediateShutdown;
3294 
3295  if (take_action)
3296  {
3297  LogChildExit(LOG, procname, pid, exitstatus);
3298  ereport(LOG,
3299  (errmsg("terminating any other active server processes")));
3300  }
3301 
3302  /* Process background workers. */
3304  {
3305  RegisteredBgWorker *rw;
3306 
3307  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3308  if (rw->rw_pid == 0)
3309  continue; /* not running */
3310  if (rw->rw_pid == pid)
3311  {
3312  /*
3313  * Found entry for freshly-dead worker, so remove it.
3314  */
3316  dlist_delete(&rw->rw_backend->elem);
3317 #ifdef EXEC_BACKEND
3318  ShmemBackendArrayRemove(rw->rw_backend);
3319 #endif
3320  free(rw->rw_backend);
3321  rw->rw_backend = NULL;
3322  rw->rw_pid = 0;
3323  rw->rw_child_slot = 0;
3324  /* don't reset crashed_at */
3325  /* don't report child stop, either */
3326  /* Keep looping so we can signal remaining workers */
3327  }
3328  else
3329  {
3330  /*
3331  * This worker is still alive. Unless we did so already, tell it
3332  * to commit hara-kiri.
3333  *
3334  * SIGQUIT is the special signal that says exit without proc_exit
3335  * and let the user know what's going on. But if SendStop is set
3336  * (-s on command line), then we send SIGSTOP instead, so that we
3337  * can get core dumps from all backends by hand.
3338  */
3339  if (take_action)
3340  {
3341  ereport(DEBUG2,
3342  (errmsg_internal("sending %s to process %d",
3343  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3344  (int) rw->rw_pid)));
3346  }
3347  }
3348  }
3349 
3350  /* Process regular backends */
3351  dlist_foreach_modify(iter, &BackendList)
3352  {
3353  bp = dlist_container(Backend, elem, iter.cur);
3354 
3355  if (bp->pid == pid)
3356  {
3357  /*
3358  * Found entry for freshly-dead backend, so remove it.
3359  */
3360  if (!bp->dead_end)
3361  {
3363 #ifdef EXEC_BACKEND
3364  ShmemBackendArrayRemove(bp);
3365 #endif
3366  }
3367  dlist_delete(iter.cur);
3368  free(bp);
3369  /* Keep looping so we can signal remaining backends */
3370  }
3371  else
3372  {
3373  /*
3374  * This backend is still alive. Unless we did so already, tell it
3375  * to commit hara-kiri.
3376  *
3377  * SIGQUIT is the special signal that says exit without proc_exit
3378  * and let the user know what's going on. But if SendStop is set
3379  * (-s on command line), then we send SIGSTOP instead, so that we
3380  * can get core dumps from all backends by hand.
3381  *
3382  * We could exclude dead_end children here, but at least in the
3383  * SIGSTOP case it seems better to include them.
3384  *
3385  * Background workers were already processed above; ignore them
3386  * here.
3387  */
3388  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3389  continue;
3390 
3391  if (take_action)
3392  {
3393  ereport(DEBUG2,
3394  (errmsg_internal("sending %s to process %d",
3395  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3396  (int) bp->pid)));
3397  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3398  }
3399  }
3400  }
3401 
3402  /* Take care of the startup process too */
3403  if (pid == StartupPID)
3404  {
3405  StartupPID = 0;
3407  }
3408  else if (StartupPID != 0 && take_action)
3409  {
3410  ereport(DEBUG2,
3411  (errmsg_internal("sending %s to process %d",
3412  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3413  (int) StartupPID)));
3414  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3416  }
3417 
3418  /* Take care of the bgwriter too */
3419  if (pid == BgWriterPID)
3420  BgWriterPID = 0;
3421  else if (BgWriterPID != 0 && take_action)
3422  {
3423  ereport(DEBUG2,
3424  (errmsg_internal("sending %s to process %d",
3425  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3426  (int) BgWriterPID)));
3427  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3428  }
3429 
3430  /* Take care of the checkpointer too */
3431  if (pid == CheckpointerPID)
3432  CheckpointerPID = 0;
3433  else if (CheckpointerPID != 0 && take_action)
3434  {
3435  ereport(DEBUG2,
3436  (errmsg_internal("sending %s to process %d",
3437  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3438  (int) CheckpointerPID)));
3439  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3440  }
3441 
3442  /* Take care of the walwriter too */
3443  if (pid == WalWriterPID)
3444  WalWriterPID = 0;
3445  else if (WalWriterPID != 0 && take_action)
3446  {
3447  ereport(DEBUG2,
3448  (errmsg_internal("sending %s to process %d",
3449  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3450  (int) WalWriterPID)));
3451  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3452  }
3453 
3454  /* Take care of the walreceiver too */
3455  if (pid == WalReceiverPID)
3456  WalReceiverPID = 0;
3457  else if (WalReceiverPID != 0 && take_action)
3458  {
3459  ereport(DEBUG2,
3460  (errmsg_internal("sending %s to process %d",
3461  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3462  (int) WalReceiverPID)));
3463  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3464  }
3465 
3466  /* Take care of the autovacuum launcher too */
3467  if (pid == AutoVacPID)
3468  AutoVacPID = 0;
3469  else if (AutoVacPID != 0 && take_action)
3470  {
3471  ereport(DEBUG2,
3472  (errmsg_internal("sending %s to process %d",
3473  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3474  (int) AutoVacPID)));
3475  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3476  }
3477 
3478  /*
3479  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3480  * necessary, but it seems like a good idea for robustness, and it
3481  * simplifies the state-machine logic in the case where a shutdown request
3482  * arrives during crash processing.)
3483  */
3484  if (PgArchPID != 0 && take_action)
3485  {
3486  ereport(DEBUG2,
3487  (errmsg_internal("sending %s to process %d",
3488  "SIGQUIT",
3489  (int) PgArchPID)));
3490  signal_child(PgArchPID, SIGQUIT);
3491  }
3492 
3493  /*
3494  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3495  * necessary, but it seems like a good idea for robustness, and it
3496  * simplifies the state-machine logic in the case where a shutdown request
3497  * arrives during crash processing.)
3498  */
3499  if (PgStatPID != 0 && take_action)
3500  {
3501  ereport(DEBUG2,
3502  (errmsg_internal("sending %s to process %d",
3503  "SIGQUIT",
3504  (int) PgStatPID)));
3505  signal_child(PgStatPID, SIGQUIT);
3507  }
3508 
3509  /* We do NOT restart the syslogger */
3510 
3511  if (Shutdown != ImmediateShutdown)
3512  FatalError = true;
3513 
3514  /* We now transit into a state of waiting for children to die */
3515  if (pmState == PM_RECOVERY ||
3516  pmState == PM_HOT_STANDBY ||
3517  pmState == PM_RUN ||
3518  pmState == PM_WAIT_BACKUP ||
3520  pmState == PM_SHUTDOWN)
3522 
3523  /*
3524  * .. and if this doesn't happen quickly enough, now the clock is ticking
3525  * for us to kill them without mercy.
3526  */
3527  if (AbortStartTime == 0)
3528  AbortStartTime = time(NULL);
3529 }
3530 
3531 /*
3532  * Log the death of a child process.
3533  */
3534 static void
3535 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3536 {
3537  /*
3538  * size of activity_buffer is arbitrary, but set equal to default
3539  * track_activity_query_size
3540  */
3541  char activity_buffer[1024];
3542  const char *activity = NULL;
3543 
3544  if (!EXIT_STATUS_0(exitstatus))
3545  activity = pgstat_get_crashed_backend_activity(pid,
3546  activity_buffer,
3547  sizeof(activity_buffer));
3548 
3549  if (WIFEXITED(exitstatus))
3550  ereport(lev,
3551 
3552  /*------
3553  translator: %s is a noun phrase describing a child process, such as
3554  "server process" */
3555  (errmsg("%s (PID %d) exited with exit code %d",
3556  procname, pid, WEXITSTATUS(exitstatus)),
3557  activity ? errdetail("Failed process was running: %s", activity) : 0));
3558  else if (WIFSIGNALED(exitstatus))
3559 #if defined(WIN32)
3560  ereport(lev,
3561 
3562  /*------
3563  translator: %s is a noun phrase describing a child process, such as
3564  "server process" */
3565  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3566  procname, pid, WTERMSIG(exitstatus)),
3567  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3568  activity ? errdetail("Failed process was running: %s", activity) : 0));
3569 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
3570  ereport(lev,
3571 
3572  /*------
3573  translator: %s is a noun phrase describing a child process, such as
3574  "server process" */
3575  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3576  procname, pid, WTERMSIG(exitstatus),
3577  WTERMSIG(exitstatus) < NSIG ?
3578  sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
3579  activity ? errdetail("Failed process was running: %s", activity) : 0));
3580 #else
3581  ereport(lev,
3582 
3583  /*------
3584  translator: %s is a noun phrase describing a child process, such as
3585  "server process" */
3586  (errmsg("%s (PID %d) was terminated by signal %d",
3587  procname, pid, WTERMSIG(exitstatus)),
3588  activity ? errdetail("Failed process was running: %s", activity) : 0));
3589 #endif
3590  else
3591  ereport(lev,
3592 
3593  /*------
3594  translator: %s is a noun phrase describing a child process, such as
3595  "server process" */
3596  (errmsg("%s (PID %d) exited with unrecognized status %d",
3597  procname, pid, exitstatus),
3598  activity ? errdetail("Failed process was running: %s", activity) : 0));
3599 }
3600 
3601 /*
3602  * Advance the postmaster's state machine and take actions as appropriate
3603  *
3604  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3605  * receive the signals that might mean we need to change state.
3606  */
3607 static void
3609 {
3610  if (pmState == PM_WAIT_BACKUP)
3611  {
3612  /*
3613  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3614  */
3615  if (!BackupInProgress())
3617  }
3618 
3619  if (pmState == PM_WAIT_READONLY)
3620  {
3621  /*
3622  * PM_WAIT_READONLY state ends when we have no regular backends that
3623  * have been started during recovery. We kill the startup and
3624  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3625  * we might like to kill these processes first and then wait for
3626  * backends to die off, but that doesn't work at present because
3627  * killing the startup process doesn't release its locks.
3628  */
3630  {
3631  if (StartupPID != 0)
3632  signal_child(StartupPID, SIGTERM);
3633  if (WalReceiverPID != 0)
3634  signal_child(WalReceiverPID, SIGTERM);
3636  }
3637  }
3638 
3639  /*
3640  * If we are in a state-machine state that implies waiting for backends to
3641  * exit, see if they're all gone, and change state if so.
3642  */
3643  if (pmState == PM_WAIT_BACKENDS)
3644  {
3645  /*
3646  * PM_WAIT_BACKENDS state ends when we have no regular backends
3647  * (including autovac workers), no bgworkers (including unconnected
3648  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3649  * doing crash recovery or an immediate shutdown then we expect the
3650  * checkpointer to exit as well, otherwise not. The archiver, stats,
3651  * and syslogger processes are disregarded since they are not
3652  * connected to shared memory; we also disregard dead_end children
3653  * here. Walsenders are also disregarded, they will be terminated
3654  * later after writing the checkpoint record, like the archiver
3655  * process.
3656  */
3658  StartupPID == 0 &&
3659  WalReceiverPID == 0 &&
3660  BgWriterPID == 0 &&
3661  (CheckpointerPID == 0 ||
3663  WalWriterPID == 0 &&
3664  AutoVacPID == 0)
3665  {
3667  {
3668  /*
3669  * Start waiting for dead_end children to die. This state
3670  * change causes ServerLoop to stop creating new ones.
3671  */
3673 
3674  /*
3675  * We already SIGQUIT'd the archiver and stats processes, if
3676  * any, when we started immediate shutdown or entered
3677  * FatalError state.
3678  */
3679  }
3680  else
3681  {
3682  /*
3683  * If we get here, we are proceeding with normal shutdown. All
3684  * the regular children are gone, and it's time to tell the
3685  * checkpointer to do a shutdown checkpoint.
3686  */
3688  /* Start the checkpointer if not running */
3689  if (CheckpointerPID == 0)
3691  /* And tell it to shut down */
3692  if (CheckpointerPID != 0)
3693  {
3695  pmState = PM_SHUTDOWN;
3696  }
3697  else
3698  {
3699  /*
3700  * If we failed to fork a checkpointer, just shut down.
3701  * Any required cleanup will happen at next restart. We
3702  * set FatalError so that an "abnormal shutdown" message
3703  * gets logged when we exit.
3704  */
3705  FatalError = true;
3707 
3708  /* Kill the walsenders, archiver and stats collector too */
3710  if (PgArchPID != 0)
3712  if (PgStatPID != 0)
3714  }
3715  }
3716  }
3717  }
3718 
3719  if (pmState == PM_SHUTDOWN_2)
3720  {
3721  /*
3722  * PM_SHUTDOWN_2 state ends when there's no other children than
3723  * dead_end children left. There shouldn't be any regular backends
3724  * left by now anyway; what we're really waiting for is walsenders and
3725  * archiver.
3726  *
3727  * Walreceiver should normally be dead by now, but not when a fast
3728  * shutdown is performed during recovery.
3729  */
3730  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
3731  WalReceiverPID == 0)
3732  {
3734  }
3735  }
3736 
3737  if (pmState == PM_WAIT_DEAD_END)
3738  {
3739  /*
3740  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3741  * (ie, no dead_end children remain), and the archiver and stats
3742  * collector are gone too.
3743  *
3744  * The reason we wait for those two is to protect them against a new
3745  * postmaster starting conflicting subprocesses; this isn't an
3746  * ironclad protection, but it at least helps in the
3747  * shutdown-and-immediately-restart scenario. Note that they have
3748  * already been sent appropriate shutdown signals, either during a
3749  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3750  * FatalError processing.
3751  */
3752  if (dlist_is_empty(&BackendList) &&
3753  PgArchPID == 0 && PgStatPID == 0)
3754  {
3755  /* These other guys should be dead already */
3756  Assert(StartupPID == 0);
3757  Assert(WalReceiverPID == 0);
3758  Assert(BgWriterPID == 0);
3759  Assert(CheckpointerPID == 0);
3760  Assert(WalWriterPID == 0);
3761  Assert(AutoVacPID == 0);
3762  /* syslogger is not considered here */
3764  }
3765  }
3766 
3767  /*
3768  * If we've been told to shut down, we exit as soon as there are no
3769  * remaining children. If there was a crash, cleanup will occur at the
3770  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3771  * crash before exiting, but that seems unwise if we are quitting because
3772  * we got SIGTERM from init --- there may well not be time for recovery
3773  * before init decides to SIGKILL us.)
3774  *
3775  * Note that the syslogger continues to run. It will exit when it sees
3776  * EOF on its input pipe, which happens when there are no more upstream
3777  * processes.
3778  */
3780  {
3781  if (FatalError)
3782  {
3783  ereport(LOG, (errmsg("abnormal database system shutdown")));
3784  ExitPostmaster(1);
3785  }
3786  else
3787  {
3788  /*
3789  * Terminate exclusive backup mode to avoid recovery after a clean
3790  * fast shutdown. Since an exclusive backup can only be taken
3791  * during normal running (and not, for example, while running
3792  * under Hot Standby) it only makes sense to do this if we reached
3793  * normal running. If we're still in recovery, the backup file is
3794  * one we're recovering *from*, and we must keep it around so that
3795  * recovery restarts from the right place.
3796  */
3798  CancelBackup();
3799 
3800  /* Normal exit from the postmaster is here */
3801  ExitPostmaster(0);
3802  }
3803  }
3804 
3805  /*
3806  * If the startup process failed, or the user does not want an automatic
3807  * restart after backend crashes, wait for all non-syslogger children to
3808  * exit, and then exit postmaster. We don't try to reinitialize when the
3809  * startup process fails, because more than likely it will just fail again
3810  * and we will keep trying forever.
3811  */
3812  if (pmState == PM_NO_CHILDREN &&
3814  ExitPostmaster(1);
3815 
3816  /*
3817  * If we need to recover from a crash, wait for all non-syslogger children
3818  * to exit, then reset shmem and StartupDataBase.
3819  */
3820  if (FatalError && pmState == PM_NO_CHILDREN)
3821  {
3822  ereport(LOG,
3823  (errmsg("all server processes terminated; reinitializing")));
3824 
3825  /* allow background workers to immediately restart */
3827 
3828  shmem_exit(1);
3830 
3832  Assert(StartupPID != 0);
3834  pmState = PM_STARTUP;
3835  /* crash recovery started, reset SIGKILL flag */
3836  AbortStartTime = 0;
3837  }
3838 }
3839 
3840 
3841 /*
3842  * Send a signal to a postmaster child process
3843  *
3844  * On systems that have setsid(), each child process sets itself up as a
3845  * process group leader. For signals that are generally interpreted in the
3846  * appropriate fashion, we signal the entire process group not just the
3847  * direct child process. This allows us to, for example, SIGQUIT a blocked
3848  * archive_recovery script, or SIGINT a script being run by a backend via
3849  * system().
3850  *
3851  * There is a race condition for recently-forked children: they might not
3852  * have executed setsid() yet. So we signal the child directly as well as
3853  * the group. We assume such a child will handle the signal before trying
3854  * to spawn any grandchild processes. We also assume that signaling the
3855  * child twice will not cause any problems.
3856  */
3857 static void
3858 signal_child(pid_t pid, int signal)
3859 {
3860  if (kill(pid, signal) < 0)
3861  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3862 #ifdef HAVE_SETSID
3863  switch (signal)
3864  {
3865  case SIGINT:
3866  case SIGTERM:
3867  case SIGQUIT:
3868  case SIGSTOP:
3869  case SIGKILL:
3870  if (kill(-pid, signal) < 0)
3871  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3872  break;
3873  default:
3874  break;
3875  }
3876 #endif
3877 }
3878 
3879 /*
3880  * Send a signal to the targeted children (but NOT special children;
3881  * dead_end children are never signaled, either).
3882  */
3883 static bool
3884 SignalSomeChildren(int signal, int target)
3885 {
3886  dlist_iter iter;
3887  bool signaled = false;
3888 
3889  dlist_foreach(iter, &BackendList)
3890  {
3891  Backend *bp = dlist_container(Backend, elem, iter.cur);
3892 
3893  if (bp->dead_end)
3894  continue;
3895 
3896  /*
3897  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3898  * it first and avoid touching shared memory for every child.
3899  */
3900  if (target != BACKEND_TYPE_ALL)
3901  {
3902  /*
3903  * Assign bkend_type for any recently announced WAL Sender
3904  * processes.
3905  */
3906  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3909 
3910  if (!(target & bp->bkend_type))
3911  continue;
3912  }
3913 
3914  ereport(DEBUG4,
3915  (errmsg_internal("sending signal %d to process %d",
3916  signal, (int) bp->pid)));
3917  signal_child(bp->pid, signal);
3918  signaled = true;
3919  }
3920  return signaled;
3921 }
3922 
3923 /*
3924  * Send a termination signal to children. This considers all of our children
3925  * processes, except syslogger and dead_end backends.
3926  */
3927 static void
3929 {
3930  SignalChildren(signal);
3931  if (StartupPID != 0)
3932  {
3933  signal_child(StartupPID, signal);
3934  if (signal == SIGQUIT || signal == SIGKILL)
3936  }
3937  if (BgWriterPID != 0)
3938  signal_child(BgWriterPID, signal);
3939  if (CheckpointerPID != 0)
3940  signal_child(CheckpointerPID, signal);
3941  if (WalWriterPID != 0)
3942  signal_child(WalWriterPID, signal);
3943  if (WalReceiverPID != 0)
3944  signal_child(WalReceiverPID, signal);
3945  if (AutoVacPID != 0)
3946  signal_child(AutoVacPID, signal);
3947  if (PgArchPID != 0)
3948  signal_child(PgArchPID, signal);
3949  if (PgStatPID != 0)
3950  signal_child(PgStatPID, signal);
3951 }
3952 
3953 /*
3954  * BackendStartup -- start backend process
3955  *
3956  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3957  *
3958  * Note: if you change this code, also consider StartAutovacuumWorker.
3959  */
3960 static int
3962 {
3963  Backend *bn; /* for backend cleanup */
3964  pid_t pid;
3965 
3966  /*
3967  * Create backend data structure. Better before the fork() so we can
3968  * handle failure cleanly.
3969  */
3970  bn = (Backend *) malloc(sizeof(Backend));
3971  if (!bn)
3972  {
3973  ereport(LOG,
3974  (errcode(ERRCODE_OUT_OF_MEMORY),
3975  errmsg("out of memory")));
3976  return STATUS_ERROR;
3977  }
3978 
3979  /*
3980  * Compute the cancel key that will be assigned to this backend. The
3981  * backend will have its own copy in the forked-off process' value of
3982  * MyCancelKey, so that it can transmit the key to the frontend.
3983  */
3985  {
3986  free(bn);
3987  ereport(LOG,
3988  (errcode(ERRCODE_INTERNAL_ERROR),
3989  errmsg("could not generate random cancel key")));
3990  return STATUS_ERROR;
3991  }
3992 
3993  bn->cancel_key = MyCancelKey;
3994 
3995  /* Pass down canAcceptConnections state */
3997  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3999 
4000  /*
4001  * Unless it's a dead_end child, assign it a child slot number
4002  */
4003  if (!bn->dead_end)
4005  else
4006  bn->child_slot = 0;
4007 
4008  /* Hasn't asked to be notified about any bgworkers yet */
4009  bn->bgworker_notify = false;
4010 
4011 #ifdef EXEC_BACKEND
4012  pid = backend_forkexec(port);
4013 #else /* !EXEC_BACKEND */
4014  pid = fork_process();
4015  if (pid == 0) /* child */
4016  {
4017  free(bn);
4018 
4019  /* Detangle from postmaster */
4021 
4022  /* Close the postmaster's sockets */
4023  ClosePostmasterPorts(false);
4024 
4025  /* Perform additional initialization and collect startup packet */
4026  BackendInitialize(port);
4027 
4028  /* And run the backend */
4029  BackendRun(port);
4030  }
4031 #endif /* EXEC_BACKEND */
4032 
4033  if (pid < 0)
4034  {
4035  /* in parent, fork failed */
4036  int save_errno = errno;
4037 
4038  if (!bn->dead_end)
4040  free(bn);
4041  errno = save_errno;
4042  ereport(LOG,
4043  (errmsg("could not fork new process for connection: %m")));
4044  report_fork_failure_to_client(port, save_errno);
4045  return STATUS_ERROR;
4046  }
4047 
4048  /* in parent, successful fork */
4049  ereport(DEBUG2,
4050  (errmsg_internal("forked new backend, pid=%d socket=%d",
4051  (int) pid, (int) port->sock)));
4052 
4053  /*
4054  * Everything's been successful, it's safe to add this backend to our list
4055  * of backends.
4056  */
4057  bn->pid = pid;
4058  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4059  dlist_push_head(&BackendList, &bn->elem);
4060 
4061 #ifdef EXEC_BACKEND
4062  if (!bn->dead_end)
4063  ShmemBackendArrayAdd(bn);
4064 #endif
4065 
4066  return STATUS_OK;
4067 }
4068 
4069 /*
4070  * Try to report backend fork() failure to client before we close the
4071  * connection. Since we do not care to risk blocking the postmaster on
4072  * this connection, we set the connection to non-blocking and try only once.
4073  *
4074  * This is grungy special-purpose code; we cannot use backend libpq since
4075  * it's not up and running.
4076  */
4077 static void
4079 {
4080  char buffer[1000];
4081  int rc;
4082 
4083  /* Format the error message packet (always V2 protocol) */
4084  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4085  _("could not fork new process for connection: "),
4086  strerror(errnum));
4087 
4088  /* Set port to non-blocking. Don't do send() if this fails */
4089  if (!pg_set_noblock(port->sock))
4090  return;
4091 
4092  /* We'll retry after EINTR, but ignore all other failures */
4093  do
4094  {
4095  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4096  } while (rc < 0 && errno == EINTR);
4097 }
4098 
4099 
4100 /*
4101  * BackendInitialize -- initialize an interactive (postmaster-child)
4102  * backend process, and collect the client's startup packet.
4103  *
4104  * returns: nothing. Will not return at all if there's any failure.
4105  *
4106  * Note: this code does not depend on having any access to shared memory.
4107  * In the EXEC_BACKEND case, we are physically attached to shared memory
4108  * but have not yet set up most of our local pointers to shmem structures.
4109  */
4110 static void
4112 {
4113  int status;
4114  int ret;
4115  char remote_host[NI_MAXHOST];
4116  char remote_port[NI_MAXSERV];
4117  char remote_ps_data[NI_MAXHOST];
4118 
4119  /* Save port etc. for ps status */
4120  MyProcPort = port;
4121 
4122  /*
4123  * PreAuthDelay is a debugging aid for investigating problems in the
4124  * authentication cycle: it can be set in postgresql.conf to allow time to
4125  * attach to the newly-forked backend with a debugger. (See also
4126  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4127  * is not honored until after authentication.)
4128  */
4129  if (PreAuthDelay > 0)
4130  pg_usleep(PreAuthDelay * 1000000L);
4131 
4132  /* This flag will remain set until InitPostgres finishes authentication */
4133  ClientAuthInProgress = true; /* limit visibility of log messages */
4134 
4135  /* save process start time */
4138 
4139  /* set these to empty in case they are needed before we set them up */
4140  port->remote_host = "";
4141  port->remote_port = "";
4142 
4143  /*
4144  * Initialize libpq and enable reporting of ereport errors to the client.
4145  * Must do this now because authentication uses libpq to send messages.
4146  */
4147  pq_init(); /* initialize libpq to talk to client */
4148  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4149 
4150  /*
4151  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4152  * timeout while trying to collect the startup packet. Otherwise the
4153  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4154  * buggy client fails to send the packet promptly. XXX it follows that
4155  * the remainder of this function must tolerate losing control at any
4156  * instant. Likewise, any pg_on_exit_callback registered before or during
4157  * this function must be prepared to execute at any instant between here
4158  * and the end of this function. Furthermore, affected callbacks execute
4159  * partially or not at all when a second exit-inducing signal arrives
4160  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4161  * that mechanic, callbacks need not anticipate more than one call.) This
4162  * is fragile; it ought to instead follow the norm of handling interrupts
4163  * at selected, safe opportunities.
4164  */
4165  pqsignal(SIGTERM, startup_die);
4167  InitializeTimeouts(); /* establishes SIGALRM handler */
4169 
4170  /*
4171  * Get the remote host name and port for logging and status display.
4172  */
4173  remote_host[0] = '\0';
4174  remote_port[0] = '\0';
4175  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4176  remote_host, sizeof(remote_host),
4177  remote_port, sizeof(remote_port),
4178  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4179  ereport(WARNING,
4180  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4181  gai_strerror(ret))));
4182  if (remote_port[0] == '\0')
4183  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4184  else
4185  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4186 
4187  /*
4188  * Save remote_host and remote_port in port structure (after this, they
4189  * will appear in log_line_prefix data for log messages).
4190  */
4191  port->remote_host = strdup(remote_host);
4192  port->remote_port = strdup(remote_port);
4193 
4194  /* And now we can issue the Log_connections message, if wanted */
4195  if (Log_connections)
4196  {
4197  if (remote_port[0])
4198  ereport(LOG,
4199  (errmsg("connection received: host=%s port=%s",
4200  remote_host,
4201  remote_port)));
4202  else
4203  ereport(LOG,
4204  (errmsg("connection received: host=%s",
4205  remote_host)));
4206  }
4207 
4208  /*
4209  * If we did a reverse lookup to name, we might as well save the results
4210  * rather than possibly repeating the lookup during authentication.
4211  *
4212  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4213  * get nothing useful for a client without an rDNS entry. Therefore, we
4214  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4215  * it into remote_hostname if so. (This test is conservative and might
4216  * sometimes classify a hostname as numeric, but an error in that
4217  * direction is safe; it only results in a possible extra lookup.)
4218  */
4219  if (log_hostname &&
4220  ret == 0 &&
4221  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4222  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4223  port->remote_hostname = strdup(remote_host);
4224 
4225  /*
4226  * Ready to begin client interaction. We will give up and exit(1) after a
4227  * time delay, so that a broken client can't hog a connection
4228  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4229  * against the time limit.
4230  *
4231  * Note: AuthenticationTimeout is applied here while waiting for the
4232  * startup packet, and then again in InitPostgres for the duration of any
4233  * authentication operations. So a hostile client could tie up the
4234  * process for nearly twice AuthenticationTimeout before we kick him off.
4235  *
4236  * Note: because PostgresMain will call InitializeTimeouts again, the
4237  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4238  * since we never use it again after this function.
4239  */
4242 
4243  /*
4244  * Receive the startup packet (which might turn out to be a cancel request
4245  * packet).
4246  */
4247  status = ProcessStartupPacket(port, false);
4248 
4249  /*
4250  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4251  * already did any appropriate error reporting.
4252  */
4253  if (status != STATUS_OK)
4254  proc_exit(0);
4255 
4256  /*
4257  * Now that we have the user and database name, we can set the process
4258  * title for ps. It's good to do this as early as possible in startup.
4259  *
4260  * For a walsender, the ps display is set in the following form:
4261  *
4262  * postgres: wal sender process <user> <host> <activity>
4263  *
4264  * To achieve that, we pass "wal sender process" as username and username
4265  * as dbname to init_ps_display(). XXX: should add a new variant of
4266  * init_ps_display() to avoid abusing the parameters like this.
4267  */
4268  if (am_walsender)
4269  init_ps_display("wal sender process", port->user_name, remote_ps_data,
4270  update_process_title ? "authentication" : "");
4271  else
4272  init_ps_display(port->user_name, port->database_name, remote_ps_data,
4273  update_process_title ? "authentication" : "");
4274 
4275  /*
4276  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4277  */
4279  PG_SETMASK(&BlockSig);
4280 }
4281 
4282 
4283 /*
4284  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4285  *
4286  * returns:
4287  * Shouldn't return at all.
4288  * If PostgresMain() fails, return status.
4289  */
4290 static void
4292 {
4293  char **av;
4294  int maxac;
4295  int ac;
4296  long secs;
4297  int usecs;
4298  int i;
4299 
4300  /*
4301  * Don't want backend to be able to see the postmaster random number
4302  * generator state. We have to clobber the static random_seed *and* start
4303  * a new random sequence in the random() library function.
4304  */
4305 #ifndef HAVE_STRONG_RANDOM
4306  random_seed = 0;
4307  random_start_time.tv_usec = 0;
4308 #endif
4309  /* slightly hacky way to convert timestamptz into integers */
4310  TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
4311  srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
4312 
4313  /*
4314  * Now, build the argv vector that will be given to PostgresMain.
4315  *
4316  * The maximum possible number of commandline arguments that could come
4317  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4318  * pg_split_opts().
4319  */
4320  maxac = 2; /* for fixed args supplied below */
4321  maxac += (strlen(ExtraOptions) + 1) / 2;
4322 
4323  av = (char **) MemoryContextAlloc(TopMemoryContext,
4324  maxac * sizeof(char *));
4325  ac = 0;
4326 
4327  av[ac++] = "postgres";
4328 
4329  /*
4330  * Pass any backend switches specified with -o on the postmaster's own
4331  * command line. We assume these are secure.
4332  */
4333  pg_split_opts(av, &ac, ExtraOptions);
4334 
4335  av[ac] = NULL;
4336 
4337  Assert(ac < maxac);
4338 
4339  /*
4340  * Debug: print arguments being passed to backend
4341  */
4342  ereport(DEBUG3,
4343  (errmsg_internal("%s child[%d]: starting with (",
4344  progname, (int) getpid())));
4345  for (i = 0; i < ac; ++i)
4346  ereport(DEBUG3,
4347  (errmsg_internal("\t%s", av[i])));
4348  ereport(DEBUG3,
4349  (errmsg_internal(")")));
4350 
4351  /*
4352  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4353  * just yet, though, because InitPostgres will need the HBA data.)
4354  */
4356 
4357  PostgresMain(ac, av, port->database_name, port->user_name);
4358 }
4359 
4360 
4361 #ifdef EXEC_BACKEND
4362 
4363 /*
4364  * postmaster_forkexec -- fork and exec a postmaster subprocess
4365  *
4366  * The caller must have set up the argv array already, except for argv[2]
4367  * which will be filled with the name of the temp variable file.
4368  *
4369  * Returns the child process PID, or -1 on fork failure (a suitable error
4370  * message has been logged on failure).
4371  *
4372  * All uses of this routine will dispatch to SubPostmasterMain in the
4373  * child process.
4374  */
4375 pid_t
4376 postmaster_forkexec(int argc, char *argv[])
4377 {
4378  Port port;
4379 
4380  /* This entry point passes dummy values for the Port variables */
4381  memset(&port, 0, sizeof(port));
4382  return internal_forkexec(argc, argv, &port);
4383 }
4384 
4385 /*
4386  * backend_forkexec -- fork/exec off a backend process
4387  *
4388  * Some operating systems (WIN32) don't have fork() so we have to simulate
4389  * it by storing parameters that need to be passed to the child and
4390  * then create a new child process.
4391  *
4392  * returns the pid of the fork/exec'd process, or -1 on failure
4393  */
4394 static pid_t
4395 backend_forkexec(Port *port)
4396 {
4397  char *av[4];
4398  int ac = 0;
4399 
4400  av[ac++] = "postgres";
4401  av[ac++] = "--forkbackend";
4402  av[ac++] = NULL; /* filled in by internal_forkexec */
4403 
4404  av[ac] = NULL;
4405  Assert(ac < lengthof(av));
4406 
4407  return internal_forkexec(ac, av, port);
4408 }
4409 
4410 #ifndef WIN32
4411 
4412 /*
4413  * internal_forkexec non-win32 implementation
4414  *
4415  * - writes out backend variables to the parameter file
4416  * - fork():s, and then exec():s the child process
4417  */
4418 static pid_t
4419 internal_forkexec(int argc, char *argv[], Port *port)
4420 {
4421  static unsigned long tmpBackendFileNum = 0;
4422  pid_t pid;
4423  char tmpfilename[MAXPGPATH];
4424  BackendParameters param;
4425  FILE *fp;
4426 
4427  if (!save_backend_variables(&param, port))
4428  return -1; /* log made by save_backend_variables */
4429 
4430  /* Calculate name for temp file */
4431  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4433  MyProcPid, ++tmpBackendFileNum);
4434 
4435  /* Open file */
4436  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4437  if (!fp)
4438  {
4439  /*
4440  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4441  * directory
4442  */
4443  mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
4444 
4445  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4446  if (!fp)
4447  {
4448  ereport(LOG,
4450  errmsg("could not create file \"%s\": %m",
4451  tmpfilename)));
4452  return -1;
4453  }
4454  }
4455 
4456  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4457  {
4458  ereport(LOG,
4460  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4461  FreeFile(fp);
4462  return -1;
4463  }
4464 
4465  /* Release file */
4466  if (FreeFile(fp))
4467  {
4468  ereport(LOG,
4470  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4471  return -1;
4472  }
4473 
4474  /* Make sure caller set up argv properly */
4475  Assert(argc >= 3);
4476  Assert(argv[argc] == NULL);
4477  Assert(strncmp(argv[1], "--fork", 6) == 0);
4478  Assert(argv[2] == NULL);
4479 
4480  /* Insert temp file name after --fork argument */
4481  argv[2] = tmpfilename;
4482 
4483  /* Fire off execv in child */
4484  if ((pid = fork_process()) == 0)
4485  {
4486  if (execv(postgres_exec_path, argv) < 0)
4487  {
4488  ereport(LOG,
4489  (errmsg("could not execute server process \"%s\": %m",
4490  postgres_exec_path)));
4491  /* We're already in the child process here, can't return */
4492  exit(1);
4493  }
4494  }
4495 
4496  return pid; /* Parent returns pid, or -1 on fork failure */
4497 }
4498 #else /* WIN32 */
4499 
4500 /*
4501  * internal_forkexec win32 implementation
4502  *
4503  * - starts backend using CreateProcess(), in suspended state
4504  * - writes out backend variables to the parameter file
4505  * - during this, duplicates handles and sockets required for
4506  * inheritance into the new process
4507  * - resumes execution of the new process once the backend parameter
4508  * file is complete.
4509  */
4510 static pid_t
4511 internal_forkexec(int argc, char *argv[], Port *port)
4512 {
4513  int retry_count = 0;
4514  STARTUPINFO si;
4515  PROCESS_INFORMATION pi;
4516  int i;
4517  int j;
4518  char cmdLine[MAXPGPATH * 2];
4519  HANDLE paramHandle;
4520  BackendParameters *param;
4521  SECURITY_ATTRIBUTES sa;
4522  char paramHandleStr[32];
4523  win32_deadchild_waitinfo *childinfo;
4524 
4525  /* Make sure caller set up argv properly */
4526  Assert(argc >= 3);
4527  Assert(argv[argc] == NULL);
4528  Assert(strncmp(argv[1], "--fork", 6) == 0);
4529  Assert(argv[2] == NULL);
4530 
4531  /* Resume here if we need to retry */
4532 retry:
4533 
4534  /* Set up shared memory for parameter passing */
4535  ZeroMemory(&sa, sizeof(sa));
4536  sa.nLength = sizeof(sa);
4537  sa.bInheritHandle = TRUE;
4538  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4539  &sa,
4540  PAGE_READWRITE,
4541  0,
4542  sizeof(BackendParameters),
4543  NULL);
4544  if (paramHandle == INVALID_HANDLE_VALUE)
4545  {
4546  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4547  GetLastError());
4548  return -1;
4549  }
4550 
4551  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4552  if (!param)
4553  {
4554  elog(LOG, "could not map backend parameter memory: error code %lu",
4555  GetLastError());
4556  CloseHandle(paramHandle);
4557  return -1;
4558  }
4559 
4560  /* Insert temp file name after --fork argument */
4561 #ifdef _WIN64
4562  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4563 #else
4564  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4565 #endif
4566  argv[2] = paramHandleStr;
4567 
4568  /* Format the cmd line */
4569  cmdLine[sizeof(cmdLine) - 1] = '\0';
4570  cmdLine[sizeof(cmdLine) - 2] = '\0';
4571  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4572  i = 0;
4573  while (argv[++i] != NULL)
4574  {
4575  j = strlen(cmdLine);
4576  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4577  }
4578  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4579  {
4580  elog(LOG, "subprocess command line too long");
4581  return -1;
4582  }
4583 
4584  memset(&pi, 0, sizeof(pi));
4585  memset(&si, 0, sizeof(si));
4586  si.cb = sizeof(si);
4587 
4588  /*
4589  * Create the subprocess in a suspended state. This will be resumed later,
4590  * once we have written out the parameter file.
4591  */
4592  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4593  NULL, NULL, &si, &pi))
4594  {
4595  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4596  GetLastError());
4597  return -1;
4598  }
4599 
4600  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4601  {
4602  /*
4603  * log made by save_backend_variables, but we have to clean up the
4604  * mess with the half-started process
4605  */
4606  if (!TerminateProcess(pi.hProcess, 255))
4607  ereport(LOG,
4608  (errmsg_internal("could not terminate unstarted process: error code %lu",
4609  GetLastError())));
4610  CloseHandle(pi.hProcess);
4611  CloseHandle(pi.hThread);
4612  return -1; /* log made by save_backend_variables */
4613  }
4614 
4615  /* Drop the parameter shared memory that is now inherited to the backend */
4616  if (!UnmapViewOfFile(param))
4617  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4618  GetLastError());
4619  if (!CloseHandle(paramHandle))
4620  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4621  GetLastError());
4622 
4623  /*
4624  * Reserve the memory region used by our main shared memory segment before
4625  * we resume the child process. Normally this should succeed, but if ASLR
4626  * is active then it might sometimes fail due to the stack or heap having
4627  * gotten mapped into that range. In that case, just terminate the
4628  * process and retry.
4629  */
4630  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4631  {
4632  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4633  if (!TerminateProcess(pi.hProcess, 255))
4634  ereport(LOG,
4635  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4636  GetLastError())));
4637  CloseHandle(pi.hProcess);
4638  CloseHandle(pi.hThread);
4639  if (++retry_count < 100)
4640  goto retry;
4641  ereport(LOG,
4642  (errmsg("giving up after too many tries to reserve shared memory"),
4643  errhint("This might be caused by ASLR or antivirus software.")));
4644  return -1;
4645  }
4646 
4647  /*
4648  * Now that the backend variables are written out, we start the child
4649  * thread so it can start initializing while we set up the rest of the
4650  * parent state.
4651  */
4652  if (ResumeThread(pi.hThread) == -1)
4653  {
4654  if (!TerminateProcess(pi.hProcess, 255))
4655  {
4656  ereport(LOG,
4657  (errmsg_internal("could not terminate unstartable process: error code %lu",
4658  GetLastError())));
4659  CloseHandle(pi.hProcess);
4660  CloseHandle(pi.hThread);
4661  return -1;
4662  }
4663  CloseHandle(pi.hProcess);
4664  CloseHandle(pi.hThread);
4665  ereport(LOG,
4666  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4667  GetLastError())));
4668  return -1;
4669  }
4670 
4671  /*
4672  * Queue a waiter for to signal when this child dies. The wait will be
4673  * handled automatically by an operating system thread pool.
4674  *
4675  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4676  * Struct will be free():d from the callback function that runs on a
4677  * different thread.
4678  */
4679  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4680  if (!childinfo)
4681  ereport(FATAL,
4682  (errcode(ERRCODE_OUT_OF_MEMORY),
4683  errmsg("out of memory")));
4684 
4685  childinfo->procHandle = pi.hProcess;
4686  childinfo->procId = pi.dwProcessId;
4687 
4688  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4689  pi.hProcess,
4690  pgwin32_deadchild_callback,
4691  childinfo,
4692  INFINITE,
4693  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4694  ereport(FATAL,
4695  (errmsg_internal("could not register process for wait: error code %lu",
4696  GetLastError())));
4697 
4698  /* Don't close pi.hProcess here - the wait thread needs access to it */
4699 
4700  CloseHandle(pi.hThread);
4701 
4702  return pi.dwProcessId;
4703 }
4704 #endif /* WIN32 */
4705 
4706 
4707 /*
4708  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4709  * to what it would be if we'd simply forked on Unix, and then
4710  * dispatch to the appropriate place.
4711  *
4712  * The first two command line arguments are expected to be "--forkFOO"
4713  * (where FOO indicates which postmaster child we are to become), and
4714  * the name of a variables file that we can read to load data that would
4715  * have been inherited by fork() on Unix. Remaining arguments go to the
4716  * subprocess FooMain() routine.
4717  */
4718 void
4719 SubPostmasterMain(int argc, char *argv[])
4720 {
4721  Port port;
4722 
4723  /* In EXEC_BACKEND case we will not have inherited these settings */
4724  IsPostmasterEnvironment = true;
4726 
4727  /* Setup as postmaster child */
4729 
4730  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4732 
4733  /* Check we got appropriate args */
4734  if (argc < 3)
4735  elog(FATAL, "invalid subpostmaster invocation");
4736 
4737  /* Read in the variables file */
4738  memset(&port, 0, sizeof(Port));
4739  read_backend_variables(argv[2], &port);
4740 
4741  /* Close the postmaster's sockets (as soon as we know them) */
4742  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4743 
4744  /*
4745  * Set reference point for stack-depth checking
4746  */
4747  set_stack_base();
4748 
4749  /*
4750  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4751  * for the non-exec case.
4752  */
4753 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4754  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4755  if (!port.gss)
4756  ereport(FATAL,
4757  (errcode(ERRCODE_OUT_OF_MEMORY),
4758  errmsg("out of memory")));
4759 #endif
4760 
4761  /*
4762  * If appropriate, physically re-attach to shared memory segment. We want
4763  * to do this before going any further to ensure that we can attach at the
4764  * same address the postmaster used. On the other hand, if we choose not
4765  * to re-attach, we may have other cleanup to do.
4766  *
4767  * If testing EXEC_BACKEND on Linux, you should run this as root before
4768  * starting the postmaster:
4769  *
4770  * echo 0 >/proc/sys/kernel/randomize_va_space
4771  *
4772  * This prevents using randomized stack and code addresses that cause the
4773  * child process's memory map to be different from the parent's, making it
4774  * sometimes impossible to attach to shared memory at the desired address.
4775  * Return the setting to its old value (usually '1' or '2') when finished.
4776  */
4777  if (strcmp(argv[1], "--forkbackend") == 0 ||
4778  strcmp(argv[1], "--forkavlauncher") == 0 ||
4779  strcmp(argv[1], "--forkavworker") == 0 ||
4780  strcmp(argv[1], "--forkboot") == 0 ||
4781  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4783  else
4785 
4786  /* autovacuum needs this set before calling InitProcess */
4787  if (strcmp(argv[1], "--forkavlauncher") == 0)
4788  AutovacuumLauncherIAm();
4789  if (strcmp(argv[1], "--forkavworker") == 0)
4790  AutovacuumWorkerIAm();
4791 
4792  /*
4793  * Start our win32 signal implementation. This has to be done after we
4794  * read the backend variables, because we need to pick up the signal pipe
4795  * from the parent process.
4796  */
4797 #ifdef WIN32
4799 #endif
4800 
4801  /* In EXEC_BACKEND case we will not have inherited these settings */
4802  pqinitmask();
4803  PG_SETMASK(&BlockSig);
4804 
4805  /* Read in remaining GUC variables */
4806  read_nondefault_variables();
4807 
4808  /*
4809  * Reload any libraries that were preloaded by the postmaster. Since we
4810  * exec'd this process, those libraries didn't come along with us; but we
4811  * should load them into all child processes to be consistent with the
4812  * non-EXEC_BACKEND behavior.
4813  */
4815 
4816  /* Run backend or appropriate child */
4817  if (strcmp(argv[1], "--forkbackend") == 0)
4818  {
4819  Assert(argc == 3); /* shouldn't be any more args */
4820 
4821  /*
4822  * Need to reinitialize the SSL library in the backend, since the
4823  * context structures contain function pointers and cannot be passed
4824  * through the parameter file.
4825  *
4826  * If for some reason reload fails (maybe the user installed broken
4827  * key files), soldier on without SSL; that's better than all
4828  * connections becoming impossible.
4829  *
4830  * XXX should we do this in all child processes? For the moment it's
4831  * enough to do it in backend children.
4832  */
4833 #ifdef USE_SSL
4834  if (EnableSSL)
4835  {
4836  if (secure_initialize(false) == 0)
4837  LoadedSSL = true;
4838  else
4839  ereport(LOG,
4840  (errmsg("SSL configuration could not be loaded in child process")));
4841  }
4842 #endif
4843 
4844  /*
4845  * Perform additional initialization and collect startup packet.
4846  *
4847  * We want to do this before InitProcess() for a couple of reasons: 1.
4848  * so that we aren't eating up a PGPROC slot while waiting on the
4849  * client. 2. so that if InitProcess() fails due to being out of
4850  * PGPROC slots, we have already initialized libpq and are able to
4851  * report the error to the client.
4852  */
4853  BackendInitialize(&port);
4854 
4855  /* Restore basic shared memory pointers */
4857 
4858  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4859  InitProcess();
4860 
4861  /* Attach process to shared data structures */
4863 
4864  /* And run the backend */
4865  BackendRun(&port); /* does not return */
4866  }
4867  if (strcmp(argv[1], "--forkboot") == 0)
4868  {
4869  /* Restore basic shared memory pointers */
4871 
4872  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4874 
4875  /* Attach process to shared data structures */
4877 
4878  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4879  }
4880  if (strcmp(argv[1], "--forkavlauncher") == 0)
4881  {
4882  /* Restore basic shared memory pointers */
4884 
4885  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4886  InitProcess();
4887 
4888  /* Attach process to shared data structures */
4890 
4891  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4892  }
4893  if (strcmp(argv[1], "--forkavworker") == 0)
4894  {
4895  /* Restore basic shared memory pointers */
4897 
4898  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4899  InitProcess();
4900 
4901  /* Attach process to shared data structures */
4903 
4904  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4905  }
4906  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4907  {
4908  int shmem_slot;
4909 
4910  /* do this as early as possible; in particular, before InitProcess() */
4911  IsBackgroundWorker = true;
4912 
4913  /* Restore basic shared memory pointers */
4915 
4916  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4917  InitProcess();
4918 
4919  /* Attach process to shared data structures */
4921 
4922  /* Fetch MyBgworkerEntry from shared memory */
4923  shmem_slot = atoi(argv[1] + 15);
4924  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4925 
4927  }
4928  if (strcmp(argv[1], "--forkarch") == 0)
4929  {
4930  /* Do not want to attach to shared memory */
4931 
4932  PgArchiverMain(argc, argv); /* does not return */
4933  }
4934  if (strcmp(argv[1], "--forkcol") == 0)
4935  {
4936  /* Do not want to attach to shared memory */
4937 
4938  PgstatCollectorMain(argc, argv); /* does not return */
4939  }
4940  if (strcmp(argv[1], "--forklog") == 0)
4941  {
4942  /* Do not want to attach to shared memory */
4943 
4944  SysLoggerMain(argc, argv); /* does not return */
4945  }
4946 
4947  abort(); /* shouldn't get here */
4948 }
4949 #endif /* EXEC_BACKEND */
4950 
4951 
4952 /*
4953  * ExitPostmaster -- cleanup
4954  *
4955  * Do NOT call exit() directly --- always go through here!
4956  */
4957 static void
4959 {
4960 #ifdef HAVE_PTHREAD_IS_THREADED_NP
4961 
4962  /*
4963  * There is no known cause for a postmaster to become multithreaded after
4964  * startup. Recheck to account for the possibility of unknown causes.
4965  * This message uses LOG level, because an unclean shutdown at this point
4966  * would usually not look much different from a clean shutdown.
4967  */
4968  if (pthread_is_threaded_np() != 0)
4969  ereport(LOG,
4970  (errcode(ERRCODE_INTERNAL_ERROR),
4971  errmsg_internal("postmaster became multithreaded"),
4972  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
4973 #endif
4974 
4975  /* should cleanup shared memory and kill all backends */
4976 
4977  /*
4978  * Not sure of the semantics here. When the Postmaster dies, should the
4979  * backends all be killed? probably not.
4980  *
4981  * MUST -- vadim 05-10-1999
4982  */
4983 
4984  proc_exit(status);
4985 }
4986 
4987 /*
4988  * sigusr1_handler - handle signal conditions from child processes
4989  */
4990 static void
4992 {
4993  int save_errno = errno;
4994 
4995  PG_SETMASK(&BlockSig);
4996 
4997  /* Process background worker state change. */
4999  {
5001  StartWorkerNeeded = true;
5002  }
5003 
5004  /*
5005  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5006  * unexpected states. If the startup process quickly starts up, completes
5007  * recovery, exits, we might process the death of the startup process
5008  * first. We don't want to go back to recovery in that case.
5009  */
5012  {
5013  /* WAL redo has started. We're out of reinitialization. */
5014  FatalError = false;
5015  Assert(AbortStartTime == 0);
5016 
5017  /*
5018  * Crank up the background tasks. It doesn't matter if this fails,
5019  * we'll just try again later.
5020  */
5021  Assert(CheckpointerPID == 0);
5023  Assert(BgWriterPID == 0);
5025 
5026  /*
5027  * Start the archiver if we're responsible for (re-)archiving received
5028  * files.
5029  */
5030  Assert(PgArchPID == 0);
5031  if (XLogArchivingAlways())
5032  PgArchPID = pgarch_start();
5033 
5034  /*
5035  * If we aren't planning to enter hot standby mode later, treat
5036  * RECOVERY_STARTED as meaning we're out of startup, and report status
5037  * accordingly.
5038  */
5039  if (!EnableHotStandby)
5040  {
5042 #ifdef USE_SYSTEMD
5043  sd_notify(0, "READY=1");
5044 #endif
5045  }
5046 
5047  pmState = PM_RECOVERY;
5048  }
5051  {
5052  /*
5053  * Likewise, start other special children as needed.
5054  */
5055  Assert(PgStatPID == 0);
5056  PgStatPID = pgstat_start();
5057 
5058  ereport(LOG,
5059  (errmsg("database system is ready to accept read only connections")));
5060 
5061  /* Report status */
5063 #ifdef USE_SYSTEMD
5064  sd_notify(0, "READY=1");
5065 #endif
5066 
5068  /* Some workers may be scheduled to start now */
5069  StartWorkerNeeded = true;
5070  }
5071 
5074 
5076  PgArchPID != 0)
5077  {
5078  /*
5079  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5080  * next WAL file.
5081  */
5083  }
5084 
5086  SysLoggerPID != 0)
5087  {
5088  /* Tell syslogger to rotate logfile */
5090  }
5091 
5093  Shutdown == NoShutdown)
5094  {
5095  /*
5096  * Start one iteration of the autovacuum daemon, even if autovacuuming
5097  * is nominally not enabled. This is so we can have an active defense
5098  * against transaction ID wraparound. We set a flag for the main loop
5099  * to do it rather than trying to do it here --- this is because the
5100  * autovac process itself may send the signal, and we want to handle
5101  * that by launching another iteration as soon as the current one
5102  * completes.
5103  */
5104  start_autovac_launcher = true;
5105  }
5106 
5108  Shutdown == NoShutdown)
5109  {
5110  /* The autovacuum launcher wants us to start a worker process. */
5112  }
5113 
5115  {
5116  /* Startup Process wants us to start the walreceiver process. */
5117  /* Start immediately if possible, else remember request for later. */
5118  WalReceiverRequested = true;
5120  }
5121 
5124  {
5125  /* Advance postmaster's state machine */
5127  }
5128 
5129  if (CheckPromoteSignal() && StartupPID != 0 &&
5130  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5132  {
5133  /* Tell startup process to finish recovery */
5135  }
5136 
5138 
5139  errno = save_errno;
5140 }
5141 
5142 /*
5143  * SIGTERM or SIGQUIT while processing startup packet.
5144  * Clean up and exit(1).
5145  *
5146  * XXX: possible future improvement: try to send a message indicating
5147  * why we are disconnecting. Problem is to be sure we don't block while
5148  * doing so, nor mess up SSL initialization. In practice, if the client
5149  * has wedged here, it probably couldn't do anything with the message anyway.
5150  */
5151 static void
5153 {
5154  proc_exit(1);
5155 }
5156 
5157 /*
5158  * Dummy signal handler
5159  *
5160  * We use this for signals that we don't actually use in the postmaster,
5161  * but we do use in backends. If we were to SIG_IGN such signals in the
5162  * postmaster, then a newly started backend might drop a signal that arrives
5163  * before it's able to reconfigure its signal processing. (See notes in
5164  * tcop/postgres.c.)
5165  */
5166 static void
5168 {
5169 }
5170 
5171 /*
5172  * Timeout while processing startup packet.
5173  * As for startup_die(), we clean up and exit(1).
5174  */
5175 static void
5177 {
5178  proc_exit(1);
5179 }
5180 
5181 
5182 /*
5183  * Generate a random cancel key.
5184  */
5185 static bool
5187 {
5188 #ifdef HAVE_STRONG_RANDOM
5189  return pg_strong_random((char *) cancel_key, sizeof(int32));
5190 #else
5191 
5192  /*
5193  * If built with --disable-strong-random, use plain old erand48.
5194  *
5195  * We cannot use pg_backend_random() in postmaster, because it stores its
5196  * state in shared memory.
5197  */
5198  static unsigned short seed[3];
5199 
5200  /*
5201  * Select a random seed at the time of first receiving a request.
5202  */
5203  if (random_seed == 0)
5204  {
5205  struct timeval random_stop_time;
5206 
5207  gettimeofday(&random_stop_time, NULL);
5208 
5209  seed[0] = (unsigned short) random_start_time.tv_usec;
5210  seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
5211  seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
5212 
5213  random_seed = 1;
5214  }
5215 
5216  *cancel_key = pg_jrand48(seed);
5217 
5218  return true;
5219 #endif
5220 }
5221 
5222 /*
5223  * Count up number of child processes of specified types (dead_end children
5224  * are always excluded).
5225  */
5226 static int
5227 CountChildren(int target)
5228 {
5229  dlist_iter iter;
5230  int cnt = 0;
5231 
5232  dlist_foreach(iter, &BackendList)
5233  {
5234  Backend *bp = dlist_container(Backend, elem, iter.cur);
5235 
5236  if (bp->dead_end)
5237  continue;
5238 
5239  /*
5240  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5241  * it first and avoid touching shared memory for every child.
5242  */
5243  if (target != BACKEND_TYPE_ALL)
5244  {
5245  /*
5246  * Assign bkend_type for any recently announced WAL Sender
5247  * processes.
5248  */
5249  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5252 
5253  if (!(target & bp->bkend_type))
5254  continue;
5255  }
5256 
5257  cnt++;
5258  }
5259  return cnt;
5260 }
5261 
5262 
5263 /*
5264  * StartChildProcess -- start an auxiliary process for the postmaster
5265  *
5266  * "type" determines what kind of child will be started. All child types
5267  * initially go to AuxiliaryProcessMain, which will handle common setup.
5268  *
5269  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5270  * to start subprocess.
5271  */
5272 static pid_t
5274 {
5275  pid_t pid;
5276  char *av[10];
5277  int ac = 0;
5278  char typebuf[32];
5279 
5280  /*
5281  * Set up command-line arguments for subprocess
5282  */
5283  av[ac++] = "postgres";
5284 
5285 #ifdef EXEC_BACKEND
5286  av[ac++] = "--forkboot";
5287  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5288 #endif
5289 
5290  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5291  av[ac++] = typebuf;
5292 
5293  av[ac] = NULL;
5294  Assert(ac < lengthof(av));
5295 
5296 #ifdef EXEC_BACKEND
5297  pid = postmaster_forkexec(ac, av);
5298 #else /* !EXEC_BACKEND */
5299  pid = fork_process();
5300 
5301  if (pid == 0) /* child */
5302  {
5304 
5305  /* Close the postmaster's sockets */
5306  ClosePostmasterPorts(false);
5307 
5308  /* Release postmaster's working memory context */
5312 
5313  AuxiliaryProcessMain(ac, av);
5314  ExitPostmaster(0);
5315  }
5316 #endif /* EXEC_BACKEND */
5317 
5318  if (pid < 0)
5319  {
5320  /* in parent, fork failed */
5321  int save_errno = errno;
5322 
5323  errno = save_errno;
5324  switch (type)
5325  {
5326  case StartupProcess:
5327  ereport(LOG,
5328  (errmsg("could not fork startup process: %m")));
5329  break;
5330  case BgWriterProcess:
5331  ereport(LOG,
5332  (errmsg("could not fork background writer process: %m")));
5333  break;
5334  case CheckpointerProcess:
5335  ereport(LOG,
5336  (errmsg("could not fork checkpointer process: %m")));
5337  break;
5338  case WalWriterProcess:
5339  ereport(LOG,
5340  (errmsg("could not fork WAL writer process: %m")));
5341  break;
5342  case WalReceiverProcess:
5343  ereport(LOG,
5344  (errmsg("could not fork WAL receiver process: %m")));
5345  break;
5346  default:
5347  ereport(LOG,
5348  (errmsg("could not fork process: %m")));
5349  break;
5350  }
5351 
5352  /*
5353  * fork failure is fatal during startup, but there's no need to choke
5354  * immediately if starting other child types fails.
5355  */
5356  if (type == StartupProcess)
5357  ExitPostmaster(1);
5358  return 0;
5359  }
5360 
5361  /*
5362  * in parent, successful fork
5363  */
5364  return pid;
5365 }
5366 
5367 /*
5368  * StartAutovacuumWorker
5369  * Start an autovac worker process.
5370  *
5371  * This function is here because it enters the resulting PID into the
5372  * postmaster's private backends list.
5373  *
5374  * NB -- this code very roughly matches BackendStartup.
5375  */
5376 static void
5378 {
5379  Backend *bn;
5380 
5381  /*
5382  * If not in condition to run a process, don't try, but handle it like a
5383  * fork failure. This does not normally happen, since the signal is only
5384  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5385  * we have to check to avoid race-condition problems during DB state
5386  * changes.
5387  */
5388  if (canAcceptConnections() == CAC_OK)
5389  {
5390  /*
5391  * Compute the cancel key that will be assigned to this session. We
5392  * probably don't need cancel keys for autovac workers, but we'd
5393  * better have something random in the field to prevent unfriendly
5394  * people from sending cancels to them.
5395  */
5397  {
5398  ereport(LOG,
5399  (errcode(ERRCODE_INTERNAL_ERROR),
5400  errmsg("could not generate random cancel key")));
5401  return;
5402  }
5403 
5404  bn = (Backend *) malloc(sizeof(Backend));
5405  if (bn)
5406  {
5407  bn->cancel_key = MyCancelKey;
5408 
5409  /* Autovac workers are not dead_end and need a child slot */
5410  bn->dead_end = false;
5412  bn->bgworker_notify = false;
5413 
5414  bn->pid = StartAutoVacWorker();
5415  if (bn->pid > 0)
5416  {
5418  dlist_push_head(&BackendList, &bn->elem);
5419 #ifdef EXEC_BACKEND
5420  ShmemBackendArrayAdd(bn);
5421 #endif
5422  /* all OK */
5423  return;
5424  }
5425 
5426  /*
5427  * fork failed, fall through to report -- actual error message was
5428  * logged by StartAutoVacWorker
5429  */
5431  free(bn);
5432  }
5433  else
5434  ereport(LOG,
5435  (errcode(ERRCODE_OUT_OF_MEMORY),
5436  errmsg("out of memory")));
5437  }
5438 
5439  /*
5440  * Report the failure to the launcher, if it's running. (If it's not, we
5441  * might not even be connected to shared memory, so don't try to call
5442  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5443  * responds to the condition, but we don't do that here, instead waiting
5444  * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5445  * quick succession between the autovac launcher and postmaster in case
5446  * things get ugly.
5447  */
5448  if (AutoVacPID != 0)
5449  {
5451  avlauncher_needs_signal = true;
5452  }
5453 }
5454 
5455 /*
5456  * MaybeStartWalReceiver
5457  * Start the WAL receiver process, if not running and our state allows.
5458  */
5459 static void
5461 {
5462  if (WalReceiverPID == 0 &&
5463  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5465  Shutdown == NoShutdown)
5466  {
5468  WalReceiverRequested = false;
5469  }
5470 }
5471 
5472 
5473 /*
5474  * Create the opts file
5475  */
5476 static bool
5477 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5478 {
5479  FILE *fp;
5480  int i;
5481 
5482 #define OPTS_FILE "postmaster.opts"
5483 
5484  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5485  {
5486  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5487  return false;
5488  }
5489 
5490  fprintf(fp, "%s", fullprogname);
5491  for (i = 1; i < argc; i++)
5492  fprintf(fp, " \"%s\"", argv[i]);
5493  fputs("\n", fp);
5494 
5495  if (fclose(fp))
5496  {
5497  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5498  return false;
5499  }
5500 
5501  return true;
5502 }
5503 
5504 
5505 /*
5506  * MaxLivePostmasterChildren
5507  *
5508  * This reports the number of entries needed in per-child-process arrays
5509  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5510  * These arrays include regular backends, autovac workers, walsenders
5511  * and background workers, but not special children nor dead_end children.
5512  * This allows the arrays to have a fixed maximum size, to wit the same
5513  * too-many-children limit enforced by canAcceptConnections(). The exact value
5514  * isn't too critical as long as it's more than MaxBackends.
5515  */
5516 int
5518 {
5519  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5521 }
5522 
5523 /*
5524  * Connect background worker to a database.
5525  */
5526 void
5528 {
5530 
5531  /* XXX is this the right errcode? */
5533  ereport(FATAL,
5534  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5535  errmsg("database connection requirement not indicated during registration")));
5536 
5537  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL);
5538 
5539  /* it had better not gotten out of "init" mode yet */
5540  if (!IsInitProcessingMode())
5541  ereport(ERROR,
5542  (errmsg("invalid processing mode in background worker")));
5544 }
5545 
5546 /*
5547  * Connect background worker to a database using OIDs.
5548  */
5549 void
5551 {
5553 
5554  /* XXX is this the right errcode? */
5556  ereport(FATAL,
5557  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5558  errmsg("database connection requirement not indicated during registration")));
5559 
5560  InitPostgres(NULL, dboid, NULL, useroid, NULL);
5561 
5562  /* it had better not gotten out of "init" mode yet */
5563  if (!IsInitProcessingMode())
5564  ereport(ERROR,
5565  (errmsg("invalid processing mode in background worker")));
5567 }
5568 
5569 /*
5570  * Block/unblock signals in a background worker
5571  */
5572 void
5574 {
5575  PG_SETMASK(&BlockSig);
5576 }
5577 
5578 void
5580 {
5582 }
5583 
5584 #ifdef EXEC_BACKEND
5585 static pid_t
5586 bgworker_forkexec(int shmem_slot)
5587 {
5588  char *av[10];
5589  int ac = 0;
5590  char forkav[MAXPGPATH];
5591 
5592  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5593 
5594  av[ac++] = "postgres";
5595  av[ac++] = forkav;
5596  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5597  av[ac] = NULL;
5598 
5599  Assert(ac < lengthof(av));
5600 
5601  return postmaster_forkexec(ac, av);
5602 }
5603 #endif
5604 
5605 /*
5606  * Start a new bgworker.
5607  * Starting time conditions must have been checked already.
5608  *
5609  * Returns true on success, false on failure.
5610  * In either case, update the RegisteredBgWorker's state appropriately.
5611  *
5612  * This code is heavily based on autovacuum.c, q.v.
5613  */
5614 static bool
5616 {
5617  pid_t worker_pid;
5618 
5619  Assert(rw->rw_pid == 0);
5620 
5621  /*
5622  * Allocate and assign the Backend element. Note we must do this before
5623  * forking, so that we can handle out of memory properly.
5624  *
5625  * Treat failure as though the worker had crashed. That way, the
5626  * postmaster will wait a bit before attempting to start it again; if it
5627  * tried again right away, most likely it'd find itself repeating the
5628  * out-of-memory or fork failure condition.
5629  */
5630  if (!assign_backendlist_entry(rw))
5631  {
5633  return false;
5634  }
5635 
5636  ereport(DEBUG1,
5637  (errmsg("starting background worker process \"%s\"",
5638  rw->rw_worker.bgw_name)));
5639 
5640 #ifdef EXEC_BACKEND
5641  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5642 #else
5643  switch ((worker_pid = fork_process()))
5644 #endif
5645  {
5646  case -1:
5647  /* in postmaster, fork failed ... */
5648  ereport(LOG,
5649  (errmsg("could not fork worker process: %m")));
5650  /* undo what assign_backendlist_entry did */
5652  rw->rw_child_slot = 0;
5653  free(rw->rw_backend);
5654  rw->rw_backend = NULL;
5655  /* mark entry as crashed, so we'll try again later */
5657  break;
5658 
5659 #ifndef EXEC_BACKEND
5660  case 0:
5661  /* in postmaster child ... */
5663 
5664  /* Close the postmaster's sockets */
5665  ClosePostmasterPorts(false);
5666 
5667  /*
5668  * Before blowing away PostmasterContext, save this bgworker's
5669  * data where it can find it.
5670  */
5671  MyBgworkerEntry = (BackgroundWorker *)
5673  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5674 
5675  /* Release postmaster's working memory context */
5679 
5681 
5682  exit(1); /* should not get here */
5683  break;
5684 #endif
5685  default:
5686  /* in postmaster, fork successful ... */
5687  rw->rw_pid = worker_pid;
5688  rw->rw_backend->pid = rw->rw_pid;
5690  /* add new worker to lists of backends */
5691  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5692 #ifdef EXEC_BACKEND
5693  ShmemBackendArrayAdd(rw->rw_backend);
5694 #endif
5695  return true;
5696  }
5697 
5698  return false;
5699 }
5700 
5701 /*
5702  * Does the current postmaster state require starting a worker with the
5703  * specified start_time?
5704  */
5705 static bool
5707 {
5708  switch (pmState)
5709  {
5710  case PM_NO_CHILDREN:
5711  case PM_WAIT_DEAD_END:
5712  case PM_SHUTDOWN_2:
5713  case PM_SHUTDOWN:
5714  case PM_WAIT_BACKENDS:
5715  case PM_WAIT_READONLY:
5716  case PM_WAIT_BACKUP:
5717  break;
5718 
5719  case PM_RUN:
5720  if (start_time == BgWorkerStart_RecoveryFinished)
5721  return true;
5722  /* fall through */
5723 
5724  case PM_HOT_STANDBY:
5725  if (start_time == BgWorkerStart_ConsistentState)
5726  return true;
5727  /* fall through */
5728 
5729  case PM_RECOVERY:
5730  case PM_STARTUP:
5731  case PM_INIT:
5732  if (start_time == BgWorkerStart_PostmasterStart)
5733  return true;
5734  /* fall through */
5735 
5736  }
5737 
5738  return false;
5739 }
5740 
5741 /*
5742  * Allocate the Backend struct for a connected background worker, but don't
5743  * add it to the list of backends just yet.
5744  *
5745  * On failure, return false without changing any worker state.
5746  *
5747  * Some info from the Backend is copied into the passed rw.
5748  */
5749 static bool
5751 {
5752  Backend *bn;
5753 
5754  /*
5755  * Compute the cancel key that will be assigned to this session. We
5756  * probably don't need cancel keys for background workers, but we'd better
5757  * have something random in the field to prevent unfriendly people from
5758  * sending cancels to them.
5759  */
5761  {
5762  ereport(LOG,
5763  (errcode(ERRCODE_INTERNAL_ERROR),
5764  errmsg("could not generate random cancel key")));
5765  return false;
5766  }
5767 
5768  bn = malloc(sizeof(Backend));
5769  if (bn == NULL)
5770  {
5771  ereport(LOG,
5772  (errcode(ERRCODE_OUT_OF_MEMORY),
5773  errmsg("out of memory")));
5774  return false;
5775  }
5776 
5777  bn->cancel_key = MyCancelKey;
5780  bn->dead_end = false;
5781  bn->bgworker_notify = false;
5782 
5783  rw->rw_backend = bn;
5784  rw->