PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netinet/in.h>
78 #include <arpa/inet.h>
79 #include <netdb.h>
80 #include <limits.h>
81 
82 #ifdef HAVE_SYS_SELECT_H
83 #include <sys/select.h>
84 #endif
85 
86 #ifdef USE_BONJOUR
87 #include <dns_sd.h>
88 #endif
89 
90 #ifdef USE_SYSTEMD
91 #include <systemd/sd-daemon.h>
92 #endif
93 
94 #ifdef HAVE_PTHREAD_IS_THREADED_NP
95 #include <pthread.h>
96 #endif
97 
98 #include "access/transam.h"
99 #include "access/xlog.h"
100 #include "bootstrap/bootstrap.h"
101 #include "catalog/pg_control.h"
102 #include "common/ip.h"
103 #include "lib/ilist.h"
104 #include "libpq/auth.h"
105 #include "libpq/libpq.h"
106 #include "libpq/pqsignal.h"
107 #include "miscadmin.h"
108 #include "pg_getopt.h"
109 #include "pgstat.h"
110 #include "postmaster/autovacuum.h"
112 #include "postmaster/fork_process.h"
113 #include "postmaster/pgarch.h"
114 #include "postmaster/postmaster.h"
115 #include "postmaster/syslogger.h"
117 #include "replication/walsender.h"
118 #include "storage/fd.h"
119 #include "storage/ipc.h"
120 #include "storage/pg_shmem.h"
121 #include "storage/pmsignal.h"
122 #include "storage/proc.h"
123 #include "tcop/tcopprot.h"
124 #include "utils/builtins.h"
125 #include "utils/datetime.h"
126 #include "utils/dynamic_loader.h"
127 #include "utils/memutils.h"
128 #include "utils/ps_status.h"
129 #include "utils/timeout.h"
130 #include "utils/varlena.h"
131 
132 #ifdef EXEC_BACKEND
133 #include "storage/spin.h"
134 #endif
135 
136 
137 /*
138  * Possible types of a backend. Beyond being the possible bkend_type values in
139  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
140  * and CountChildren().
141  */
142 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
143 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
144 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
145 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
146 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
147 
148 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
149 
150 /*
151  * List of active backends (or child processes anyway; we don't actually
152  * know whether a given child has become a backend or is still in the
153  * authorization phase). This is used mainly to keep track of how many
154  * children we have and send them appropriate signals when necessary.
155  *
156  * "Special" children such as the startup, bgwriter and autovacuum launcher
157  * tasks are not in this list. Autovacuum worker and walsender are in it.
158  * Also, "dead_end" children are in it: these are children launched just for
159  * the purpose of sending a friendly rejection message to a would-be client.
160  * We must track them because they are attached to shared memory, but we know
161  * they will never become live backends. dead_end children are not assigned a
162  * PMChildSlot.
163  *
164  * Background workers are in this list, too.
165  */
166 typedef struct bkend
167 {
168  pid_t pid; /* process id of backend */
169  int32 cancel_key; /* cancel key for cancels for this backend */
170  int child_slot; /* PMChildSlot for this backend, if any */
171 
172  /*
173  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
174  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
175  * bkend_type is normal, you should check for a recent transition.
176  */
178  bool dead_end; /* is it going to send an error and quit? */
179  bool bgworker_notify; /* gets bgworker start/stop notifications */
180  dlist_node elem; /* list link in BackendList */
181 } Backend;
182 
184 
185 #ifdef EXEC_BACKEND
186 static Backend *ShmemBackendArray;
187 #endif
188 
190 
191 
192 
193 /* The socket number we are listening for connections on */
195 
196 /* The directory names for Unix socket(s) */
198 
199 /* The TCP listen address(es) */
201 
202 /*
203  * ReservedBackends is the number of backends reserved for superuser use.
204  * This number is taken out of the pool size given by MaxBackends so
205  * number of backend slots available to non-superusers is
206  * (MaxBackends - ReservedBackends). Note what this really means is
207  * "if there are <= ReservedBackends connections available, only superusers
208  * can make new connections" --- pre-existing superuser connections don't
209  * count against the limit.
210  */
212 
213 /* The socket(s) we're listening to. */
214 #define MAXLISTEN 64
216 
217 /*
218  * Set by the -o option
219  */
220 static char ExtraOptions[MAXPGPATH];
221 
222 /*
223  * These globals control the behavior of the postmaster in case some
224  * backend dumps core. Normally, it kills all peers of the dead backend
225  * and reinitializes shared memory. By specifying -s or -n, we can have
226  * the postmaster stop (rather than kill) peers and not reinitialize
227  * shared data structures. (Reinit is currently dead code, though.)
228  */
229 static bool Reinit = true;
230 static int SendStop = false;
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 bool Db_user_namespace = false;
241 
242 bool enable_bonjour = false;
245 
246 /* PIDs of special child processes; 0 when not running */
247 static pid_t StartupPID = 0,
256 
257 /* Startup process's status */
258 typedef enum
259 {
262  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
265 
267 
268 /* Startup/shutdown state */
269 #define NoShutdown 0
270 #define SmartShutdown 1
271 #define FastShutdown 2
272 #define ImmediateShutdown 3
273 
274 static int Shutdown = NoShutdown;
275 
276 static bool FatalError = false; /* T if recovering from backend crash */
277 
278 /*
279  * We use a simple state machine to control startup, shutdown, and
280  * crash recovery (which is rather like shutdown followed by startup).
281  *
282  * After doing all the postmaster initialization work, we enter PM_STARTUP
283  * state and the startup process is launched. The startup process begins by
284  * reading the control file and other preliminary initialization steps.
285  * In a normal startup, or after crash recovery, the startup process exits
286  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
287  * is handled specially since it takes much longer and we would like to support
288  * hot standby during archive recovery.
289  *
290  * When the startup process is ready to start archive recovery, it signals the
291  * postmaster, and we switch to PM_RECOVERY state. The background writer and
292  * checkpointer are launched, while the startup process continues applying WAL.
293  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
294  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
295  * state and begin accepting connections to perform read-only queries. When
296  * archive recovery is finished, the startup process exits with exit code 0
297  * and we switch to PM_RUN state.
298  *
299  * Normal child backends can only be launched when we are in PM_RUN or
300  * PM_HOT_STANDBY state. (We also allow launch of normal
301  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
316  * states, nor in PM_SHUTDOWN states (because we don't enter those states
317  * when trying to recover from a crash). It can be true in PM_STARTUP state,
318  * because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
328  PM_WAIT_READONLY, /* waiting for read only backends to exit */
329  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
330  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
331  * ckpt */
332  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
333  * finish */
334  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
335  PM_NO_CHILDREN /* all important children have exited */
336 } PMState;
337 
339 
340 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
341 /* Zero means timeout is not running */
342 static time_t AbortStartTime = 0;
343 /* Length of said timeout */
344 #define SIGKILL_CHILDREN_AFTER_SECS 5
345 
346 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
347 
348 bool ClientAuthInProgress = false; /* T during new-client
349  * authentication */
350 
351 bool redirection_done = false; /* stderr redirected for syslogger? */
352 
353 /* received START_AUTOVAC_LAUNCHER signal */
354 static volatile sig_atomic_t start_autovac_launcher = false;
355 
356 /* the launcher needs to be signalled to communicate some condition */
357 static volatile bool avlauncher_needs_signal = false;
358 
359 /* set when there's a worker that needs to be started up */
360 static volatile bool StartWorkerNeeded = true;
361 static volatile bool HaveCrashedWorker = false;
362 
363 #ifndef HAVE_STRONG_RANDOM
364 /*
365  * State for assigning cancel keys.
366  * Also, the global MyCancelKey passes the cancel key assigned to a given
367  * backend from the postmaster to that backend (via fork).
368  */
369 static unsigned int random_seed = 0;
370 static struct timeval random_start_time;
371 #endif
372 
373 #ifdef USE_SSL
374 /* Set when and if SSL has been initialized properly */
375 static bool LoadedSSL = false;
376 #endif
377 
378 #ifdef USE_BONJOUR
379 static DNSServiceRef bonjour_sdref = NULL;
380 #endif
381 
382 /*
383  * postmaster.c - function prototypes
384  */
385 static void CloseServerPorts(int status, Datum arg);
386 static void unlink_external_pid_file(int status, Datum arg);
387 static void getInstallationPaths(const char *argv0);
388 static void checkDataDir(void);
389 static Port *ConnCreate(int serverFd);
390 static void ConnFree(Port *port);
391 static void reset_shared(int port);
392 static void SIGHUP_handler(SIGNAL_ARGS);
393 static void pmdie(SIGNAL_ARGS);
394 static void reaper(SIGNAL_ARGS);
395 static void sigusr1_handler(SIGNAL_ARGS);
396 static void startup_die(SIGNAL_ARGS);
397 static void dummy_handler(SIGNAL_ARGS);
398 static void StartupPacketTimeoutHandler(void);
399 static void CleanupBackend(int pid, int exitstatus);
400 static bool CleanupBackgroundWorker(int pid, int exitstatus);
401 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
402 static void LogChildExit(int lev, const char *procname,
403  int pid, int exitstatus);
404 static void PostmasterStateMachine(void);
405 static void BackendInitialize(Port *port);
406 static void BackendRun(Port *port) pg_attribute_noreturn();
407 static void ExitPostmaster(int status) pg_attribute_noreturn();
408 static int ServerLoop(void);
409 static int BackendStartup(Port *port);
410 static int ProcessStartupPacket(Port *port, bool SSLdone);
411 static void processCancelRequest(Port *port, void *pkt);
412 static int initMasks(fd_set *rmask);
413 static void report_fork_failure_to_client(Port *port, int errnum);
414 static CAC_state canAcceptConnections(void);
415 static bool RandomCancelKey(int32 *cancel_key);
416 static void signal_child(pid_t pid, int signal);
417 static bool SignalSomeChildren(int signal, int targets);
418 static void TerminateChildren(int signal);
419 
420 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
421 
422 static int CountChildren(int target);
423 static void maybe_start_bgworker(void);
424 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
425 static pid_t StartChildProcess(AuxProcType type);
426 static void StartAutovacuumWorker(void);
427 static void InitPostmasterDeathWatchHandle(void);
428 
429 /*
430  * Archiver is allowed to start up at the current postmaster state?
431  *
432  * If WAL archiving is enabled always, we are allowed to start archiver
433  * even during recovery.
434  */
435 #define PgArchStartupAllowed() \
436  ((XLogArchivingActive() && pmState == PM_RUN) || \
437  (XLogArchivingAlways() && \
438  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
439 
440 #ifdef EXEC_BACKEND
441 
442 #ifdef WIN32
443 #define WNOHANG 0 /* ignored, so any integer value will do */
444 
445 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
446 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
447 
448 static HANDLE win32ChildQueue;
449 
450 typedef struct
451 {
452  HANDLE waitHandle;
453  HANDLE procHandle;
454  DWORD procId;
455 } win32_deadchild_waitinfo;
456 #endif /* WIN32 */
457 
458 static pid_t backend_forkexec(Port *port);
459 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
460 
461 /* Type for a socket that can be inherited to a client process */
462 #ifdef WIN32
463 typedef struct
464 {
465  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
466  * if not a socket */
467  WSAPROTOCOL_INFO wsainfo;
468 } InheritableSocket;
469 #else
470 typedef int InheritableSocket;
471 #endif
472 
473 /*
474  * Structure contains all variables passed to exec:ed backends
475  */
476 typedef struct
477 {
478  Port port;
479  InheritableSocket portsocket;
480  char DataDir[MAXPGPATH];
483  int MyPMChildSlot;
484 #ifndef WIN32
485  unsigned long UsedShmemSegID;
486 #else
487  HANDLE UsedShmemSegID;
488 #endif
489  void *UsedShmemSegAddr;
492  Backend *ShmemBackendArray;
493 #ifndef HAVE_SPINLOCKS
495 #endif
504  InheritableSocket pgStatSock;
505  pid_t PostmasterPid;
509  bool redirection_done;
510  bool IsBinaryUpgrade;
511  int max_safe_fds;
512  int MaxBackends;
513 #ifdef WIN32
514  HANDLE PostmasterHandle;
515  HANDLE initial_signal_pipe;
516  HANDLE syslogPipe[2];
517 #else
518  int postmaster_alive_fds[2];
519  int syslogPipe[2];
520 #endif
521  char my_exec_path[MAXPGPATH];
522  char pkglib_path[MAXPGPATH];
523  char ExtraOptions[MAXPGPATH];
524 } BackendParameters;
525 
526 static void read_backend_variables(char *id, Port *port);
527 static void restore_backend_variables(BackendParameters *param, Port *port);
528 
529 #ifndef WIN32
530 static bool save_backend_variables(BackendParameters *param, Port *port);
531 #else
532 static bool save_backend_variables(BackendParameters *param, Port *port,
533  HANDLE childProcess, pid_t childPid);
534 #endif
535 
536 static void ShmemBackendArrayAdd(Backend *bn);
537 static void ShmemBackendArrayRemove(Backend *bn);
538 #endif /* EXEC_BACKEND */
539 
540 #define StartupDataBase() StartChildProcess(StartupProcess)
541 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
542 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
543 #define StartWalWriter() StartChildProcess(WalWriterProcess)
544 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
545 
546 /* Macros to check exit status of a child process */
547 #define EXIT_STATUS_0(st) ((st) == 0)
548 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
549 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
550 
551 #ifndef WIN32
552 /*
553  * File descriptors for pipe used to monitor if postmaster is alive.
554  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
555  */
556 int postmaster_alive_fds[2] = {-1, -1};
557 #else
558 /* Process handle of postmaster used for the same purpose on Windows */
559 HANDLE PostmasterHandle;
560 #endif
561 
562 /*
563  * Postmaster main entry point
564  */
565 void
566 PostmasterMain(int argc, char *argv[])
567 {
568  int opt;
569  int status;
570  char *userDoption = NULL;
571  bool listen_addr_saved = false;
572  int i;
573  char *output_config_variable = NULL;
574 
575  MyProcPid = PostmasterPid = getpid();
576 
577  MyStartTime = time(NULL);
578 
580 
581  /*
582  * for security, no dir or file created can be group or other accessible
583  */
584  umask(S_IRWXG | S_IRWXO);
585 
586  /*
587  * Initialize random(3) so we don't get the same values in every run.
588  *
589  * Note: the seed is pretty predictable from externally-visible facts such
590  * as postmaster start time, so avoid using random() for security-critical
591  * random values during postmaster startup. At the time of first
592  * connection, PostmasterRandom will select a hopefully-more-random seed.
593  */
594  srandom((unsigned int) (MyProcPid ^ MyStartTime));
595 
596  /*
597  * By default, palloc() requests in the postmaster will be allocated in
598  * the PostmasterContext, which is space that can be recycled by backends.
599  * Allocated data that needs to be available to backends should be
600  * allocated in TopMemoryContext.
601  */
603  "Postmaster",
606 
607  /* Initialize paths to installation files */
608  getInstallationPaths(argv[0]);
609 
610  /*
611  * Set up signal handlers for the postmaster process.
612  *
613  * CAUTION: when changing this list, check for side-effects on the signal
614  * handling setup of child processes. See tcop/postgres.c,
615  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
616  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
617  * postmaster/syslogger.c, postmaster/bgworker.c and
618  * postmaster/checkpointer.c.
619  */
620  pqinitmask();
622 
623  pqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have
624  * children do same */
625  pqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */
626  pqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
627  pqsignal(SIGTERM, pmdie); /* wait for children and shut down */
628  pqsignal(SIGALRM, SIG_IGN); /* ignored */
629  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
630  pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
631  pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
632  pqsignal(SIGCHLD, reaper); /* handle child termination */
633  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
634  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
635  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
636 #ifdef SIGXFSZ
637  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
638 #endif
639 
640  /*
641  * Options setup
642  */
644 
645  opterr = 1;
646 
647  /*
648  * Parse command-line options. CAUTION: keep this in sync with
649  * tcop/postgres.c (the option sets should not conflict) and with the
650  * common help() function in main/main.c.
651  */
652  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
653  {
654  switch (opt)
655  {
656  case 'B':
657  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
658  break;
659 
660  case 'b':
661  /* Undocumented flag used for binary upgrades */
662  IsBinaryUpgrade = true;
663  break;
664 
665  case 'C':
666  output_config_variable = strdup(optarg);
667  break;
668 
669  case 'D':
670  userDoption = strdup(optarg);
671  break;
672 
673  case 'd':
675  break;
676 
677  case 'E':
678  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
679  break;
680 
681  case 'e':
682  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
683  break;
684 
685  case 'F':
686  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
687  break;
688 
689  case 'f':
691  {
692  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
693  progname, optarg);
694  ExitPostmaster(1);
695  }
696  break;
697 
698  case 'h':
699  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
700  break;
701 
702  case 'i':
703  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
704  break;
705 
706  case 'j':
707  /* only used by interactive backend */
708  break;
709 
710  case 'k':
711  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
712  break;
713 
714  case 'l':
715  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
716  break;
717 
718  case 'N':
719  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
720  break;
721 
722  case 'n':
723  /* Don't reinit shared mem after abnormal exit */
724  Reinit = false;
725  break;
726 
727  case 'O':
728  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'o':
732  /* Other options to pass to the backend on the command line */
734  sizeof(ExtraOptions) - strlen(ExtraOptions),
735  " %s", optarg);
736  break;
737 
738  case 'P':
739  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
740  break;
741 
742  case 'p':
744  break;
745 
746  case 'r':
747  /* only used by single-user backend */
748  break;
749 
750  case 'S':
752  break;
753 
754  case 's':
755  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
756  break;
757 
758  case 'T':
759 
760  /*
761  * In the event that some backend dumps core, send SIGSTOP,
762  * rather than SIGQUIT, to all its peers. This lets the wily
763  * post_hacker collect core dumps from everyone.
764  */
765  SendStop = true;
766  break;
767 
768  case 't':
769  {
770  const char *tmp = get_stats_option_name(optarg);
771 
772  if (tmp)
773  {
775  }
776  else
777  {
778  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
779  progname, optarg);
780  ExitPostmaster(1);
781  }
782  break;
783  }
784 
785  case 'W':
786  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
787  break;
788 
789  case 'c':
790  case '-':
791  {
792  char *name,
793  *value;
794 
795  ParseLongOption(optarg, &name, &value);
796  if (!value)
797  {
798  if (opt == '-')
799  ereport(ERROR,
800  (errcode(ERRCODE_SYNTAX_ERROR),
801  errmsg("--%s requires a value",
802  optarg)));
803  else
804  ereport(ERROR,
805  (errcode(ERRCODE_SYNTAX_ERROR),
806  errmsg("-c %s requires a value",
807  optarg)));
808  }
809 
811  free(name);
812  if (value)
813  free(value);
814  break;
815  }
816 
817  default:
818  write_stderr("Try \"%s --help\" for more information.\n",
819  progname);
820  ExitPostmaster(1);
821  }
822  }
823 
824  /*
825  * Postmaster accepts no non-option switch arguments.
826  */
827  if (optind < argc)
828  {
829  write_stderr("%s: invalid argument: \"%s\"\n",
830  progname, argv[optind]);
831  write_stderr("Try \"%s --help\" for more information.\n",
832  progname);
833  ExitPostmaster(1);
834  }
835 
836  /*
837  * Locate the proper configuration files and data directory, and read
838  * postgresql.conf for the first time.
839  */
840  if (!SelectConfigFiles(userDoption, progname))
841  ExitPostmaster(2);
842 
843  if (output_config_variable != NULL)
844  {
845  /*
846  * "-C guc" was specified, so print GUC's value and exit. No extra
847  * permission check is needed because the user is reading inside the
848  * data dir.
849  */
850  const char *config_val = GetConfigOption(output_config_variable,
851  false, false);
852 
853  puts(config_val ? config_val : "");
854  ExitPostmaster(0);
855  }
856 
857  /* Verify that DataDir looks reasonable */
858  checkDataDir();
859 
860  /* And switch working directory into it */
861  ChangeToDataDir();
862 
863  /*
864  * Check for invalid combinations of GUC settings.
865  */
867  {
868  write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
869  ExitPostmaster(1);
870  }
872  {
873  write_stderr("%s: max_wal_senders must be less than max_connections\n", progname);
874  ExitPostmaster(1);
875  }
877  ereport(ERROR,
878  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
880  ereport(ERROR,
881  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
882 
883  /*
884  * Other one-time internal sanity checks can go here, if they are fast.
885  * (Put any slow processing further down, after postmaster.pid creation.)
886  */
887  if (!CheckDateTokenTables())
888  {
889  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
890  ExitPostmaster(1);
891  }
892 
893  /*
894  * Now that we are done processing the postmaster arguments, reset
895  * getopt(3) library so that it will work correctly in subprocesses.
896  */
897  optind = 1;
898 #ifdef HAVE_INT_OPTRESET
899  optreset = 1; /* some systems need this too */
900 #endif
901 
902  /* For debugging: display postmaster environment */
903  {
904  extern char **environ;
905  char **p;
906 
907  ereport(DEBUG3,
908  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
909  progname)));
910  ereport(DEBUG3,
911  (errmsg_internal("-----------------------------------------")));
912  for (p = environ; *p; ++p)
913  ereport(DEBUG3,
914  (errmsg_internal("\t%s", *p)));
915  ereport(DEBUG3,
916  (errmsg_internal("-----------------------------------------")));
917  }
918 
919  /*
920  * Create lockfile for data directory.
921  *
922  * We want to do this before we try to grab the input sockets, because the
923  * data directory interlock is more reliable than the socket-file
924  * interlock (thanks to whoever decided to put socket files in /tmp :-().
925  * For the same reason, it's best to grab the TCP socket(s) before the
926  * Unix socket(s).
927  *
928  * Also note that this internally sets up the on_proc_exit function that
929  * is responsible for removing both data directory and socket lockfiles;
930  * so it must happen before opening sockets so that at exit, the socket
931  * lockfiles go away after CloseServerPorts runs.
932  */
933  CreateDataDirLockFile(true);
934 
935  /*
936  * Initialize SSL library, if specified.
937  */
938 #ifdef USE_SSL
939  if (EnableSSL)
940  {
941  (void) secure_initialize(true);
942  LoadedSSL = true;
943  }
944 #endif
945 
946  /*
947  * Register the apply launcher. Since it registers a background worker,
948  * it needs to be called before InitializeMaxBackends(), and it's probably
949  * a good idea to call it before any modules had chance to take the
950  * background worker slots.
951  */
953 
954  /*
955  * process any libraries that should be preloaded at postmaster start
956  */
958 
959  /*
960  * Now that loadable modules have had their chance to register background
961  * workers, calculate MaxBackends.
962  */
964 
965  /*
966  * Establish input sockets.
967  *
968  * First, mark them all closed, and set up an on_proc_exit function that's
969  * charged with closing the sockets again at postmaster shutdown.
970  */
971  for (i = 0; i < MAXLISTEN; i++)
973 
975 
976  if (ListenAddresses)
977  {
978  char *rawstring;
979  List *elemlist;
980  ListCell *l;
981  int success = 0;
982 
983  /* Need a modifiable copy of ListenAddresses */
984  rawstring = pstrdup(ListenAddresses);
985 
986  /* Parse string into list of hostnames */
987  if (!SplitIdentifierString(rawstring, ',', &elemlist))
988  {
989  /* syntax error in list */
990  ereport(FATAL,
991  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
992  errmsg("invalid list syntax in parameter \"%s\"",
993  "listen_addresses")));
994  }
995 
996  foreach(l, elemlist)
997  {
998  char *curhost = (char *) lfirst(l);
999 
1000  if (strcmp(curhost, "*") == 0)
1001  status = StreamServerPort(AF_UNSPEC, NULL,
1002  (unsigned short) PostPortNumber,
1003  NULL,
1005  else
1006  status = StreamServerPort(AF_UNSPEC, curhost,
1007  (unsigned short) PostPortNumber,
1008  NULL,
1009  ListenSocket, MAXLISTEN);
1010 
1011  if (status == STATUS_OK)
1012  {
1013  success++;
1014  /* record the first successful host addr in lockfile */
1015  if (!listen_addr_saved)
1016  {
1018  listen_addr_saved = true;
1019  }
1020  }
1021  else
1022  ereport(WARNING,
1023  (errmsg("could not create listen socket for \"%s\"",
1024  curhost)));
1025  }
1026 
1027  if (!success && elemlist != NIL)
1028  ereport(FATAL,
1029  (errmsg("could not create any TCP/IP sockets")));
1030 
1031  list_free(elemlist);
1032  pfree(rawstring);
1033  }
1034 
1035 #ifdef USE_BONJOUR
1036  /* Register for Bonjour only if we opened TCP socket(s) */
1038  {
1039  DNSServiceErrorType err;
1040 
1041  /*
1042  * We pass 0 for interface_index, which will result in registering on
1043  * all "applicable" interfaces. It's not entirely clear from the
1044  * DNS-SD docs whether this would be appropriate if we have bound to
1045  * just a subset of the available network interfaces.
1046  */
1047  err = DNSServiceRegister(&bonjour_sdref,
1048  0,
1049  0,
1050  bonjour_name,
1051  "_postgresql._tcp.",
1052  NULL,
1053  NULL,
1054  htons(PostPortNumber),
1055  0,
1056  NULL,
1057  NULL,
1058  NULL);
1059  if (err != kDNSServiceErr_NoError)
1060  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1061  (long) err);
1062 
1063  /*
1064  * We don't bother to read the mDNS daemon's reply, and we expect that
1065  * it will automatically terminate our registration when the socket is
1066  * closed at postmaster termination. So there's nothing more to be
1067  * done here. However, the bonjour_sdref is kept around so that
1068  * forked children can close their copies of the socket.
1069  */
1070  }
1071 #endif
1072 
1073 #ifdef HAVE_UNIX_SOCKETS
1075  {
1076  char *rawstring;
1077  List *elemlist;
1078  ListCell *l;
1079  int success = 0;
1080 
1081  /* Need a modifiable copy of Unix_socket_directories */
1082  rawstring = pstrdup(Unix_socket_directories);
1083 
1084  /* Parse string into list of directories */
1085  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1086  {
1087  /* syntax error in list */
1088  ereport(FATAL,
1089  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1090  errmsg("invalid list syntax in parameter \"%s\"",
1091  "unix_socket_directories")));
1092  }
1093 
1094  foreach(l, elemlist)
1095  {
1096  char *socketdir = (char *) lfirst(l);
1097 
1098  status = StreamServerPort(AF_UNIX, NULL,
1099  (unsigned short) PostPortNumber,
1100  socketdir,
1101  ListenSocket, MAXLISTEN);
1102 
1103  if (status == STATUS_OK)
1104  {
1105  success++;
1106  /* record the first successful Unix socket in lockfile */
1107  if (success == 1)
1109  }
1110  else
1111  ereport(WARNING,
1112  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1113  socketdir)));
1114  }
1115 
1116  if (!success && elemlist != NIL)
1117  ereport(FATAL,
1118  (errmsg("could not create any Unix-domain sockets")));
1119 
1120  list_free_deep(elemlist);
1121  pfree(rawstring);
1122  }
1123 #endif
1124 
1125  /*
1126  * check that we have some socket to listen on
1127  */
1128  if (ListenSocket[0] == PGINVALID_SOCKET)
1129  ereport(FATAL,
1130  (errmsg("no socket created for listening")));
1131 
1132  /*
1133  * If no valid TCP ports, write an empty line for listen address,
1134  * indicating the Unix socket must be used. Note that this line is not
1135  * added to the lock file until there is a socket backing it.
1136  */
1137  if (!listen_addr_saved)
1139 
1140  /*
1141  * Set up shared memory and semaphores.
1142  */
1144 
1145  /*
1146  * Estimate number of openable files. This must happen after setting up
1147  * semaphores, because on some platforms semaphores count as open files.
1148  */
1149  set_max_safe_fds();
1150 
1151  /*
1152  * Set reference point for stack-depth checking.
1153  */
1154  set_stack_base();
1155 
1156  /*
1157  * Initialize pipe (or process handle on Windows) that allows children to
1158  * wake up from sleep on postmaster death.
1159  */
1161 
1162 #ifdef WIN32
1163 
1164  /*
1165  * Initialize I/O completion port used to deliver list of dead children.
1166  */
1167  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1168  if (win32ChildQueue == NULL)
1169  ereport(FATAL,
1170  (errmsg("could not create I/O completion port for child queue")));
1171 #endif
1172 
1173  /*
1174  * Record postmaster options. We delay this till now to avoid recording
1175  * bogus options (eg, NBuffers too high for available memory).
1176  */
1177  if (!CreateOptsFile(argc, argv, my_exec_path))
1178  ExitPostmaster(1);
1179 
1180 #ifdef EXEC_BACKEND
1181  /* Write out nondefault GUC settings for child processes to use */
1182  write_nondefault_variables(PGC_POSTMASTER);
1183 #endif
1184 
1185  /*
1186  * Write the external PID file if requested
1187  */
1188  if (external_pid_file)
1189  {
1190  FILE *fpidfile = fopen(external_pid_file, "w");
1191 
1192  if (fpidfile)
1193  {
1194  fprintf(fpidfile, "%d\n", MyProcPid);
1195  fclose(fpidfile);
1196 
1197  /* Make PID file world readable */
1198  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1199  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1201  }
1202  else
1203  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1205 
1207  }
1208 
1209  /*
1210  * Remove old temporary files. At this point there can be no other
1211  * Postgres processes running in this directory, so this should be safe.
1212  */
1214 
1215  /*
1216  * Forcibly remove the files signaling a standby promotion request.
1217  * Otherwise, the existence of those files triggers a promotion too early,
1218  * whether a user wants that or not.
1219  *
1220  * This removal of files is usually unnecessary because they can exist
1221  * only during a few moments during a standby promotion. However there is
1222  * a race condition: if pg_ctl promote is executed and creates the files
1223  * during a promotion, the files can stay around even after the server is
1224  * brought up to new master. Then, if new standby starts by using the
1225  * backup taken from that master, the files can exist at the server
1226  * startup and should be removed in order to avoid an unexpected
1227  * promotion.
1228  *
1229  * Note that promotion signal files need to be removed before the startup
1230  * process is invoked. Because, after that, they can be used by
1231  * postmaster's SIGUSR1 signal handler.
1232  */
1234 
1235  /* Remove any outdated file holding the current log filenames. */
1236  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1237  ereport(LOG,
1239  errmsg("could not remove file \"%s\": %m",
1241 
1242  /*
1243  * If enabled, start up syslogger collection subprocess
1244  */
1246 
1247  /*
1248  * Reset whereToSendOutput from DestDebug (its starting state) to
1249  * DestNone. This stops ereport from sending log messages to stderr unless
1250  * Log_destination permits. We don't do this until the postmaster is
1251  * fully launched, since startup failures may as well be reported to
1252  * stderr.
1253  *
1254  * If we are in fact disabling logging to stderr, first emit a log message
1255  * saying so, to provide a breadcrumb trail for users who may not remember
1256  * that their logging is configured to go somewhere else.
1257  */
1259  ereport(LOG,
1260  (errmsg("ending log output to stderr"),
1261  errhint("Future log output will go to log destination \"%s\".",
1263 
1265 
1266  /*
1267  * Initialize stats collection subsystem (this does NOT start the
1268  * collector process!)
1269  */
1270  pgstat_init();
1271 
1272  /*
1273  * Initialize the autovacuum subsystem (again, no process start yet)
1274  */
1275  autovac_init();
1276 
1277  /*
1278  * Load configuration files for client authentication.
1279  */
1280  if (!load_hba())
1281  {
1282  /*
1283  * It makes no sense to continue if we fail to load the HBA file,
1284  * since there is no way to connect to the database in this case.
1285  */
1286  ereport(FATAL,
1287  (errmsg("could not load pg_hba.conf")));
1288  }
1289  if (!load_ident())
1290  {
1291  /*
1292  * We can start up without the IDENT file, although it means that you
1293  * cannot log in using any of the authentication methods that need a
1294  * user name mapping. load_ident() already logged the details of error
1295  * to the log.
1296  */
1297  }
1298 
1299 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1300 
1301  /*
1302  * On macOS, libintl replaces setlocale() with a version that calls
1303  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1304  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1305  * the process multithreaded. The postmaster calls sigprocmask() and
1306  * calls fork() without an immediate exec(), both of which have undefined
1307  * behavior in a multithreaded program. A multithreaded postmaster is the
1308  * normal case on Windows, which offers neither fork() nor sigprocmask().
1309  */
1310  if (pthread_is_threaded_np() != 0)
1311  ereport(FATAL,
1312  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1313  errmsg("postmaster became multithreaded during startup"),
1314  errhint("Set the LC_ALL environment variable to a valid locale.")));
1315 #endif
1316 
1317  /*
1318  * Remember postmaster startup time
1319  */
1321 #ifndef HAVE_STRONG_RANDOM
1322  /* RandomCancelKey wants its own copy */
1324 #endif
1325 
1326  /*
1327  * We're ready to rock and roll...
1328  */
1330  Assert(StartupPID != 0);
1332  pmState = PM_STARTUP;
1333 
1334  /* Some workers may be scheduled to start now */
1336 
1337  status = ServerLoop();
1338 
1339  /*
1340  * ServerLoop probably shouldn't ever return, but if it does, close down.
1341  */
1342  ExitPostmaster(status != STATUS_OK);
1343 
1344  abort(); /* not reached */
1345 }
1346 
1347 
1348 /*
1349  * on_proc_exit callback to close server's listen sockets
1350  */
1351 static void
1353 {
1354  int i;
1355 
1356  /*
1357  * First, explicitly close all the socket FDs. We used to just let this
1358  * happen implicitly at postmaster exit, but it's better to close them
1359  * before we remove the postmaster.pid lockfile; otherwise there's a race
1360  * condition if a new postmaster wants to re-use the TCP port number.
1361  */
1362  for (i = 0; i < MAXLISTEN; i++)
1363  {
1364  if (ListenSocket[i] != PGINVALID_SOCKET)
1365  {
1368  }
1369  }
1370 
1371  /*
1372  * Next, remove any filesystem entries for Unix sockets. To avoid race
1373  * conditions against incoming postmasters, this must happen after closing
1374  * the sockets and before removing lock files.
1375  */
1377 
1378  /*
1379  * We don't do anything about socket lock files here; those will be
1380  * removed in a later on_proc_exit callback.
1381  */
1382 }
1383 
1384 /*
1385  * on_proc_exit callback to delete external_pid_file
1386  */
1387 static void
1389 {
1390  if (external_pid_file)
1392 }
1393 
1394 
1395 /*
1396  * Compute and check the directory paths to files that are part of the
1397  * installation (as deduced from the postgres executable's own location)
1398  */
1399 static void
1401 {
1402  DIR *pdir;
1403 
1404  /* Locate the postgres executable itself */
1405  if (find_my_exec(argv0, my_exec_path) < 0)
1406  elog(FATAL, "%s: could not locate my own executable path", argv0);
1407 
1408 #ifdef EXEC_BACKEND
1409  /* Locate executable backend before we change working directory */
1410  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1411  postgres_exec_path) < 0)
1412  ereport(FATAL,
1413  (errmsg("%s: could not locate matching postgres executable",
1414  argv0)));
1415 #endif
1416 
1417  /*
1418  * Locate the pkglib directory --- this has to be set early in case we try
1419  * to load any modules from it in response to postgresql.conf entries.
1420  */
1422 
1423  /*
1424  * Verify that there's a readable directory there; otherwise the Postgres
1425  * installation is incomplete or corrupt. (A typical cause of this
1426  * failure is that the postgres executable has been moved or hardlinked to
1427  * some directory that's not a sibling of the installation lib/
1428  * directory.)
1429  */
1430  pdir = AllocateDir(pkglib_path);
1431  if (pdir == NULL)
1432  ereport(ERROR,
1434  errmsg("could not open directory \"%s\": %m",
1435  pkglib_path),
1436  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1437  my_exec_path)));
1438  FreeDir(pdir);
1439 
1440  /*
1441  * XXX is it worth similarly checking the share/ directory? If the lib/
1442  * directory is there, then share/ probably is too.
1443  */
1444 }
1445 
1446 
1447 /*
1448  * Validate the proposed data directory
1449  */
1450 static void
1452 {
1453  char path[MAXPGPATH];
1454  FILE *fp;
1455  struct stat stat_buf;
1456 
1457  Assert(DataDir);
1458 
1459  if (stat(DataDir, &stat_buf) != 0)
1460  {
1461  if (errno == ENOENT)
1462  ereport(FATAL,
1464  errmsg("data directory \"%s\" does not exist",
1465  DataDir)));
1466  else
1467  ereport(FATAL,
1469  errmsg("could not read permissions of directory \"%s\": %m",
1470  DataDir)));
1471  }
1472 
1473  /* eventual chdir would fail anyway, but let's test ... */
1474  if (!S_ISDIR(stat_buf.st_mode))
1475  ereport(FATAL,
1476  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1477  errmsg("specified data directory \"%s\" is not a directory",
1478  DataDir)));
1479 
1480  /*
1481  * Check that the directory belongs to my userid; if not, reject.
1482  *
1483  * This check is an essential part of the interlock that prevents two
1484  * postmasters from starting in the same directory (see CreateLockFile()).
1485  * Do not remove or weaken it.
1486  *
1487  * XXX can we safely enable this check on Windows?
1488  */
1489 #if !defined(WIN32) && !defined(__CYGWIN__)
1490  if (stat_buf.st_uid != geteuid())
1491  ereport(FATAL,
1492  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1493  errmsg("data directory \"%s\" has wrong ownership",
1494  DataDir),
1495  errhint("The server must be started by the user that owns the data directory.")));
1496 #endif
1497 
1498  /*
1499  * Check if the directory has group or world access. If so, reject.
1500  *
1501  * It would be possible to allow weaker constraints (for example, allow
1502  * group access) but we cannot make a general assumption that that is
1503  * okay; for example there are platforms where nearly all users
1504  * customarily belong to the same group. Perhaps this test should be
1505  * configurable.
1506  *
1507  * XXX temporarily suppress check when on Windows, because there may not
1508  * be proper support for Unix-y file permissions. Need to think of a
1509  * reasonable check to apply on Windows.
1510  */
1511 #if !defined(WIN32) && !defined(__CYGWIN__)
1512  if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1513  ereport(FATAL,
1514  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1515  errmsg("data directory \"%s\" has group or world access",
1516  DataDir),
1517  errdetail("Permissions should be u=rwx (0700).")));
1518 #endif
1519 
1520  /* Look for PG_VERSION before looking for pg_control */
1522 
1523  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1524 
1525  fp = AllocateFile(path, PG_BINARY_R);
1526  if (fp == NULL)
1527  {
1528  write_stderr("%s: could not find the database system\n"
1529  "Expected to find it in the directory \"%s\",\n"
1530  "but could not open file \"%s\": %s\n",
1531  progname, DataDir, path, strerror(errno));
1532  ExitPostmaster(2);
1533  }
1534  FreeFile(fp);
1535 }
1536 
1537 /*
1538  * Determine how long should we let ServerLoop sleep.
1539  *
1540  * In normal conditions we wait at most one minute, to ensure that the other
1541  * background tasks handled by ServerLoop get done even when no requests are
1542  * arriving. However, if there are background workers waiting to be started,
1543  * we don't actually sleep so that they are quickly serviced. Other exception
1544  * cases are as shown in the code.
1545  */
1546 static void
1547 DetermineSleepTime(struct timeval * timeout)
1548 {
1549  TimestampTz next_wakeup = 0;
1550 
1551  /*
1552  * Normal case: either there are no background workers at all, or we're in
1553  * a shutdown sequence (during which we ignore bgworkers altogether).
1554  */
1555  if (Shutdown > NoShutdown ||
1557  {
1558  if (AbortStartTime != 0)
1559  {
1560  /* time left to abort; clamp to 0 in case it already expired */
1561  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1562  (time(NULL) - AbortStartTime);
1563  timeout->tv_sec = Max(timeout->tv_sec, 0);
1564  timeout->tv_usec = 0;
1565  }
1566  else
1567  {
1568  timeout->tv_sec = 60;
1569  timeout->tv_usec = 0;
1570  }
1571  return;
1572  }
1573 
1574  if (StartWorkerNeeded)
1575  {
1576  timeout->tv_sec = 0;
1577  timeout->tv_usec = 0;
1578  return;
1579  }
1580 
1581  if (HaveCrashedWorker)
1582  {
1583  slist_mutable_iter siter;
1584 
1585  /*
1586  * When there are crashed bgworkers, we sleep just long enough that
1587  * they are restarted when they request to be. Scan the list to
1588  * determine the minimum of all wakeup times according to most recent
1589  * crash time and requested restart interval.
1590  */
1592  {
1593  RegisteredBgWorker *rw;
1594  TimestampTz this_wakeup;
1595 
1596  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1597 
1598  if (rw->rw_crashed_at == 0)
1599  continue;
1600 
1602  || rw->rw_terminate)
1603  {
1604  ForgetBackgroundWorker(&siter);
1605  continue;
1606  }
1607 
1608  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1609  1000L * rw->rw_worker.bgw_restart_time);
1610  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1611  next_wakeup = this_wakeup;
1612  }
1613  }
1614 
1615  if (next_wakeup != 0)
1616  {
1617  long secs;
1618  int microsecs;
1619 
1621  &secs, &microsecs);
1622  timeout->tv_sec = secs;
1623  timeout->tv_usec = microsecs;
1624 
1625  /* Ensure we don't exceed one minute */
1626  if (timeout->tv_sec > 60)
1627  {
1628  timeout->tv_sec = 60;
1629  timeout->tv_usec = 0;
1630  }
1631  }
1632  else
1633  {
1634  timeout->tv_sec = 60;
1635  timeout->tv_usec = 0;
1636  }
1637 }
1638 
1639 /*
1640  * Main idle loop of postmaster
1641  *
1642  * NB: Needs to be called with signals blocked
1643  */
1644 static int
1646 {
1647  fd_set readmask;
1648  int nSockets;
1649  time_t last_lockfile_recheck_time,
1650  last_touch_time;
1651 
1652  last_lockfile_recheck_time = last_touch_time = time(NULL);
1653 
1654  nSockets = initMasks(&readmask);
1655 
1656  for (;;)
1657  {
1658  fd_set rmask;
1659  int selres;
1660  time_t now;
1661 
1662  /*
1663  * Wait for a connection request to arrive.
1664  *
1665  * We block all signals except while sleeping. That makes it safe for
1666  * signal handlers, which again block all signals while executing, to
1667  * do nontrivial work.
1668  *
1669  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1670  * any new connections, so we don't call select(), and just sleep.
1671  */
1672  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1673 
1674  if (pmState == PM_WAIT_DEAD_END)
1675  {
1677 
1678  pg_usleep(100000L); /* 100 msec seems reasonable */
1679  selres = 0;
1680 
1681  PG_SETMASK(&BlockSig);
1682  }
1683  else
1684  {
1685  /* must set timeout each time; some OSes change it! */
1686  struct timeval timeout;
1687 
1688  /* Needs to run with blocked signals! */
1689  DetermineSleepTime(&timeout);
1690 
1692 
1693  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1694 
1695  PG_SETMASK(&BlockSig);
1696  }
1697 
1698  /* Now check the select() result */
1699  if (selres < 0)
1700  {
1701  if (errno != EINTR && errno != EWOULDBLOCK)
1702  {
1703  ereport(LOG,
1705  errmsg("select() failed in postmaster: %m")));
1706  return STATUS_ERROR;
1707  }
1708  }
1709 
1710  /*
1711  * New connection pending on any of our sockets? If so, fork a child
1712  * process to deal with it.
1713  */
1714  if (selres > 0)
1715  {
1716  int i;
1717 
1718  for (i = 0; i < MAXLISTEN; i++)
1719  {
1720  if (ListenSocket[i] == PGINVALID_SOCKET)
1721  break;
1722  if (FD_ISSET(ListenSocket[i], &rmask))
1723  {
1724  Port *port;
1725 
1726  port = ConnCreate(ListenSocket[i]);
1727  if (port)
1728  {
1729  BackendStartup(port);
1730 
1731  /*
1732  * We no longer need the open socket or port structure
1733  * in this process
1734  */
1735  StreamClose(port->sock);
1736  ConnFree(port);
1737  }
1738  }
1739  }
1740  }
1741 
1742  /* If we have lost the log collector, try to start a new one */
1743  if (SysLoggerPID == 0 && Logging_collector)
1745 
1746  /*
1747  * If no background writer process is running, and we are not in a
1748  * state that prevents it, start one. It doesn't matter if this
1749  * fails, we'll just try again later. Likewise for the checkpointer.
1750  */
1751  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1753  {
1754  if (CheckpointerPID == 0)
1756  if (BgWriterPID == 0)
1758  }
1759 
1760  /*
1761  * Likewise, if we have lost the walwriter process, try to start a new
1762  * one. But this is needed only in normal operation (else we cannot
1763  * be writing any new WAL).
1764  */
1765  if (WalWriterPID == 0 && pmState == PM_RUN)
1767 
1768  /*
1769  * If we have lost the autovacuum launcher, try to start a new one. We
1770  * don't want autovacuum to run in binary upgrade mode because
1771  * autovacuum might update relfrozenxid for empty tables before the
1772  * physical files are put in place.
1773  */
1774  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1776  pmState == PM_RUN)
1777  {
1779  if (AutoVacPID != 0)
1780  start_autovac_launcher = false; /* signal processed */
1781  }
1782 
1783  /* If we have lost the stats collector, try to start a new one */
1784  if (PgStatPID == 0 &&
1785  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1786  PgStatPID = pgstat_start();
1787 
1788  /* If we have lost the archiver, try to start a new one. */
1789  if (PgArchPID == 0 && PgArchStartupAllowed())
1790  PgArchPID = pgarch_start();
1791 
1792  /* If we need to signal the autovacuum launcher, do so now */
1794  {
1795  avlauncher_needs_signal = false;
1796  if (AutoVacPID != 0)
1797  kill(AutoVacPID, SIGUSR2);
1798  }
1799 
1800  /* Get other worker processes running, if needed */
1803 
1804 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1805 
1806  /*
1807  * With assertions enabled, check regularly for appearance of
1808  * additional threads. All builds check at start and exit.
1809  */
1810  Assert(pthread_is_threaded_np() == 0);
1811 #endif
1812 
1813  /*
1814  * Lastly, check to see if it's time to do some things that we don't
1815  * want to do every single time through the loop, because they're a
1816  * bit expensive. Note that there's up to a minute of slop in when
1817  * these tasks will be performed, since DetermineSleepTime() will let
1818  * us sleep at most that long; except for SIGKILL timeout which has
1819  * special-case logic there.
1820  */
1821  now = time(NULL);
1822 
1823  /*
1824  * If we already sent SIGQUIT to children and they are slow to shut
1825  * down, it's time to send them SIGKILL. This doesn't happen
1826  * normally, but under certain conditions backends can get stuck while
1827  * shutting down. This is a last measure to get them unwedged.
1828  *
1829  * Note we also do this during recovery from a process crash.
1830  */
1831  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1832  AbortStartTime != 0 &&
1834  {
1835  /* We were gentle with them before. Not anymore */
1837  /* reset flag so we don't SIGKILL again */
1838  AbortStartTime = 0;
1839  }
1840 
1841  /*
1842  * Once a minute, verify that postmaster.pid hasn't been removed or
1843  * overwritten. If it has, we force a shutdown. This avoids having
1844  * postmasters and child processes hanging around after their database
1845  * is gone, and maybe causing problems if a new database cluster is
1846  * created in the same place. It also provides some protection
1847  * against a DBA foolishly removing postmaster.pid and manually
1848  * starting a new postmaster. Data corruption is likely to ensue from
1849  * that anyway, but we can minimize the damage by aborting ASAP.
1850  */
1851  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1852  {
1853  if (!RecheckDataDirLockFile())
1854  {
1855  ereport(LOG,
1856  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1857  kill(MyProcPid, SIGQUIT);
1858  }
1859  last_lockfile_recheck_time = now;
1860  }
1861 
1862  /*
1863  * Touch Unix socket and lock files every 58 minutes, to ensure that
1864  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1865  * no one runs cleaners with cutoff times of less than an hour ...
1866  */
1867  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1868  {
1869  TouchSocketFiles();
1871  last_touch_time = now;
1872  }
1873  }
1874 }
1875 
1876 /*
1877  * Initialise the masks for select() for the ports we are listening on.
1878  * Return the number of sockets to listen on.
1879  */
1880 static int
1881 initMasks(fd_set *rmask)
1882 {
1883  int maxsock = -1;
1884  int i;
1885 
1886  FD_ZERO(rmask);
1887 
1888  for (i = 0; i < MAXLISTEN; i++)
1889  {
1890  int fd = ListenSocket[i];
1891 
1892  if (fd == PGINVALID_SOCKET)
1893  break;
1894  FD_SET(fd, rmask);
1895 
1896  if (fd > maxsock)
1897  maxsock = fd;
1898  }
1899 
1900  return maxsock + 1;
1901 }
1902 
1903 
1904 /*
1905  * Read a client's startup packet and do something according to it.
1906  *
1907  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1908  * not return at all.
1909  *
1910  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1911  * if that's what you want. Return STATUS_ERROR if you don't want to
1912  * send anything to the client, which would typically be appropriate
1913  * if we detect a communications failure.)
1914  */
1915 static int
1917 {
1918  int32 len;
1919  void *buf;
1920  ProtocolVersion proto;
1921  MemoryContext oldcontext;
1922 
1923  pq_startmsgread();
1924  if (pq_getbytes((char *) &len, 4) == EOF)
1925  {
1926  /*
1927  * EOF after SSLdone probably means the client didn't like our
1928  * response to NEGOTIATE_SSL_CODE. That's not an error condition, so
1929  * don't clutter the log with a complaint.
1930  */
1931  if (!SSLdone)
1933  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1934  errmsg("incomplete startup packet")));
1935  return STATUS_ERROR;
1936  }
1937 
1938  len = ntohl(len);
1939  len -= 4;
1940 
1941  if (len < (int32) sizeof(ProtocolVersion) ||
1943  {
1945  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1946  errmsg("invalid length of startup packet")));
1947  return STATUS_ERROR;
1948  }
1949 
1950  /*
1951  * Allocate at least the size of an old-style startup packet, plus one
1952  * extra byte, and make sure all are zeroes. This ensures we will have
1953  * null termination of all strings, in both fixed- and variable-length
1954  * packet layouts.
1955  */
1956  if (len <= (int32) sizeof(StartupPacket))
1957  buf = palloc0(sizeof(StartupPacket) + 1);
1958  else
1959  buf = palloc0(len + 1);
1960 
1961  if (pq_getbytes(buf, len) == EOF)
1962  {
1964  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1965  errmsg("incomplete startup packet")));
1966  return STATUS_ERROR;
1967  }
1968  pq_endmsgread();
1969 
1970  /*
1971  * The first field is either a protocol version number or a special
1972  * request code.
1973  */
1974  port->proto = proto = ntohl(*((ProtocolVersion *) buf));
1975 
1976  if (proto == CANCEL_REQUEST_CODE)
1977  {
1978  processCancelRequest(port, buf);
1979  /* Not really an error, but we don't want to proceed further */
1980  return STATUS_ERROR;
1981  }
1982 
1983  if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
1984  {
1985  char SSLok;
1986 
1987 #ifdef USE_SSL
1988  /* No SSL when disabled or on Unix sockets */
1989  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1990  SSLok = 'N';
1991  else
1992  SSLok = 'S'; /* Support for SSL */
1993 #else
1994  SSLok = 'N'; /* No support for SSL */
1995 #endif
1996 
1997 retry1:
1998  if (send(port->sock, &SSLok, 1, 0) != 1)
1999  {
2000  if (errno == EINTR)
2001  goto retry1; /* if interrupted, just retry */
2004  errmsg("failed to send SSL negotiation response: %m")));
2005  return STATUS_ERROR; /* close the connection */
2006  }
2007 
2008 #ifdef USE_SSL
2009  if (SSLok == 'S' && secure_open_server(port) == -1)
2010  return STATUS_ERROR;
2011 #endif
2012  /* regular startup packet, cancel, etc packet should follow... */
2013  /* but not another SSL negotiation request */
2014  return ProcessStartupPacket(port, true);
2015  }
2016 
2017  /* Could add additional special packet types here */
2018 
2019  /*
2020  * Set FrontendProtocol now so that ereport() knows what format to send if
2021  * we fail during startup.
2022  */
2023  FrontendProtocol = proto;
2024 
2025  /* Check we can handle the protocol the frontend is using. */
2026 
2031  ereport(FATAL,
2032  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2033  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2034  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2038 
2039  /*
2040  * Now fetch parameters out of startup packet and save them into the Port
2041  * structure. All data structures attached to the Port struct must be
2042  * allocated in TopMemoryContext so that they will remain available in a
2043  * running backend (even after PostmasterContext is destroyed). We need
2044  * not worry about leaking this storage on failure, since we aren't in the
2045  * postmaster process anymore.
2046  */
2048 
2049  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2050  {
2051  int32 offset = sizeof(ProtocolVersion);
2052 
2053  /*
2054  * Scan packet body for name/option pairs. We can assume any string
2055  * beginning within the packet body is null-terminated, thanks to
2056  * zeroing extra byte above.
2057  */
2058  port->guc_options = NIL;
2059 
2060  while (offset < len)
2061  {
2062  char *nameptr = ((char *) buf) + offset;
2063  int32 valoffset;
2064  char *valptr;
2065 
2066  if (*nameptr == '\0')
2067  break; /* found packet terminator */
2068  valoffset = offset + strlen(nameptr) + 1;
2069  if (valoffset >= len)
2070  break; /* missing value, will complain below */
2071  valptr = ((char *) buf) + valoffset;
2072 
2073  if (strcmp(nameptr, "database") == 0)
2074  port->database_name = pstrdup(valptr);
2075  else if (strcmp(nameptr, "user") == 0)
2076  port->user_name = pstrdup(valptr);
2077  else if (strcmp(nameptr, "options") == 0)
2078  port->cmdline_options = pstrdup(valptr);
2079  else if (strcmp(nameptr, "replication") == 0)
2080  {
2081  /*
2082  * Due to backward compatibility concerns the replication
2083  * parameter is a hybrid beast which allows the value to be
2084  * either boolean or the string 'database'. The latter
2085  * connects to a specific database which is e.g. required for
2086  * logical decoding while.
2087  */
2088  if (strcmp(valptr, "database") == 0)
2089  {
2090  am_walsender = true;
2091  am_db_walsender = true;
2092  }
2093  else if (!parse_bool(valptr, &am_walsender))
2094  ereport(FATAL,
2095  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2096  errmsg("invalid value for parameter \"%s\": \"%s\"",
2097  "replication",
2098  valptr),
2099  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2100  }
2101  else
2102  {
2103  /* Assume it's a generic GUC option */
2104  port->guc_options = lappend(port->guc_options,
2105  pstrdup(nameptr));
2106  port->guc_options = lappend(port->guc_options,
2107  pstrdup(valptr));
2108  }
2109  offset = valoffset + strlen(valptr) + 1;
2110  }
2111 
2112  /*
2113  * If we didn't find a packet terminator exactly at the end of the
2114  * given packet length, complain.
2115  */
2116  if (offset != len - 1)
2117  ereport(FATAL,
2118  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2119  errmsg("invalid startup packet layout: expected terminator as last byte")));
2120  }
2121  else
2122  {
2123  /*
2124  * Get the parameters from the old-style, fixed-width-fields startup
2125  * packet as C strings. The packet destination was cleared first so a
2126  * short packet has zeros silently added. We have to be prepared to
2127  * truncate the pstrdup result for oversize fields, though.
2128  */
2129  StartupPacket *packet = (StartupPacket *) buf;
2130 
2131  port->database_name = pstrdup(packet->database);
2132  if (strlen(port->database_name) > sizeof(packet->database))
2133  port->database_name[sizeof(packet->database)] = '\0';
2134  port->user_name = pstrdup(packet->user);
2135  if (strlen(port->user_name) > sizeof(packet->user))
2136  port->user_name[sizeof(packet->user)] = '\0';
2137  port->cmdline_options = pstrdup(packet->options);
2138  if (strlen(port->cmdline_options) > sizeof(packet->options))
2139  port->cmdline_options[sizeof(packet->options)] = '\0';
2140  port->guc_options = NIL;
2141  }
2142 
2143  /* Check a user name was given. */
2144  if (port->user_name == NULL || port->user_name[0] == '\0')
2145  ereport(FATAL,
2146  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2147  errmsg("no PostgreSQL user name specified in startup packet")));
2148 
2149  /* The database defaults to the user name. */
2150  if (port->database_name == NULL || port->database_name[0] == '\0')
2151  port->database_name = pstrdup(port->user_name);
2152 
2153  if (Db_user_namespace)
2154  {
2155  /*
2156  * If user@, it is a global user, remove '@'. We only want to do this
2157  * if there is an '@' at the end and no earlier in the user string or
2158  * they may fake as a local user of another database attaching to this
2159  * database.
2160  */
2161  if (strchr(port->user_name, '@') ==
2162  port->user_name + strlen(port->user_name) - 1)
2163  *strchr(port->user_name, '@') = '\0';
2164  else
2165  {
2166  /* Append '@' and dbname */
2167  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2168  }
2169  }
2170 
2171  /*
2172  * Truncate given database and user names to length of a Postgres name.
2173  * This avoids lookup failures when overlength names are given.
2174  */
2175  if (strlen(port->database_name) >= NAMEDATALEN)
2176  port->database_name[NAMEDATALEN - 1] = '\0';
2177  if (strlen(port->user_name) >= NAMEDATALEN)
2178  port->user_name[NAMEDATALEN - 1] = '\0';
2179 
2180  /*
2181  * Normal walsender backends, e.g. for streaming replication, are not
2182  * connected to a particular database. But walsenders used for logical
2183  * replication need to connect to a specific database. We allow streaming
2184  * replication commands to be issued even if connected to a database as it
2185  * can make sense to first make a basebackup and then stream changes
2186  * starting from that.
2187  */
2188  if (am_walsender && !am_db_walsender)
2189  port->database_name[0] = '\0';
2190 
2191  /*
2192  * Done putting stuff in TopMemoryContext.
2193  */
2194  MemoryContextSwitchTo(oldcontext);
2195 
2196  /*
2197  * If we're going to reject the connection due to database state, say so
2198  * now instead of wasting cycles on an authentication exchange. (This also
2199  * allows a pg_ping utility to be written.)
2200  */
2201  switch (port->canAcceptConnections)
2202  {
2203  case CAC_STARTUP:
2204  ereport(FATAL,
2206  errmsg("the database system is starting up")));
2207  break;
2208  case CAC_SHUTDOWN:
2209  ereport(FATAL,
2211  errmsg("the database system is shutting down")));
2212  break;
2213  case CAC_RECOVERY:
2214  ereport(FATAL,
2216  errmsg("the database system is in recovery mode")));
2217  break;
2218  case CAC_TOOMANY:
2219  ereport(FATAL,
2220  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2221  errmsg("sorry, too many clients already")));
2222  break;
2223  case CAC_WAITBACKUP:
2224  /* OK for now, will check in InitPostgres */
2225  break;
2226  case CAC_OK:
2227  break;
2228  }
2229 
2230  return STATUS_OK;
2231 }
2232 
2233 
2234 /*
2235  * The client has sent a cancel request packet, not a normal
2236  * start-a-new-connection packet. Perform the necessary processing.
2237  * Nothing is sent back to the client.
2238  */
2239 static void
2241 {
2242  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2243  int backendPID;
2244  int32 cancelAuthCode;
2245  Backend *bp;
2246 
2247 #ifndef EXEC_BACKEND
2248  dlist_iter iter;
2249 #else
2250  int i;
2251 #endif
2252 
2253  backendPID = (int) ntohl(canc->backendPID);
2254  cancelAuthCode = (int32) ntohl(canc->cancelAuthCode);
2255 
2256  /*
2257  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2258  * longer access the postmaster's own backend list, and must rely on the
2259  * duplicate array in shared memory.
2260  */
2261 #ifndef EXEC_BACKEND
2262  dlist_foreach(iter, &BackendList)
2263  {
2264  bp = dlist_container(Backend, elem, iter.cur);
2265 #else
2266  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2267  {
2268  bp = (Backend *) &ShmemBackendArray[i];
2269 #endif
2270  if (bp->pid == backendPID)
2271  {
2272  if (bp->cancel_key == cancelAuthCode)
2273  {
2274  /* Found a match; signal that backend to cancel current op */
2275  ereport(DEBUG2,
2276  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2277  backendPID)));
2278  signal_child(bp->pid, SIGINT);
2279  }
2280  else
2281  /* Right PID, wrong key: no way, Jose */
2282  ereport(LOG,
2283  (errmsg("wrong key in cancel request for process %d",
2284  backendPID)));
2285  return;
2286  }
2287  }
2288 
2289  /* No matching backend */
2290  ereport(LOG,
2291  (errmsg("PID %d in cancel request did not match any process",
2292  backendPID)));
2293 }
2294 
2295 /*
2296  * canAcceptConnections --- check to see if database state allows connections.
2297  */
2298 static CAC_state
2300 {
2302 
2303  /*
2304  * Can't start backends when in startup/shutdown/inconsistent recovery
2305  * state.
2306  *
2307  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2308  * allowed so that a superuser can end online backup mode); we return
2309  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2310  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2311  * have checked for too many children.
2312  */
2313  if (pmState != PM_RUN)
2314  {
2315  if (pmState == PM_WAIT_BACKUP)
2316  result = CAC_WAITBACKUP; /* allow superusers only */
2317  else if (Shutdown > NoShutdown)
2318  return CAC_SHUTDOWN; /* shutdown is pending */
2319  else if (!FatalError &&
2320  (pmState == PM_STARTUP ||
2321  pmState == PM_RECOVERY))
2322  return CAC_STARTUP; /* normal startup */
2323  else if (!FatalError &&
2325  result = CAC_OK; /* connection OK during hot standby */
2326  else
2327  return CAC_RECOVERY; /* else must be crash recovery */
2328  }
2329 
2330  /*
2331  * Don't start too many children.
2332  *
2333  * We allow more connections than we can have backends here because some
2334  * might still be authenticating; they might fail auth, or some existing
2335  * backend might exit before the auth cycle is completed. The exact
2336  * MaxBackends limit is enforced when a new backend tries to join the
2337  * shared-inval backend array.
2338  *
2339  * The limit here must match the sizes of the per-child-process arrays;
2340  * see comments for MaxLivePostmasterChildren().
2341  */
2343  result = CAC_TOOMANY;
2344 
2345  return result;
2346 }
2347 
2348 
2349 /*
2350  * ConnCreate -- create a local connection data structure
2351  *
2352  * Returns NULL on failure, other than out-of-memory which is fatal.
2353  */
2354 static Port *
2355 ConnCreate(int serverFd)
2356 {
2357  Port *port;
2358 
2359  if (!(port = (Port *) calloc(1, sizeof(Port))))
2360  {
2361  ereport(LOG,
2362  (errcode(ERRCODE_OUT_OF_MEMORY),
2363  errmsg("out of memory")));
2364  ExitPostmaster(1);
2365  }
2366 
2367  if (StreamConnection(serverFd, port) != STATUS_OK)
2368  {
2369  if (port->sock != PGINVALID_SOCKET)
2370  StreamClose(port->sock);
2371  ConnFree(port);
2372  return NULL;
2373  }
2374 
2375  /*
2376  * Allocate GSSAPI specific state struct
2377  */
2378 #ifndef EXEC_BACKEND
2379 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2380  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2381  if (!port->gss)
2382  {
2383  ereport(LOG,
2384  (errcode(ERRCODE_OUT_OF_MEMORY),
2385  errmsg("out of memory")));
2386  ExitPostmaster(1);
2387  }
2388 #endif
2389 #endif
2390 
2391  return port;
2392 }
2393 
2394 
2395 /*
2396  * ConnFree -- free a local connection data structure
2397  */
2398 static void
2400 {
2401 #ifdef USE_SSL
2402  secure_close(conn);
2403 #endif
2404  if (conn->gss)
2405  free(conn->gss);
2406  free(conn);
2407 }
2408 
2409 
2410 /*
2411  * ClosePostmasterPorts -- close all the postmaster's open sockets
2412  *
2413  * This is called during child process startup to release file descriptors
2414  * that are not needed by that child process. The postmaster still has
2415  * them open, of course.
2416  *
2417  * Note: we pass am_syslogger as a boolean because we don't want to set
2418  * the global variable yet when this is called.
2419  */
2420 void
2422 {
2423  int i;
2424 
2425 #ifndef WIN32
2426 
2427  /*
2428  * Close the write end of postmaster death watch pipe. It's important to
2429  * do this as early as possible, so that if postmaster dies, others won't
2430  * think that it's still running because we're holding the pipe open.
2431  */
2433  ereport(FATAL,
2435  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2437 #endif
2438 
2439  /* Close the listen sockets */
2440  for (i = 0; i < MAXLISTEN; i++)
2441  {
2442  if (ListenSocket[i] != PGINVALID_SOCKET)
2443  {
2446  }
2447  }
2448 
2449  /* If using syslogger, close the read side of the pipe */
2450  if (!am_syslogger)
2451  {
2452 #ifndef WIN32
2453  if (syslogPipe[0] >= 0)
2454  close(syslogPipe[0]);
2455  syslogPipe[0] = -1;
2456 #else
2457  if (syslogPipe[0])
2458  CloseHandle(syslogPipe[0]);
2459  syslogPipe[0] = 0;
2460 #endif
2461  }
2462 
2463 #ifdef USE_BONJOUR
2464  /* If using Bonjour, close the connection to the mDNS daemon */
2465  if (bonjour_sdref)
2466  close(DNSServiceRefSockFD(bonjour_sdref));
2467 #endif
2468 }
2469 
2470 
2471 /*
2472  * reset_shared -- reset shared memory and semaphores
2473  */
2474 static void
2475 reset_shared(int port)
2476 {
2477  /*
2478  * Create or re-create shared memory and semaphores.
2479  *
2480  * Note: in each "cycle of life" we will normally assign the same IPC keys
2481  * (if using SysV shmem and/or semas), since the port number is used to
2482  * determine IPC keys. This helps ensure that we will clean up dead IPC
2483  * objects if the postmaster crashes and is restarted.
2484  */
2485  CreateSharedMemoryAndSemaphores(false, port);
2486 }
2487 
2488 
2489 /*
2490  * SIGHUP -- reread config files, and tell children to do same
2491  */
2492 static void
2494 {
2495  int save_errno = errno;
2496 
2497  PG_SETMASK(&BlockSig);
2498 
2499  if (Shutdown <= SmartShutdown)
2500  {
2501  ereport(LOG,
2502  (errmsg("received SIGHUP, reloading configuration files")));
2505  if (StartupPID != 0)
2507  if (BgWriterPID != 0)
2509  if (CheckpointerPID != 0)
2511  if (WalWriterPID != 0)
2513  if (WalReceiverPID != 0)
2515  if (AutoVacPID != 0)
2517  if (PgArchPID != 0)
2519  if (SysLoggerPID != 0)
2521  if (PgStatPID != 0)
2523 
2524  /* Reload authentication config files too */
2525  if (!load_hba())
2526  ereport(LOG,
2527  (errmsg("pg_hba.conf was not reloaded")));
2528 
2529  if (!load_ident())
2530  ereport(LOG,
2531  (errmsg("pg_ident.conf was not reloaded")));
2532 
2533 #ifdef USE_SSL
2534  /* Reload SSL configuration as well */
2535  if (EnableSSL)
2536  {
2537  if (secure_initialize(false) == 0)
2538  LoadedSSL = true;
2539  else
2540  ereport(LOG,
2541  (errmsg("SSL configuration was not reloaded")));
2542  }
2543  else
2544  {
2545  secure_destroy();
2546  LoadedSSL = false;
2547  }
2548 #endif
2549 
2550 #ifdef EXEC_BACKEND
2551  /* Update the starting-point file for future children */
2552  write_nondefault_variables(PGC_SIGHUP);
2553 #endif
2554  }
2555 
2557 
2558  errno = save_errno;
2559 }
2560 
2561 
2562 /*
2563  * pmdie -- signal handler for processing various postmaster signals.
2564  */
2565 static void
2567 {
2568  int save_errno = errno;
2569 
2570  PG_SETMASK(&BlockSig);
2571 
2572  ereport(DEBUG2,
2573  (errmsg_internal("postmaster received signal %d",
2574  postgres_signal_arg)));
2575 
2576  switch (postgres_signal_arg)
2577  {
2578  case SIGTERM:
2579 
2580  /*
2581  * Smart Shutdown:
2582  *
2583  * Wait for children to end their work, then shut down.
2584  */
2585  if (Shutdown >= SmartShutdown)
2586  break;
2588  ereport(LOG,
2589  (errmsg("received smart shutdown request")));
2590 #ifdef USE_SYSTEMD
2591  sd_notify(0, "STOPPING=1");
2592 #endif
2593 
2594  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2596  {
2597  /* autovac workers are told to shut down immediately */
2598  /* and bgworkers too; does this need tweaking? */
2599  SignalSomeChildren(SIGTERM,
2601  /* and the autovac launcher too */
2602  if (AutoVacPID != 0)
2603  signal_child(AutoVacPID, SIGTERM);
2604  /* and the bgwriter too */
2605  if (BgWriterPID != 0)
2606  signal_child(BgWriterPID, SIGTERM);
2607  /* and the walwriter too */
2608  if (WalWriterPID != 0)
2609  signal_child(WalWriterPID, SIGTERM);
2610 
2611  /*
2612  * If we're in recovery, we can't kill the startup process
2613  * right away, because at present doing so does not release
2614  * its locks. We might want to change this in a future
2615  * release. For the time being, the PM_WAIT_READONLY state
2616  * indicates that we're waiting for the regular (read only)
2617  * backends to die off; once they do, we'll kill the startup
2618  * and walreceiver processes.
2619  */
2620  pmState = (pmState == PM_RUN) ?
2622  }
2623 
2624  /*
2625  * Now wait for online backup mode to end and backends to exit. If
2626  * that is already the case, PostmasterStateMachine will take the
2627  * next step.
2628  */
2630  break;
2631 
2632  case SIGINT:
2633 
2634  /*
2635  * Fast Shutdown:
2636  *
2637  * Abort all children with SIGTERM (rollback active transactions
2638  * and exit) and shut down when they are gone.
2639  */
2640  if (Shutdown >= FastShutdown)
2641  break;
2643  ereport(LOG,
2644  (errmsg("received fast shutdown request")));
2645 #ifdef USE_SYSTEMD
2646  sd_notify(0, "STOPPING=1");
2647 #endif
2648 
2649  if (StartupPID != 0)
2650  signal_child(StartupPID, SIGTERM);
2651  if (BgWriterPID != 0)
2652  signal_child(BgWriterPID, SIGTERM);
2653  if (WalReceiverPID != 0)
2654  signal_child(WalReceiverPID, SIGTERM);
2655  if (pmState == PM_RECOVERY)
2656  {
2658 
2659  /*
2660  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2661  * and/or checkpointer should be active in this state; we just
2662  * signaled the first four, and we don't want to kill
2663  * checkpointer yet.
2664  */
2666  }
2667  else if (pmState == PM_RUN ||
2668  pmState == PM_WAIT_BACKUP ||
2672  {
2673  ereport(LOG,
2674  (errmsg("aborting any active transactions")));
2675  /* shut down all backends and workers */
2676  SignalSomeChildren(SIGTERM,
2679  /* and the autovac launcher too */
2680  if (AutoVacPID != 0)
2681  signal_child(AutoVacPID, SIGTERM);
2682  /* and the walwriter too */
2683  if (WalWriterPID != 0)
2684  signal_child(WalWriterPID, SIGTERM);
2686  }
2687 
2688  /*
2689  * Now wait for backends to exit. If there are none,
2690  * PostmasterStateMachine will take the next step.
2691  */
2693  break;
2694 
2695  case SIGQUIT:
2696 
2697  /*
2698  * Immediate Shutdown:
2699  *
2700  * abort all children with SIGQUIT, wait for them to exit,
2701  * terminate remaining ones with SIGKILL, then exit without
2702  * attempt to properly shut down the data base system.
2703  */
2704  if (Shutdown >= ImmediateShutdown)
2705  break;
2707  ereport(LOG,
2708  (errmsg("received immediate shutdown request")));
2709 #ifdef USE_SYSTEMD
2710  sd_notify(0, "STOPPING=1");
2711 #endif
2712 
2715 
2716  /* set stopwatch for them to die */
2717  AbortStartTime = time(NULL);
2718 
2719  /*
2720  * Now wait for backends to exit. If there are none,
2721  * PostmasterStateMachine will take the next step.
2722  */
2724  break;
2725  }
2726 
2728 
2729  errno = save_errno;
2730 }
2731 
2732 /*
2733  * Reaper -- signal handler to cleanup after a child process dies.
2734  */
2735 static void
2737 {
2738  int save_errno = errno;
2739  int pid; /* process id of dead child process */
2740  int exitstatus; /* its exit status */
2741 
2742  PG_SETMASK(&BlockSig);
2743 
2744  ereport(DEBUG4,
2745  (errmsg_internal("reaping dead processes")));
2746 
2747  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2748  {
2749  /*
2750  * Check if this child was a startup process.
2751  */
2752  if (pid == StartupPID)
2753  {
2754  StartupPID = 0;
2755 
2756  /*
2757  * Startup process exited in response to a shutdown request (or it
2758  * completed normally regardless of the shutdown request).
2759  */
2760  if (Shutdown > NoShutdown &&
2761  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2762  {
2765  /* PostmasterStateMachine logic does the rest */
2766  continue;
2767  }
2768 
2769  if (EXIT_STATUS_3(exitstatus))
2770  {
2771  ereport(LOG,
2772  (errmsg("shutdown at recovery target")));
2775  TerminateChildren(SIGTERM);
2777  /* PostmasterStateMachine logic does the rest */
2778  continue;
2779  }
2780 
2781  /*
2782  * Unexpected exit of startup process (including FATAL exit)
2783  * during PM_STARTUP is treated as catastrophic. There are no
2784  * other processes running yet, so we can just exit.
2785  */
2786  if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2787  {
2788  LogChildExit(LOG, _("startup process"),
2789  pid, exitstatus);
2790  ereport(LOG,
2791  (errmsg("aborting startup due to startup process failure")));
2792  ExitPostmaster(1);
2793  }
2794 
2795  /*
2796  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2797  * the startup process is catastrophic, so kill other children,
2798  * and set StartupStatus so we don't try to reinitialize after
2799  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2800  * then we previously sent the startup process a SIGQUIT; so
2801  * that's probably the reason it died, and we do want to try to
2802  * restart in that case.
2803  */
2804  if (!EXIT_STATUS_0(exitstatus))
2805  {
2808  else
2810  HandleChildCrash(pid, exitstatus,
2811  _("startup process"));
2812  continue;
2813  }
2814 
2815  /*
2816  * Startup succeeded, commence normal operations
2817  */
2819  FatalError = false;
2820  Assert(AbortStartTime == 0);
2821  ReachedNormalRunning = true;
2822  pmState = PM_RUN;
2823 
2824  /*
2825  * Crank up the background tasks, if we didn't do that already
2826  * when we entered consistent recovery state. It doesn't matter
2827  * if this fails, we'll just try again later.
2828  */
2829  if (CheckpointerPID == 0)
2831  if (BgWriterPID == 0)
2833  if (WalWriterPID == 0)
2835 
2836  /*
2837  * Likewise, start other special children as needed. In a restart
2838  * situation, some of them may be alive already.
2839  */
2842  if (PgArchStartupAllowed() && PgArchPID == 0)
2843  PgArchPID = pgarch_start();
2844  if (PgStatPID == 0)
2845  PgStatPID = pgstat_start();
2846 
2847  /* workers may be scheduled to start now */
2849 
2850  /* at this point we are really open for business */
2851  ereport(LOG,
2852  (errmsg("database system is ready to accept connections")));
2853 
2854 #ifdef USE_SYSTEMD
2855  sd_notify(0, "READY=1");
2856 #endif
2857 
2858  continue;
2859  }
2860 
2861  /*
2862  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2863  * one at the next iteration of the postmaster's main loop, if
2864  * necessary. Any other exit condition is treated as a crash.
2865  */
2866  if (pid == BgWriterPID)
2867  {
2868  BgWriterPID = 0;
2869  if (!EXIT_STATUS_0(exitstatus))
2870  HandleChildCrash(pid, exitstatus,
2871  _("background writer process"));
2872  continue;
2873  }
2874 
2875  /*
2876  * Was it the checkpointer?
2877  */
2878  if (pid == CheckpointerPID)
2879  {
2880  CheckpointerPID = 0;
2881  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2882  {
2883  /*
2884  * OK, we saw normal exit of the checkpointer after it's been
2885  * told to shut down. We expect that it wrote a shutdown
2886  * checkpoint. (If for some reason it didn't, recovery will
2887  * occur on next postmaster start.)
2888  *
2889  * At this point we should have no normal backend children
2890  * left (else we'd not be in PM_SHUTDOWN state) but we might
2891  * have dead_end children to wait for.
2892  *
2893  * If we have an archiver subprocess, tell it to do a last
2894  * archive cycle and quit. Likewise, if we have walsender
2895  * processes, tell them to send any remaining WAL and quit.
2896  */
2898 
2899  /* Waken archiver for the last time */
2900  if (PgArchPID != 0)
2902 
2903  /*
2904  * Waken walsenders for the last time. No regular backends
2905  * should be around anymore.
2906  */
2908 
2910 
2911  /*
2912  * We can also shut down the stats collector now; there's
2913  * nothing left for it to do.
2914  */
2915  if (PgStatPID != 0)
2917  }
2918  else
2919  {
2920  /*
2921  * Any unexpected exit of the checkpointer (including FATAL
2922  * exit) is treated as a crash.
2923  */
2924  HandleChildCrash(pid, exitstatus,
2925  _("checkpointer process"));
2926  }
2927 
2928  continue;
2929  }
2930 
2931  /*
2932  * Was it the wal writer? Normal exit can be ignored; we'll start a
2933  * new one at the next iteration of the postmaster's main loop, if
2934  * necessary. Any other exit condition is treated as a crash.
2935  */
2936  if (pid == WalWriterPID)
2937  {
2938  WalWriterPID = 0;
2939  if (!EXIT_STATUS_0(exitstatus))
2940  HandleChildCrash(pid, exitstatus,
2941  _("WAL writer process"));
2942  continue;
2943  }
2944 
2945  /*
2946  * Was it the wal receiver? If exit status is zero (normal) or one
2947  * (FATAL exit), we assume everything is all right just like normal
2948  * backends.
2949  */
2950  if (pid == WalReceiverPID)
2951  {
2952  WalReceiverPID = 0;
2953  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2954  HandleChildCrash(pid, exitstatus,
2955  _("WAL receiver process"));
2956  continue;
2957  }
2958 
2959  /*
2960  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
2961  * start a new one at the next iteration of the postmaster's main
2962  * loop, if necessary. Any other exit condition is treated as a
2963  * crash.
2964  */
2965  if (pid == AutoVacPID)
2966  {
2967  AutoVacPID = 0;
2968  if (!EXIT_STATUS_0(exitstatus))
2969  HandleChildCrash(pid, exitstatus,
2970  _("autovacuum launcher process"));
2971  continue;
2972  }
2973 
2974  /*
2975  * Was it the archiver? If so, just try to start a new one; no need
2976  * to force reset of the rest of the system. (If fail, we'll try
2977  * again in future cycles of the main loop.). Unless we were waiting
2978  * for it to shut down; don't restart it in that case, and
2979  * PostmasterStateMachine() will advance to the next shutdown step.
2980  */
2981  if (pid == PgArchPID)
2982  {
2983  PgArchPID = 0;
2984  if (!EXIT_STATUS_0(exitstatus))
2985  LogChildExit(LOG, _("archiver process"),
2986  pid, exitstatus);
2987  if (PgArchStartupAllowed())
2988  PgArchPID = pgarch_start();
2989  continue;
2990  }
2991 
2992  /*
2993  * Was it the statistics collector? If so, just try to start a new
2994  * one; no need to force reset of the rest of the system. (If fail,
2995  * we'll try again in future cycles of the main loop.)
2996  */
2997  if (pid == PgStatPID)
2998  {
2999  PgStatPID = 0;
3000  if (!EXIT_STATUS_0(exitstatus))
3001  LogChildExit(LOG, _("statistics collector process"),
3002  pid, exitstatus);
3003  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3004  PgStatPID = pgstat_start();
3005  continue;
3006  }
3007 
3008  /* Was it the system logger? If so, try to start a new one */
3009  if (pid == SysLoggerPID)
3010  {
3011  SysLoggerPID = 0;
3012  /* for safety's sake, launch new logger *first* */
3014  if (!EXIT_STATUS_0(exitstatus))
3015  LogChildExit(LOG, _("system logger process"),
3016  pid, exitstatus);
3017  continue;
3018  }
3019 
3020  /* Was it one of our background workers? */
3021  if (CleanupBackgroundWorker(pid, exitstatus))
3022  {
3023  /* have it be restarted */
3024  HaveCrashedWorker = true;
3025  continue;
3026  }
3027 
3028  /*
3029  * Else do standard backend child cleanup.
3030  */
3031  CleanupBackend(pid, exitstatus);
3032  } /* loop over pending child-death reports */
3033 
3034  /*
3035  * After cleaning out the SIGCHLD queue, see if we have any state changes
3036  * or actions to make.
3037  */
3039 
3040  /* Done with signal handler */
3042 
3043  errno = save_errno;
3044 }
3045 
3046 /*
3047  * Scan the bgworkers list and see if the given PID (which has just stopped
3048  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3049  * bgworker, return false.
3050  *
3051  * This is heavily based on CleanupBackend. One important difference is that
3052  * we don't know yet that the dying process is a bgworker, so we must be silent
3053  * until we're sure it is.
3054  */
3055 static bool
3057  int exitstatus) /* child's exit status */
3058 {
3059  char namebuf[MAXPGPATH];
3060  slist_mutable_iter iter;
3061 
3063  {
3064  RegisteredBgWorker *rw;
3065 
3066  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3067 
3068  if (rw->rw_pid != pid)
3069  continue;
3070 
3071 #ifdef WIN32
3072  /* see CleanupBackend */
3073  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3074  exitstatus = 0;
3075 #endif
3076 
3077  snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"),
3078  rw->rw_worker.bgw_name);
3079 
3080  if (!EXIT_STATUS_0(exitstatus))
3081  {
3082  /* Record timestamp, so we know when to restart the worker. */
3084  }
3085  else
3086  {
3087  /* Zero exit status means terminate */
3088  rw->rw_crashed_at = 0;
3089  rw->rw_terminate = true;
3090  }
3091 
3092  /*
3093  * Additionally, for shared-memory-connected workers, just like a
3094  * backend, any exit status other than 0 or 1 is considered a crash
3095  * and causes a system-wide restart.
3096  */
3097  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3098  {
3099  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3100  {
3101  HandleChildCrash(pid, exitstatus, namebuf);
3102  return true;
3103  }
3104  }
3105 
3106  /*
3107  * We must release the postmaster child slot whether this worker is
3108  * connected to shared memory or not, but we only treat it as a crash
3109  * if it is in fact connected.
3110  */
3113  {
3114  HandleChildCrash(pid, exitstatus, namebuf);
3115  return true;
3116  }
3117 
3118  /* Get it out of the BackendList and clear out remaining data */
3119  dlist_delete(&rw->rw_backend->elem);
3120 #ifdef EXEC_BACKEND
3121  ShmemBackendArrayRemove(rw->rw_backend);
3122 #endif
3123 
3124  /*
3125  * It's possible that this background worker started some OTHER
3126  * background worker and asked to be notified when that worker started
3127  * or stopped. If so, cancel any notifications destined for the
3128  * now-dead backend.
3129  */
3130  if (rw->rw_backend->bgworker_notify)
3132  free(rw->rw_backend);
3133  rw->rw_backend = NULL;
3134  rw->rw_pid = 0;
3135  rw->rw_child_slot = 0;
3136  ReportBackgroundWorkerExit(&iter); /* report child death */
3137 
3138  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3139  namebuf, pid, exitstatus);
3140 
3141  return true;
3142  }
3143 
3144  return false;
3145 }
3146 
3147 /*
3148  * CleanupBackend -- cleanup after terminated backend.
3149  *
3150  * Remove all local state associated with backend.
3151  *
3152  * If you change this, see also CleanupBackgroundWorker.
3153  */
3154 static void
3156  int exitstatus) /* child's exit status. */
3157 {
3158  dlist_mutable_iter iter;
3159 
3160  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3161 
3162  /*
3163  * If a backend dies in an ugly way then we must signal all other backends
3164  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3165  * assume everything is all right and proceed to remove the backend from
3166  * the active backend list.
3167  */
3168 
3169 #ifdef WIN32
3170 
3171  /*
3172  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3173  * since that sometimes happens under load when the process fails to start
3174  * properly (long before it starts using shared memory). Microsoft reports
3175  * it is related to mutex failure:
3176  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3177  */
3178  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3179  {
3180  LogChildExit(LOG, _("server process"), pid, exitstatus);
3181  exitstatus = 0;
3182  }
3183 #endif
3184 
3185  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3186  {
3187  HandleChildCrash(pid, exitstatus, _("server process"));
3188  return;
3189  }
3190 
3191  dlist_foreach_modify(iter, &BackendList)
3192  {
3193  Backend *bp = dlist_container(Backend, elem, iter.cur);
3194 
3195  if (bp->pid == pid)
3196  {
3197  if (!bp->dead_end)
3198  {
3200  {
3201  /*
3202  * Uh-oh, the child failed to clean itself up. Treat as a
3203  * crash after all.
3204  */
3205  HandleChildCrash(pid, exitstatus, _("server process"));
3206  return;
3207  }
3208 #ifdef EXEC_BACKEND
3209  ShmemBackendArrayRemove(bp);
3210 #endif
3211  }
3212  if (bp->bgworker_notify)
3213  {
3214  /*
3215  * This backend may have been slated to receive SIGUSR1 when
3216  * some background worker started or stopped. Cancel those
3217  * notifications, as we don't want to signal PIDs that are not
3218  * PostgreSQL backends. This gets skipped in the (probably
3219  * very common) case where the backend has never requested any
3220  * such notifications.
3221  */
3223  }
3224  dlist_delete(iter.cur);
3225  free(bp);
3226  break;
3227  }
3228  }
3229 }
3230 
3231 /*
3232  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3233  * walwriter, autovacuum, or background worker.
3234  *
3235  * The objectives here are to clean up our local state about the child
3236  * process, and to signal all other remaining children to quickdie.
3237  */
3238 static void
3239 HandleChildCrash(int pid, int exitstatus, const char *procname)
3240 {
3241  dlist_mutable_iter iter;
3242  slist_iter siter;
3243  Backend *bp;
3244  bool take_action;
3245 
3246  /*
3247  * We only log messages and send signals if this is the first process
3248  * crash and we're not doing an immediate shutdown; otherwise, we're only
3249  * here to update postmaster's idea of live processes. If we have already
3250  * signalled children, nonzero exit status is to be expected, so don't
3251  * clutter log.
3252  */
3253  take_action = !FatalError && Shutdown != ImmediateShutdown;
3254 
3255  if (take_action)
3256  {
3257  LogChildExit(LOG, procname, pid, exitstatus);
3258  ereport(LOG,
3259  (errmsg("terminating any other active server processes")));
3260  }
3261 
3262  /* Process background workers. */
3264  {
3265  RegisteredBgWorker *rw;
3266 
3267  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3268  if (rw->rw_pid == 0)
3269  continue; /* not running */
3270  if (rw->rw_pid == pid)
3271  {
3272  /*
3273  * Found entry for freshly-dead worker, so remove it.
3274  */
3276  dlist_delete(&rw->rw_backend->elem);
3277 #ifdef EXEC_BACKEND
3278  ShmemBackendArrayRemove(rw->rw_backend);
3279 #endif
3280  free(rw->rw_backend);
3281  rw->rw_backend = NULL;
3282  rw->rw_pid = 0;
3283  rw->rw_child_slot = 0;
3284  /* don't reset crashed_at */
3285  /* don't report child stop, either */
3286  /* Keep looping so we can signal remaining workers */
3287  }
3288  else
3289  {
3290  /*
3291  * This worker is still alive. Unless we did so already, tell it
3292  * to commit hara-kiri.
3293  *
3294  * SIGQUIT is the special signal that says exit without proc_exit
3295  * and let the user know what's going on. But if SendStop is set
3296  * (-s on command line), then we send SIGSTOP instead, so that we
3297  * can get core dumps from all backends by hand.
3298  */
3299  if (take_action)
3300  {
3301  ereport(DEBUG2,
3302  (errmsg_internal("sending %s to process %d",
3303  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3304  (int) rw->rw_pid)));
3306  }
3307  }
3308  }
3309 
3310  /* Process regular backends */
3311  dlist_foreach_modify(iter, &BackendList)
3312  {
3313  bp = dlist_container(Backend, elem, iter.cur);
3314 
3315  if (bp->pid == pid)
3316  {
3317  /*
3318  * Found entry for freshly-dead backend, so remove it.
3319  */
3320  if (!bp->dead_end)
3321  {
3323 #ifdef EXEC_BACKEND
3324  ShmemBackendArrayRemove(bp);
3325 #endif
3326  }
3327  dlist_delete(iter.cur);
3328  free(bp);
3329  /* Keep looping so we can signal remaining backends */
3330  }
3331  else
3332  {
3333  /*
3334  * This backend is still alive. Unless we did so already, tell it
3335  * to commit hara-kiri.
3336  *
3337  * SIGQUIT is the special signal that says exit without proc_exit
3338  * and let the user know what's going on. But if SendStop is set
3339  * (-s on command line), then we send SIGSTOP instead, so that we
3340  * can get core dumps from all backends by hand.
3341  *
3342  * We could exclude dead_end children here, but at least in the
3343  * SIGSTOP case it seems better to include them.
3344  *
3345  * Background workers were already processed above; ignore them
3346  * here.
3347  */
3348  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3349  continue;
3350 
3351  if (take_action)
3352  {
3353  ereport(DEBUG2,
3354  (errmsg_internal("sending %s to process %d",
3355  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3356  (int) bp->pid)));
3357  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3358  }
3359  }
3360  }
3361 
3362  /* Take care of the startup process too */
3363  if (pid == StartupPID)
3364  {
3365  StartupPID = 0;
3367  }
3368  else if (StartupPID != 0 && take_action)
3369  {
3370  ereport(DEBUG2,
3371  (errmsg_internal("sending %s to process %d",
3372  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3373  (int) StartupPID)));
3374  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3376  }
3377 
3378  /* Take care of the bgwriter too */
3379  if (pid == BgWriterPID)
3380  BgWriterPID = 0;
3381  else if (BgWriterPID != 0 && take_action)
3382  {
3383  ereport(DEBUG2,
3384  (errmsg_internal("sending %s to process %d",
3385  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3386  (int) BgWriterPID)));
3387  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3388  }
3389 
3390  /* Take care of the checkpointer too */
3391  if (pid == CheckpointerPID)
3392  CheckpointerPID = 0;
3393  else if (CheckpointerPID != 0 && take_action)
3394  {
3395  ereport(DEBUG2,
3396  (errmsg_internal("sending %s to process %d",
3397  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3398  (int) CheckpointerPID)));
3399  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3400  }
3401 
3402  /* Take care of the walwriter too */
3403  if (pid == WalWriterPID)
3404  WalWriterPID = 0;
3405  else if (WalWriterPID != 0 && take_action)
3406  {
3407  ereport(DEBUG2,
3408  (errmsg_internal("sending %s to process %d",
3409  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3410  (int) WalWriterPID)));
3411  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3412  }
3413 
3414  /* Take care of the walreceiver too */
3415  if (pid == WalReceiverPID)
3416  WalReceiverPID = 0;
3417  else if (WalReceiverPID != 0 && take_action)
3418  {
3419  ereport(DEBUG2,
3420  (errmsg_internal("sending %s to process %d",
3421  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3422  (int) WalReceiverPID)));
3423  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3424  }
3425 
3426  /* Take care of the autovacuum launcher too */
3427  if (pid == AutoVacPID)
3428  AutoVacPID = 0;
3429  else if (AutoVacPID != 0 && take_action)
3430  {
3431  ereport(DEBUG2,
3432  (errmsg_internal("sending %s to process %d",
3433  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3434  (int) AutoVacPID)));
3435  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3436  }
3437 
3438  /*
3439  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3440  * necessary, but it seems like a good idea for robustness, and it
3441  * simplifies the state-machine logic in the case where a shutdown request
3442  * arrives during crash processing.)
3443  */
3444  if (PgArchPID != 0 && take_action)
3445  {
3446  ereport(DEBUG2,
3447  (errmsg_internal("sending %s to process %d",
3448  "SIGQUIT",
3449  (int) PgArchPID)));
3450  signal_child(PgArchPID, SIGQUIT);
3451  }
3452 
3453  /*
3454  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3455  * necessary, but it seems like a good idea for robustness, and it
3456  * simplifies the state-machine logic in the case where a shutdown request
3457  * arrives during crash processing.)
3458  */
3459  if (PgStatPID != 0 && take_action)
3460  {
3461  ereport(DEBUG2,
3462  (errmsg_internal("sending %s to process %d",
3463  "SIGQUIT",
3464  (int) PgStatPID)));
3465  signal_child(PgStatPID, SIGQUIT);
3467  }
3468 
3469  /* We do NOT restart the syslogger */
3470 
3471  if (Shutdown != ImmediateShutdown)
3472  FatalError = true;
3473 
3474  /* We now transit into a state of waiting for children to die */
3475  if (pmState == PM_RECOVERY ||
3476  pmState == PM_HOT_STANDBY ||
3477  pmState == PM_RUN ||
3478  pmState == PM_WAIT_BACKUP ||
3480  pmState == PM_SHUTDOWN)
3482 
3483  /*
3484  * .. and if this doesn't happen quickly enough, now the clock is ticking
3485  * for us to kill them without mercy.
3486  */
3487  if (AbortStartTime == 0)
3488  AbortStartTime = time(NULL);
3489 }
3490 
3491 /*
3492  * Log the death of a child process.
3493  */
3494 static void
3495 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3496 {
3497  /*
3498  * size of activity_buffer is arbitrary, but set equal to default
3499  * track_activity_query_size
3500  */
3501  char activity_buffer[1024];
3502  const char *activity = NULL;
3503 
3504  if (!EXIT_STATUS_0(exitstatus))
3505  activity = pgstat_get_crashed_backend_activity(pid,
3506  activity_buffer,
3507  sizeof(activity_buffer));
3508 
3509  if (WIFEXITED(exitstatus))
3510  ereport(lev,
3511 
3512  /*------
3513  translator: %s is a noun phrase describing a child process, such as
3514  "server process" */
3515  (errmsg("%s (PID %d) exited with exit code %d",
3516  procname, pid, WEXITSTATUS(exitstatus)),
3517  activity ? errdetail("Failed process was running: %s", activity) : 0));
3518  else if (WIFSIGNALED(exitstatus))
3519 #if defined(WIN32)
3520  ereport(lev,
3521 
3522  /*------
3523  translator: %s is a noun phrase describing a child process, such as
3524  "server process" */
3525  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3526  procname, pid, WTERMSIG(exitstatus)),
3527  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3528  activity ? errdetail("Failed process was running: %s", activity) : 0));
3529 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
3530  ereport(lev,
3531 
3532  /*------
3533  translator: %s is a noun phrase describing a child process, such as
3534  "server process" */
3535  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3536  procname, pid, WTERMSIG(exitstatus),
3537  WTERMSIG(exitstatus) < NSIG ?
3538  sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
3539  activity ? errdetail("Failed process was running: %s", activity) : 0));
3540 #else
3541  ereport(lev,
3542 
3543  /*------
3544  translator: %s is a noun phrase describing a child process, such as
3545  "server process" */
3546  (errmsg("%s (PID %d) was terminated by signal %d",
3547  procname, pid, WTERMSIG(exitstatus)),
3548  activity ? errdetail("Failed process was running: %s", activity) : 0));
3549 #endif
3550  else
3551  ereport(lev,
3552 
3553  /*------
3554  translator: %s is a noun phrase describing a child process, such as
3555  "server process" */
3556  (errmsg("%s (PID %d) exited with unrecognized status %d",
3557  procname, pid, exitstatus),
3558  activity ? errdetail("Failed process was running: %s", activity) : 0));
3559 }
3560 
3561 /*
3562  * Advance the postmaster's state machine and take actions as appropriate
3563  *
3564  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3565  * receive the signals that might mean we need to change state.
3566  */
3567 static void
3569 {
3570  if (pmState == PM_WAIT_BACKUP)
3571  {
3572  /*
3573  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3574  */
3575  if (!BackupInProgress())
3577  }
3578 
3579  if (pmState == PM_WAIT_READONLY)
3580  {
3581  /*
3582  * PM_WAIT_READONLY state ends when we have no regular backends that
3583  * have been started during recovery. We kill the startup and
3584  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3585  * we might like to kill these processes first and then wait for
3586  * backends to die off, but that doesn't work at present because
3587  * killing the startup process doesn't release its locks.
3588  */
3590  {
3591  if (StartupPID != 0)
3592  signal_child(StartupPID, SIGTERM);
3593  if (WalReceiverPID != 0)
3594  signal_child(WalReceiverPID, SIGTERM);
3596  }
3597  }
3598 
3599  /*
3600  * If we are in a state-machine state that implies waiting for backends to
3601  * exit, see if they're all gone, and change state if so.
3602  */
3603  if (pmState == PM_WAIT_BACKENDS)
3604  {
3605  /*
3606  * PM_WAIT_BACKENDS state ends when we have no regular backends
3607  * (including autovac workers), no bgworkers (including unconnected
3608  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3609  * doing crash recovery or an immediate shutdown then we expect the
3610  * checkpointer to exit as well, otherwise not. The archiver, stats,
3611  * and syslogger processes are disregarded since they are not
3612  * connected to shared memory; we also disregard dead_end children
3613  * here. Walsenders are also disregarded, they will be terminated
3614  * later after writing the checkpoint record, like the archiver
3615  * process.
3616  */
3618  StartupPID == 0 &&
3619  WalReceiverPID == 0 &&
3620  BgWriterPID == 0 &&
3621  (CheckpointerPID == 0 ||
3623  WalWriterPID == 0 &&
3624  AutoVacPID == 0)
3625  {
3627  {
3628  /*
3629  * Start waiting for dead_end children to die. This state
3630  * change causes ServerLoop to stop creating new ones.
3631  */
3633 
3634  /*
3635  * We already SIGQUIT'd the archiver and stats processes, if
3636  * any, when we started immediate shutdown or entered
3637  * FatalError state.
3638  */
3639  }
3640  else
3641  {
3642  /*
3643  * If we get here, we are proceeding with normal shutdown. All
3644  * the regular children are gone, and it's time to tell the
3645  * checkpointer to do a shutdown checkpoint.
3646  */
3648  /* Start the checkpointer if not running */
3649  if (CheckpointerPID == 0)
3651  /* And tell it to shut down */
3652  if (CheckpointerPID != 0)
3653  {
3655  pmState = PM_SHUTDOWN;
3656  }
3657  else
3658  {
3659  /*
3660  * If we failed to fork a checkpointer, just shut down.
3661  * Any required cleanup will happen at next restart. We
3662  * set FatalError so that an "abnormal shutdown" message
3663  * gets logged when we exit.
3664  */
3665  FatalError = true;
3667 
3668  /* Kill the walsenders, archiver and stats collector too */
3670  if (PgArchPID != 0)
3672  if (PgStatPID != 0)
3674  }
3675  }
3676  }
3677  }
3678 
3679  if (pmState == PM_SHUTDOWN_2)
3680  {
3681  /*
3682  * PM_SHUTDOWN_2 state ends when there's no other children than
3683  * dead_end children left. There shouldn't be any regular backends
3684  * left by now anyway; what we're really waiting for is walsenders and
3685  * archiver.
3686  *
3687  * Walreceiver should normally be dead by now, but not when a fast
3688  * shutdown is performed during recovery.
3689  */
3690  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
3691  WalReceiverPID == 0)
3692  {
3694  }
3695  }
3696 
3697  if (pmState == PM_WAIT_DEAD_END)
3698  {
3699  /*
3700  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3701  * (ie, no dead_end children remain), and the archiver and stats
3702  * collector are gone too.
3703  *
3704  * The reason we wait for those two is to protect them against a new
3705  * postmaster starting conflicting subprocesses; this isn't an
3706  * ironclad protection, but it at least helps in the
3707  * shutdown-and-immediately-restart scenario. Note that they have
3708  * already been sent appropriate shutdown signals, either during a
3709  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3710  * FatalError processing.
3711  */
3712  if (dlist_is_empty(&BackendList) &&
3713  PgArchPID == 0 && PgStatPID == 0)
3714  {
3715  /* These other guys should be dead already */
3716  Assert(StartupPID == 0);
3717  Assert(WalReceiverPID == 0);
3718  Assert(BgWriterPID == 0);
3719  Assert(CheckpointerPID == 0);
3720  Assert(WalWriterPID == 0);
3721  Assert(AutoVacPID == 0);
3722  /* syslogger is not considered here */
3724  }
3725  }
3726 
3727  /*
3728  * If we've been told to shut down, we exit as soon as there are no
3729  * remaining children. If there was a crash, cleanup will occur at the
3730  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3731  * crash before exiting, but that seems unwise if we are quitting because
3732  * we got SIGTERM from init --- there may well not be time for recovery
3733  * before init decides to SIGKILL us.)
3734  *
3735  * Note that the syslogger continues to run. It will exit when it sees
3736  * EOF on its input pipe, which happens when there are no more upstream
3737  * processes.
3738  */
3740  {
3741  if (FatalError)
3742  {
3743  ereport(LOG, (errmsg("abnormal database system shutdown")));
3744  ExitPostmaster(1);
3745  }
3746  else
3747  {
3748  /*
3749  * Terminate exclusive backup mode to avoid recovery after a clean
3750  * fast shutdown. Since an exclusive backup can only be taken
3751  * during normal running (and not, for example, while running
3752  * under Hot Standby) it only makes sense to do this if we reached
3753  * normal running. If we're still in recovery, the backup file is
3754  * one we're recovering *from*, and we must keep it around so that
3755  * recovery restarts from the right place.
3756  */
3758  CancelBackup();
3759 
3760  /* Normal exit from the postmaster is here */
3761  ExitPostmaster(0);
3762  }
3763  }
3764 
3765  /*
3766  * If the startup process failed, or the user does not want an automatic
3767  * restart after backend crashes, wait for all non-syslogger children to
3768  * exit, and then exit postmaster. We don't try to reinitialize when the
3769  * startup process fails, because more than likely it will just fail again
3770  * and we will keep trying forever.
3771  */
3772  if (pmState == PM_NO_CHILDREN &&
3774  ExitPostmaster(1);
3775 
3776  /*
3777  * If we need to recover from a crash, wait for all non-syslogger children
3778  * to exit, then reset shmem and StartupDataBase.
3779  */
3780  if (FatalError && pmState == PM_NO_CHILDREN)
3781  {
3782  ereport(LOG,
3783  (errmsg("all server processes terminated; reinitializing")));
3784 
3785  /* allow background workers to immediately restart */
3787 
3788  shmem_exit(1);
3790 
3792  Assert(StartupPID != 0);
3794  pmState = PM_STARTUP;
3795  /* crash recovery started, reset SIGKILL flag */
3796  AbortStartTime = 0;
3797  }
3798 }
3799 
3800 
3801 /*
3802  * Send a signal to a postmaster child process
3803  *
3804  * On systems that have setsid(), each child process sets itself up as a
3805  * process group leader. For signals that are generally interpreted in the
3806  * appropriate fashion, we signal the entire process group not just the
3807  * direct child process. This allows us to, for example, SIGQUIT a blocked
3808  * archive_recovery script, or SIGINT a script being run by a backend via
3809  * system().
3810  *
3811  * There is a race condition for recently-forked children: they might not
3812  * have executed setsid() yet. So we signal the child directly as well as
3813  * the group. We assume such a child will handle the signal before trying
3814  * to spawn any grandchild processes. We also assume that signaling the
3815  * child twice will not cause any problems.
3816  */
3817 static void
3818 signal_child(pid_t pid, int signal)
3819 {
3820  if (kill(pid, signal) < 0)
3821  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3822 #ifdef HAVE_SETSID
3823  switch (signal)
3824  {
3825  case SIGINT:
3826  case SIGTERM:
3827  case SIGQUIT:
3828  case SIGSTOP:
3829  case SIGKILL:
3830  if (kill(-pid, signal) < 0)
3831  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3832  break;
3833  default:
3834  break;
3835  }
3836 #endif
3837 }
3838 
3839 /*
3840  * Send a signal to the targeted children (but NOT special children;
3841  * dead_end children are never signaled, either).
3842  */
3843 static bool
3844 SignalSomeChildren(int signal, int target)
3845 {
3846  dlist_iter iter;
3847  bool signaled = false;
3848 
3849  dlist_foreach(iter, &BackendList)
3850  {
3851  Backend *bp = dlist_container(Backend, elem, iter.cur);
3852 
3853  if (bp->dead_end)
3854  continue;
3855 
3856  /*
3857  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3858  * it first and avoid touching shared memory for every child.
3859  */
3860  if (target != BACKEND_TYPE_ALL)
3861  {
3862  /*
3863  * Assign bkend_type for any recently announced WAL Sender
3864  * processes.
3865  */
3866  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3869 
3870  if (!(target & bp->bkend_type))
3871  continue;
3872  }
3873 
3874  ereport(DEBUG4,
3875  (errmsg_internal("sending signal %d to process %d",
3876  signal, (int) bp->pid)));
3877  signal_child(bp->pid, signal);
3878  signaled = true;
3879  }
3880  return signaled;
3881 }
3882 
3883 /*
3884  * Send a termination signal to children. This considers all of our children
3885  * processes, except syslogger and dead_end backends.
3886  */
3887 static void
3889 {
3890  SignalChildren(signal);
3891  if (StartupPID != 0)
3892  {
3893  signal_child(StartupPID, signal);
3894  if (signal == SIGQUIT || signal == SIGKILL)
3896  }
3897  if (BgWriterPID != 0)
3898  signal_child(BgWriterPID, signal);
3899  if (CheckpointerPID != 0)
3900  signal_child(CheckpointerPID, signal);
3901  if (WalWriterPID != 0)
3902  signal_child(WalWriterPID, signal);
3903  if (WalReceiverPID != 0)
3904  signal_child(WalReceiverPID, signal);
3905  if (AutoVacPID != 0)
3906  signal_child(AutoVacPID, signal);
3907  if (PgArchPID != 0)
3908  signal_child(PgArchPID, signal);
3909  if (PgStatPID != 0)
3910  signal_child(PgStatPID, signal);
3911 }
3912 
3913 /*
3914  * BackendStartup -- start backend process
3915  *
3916  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3917  *
3918  * Note: if you change this code, also consider StartAutovacuumWorker.
3919  */
3920 static int
3922 {
3923  Backend *bn; /* for backend cleanup */
3924  pid_t pid;
3925 
3926  /*
3927  * Create backend data structure. Better before the fork() so we can
3928  * handle failure cleanly.
3929  */
3930  bn = (Backend *) malloc(sizeof(Backend));
3931  if (!bn)
3932  {
3933  ereport(LOG,
3934  (errcode(ERRCODE_OUT_OF_MEMORY),
3935  errmsg("out of memory")));
3936  return STATUS_ERROR;
3937  }
3938 
3939  /*
3940  * Compute the cancel key that will be assigned to this backend. The
3941  * backend will have its own copy in the forked-off process' value of
3942  * MyCancelKey, so that it can transmit the key to the frontend.
3943  */
3945  {
3946  free(bn);
3947  ereport(LOG,
3948  (errcode(ERRCODE_INTERNAL_ERROR),
3949  errmsg("could not generate random cancel key")));
3950  return STATUS_ERROR;
3951  }
3952 
3953  bn->cancel_key = MyCancelKey;
3954 
3955  /* Pass down canAcceptConnections state */
3957  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3959 
3960  /*
3961  * Unless it's a dead_end child, assign it a child slot number
3962  */
3963  if (!bn->dead_end)
3965  else
3966  bn->child_slot = 0;
3967 
3968  /* Hasn't asked to be notified about any bgworkers yet */
3969  bn->bgworker_notify = false;
3970 
3971 #ifdef EXEC_BACKEND
3972  pid = backend_forkexec(port);
3973 #else /* !EXEC_BACKEND */
3974  pid = fork_process();
3975  if (pid == 0) /* child */
3976  {
3977  free(bn);
3978 
3979  /* Detangle from postmaster */
3981 
3982  /* Close the postmaster's sockets */
3983  ClosePostmasterPorts(false);
3984 
3985  /* Perform additional initialization and collect startup packet */
3986  BackendInitialize(port);
3987 
3988  /* And run the backend */
3989  BackendRun(port);
3990  }
3991 #endif /* EXEC_BACKEND */
3992 
3993  if (pid < 0)
3994  {
3995  /* in parent, fork failed */
3996  int save_errno = errno;
3997 
3998  if (!bn->dead_end)
4000  free(bn);
4001  errno = save_errno;
4002  ereport(LOG,
4003  (errmsg("could not fork new process for connection: %m")));
4004  report_fork_failure_to_client(port, save_errno);
4005  return STATUS_ERROR;
4006  }
4007 
4008  /* in parent, successful fork */
4009  ereport(DEBUG2,
4010  (errmsg_internal("forked new backend, pid=%d socket=%d",
4011  (int) pid, (int) port->sock)));
4012 
4013  /*
4014  * Everything's been successful, it's safe to add this backend to our list
4015  * of backends.
4016  */
4017  bn->pid = pid;
4018  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4019  dlist_push_head(&BackendList, &bn->elem);
4020 
4021 #ifdef EXEC_BACKEND
4022  if (!bn->dead_end)
4023  ShmemBackendArrayAdd(bn);
4024 #endif
4025 
4026  return STATUS_OK;
4027 }
4028 
4029 /*
4030  * Try to report backend fork() failure to client before we close the
4031  * connection. Since we do not care to risk blocking the postmaster on
4032  * this connection, we set the connection to non-blocking and try only once.
4033  *
4034  * This is grungy special-purpose code; we cannot use backend libpq since
4035  * it's not up and running.
4036  */
4037 static void
4039 {
4040  char buffer[1000];
4041  int rc;
4042 
4043  /* Format the error message packet (always V2 protocol) */
4044  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4045  _("could not fork new process for connection: "),
4046  strerror(errnum));
4047 
4048  /* Set port to non-blocking. Don't do send() if this fails */
4049  if (!pg_set_noblock(port->sock))
4050  return;
4051 
4052  /* We'll retry after EINTR, but ignore all other failures */
4053  do
4054  {
4055  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4056  } while (rc < 0 && errno == EINTR);
4057 }
4058 
4059 
4060 /*
4061  * BackendInitialize -- initialize an interactive (postmaster-child)
4062  * backend process, and collect the client's startup packet.
4063  *
4064  * returns: nothing. Will not return at all if there's any failure.
4065  *
4066  * Note: this code does not depend on having any access to shared memory.
4067  * In the EXEC_BACKEND case, we are physically attached to shared memory
4068  * but have not yet set up most of our local pointers to shmem structures.
4069  */
4070 static void
4072 {
4073  int status;
4074  int ret;
4075  char remote_host[NI_MAXHOST];
4076  char remote_port[NI_MAXSERV];
4077  char remote_ps_data[NI_MAXHOST];
4078 
4079  /* Save port etc. for ps status */
4080  MyProcPort = port;
4081 
4082  /*
4083  * PreAuthDelay is a debugging aid for investigating problems in the
4084  * authentication cycle: it can be set in postgresql.conf to allow time to
4085  * attach to the newly-forked backend with a debugger. (See also
4086  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4087  * is not honored until after authentication.)
4088  */
4089  if (PreAuthDelay > 0)
4090  pg_usleep(PreAuthDelay * 1000000L);
4091 
4092  /* This flag will remain set until InitPostgres finishes authentication */
4093  ClientAuthInProgress = true; /* limit visibility of log messages */
4094 
4095  /* save process start time */
4098 
4099  /* set these to empty in case they are needed before we set them up */
4100  port->remote_host = "";
4101  port->remote_port = "";
4102 
4103  /*
4104  * Initialize libpq and enable reporting of ereport errors to the client.
4105  * Must do this now because authentication uses libpq to send messages.
4106  */
4107  pq_init(); /* initialize libpq to talk to client */
4108  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4109 
4110  /*
4111  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4112  * timeout while trying to collect the startup packet. Otherwise the
4113  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4114  * buggy client fails to send the packet promptly. XXX it follows that
4115  * the remainder of this function must tolerate losing control at any
4116  * instant. Likewise, any pg_on_exit_callback registered before or during
4117  * this function must be prepared to execute at any instant between here
4118  * and the end of this function. Furthermore, affected callbacks execute
4119  * partially or not at all when a second exit-inducing signal arrives
4120  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4121  * that mechanic, callbacks need not anticipate more than one call.) This
4122  * is fragile; it ought to instead follow the norm of handling interrupts
4123  * at selected, safe opportunities.
4124  */
4125  pqsignal(SIGTERM, startup_die);
4127  InitializeTimeouts(); /* establishes SIGALRM handler */
4129 
4130  /*
4131  * Get the remote host name and port for logging and status display.
4132  */
4133  remote_host[0] = '\0';
4134  remote_port[0] = '\0';
4135  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4136  remote_host, sizeof(remote_host),
4137  remote_port, sizeof(remote_port),
4138  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4139  ereport(WARNING,
4140  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4141  gai_strerror(ret))));
4142  if (remote_port[0] == '\0')
4143  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4144  else
4145  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4146 
4147  /*
4148  * Save remote_host and remote_port in port structure (after this, they
4149  * will appear in log_line_prefix data for log messages).
4150  */
4151  port->remote_host = strdup(remote_host);
4152  port->remote_port = strdup(remote_port);
4153 
4154  /* And now we can issue the Log_connections message, if wanted */
4155  if (Log_connections)
4156  {
4157  if (remote_port[0])
4158  ereport(LOG,
4159  (errmsg("connection received: host=%s port=%s",
4160  remote_host,
4161  remote_port)));
4162  else
4163  ereport(LOG,
4164  (errmsg("connection received: host=%s",
4165  remote_host)));
4166  }
4167 
4168  /*
4169  * If we did a reverse lookup to name, we might as well save the results
4170  * rather than possibly repeating the lookup during authentication.
4171  *
4172  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4173  * get nothing useful for a client without an rDNS entry. Therefore, we
4174  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4175  * it into remote_hostname if so. (This test is conservative and might
4176  * sometimes classify a hostname as numeric, but an error in that
4177  * direction is safe; it only results in a possible extra lookup.)
4178  */
4179  if (log_hostname &&
4180  ret == 0 &&
4181  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4182  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4183  port->remote_hostname = strdup(remote_host);
4184 
4185  /*
4186  * Ready to begin client interaction. We will give up and exit(1) after a
4187  * time delay, so that a broken client can't hog a connection
4188  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4189  * against the time limit.
4190  *
4191  * Note: AuthenticationTimeout is applied here while waiting for the
4192  * startup packet, and then again in InitPostgres for the duration of any
4193  * authentication operations. So a hostile client could tie up the
4194  * process for nearly twice AuthenticationTimeout before we kick him off.
4195  *
4196  * Note: because PostgresMain will call InitializeTimeouts again, the
4197  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4198  * since we never use it again after this function.
4199  */
4202 
4203  /*
4204  * Receive the startup packet (which might turn out to be a cancel request
4205  * packet).
4206  */
4207  status = ProcessStartupPacket(port, false);
4208 
4209  /*
4210  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4211  * already did any appropriate error reporting.
4212  */
4213  if (status != STATUS_OK)
4214  proc_exit(0);
4215 
4216  /*
4217  * Now that we have the user and database name, we can set the process
4218  * title for ps. It's good to do this as early as possible in startup.
4219  *
4220  * For a walsender, the ps display is set in the following form:
4221  *
4222  * postgres: wal sender process <user> <host> <activity>
4223  *
4224  * To achieve that, we pass "wal sender process" as username and username
4225  * as dbname to init_ps_display(). XXX: should add a new variant of
4226  * init_ps_display() to avoid abusing the parameters like this.
4227  */
4228  if (am_walsender)
4229  init_ps_display("wal sender process", port->user_name, remote_ps_data,
4230  update_process_title ? "authentication" : "");
4231  else
4232  init_ps_display(port->user_name, port->database_name, remote_ps_data,
4233  update_process_title ? "authentication" : "");
4234 
4235  /*
4236  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4237  */
4239  PG_SETMASK(&BlockSig);
4240 }
4241 
4242 
4243 /*
4244  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4245  *
4246  * returns:
4247  * Shouldn't return at all.
4248  * If PostgresMain() fails, return status.
4249  */
4250 static void
4252 {
4253  char **av;
4254  int maxac;
4255  int ac;
4256  long secs;
4257  int usecs;
4258  int i;
4259 
4260  /*
4261  * Don't want backend to be able to see the postmaster random number
4262  * generator state. We have to clobber the static random_seed *and* start
4263  * a new random sequence in the random() library function.
4264  */
4265 #ifndef HAVE_STRONG_RANDOM
4266  random_seed = 0;
4267  random_start_time.tv_usec = 0;
4268 #endif
4269  /* slightly hacky way to convert timestamptz into integers */
4270  TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
4271  srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
4272 
4273  /*
4274  * Now, build the argv vector that will be given to PostgresMain.
4275  *
4276  * The maximum possible number of commandline arguments that could come
4277  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4278  * pg_split_opts().
4279  */
4280  maxac = 2; /* for fixed args supplied below */
4281  maxac += (strlen(ExtraOptions) + 1) / 2;
4282 
4283  av = (char **) MemoryContextAlloc(TopMemoryContext,
4284  maxac * sizeof(char *));
4285  ac = 0;
4286 
4287  av[ac++] = "postgres";
4288 
4289  /*
4290  * Pass any backend switches specified with -o on the postmaster's own
4291  * command line. We assume these are secure.
4292  */
4293  pg_split_opts(av, &ac, ExtraOptions);
4294 
4295  av[ac] = NULL;
4296 
4297  Assert(ac < maxac);
4298 
4299  /*
4300  * Debug: print arguments being passed to backend
4301  */
4302  ereport(DEBUG3,
4303  (errmsg_internal("%s child[%d]: starting with (",
4304  progname, (int) getpid())));
4305  for (i = 0; i < ac; ++i)
4306  ereport(DEBUG3,
4307  (errmsg_internal("\t%s", av[i])));
4308  ereport(DEBUG3,
4309  (errmsg_internal(")")));
4310 
4311  /*
4312  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4313  * just yet, though, because InitPostgres will need the HBA data.)
4314  */
4316 
4317  PostgresMain(ac, av, port->database_name, port->user_name);
4318 }
4319 
4320 
4321 #ifdef EXEC_BACKEND
4322 
4323 /*
4324  * postmaster_forkexec -- fork and exec a postmaster subprocess
4325  *
4326  * The caller must have set up the argv array already, except for argv[2]
4327  * which will be filled with the name of the temp variable file.
4328  *
4329  * Returns the child process PID, or -1 on fork failure (a suitable error
4330  * message has been logged on failure).
4331  *
4332  * All uses of this routine will dispatch to SubPostmasterMain in the
4333  * child process.
4334  */
4335 pid_t
4336 postmaster_forkexec(int argc, char *argv[])
4337 {
4338  Port port;
4339 
4340  /* This entry point passes dummy values for the Port variables */
4341  memset(&port, 0, sizeof(port));
4342  return internal_forkexec(argc, argv, &port);
4343 }
4344 
4345 /*
4346  * backend_forkexec -- fork/exec off a backend process
4347  *
4348  * Some operating systems (WIN32) don't have fork() so we have to simulate
4349  * it by storing parameters that need to be passed to the child and
4350  * then create a new child process.
4351  *
4352  * returns the pid of the fork/exec'd process, or -1 on failure
4353  */
4354 static pid_t
4355 backend_forkexec(Port *port)
4356 {
4357  char *av[4];
4358  int ac = 0;
4359 
4360  av[ac++] = "postgres";
4361  av[ac++] = "--forkbackend";
4362  av[ac++] = NULL; /* filled in by internal_forkexec */
4363 
4364  av[ac] = NULL;
4365  Assert(ac < lengthof(av));
4366 
4367  return internal_forkexec(ac, av, port);
4368 }
4369 
4370 #ifndef WIN32
4371 
4372 /*
4373  * internal_forkexec non-win32 implementation
4374  *
4375  * - writes out backend variables to the parameter file
4376  * - fork():s, and then exec():s the child process
4377  */
4378 static pid_t
4379 internal_forkexec(int argc, char *argv[], Port *port)
4380 {
4381  static unsigned long tmpBackendFileNum = 0;
4382  pid_t pid;
4383  char tmpfilename[MAXPGPATH];
4384  BackendParameters param;
4385  FILE *fp;
4386 
4387  if (!save_backend_variables(&param, port))
4388  return -1; /* log made by save_backend_variables */
4389 
4390  /* Calculate name for temp file */
4391  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4393  MyProcPid, ++tmpBackendFileNum);
4394 
4395  /* Open file */
4396  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4397  if (!fp)
4398  {
4399  /*
4400  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4401  * directory
4402  */
4403  mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
4404 
4405  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4406  if (!fp)
4407  {
4408  ereport(LOG,
4410  errmsg("could not create file \"%s\": %m",
4411  tmpfilename)));
4412  return -1;
4413  }
4414  }
4415 
4416  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4417  {
4418  ereport(LOG,
4420  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4421  FreeFile(fp);
4422  return -1;
4423  }
4424 
4425  /* Release file */
4426  if (FreeFile(fp))
4427  {
4428  ereport(LOG,
4430  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4431  return -1;
4432  }
4433 
4434  /* Make sure caller set up argv properly */
4435  Assert(argc >= 3);
4436  Assert(argv[argc] == NULL);
4437  Assert(strncmp(argv[1], "--fork", 6) == 0);
4438  Assert(argv[2] == NULL);
4439 
4440  /* Insert temp file name after --fork argument */
4441  argv[2] = tmpfilename;
4442 
4443  /* Fire off execv in child */
4444  if ((pid = fork_process()) == 0)
4445  {
4446  if (execv(postgres_exec_path, argv) < 0)
4447  {
4448  ereport(LOG,
4449  (errmsg("could not execute server process \"%s\": %m",
4450  postgres_exec_path)));
4451  /* We're already in the child process here, can't return */
4452  exit(1);
4453  }
4454  }
4455 
4456  return pid; /* Parent returns pid, or -1 on fork failure */
4457 }
4458 #else /* WIN32 */
4459 
4460 /*
4461  * internal_forkexec win32 implementation
4462  *
4463  * - starts backend using CreateProcess(), in suspended state
4464  * - writes out backend variables to the parameter file
4465  * - during this, duplicates handles and sockets required for
4466  * inheritance into the new process
4467  * - resumes execution of the new process once the backend parameter
4468  * file is complete.
4469  */
4470 static pid_t
4471 internal_forkexec(int argc, char *argv[], Port *port)
4472 {
4473  STARTUPINFO si;
4474  PROCESS_INFORMATION pi;
4475  int i;
4476  int j;
4477  char cmdLine[MAXPGPATH * 2];
4478  HANDLE paramHandle;
4479  BackendParameters *param;
4480  SECURITY_ATTRIBUTES sa;
4481  char paramHandleStr[32];
4482  win32_deadchild_waitinfo *childinfo;
4483 
4484  /* Make sure caller set up argv properly */
4485  Assert(argc >= 3);
4486  Assert(argv[argc] == NULL);
4487  Assert(strncmp(argv[1], "--fork", 6) == 0);
4488  Assert(argv[2] == NULL);
4489 
4490  /* Set up shared memory for parameter passing */
4491  ZeroMemory(&sa, sizeof(sa));
4492  sa.nLength = sizeof(sa);
4493  sa.bInheritHandle = TRUE;
4494  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4495  &sa,
4496  PAGE_READWRITE,
4497  0,
4498  sizeof(BackendParameters),
4499  NULL);
4500  if (paramHandle == INVALID_HANDLE_VALUE)
4501  {
4502  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4503  GetLastError());
4504  return -1;
4505  }
4506 
4507  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4508  if (!param)
4509  {
4510  elog(LOG, "could not map backend parameter memory: error code %lu",
4511  GetLastError());
4512  CloseHandle(paramHandle);
4513  return -1;
4514  }
4515 
4516  /* Insert temp file name after --fork argument */
4517 #ifdef _WIN64
4518  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4519 #else
4520  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4521 #endif
4522  argv[2] = paramHandleStr;
4523 
4524  /* Format the cmd line */
4525  cmdLine[sizeof(cmdLine) - 1] = '\0';
4526  cmdLine[sizeof(cmdLine) - 2] = '\0';
4527  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4528  i = 0;
4529  while (argv[++i] != NULL)
4530  {
4531  j = strlen(cmdLine);
4532  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4533  }
4534  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4535  {
4536  elog(LOG, "subprocess command line too long");
4537  return -1;
4538  }
4539 
4540  memset(&pi, 0, sizeof(pi));
4541  memset(&si, 0, sizeof(si));
4542  si.cb = sizeof(si);
4543 
4544  /*
4545  * Create the subprocess in a suspended state. This will be resumed later,
4546  * once we have written out the parameter file.
4547  */
4548  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4549  NULL, NULL, &si, &pi))
4550  {
4551  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4552  GetLastError());
4553  return -1;
4554  }
4555 
4556  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4557  {
4558  /*
4559  * log made by save_backend_variables, but we have to clean up the
4560  * mess with the half-started process
4561  */
4562  if (!TerminateProcess(pi.hProcess, 255))
4563  ereport(LOG,
4564  (errmsg_internal("could not terminate unstarted process: error code %lu",
4565  GetLastError())));
4566  CloseHandle(pi.hProcess);
4567  CloseHandle(pi.hThread);
4568  return -1; /* log made by save_backend_variables */
4569  }
4570 
4571  /* Drop the parameter shared memory that is now inherited to the backend */
4572  if (!UnmapViewOfFile(param))
4573  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4574  GetLastError());
4575  if (!CloseHandle(paramHandle))
4576  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4577  GetLastError());
4578 
4579  /*
4580  * Reserve the memory region used by our main shared memory segment before
4581  * we resume the child process.
4582  */
4583  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4584  {
4585  /*
4586  * Failed to reserve the memory, so terminate the newly created
4587  * process and give up.
4588  */
4589  if (!TerminateProcess(pi.hProcess, 255))
4590  ereport(LOG,
4591  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4592  GetLastError())));
4593  CloseHandle(pi.hProcess);
4594  CloseHandle(pi.hThread);
4595  return -1; /* logging done made by
4596  * pgwin32_ReserveSharedMemoryRegion() */
4597  }
4598 
4599  /*
4600  * Now that the backend variables are written out, we start the child
4601  * thread so it can start initializing while we set up the rest of the
4602  * parent state.
4603  */
4604  if (ResumeThread(pi.hThread) == -1)
4605  {
4606  if (!TerminateProcess(pi.hProcess, 255))
4607  {
4608  ereport(LOG,
4609  (errmsg_internal("could not terminate unstartable process: error code %lu",
4610  GetLastError())));
4611  CloseHandle(pi.hProcess);
4612  CloseHandle(pi.hThread);
4613  return -1;
4614  }
4615  CloseHandle(pi.hProcess);
4616  CloseHandle(pi.hThread);
4617  ereport(LOG,
4618  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4619  GetLastError())));
4620  return -1;
4621  }
4622 
4623  /*
4624  * Queue a waiter for to signal when this child dies. The wait will be
4625  * handled automatically by an operating system thread pool.
4626  *
4627  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4628  * Struct will be free():d from the callback function that runs on a
4629  * different thread.
4630  */
4631  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4632  if (!childinfo)
4633  ereport(FATAL,
4634  (errcode(ERRCODE_OUT_OF_MEMORY),
4635  errmsg("out of memory")));
4636 
4637  childinfo->procHandle = pi.hProcess;
4638  childinfo->procId = pi.dwProcessId;
4639 
4640  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4641  pi.hProcess,
4642  pgwin32_deadchild_callback,
4643  childinfo,
4644  INFINITE,
4645  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4646  ereport(FATAL,
4647  (errmsg_internal("could not register process for wait: error code %lu",
4648  GetLastError())));
4649 
4650  /* Don't close pi.hProcess here - the wait thread needs access to it */
4651 
4652  CloseHandle(pi.hThread);
4653 
4654  return pi.dwProcessId;
4655 }
4656 #endif /* WIN32 */
4657 
4658 
4659 /*
4660  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4661  * to what it would be if we'd simply forked on Unix, and then
4662  * dispatch to the appropriate place.
4663  *
4664  * The first two command line arguments are expected to be "--forkFOO"
4665  * (where FOO indicates which postmaster child we are to become), and
4666  * the name of a variables file that we can read to load data that would
4667  * have been inherited by fork() on Unix. Remaining arguments go to the
4668  * subprocess FooMain() routine.
4669  */
4670 void
4671 SubPostmasterMain(int argc, char *argv[])
4672 {
4673  Port port;
4674 
4675  /* In EXEC_BACKEND case we will not have inherited these settings */
4676  IsPostmasterEnvironment = true;
4678 
4679  /* Setup as postmaster child */
4681 
4682  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4684 
4685  /* Check we got appropriate args */
4686  if (argc < 3)
4687  elog(FATAL, "invalid subpostmaster invocation");
4688 
4689  /* Read in the variables file */
4690  memset(&port, 0, sizeof(Port));
4691  read_backend_variables(argv[2], &port);
4692 
4693  /* Close the postmaster's sockets (as soon as we know them) */
4694  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4695 
4696  /*
4697  * Set reference point for stack-depth checking
4698  */
4699  set_stack_base();
4700 
4701  /*
4702  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4703  * for the non-exec case.
4704  */
4705 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4706  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4707  if (!port.gss)
4708  ereport(FATAL,
4709  (errcode(ERRCODE_OUT_OF_MEMORY),
4710  errmsg("out of memory")));
4711 #endif
4712 
4713  /*
4714  * If appropriate, physically re-attach to shared memory segment. We want
4715  * to do this before going any further to ensure that we can attach at the
4716  * same address the postmaster used. On the other hand, if we choose not
4717  * to re-attach, we may have other cleanup to do.
4718  *
4719  * If testing EXEC_BACKEND on Linux, you should run this as root before
4720  * starting the postmaster:
4721  *
4722  * echo 0 >/proc/sys/kernel/randomize_va_space
4723  *
4724  * This prevents using randomized stack and code addresses that cause the
4725  * child process's memory map to be different from the parent's, making it
4726  * sometimes impossible to attach to shared memory at the desired address.
4727  * Return the setting to its old value (usually '1' or '2') when finished.
4728  */
4729  if (strcmp(argv[1], "--forkbackend") == 0 ||
4730  strcmp(argv[1], "--forkavlauncher") == 0 ||
4731  strcmp(argv[1], "--forkavworker") == 0 ||
4732  strcmp(argv[1], "--forkboot") == 0 ||
4733  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4735  else
4737 
4738  /* autovacuum needs this set before calling InitProcess */
4739  if (strcmp(argv[1], "--forkavlauncher") == 0)
4740  AutovacuumLauncherIAm();
4741  if (strcmp(argv[1], "--forkavworker") == 0)
4742  AutovacuumWorkerIAm();
4743 
4744  /*
4745  * Start our win32 signal implementation. This has to be done after we
4746  * read the backend variables, because we need to pick up the signal pipe
4747  * from the parent process.
4748  */
4749 #ifdef WIN32
4751 #endif
4752 
4753  /* In EXEC_BACKEND case we will not have inherited these settings */
4754  pqinitmask();
4755  PG_SETMASK(&BlockSig);
4756 
4757  /* Read in remaining GUC variables */
4758  read_nondefault_variables();
4759 
4760  /*
4761  * Reload any libraries that were preloaded by the postmaster. Since we
4762  * exec'd this process, those libraries didn't come along with us; but we
4763  * should load them into all child processes to be consistent with the
4764  * non-EXEC_BACKEND behavior.
4765  */
4767 
4768  /* Run backend or appropriate child */
4769  if (strcmp(argv[1], "--forkbackend") == 0)
4770  {
4771  Assert(argc == 3); /* shouldn't be any more args */
4772 
4773  /*
4774  * Need to reinitialize the SSL library in the backend, since the
4775  * context structures contain function pointers and cannot be passed
4776  * through the parameter file.
4777  *
4778  * If for some reason reload fails (maybe the user installed broken
4779  * key files), soldier on without SSL; that's better than all
4780  * connections becoming impossible.
4781  *
4782  * XXX should we do this in all child processes? For the moment it's
4783  * enough to do it in backend children.
4784  */
4785 #ifdef USE_SSL
4786  if (EnableSSL)
4787  {
4788  if (secure_initialize(false) == 0)
4789  LoadedSSL = true;
4790  else
4791  ereport(LOG,
4792  (errmsg("SSL configuration could not be loaded in child process")));
4793  }
4794 #endif
4795 
4796  /*
4797  * Perform additional initialization and collect startup packet.
4798  *
4799  * We want to do this before InitProcess() for a couple of reasons: 1.
4800  * so that we aren't eating up a PGPROC slot while waiting on the
4801  * client. 2. so that if InitProcess() fails due to being out of
4802  * PGPROC slots, we have already initialized libpq and are able to
4803  * report the error to the client.
4804  */
4805  BackendInitialize(&port);
4806 
4807  /* Restore basic shared memory pointers */
4809 
4810  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4811  InitProcess();
4812 
4813  /* Attach process to shared data structures */
4815 
4816  /* And run the backend */
4817  BackendRun(&port); /* does not return */
4818  }
4819  if (strcmp(argv[1], "--forkboot") == 0)
4820  {
4821  /* Restore basic shared memory pointers */
4823 
4824  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4826 
4827  /* Attach process to shared data structures */
4829 
4830  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4831  }
4832  if (strcmp(argv[1], "--forkavlauncher") == 0)
4833  {
4834  /* Restore basic shared memory pointers */
4836 
4837  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4838  InitProcess();
4839 
4840  /* Attach process to shared data structures */
4842 
4843  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4844  }
4845  if (strcmp(argv[1], "--forkavworker") == 0)
4846  {
4847  /* Restore basic shared memory pointers */
4849 
4850  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4851  InitProcess();
4852 
4853  /* Attach process to shared data structures */
4855 
4856  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4857  }
4858  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4859  {
4860  int shmem_slot;
4861 
4862  /* do this as early as possible; in particular, before InitProcess() */
4863  IsBackgroundWorker = true;
4864 
4865  /* Restore basic shared memory pointers */
4867 
4868  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4869  InitProcess();
4870 
4871  /* Attach process to shared data structures */
4873 
4874  /* Fetch MyBgworkerEntry from shared memory */
4875  shmem_slot = atoi(argv[1] + 15);
4876  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4877 
4879  }
4880  if (strcmp(argv[1], "--forkarch") == 0)
4881  {
4882  /* Do not want to attach to shared memory */
4883 
4884  PgArchiverMain(argc, argv); /* does not return */
4885  }
4886  if (strcmp(argv[1], "--forkcol") == 0)
4887  {
4888  /* Do not want to attach to shared memory */
4889 
4890  PgstatCollectorMain(argc, argv); /* does not return */
4891  }
4892  if (strcmp(argv[1], "--forklog") == 0)
4893  {
4894  /* Do not want to attach to shared memory */
4895 
4896  SysLoggerMain(argc, argv); /* does not return */
4897  }
4898 
4899  abort(); /* shouldn't get here */
4900 }
4901 #endif /* EXEC_BACKEND */
4902 
4903 
4904 /*
4905  * ExitPostmaster -- cleanup
4906  *
4907  * Do NOT call exit() directly --- always go through here!
4908  */
4909 static void
4911 {
4912 #ifdef HAVE_PTHREAD_IS_THREADED_NP
4913 
4914  /*
4915  * There is no known cause for a postmaster to become multithreaded after
4916  * startup. Recheck to account for the possibility of unknown causes.
4917  * This message uses LOG level, because an unclean shutdown at this point
4918  * would usually not look much different from a clean shutdown.
4919  */
4920  if (pthread_is_threaded_np() != 0)
4921  ereport(LOG,
4922  (errcode(ERRCODE_INTERNAL_ERROR),
4923  errmsg_internal("postmaster became multithreaded"),
4924  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
4925 #endif
4926 
4927  /* should cleanup shared memory and kill all backends */
4928 
4929  /*
4930  * Not sure of the semantics here. When the Postmaster dies, should the
4931  * backends all be killed? probably not.
4932  *
4933  * MUST -- vadim 05-10-1999
4934  */
4935 
4936  proc_exit(status);
4937 }
4938 
4939 /*
4940  * sigusr1_handler - handle signal conditions from child processes
4941  */
4942 static void
4944 {
4945  int save_errno = errno;
4946 
4947  PG_SETMASK(&BlockSig);
4948 
4949  /* Process background worker state change. */
4951  {
4953  StartWorkerNeeded = true;
4954  }
4955 
4956  /*
4957  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
4958  * unexpected states. If the startup process quickly starts up, completes
4959  * recovery, exits, we might process the death of the startup process
4960  * first. We don't want to go back to recovery in that case.
4961  */
4964  {
4965  /* WAL redo has started. We're out of reinitialization. */
4966  FatalError = false;
4967  Assert(AbortStartTime == 0);
4968 
4969  /*
4970  * Crank up the background tasks. It doesn't matter if this fails,
4971  * we'll just try again later.
4972  */
4973  Assert(CheckpointerPID == 0);
4975  Assert(BgWriterPID == 0);
4977 
4978  /*
4979  * Start the archiver if we're responsible for (re-)archiving received
4980  * files.
4981  */
4982  Assert(PgArchPID == 0);
4983  if (XLogArchivingAlways())
4984  PgArchPID = pgarch_start();
4985 
4986 #ifdef USE_SYSTEMD
4987  if (!EnableHotStandby)
4988  sd_notify(0, "READY=1");
4989 #endif
4990 
4991  pmState = PM_RECOVERY;
4992  }
4995  {
4996  /*
4997  * Likewise, start other special children as needed.
4998  */
4999  Assert(PgStatPID == 0);
5000  PgStatPID = pgstat_start();
5001 
5002  ereport(LOG,
5003  (errmsg("database system is ready to accept read only connections")));
5004 
5005 #ifdef USE_SYSTEMD
5006  sd_notify(0, "READY=1");
5007 #endif
5008 
5010  /* Some workers may be scheduled to start now */
5011  StartWorkerNeeded = true;
5012  }
5013 
5016 
5018  PgArchPID != 0)
5019  {
5020  /*
5021  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5022  * next transaction log file.
5023  */
5025  }
5026 
5028  SysLoggerPID != 0)
5029  {
5030  /* Tell syslogger to rotate logfile */
5032  }
5033 
5035  Shutdown == NoShutdown)
5036  {
5037  /*
5038  * Start one iteration of the autovacuum daemon, even if autovacuuming
5039  * is nominally not enabled. This is so we can have an active defense
5040  * against transaction ID wraparound. We set a flag for the main loop
5041  * to do it rather than trying to do it here --- this is because the
5042  * autovac process itself may send the signal, and we want to handle
5043  * that by launching another iteration as soon as the current one
5044  * completes.
5045  */
5046  start_autovac_launcher = true;
5047  }
5048 
5050  Shutdown == NoShutdown)
5051  {
5052  /* The autovacuum launcher wants us to start a worker process. */
5054  }
5055 
5057  WalReceiverPID == 0 &&
5058  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5060  Shutdown == NoShutdown)
5061  {
5062  /* Startup Process wants us to start the walreceiver process. */
5064  }
5065 
5068  {
5069  /* Advance postmaster's state machine */
5071  }
5072 
5073  if (CheckPromoteSignal() && StartupPID != 0 &&
5074  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5076  {
5077  /* Tell startup process to finish recovery */
5079  }
5080 
5082 
5083  errno = save_errno;
5084 }
5085 
5086 /*
5087  * SIGTERM or SIGQUIT while processing startup packet.
5088  * Clean up and exit(1).
5089  *
5090  * XXX: possible future improvement: try to send a message indicating
5091  * why we are disconnecting. Problem is to be sure we don't block while
5092  * doing so, nor mess up SSL initialization. In practice, if the client
5093  * has wedged here, it probably couldn't do anything with the message anyway.
5094  */
5095 static void
5097 {
5098  proc_exit(1);
5099 }
5100 
5101 /*
5102  * Dummy signal handler
5103  *
5104  * We use this for signals that we don't actually use in the postmaster,
5105  * but we do use in backends. If we were to SIG_IGN such signals in the
5106  * postmaster, then a newly started backend might drop a signal that arrives
5107  * before it's able to reconfigure its signal processing. (See notes in
5108  * tcop/postgres.c.)
5109  */
5110 static void
5112 {
5113 }
5114 
5115 /*
5116  * Timeout while processing startup packet.
5117  * As for startup_die(), we clean up and exit(1).
5118  */
5119 static void
5121 {
5122  proc_exit(1);
5123 }
5124 
5125 
5126 /*
5127  * Generate a random cancel key.
5128  */
5129 static bool
5131 {
5132 #ifdef HAVE_STRONG_RANDOM
5133  return pg_strong_random((char *) cancel_key, sizeof(int32));
5134 #else
5135  /*
5136  * If built with --disable-strong-random, use plain old erand48.
5137  *
5138  * We cannot use pg_backend_random() in postmaster, because it stores
5139  * its state in shared memory.
5140  */
5141  static unsigned short seed[3];
5142 
5143  /*
5144  * Select a random seed at the time of first receiving a request.
5145  */
5146  if (random_seed == 0)
5147  {
5148  struct timeval random_stop_time;
5149 
5150  gettimeofday(&random_stop_time, NULL);
5151 
5152  seed[0] = (unsigned short) random_start_time.tv_usec;
5153  seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
5154  seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
5155 
5156  random_seed = 1;
5157  }
5158 
5159  *cancel_key = pg_jrand48(seed);
5160 
5161  return true;
5162 #endif
5163 }
5164 
5165 /*
5166  * Count up number of child processes of specified types (dead_end children
5167  * are always excluded).
5168  */
5169 static int
5170 CountChildren(int target)
5171 {
5172  dlist_iter iter;
5173  int cnt = 0;
5174 
5175  dlist_foreach(iter, &BackendList)
5176  {
5177  Backend *bp = dlist_container(Backend, elem, iter.cur);
5178 
5179  if (bp->dead_end)
5180  continue;
5181 
5182  /*
5183  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5184  * it first and avoid touching shared memory for every child.
5185  */
5186  if (target != BACKEND_TYPE_ALL)
5187  {
5188  /*
5189  * Assign bkend_type for any recently announced WAL Sender
5190  * processes.
5191  */
5192  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5195 
5196  if (!(target & bp->bkend_type))
5197  continue;
5198  }
5199 
5200  cnt++;
5201  }
5202  return cnt;
5203 }
5204 
5205 
5206 /*
5207  * StartChildProcess -- start an auxiliary process for the postmaster
5208  *
5209  * "type" determines what kind of child will be started. All child types
5210  * initially go to AuxiliaryProcessMain, which will handle common setup.
5211  *
5212  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5213  * to start subprocess.
5214  */
5215 static pid_t
5217 {
5218  pid_t pid;
5219  char *av[10];
5220  int ac = 0;
5221  char typebuf[32];
5222 
5223  /*
5224  * Set up command-line arguments for subprocess
5225  */
5226  av[ac++] = "postgres";
5227 
5228 #ifdef EXEC_BACKEND
5229  av[ac++] = "--forkboot";
5230  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5231 #endif
5232 
5233  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5234  av[ac++] = typebuf;
5235 
5236  av[ac] = NULL;
5237  Assert(ac < lengthof(av));
5238 
5239 #ifdef EXEC_BACKEND
5240  pid = postmaster_forkexec(ac, av);
5241 #else /* !EXEC_BACKEND */
5242  pid = fork_process();
5243 
5244  if (pid == 0) /* child */
5245  {
5247 
5248  /* Close the postmaster's sockets */
5249  ClosePostmasterPorts(false);
5250 
5251  /* Release postmaster's working memory context */
5255 
5256  AuxiliaryProcessMain(ac, av);
5257  ExitPostmaster(0);
5258  }
5259 #endif /* EXEC_BACKEND */
5260 
5261  if (pid < 0)
5262  {
5263  /* in parent, fork failed */
5264  int save_errno = errno;
5265 
5266  errno = save_errno;
5267  switch (type)
5268  {
5269  case StartupProcess:
5270  ereport(LOG,
5271  (errmsg("could not fork startup process: %m")));
5272  break;
5273  case BgWriterProcess:
5274  ereport(LOG,
5275  (errmsg("could not fork background writer process: %m")));
5276  break;
5277  case CheckpointerProcess:
5278  ereport(LOG,
5279  (errmsg("could not fork checkpointer process: %m")));
5280  break;
5281  case WalWriterProcess:
5282  ereport(LOG,
5283  (errmsg("could not fork WAL writer process: %m")));
5284  break;
5285  case WalReceiverProcess:
5286  ereport(LOG,
5287  (errmsg("could not fork WAL receiver process: %m")));
5288  break;
5289  default:
5290  ereport(LOG,
5291  (errmsg("could not fork process: %m")));
5292  break;
5293  }
5294 
5295  /*
5296  * fork failure is fatal during startup, but there's no need to choke
5297  * immediately if starting other child types fails.
5298  */
5299  if (type == StartupProcess)
5300  ExitPostmaster(1);
5301  return 0;
5302  }
5303 
5304  /*
5305  * in parent, successful fork
5306  */
5307  return pid;
5308 }
5309 
5310 /*
5311  * StartAutovacuumWorker
5312  * Start an autovac worker process.
5313  *
5314  * This function is here because it enters the resulting PID into the
5315  * postmaster's private backends list.
5316  *
5317  * NB -- this code very roughly matches BackendStartup.
5318  */
5319 static void
5321 {
5322  Backend *bn;
5323 
5324  /*
5325  * If not in condition to run a process, don't try, but handle it like a
5326  * fork failure. This does not normally happen, since the signal is only
5327  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5328  * we have to check to avoid race-condition problems during DB state
5329  * changes.
5330  */
5331  if (canAcceptConnections() == CAC_OK)
5332  {
5333  /*
5334  * Compute the cancel key that will be assigned to this session.
5335  * We probably don't need cancel keys for autovac workers, but
5336  * we'd better have something random in the field to prevent
5337  * unfriendly people from sending cancels to them.
5338  */
5340  {
5341  ereport(LOG,
5342  (errcode(ERRCODE_INTERNAL_ERROR),
5343  errmsg("could not generate random cancel key")));
5344  return;
5345  }
5346 
5347  bn = (Backend *) malloc(sizeof(Backend));
5348  if (bn)
5349  {
5350  bn->cancel_key = MyCancelKey;
5351 
5352  /* Autovac workers are not dead_end and need a child slot */
5353  bn->dead_end = false;
5355  bn->bgworker_notify = false;
5356 
5357  bn->pid = StartAutoVacWorker();
5358  if (bn->pid > 0)
5359  {
5361  dlist_push_head(&BackendList, &bn->elem);
5362 #ifdef EXEC_BACKEND
5363  ShmemBackendArrayAdd(bn);
5364 #endif
5365  /* all OK */
5366  return;
5367  }
5368 
5369  /*
5370  * fork failed, fall through to report -- actual error message was
5371  * logged by StartAutoVacWorker
5372  */
5374  free(bn);
5375  }
5376  else
5377  ereport(LOG,
5378  (errcode(ERRCODE_OUT_OF_MEMORY),
5379  errmsg("out of memory")));
5380  }
5381 
5382  /*
5383  * Report the failure to the launcher, if it's running. (If it's not, we
5384  * might not even be connected to shared memory, so don't try to call
5385  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5386  * responds to the condition, but we don't do that here, instead waiting
5387  * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5388  * quick succession between the autovac launcher and postmaster in case
5389  * things get ugly.
5390  */
5391  if (AutoVacPID != 0)
5392  {
5394  avlauncher_needs_signal = true;
5395  }
5396 }
5397 
5398 /*
5399  * Create the opts file
5400  */
5401 static bool
5402 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5403 {
5404  FILE *fp;
5405  int i;
5406 
5407 #define OPTS_FILE "postmaster.opts"
5408 
5409  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5410  {
5411  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5412  return false;
5413  }
5414 
5415  fprintf(fp, "%s", fullprogname);
5416  for (i = 1; i < argc; i++)
5417  fprintf(fp, " \"%s\"", argv[i]);
5418  fputs("\n", fp);
5419 
5420  if (fclose(fp))
5421  {
5422  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5423  return false;
5424  }
5425 
5426  return true;
5427 }
5428 
5429 
5430 /*
5431  * MaxLivePostmasterChildren
5432  *
5433  * This reports the number of entries needed in per-child-process arrays
5434  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5435  * These arrays include regular backends, autovac workers, walsenders
5436  * and background workers, but not special children nor dead_end children.
5437  * This allows the arrays to have a fixed maximum size, to wit the same
5438  * too-many-children limit enforced by canAcceptConnections(). The exact value
5439  * isn't too critical as long as it's more than MaxBackends.
5440  */
5441 int
5443 {
5444  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5446 }
5447 
5448 /*
5449  * Connect background worker to a database.
5450  */
5451 void
5453 {
5455 
5456  /* XXX is this the right errcode? */
5458  ereport(FATAL,
5459  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5460  errmsg("database connection requirement not indicated during registration")));
5461 
5462  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL);
5463 
5464  /* it had better not gotten out of "init" mode yet */
5465  if (!IsInitProcessingMode())
5466  ereport(ERROR,
5467  (errmsg("invalid processing mode in background worker")));
5469 }
5470 
5471 /*
5472  * Connect background worker to a database using OIDs.
5473  */
5474 void
5476 {
5478 
5479  /* XXX is this the right errcode? */
5481  ereport(FATAL,
5482  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5483  errmsg("database connection requirement not indicated during registration")));
5484 
5485  InitPostgres(NULL, dboid, NULL, useroid, NULL);
5486 
5487  /* it had better not gotten out of "init" mode yet */
5488  if (!IsInitProcessingMode())
5489  ereport(ERROR,
5490  (errmsg("invalid processing mode in background worker")));
5492 }
5493 
5494 /*
5495  * Block/unblock signals in a background worker
5496  */
5497 void
5499 {
5500  PG_SETMASK(&BlockSig);
5501 }
5502 
5503 void
5505 {
5507 }
5508 
5509 #ifdef EXEC_BACKEND
5510 static pid_t
5511 bgworker_forkexec(int shmem_slot)
5512 {
5513  char *av[10];
5514  int ac = 0;
5515  char forkav[MAXPGPATH];
5516 
5517  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5518 
5519  av[ac++] = "postgres";
5520  av[ac++] = forkav;
5521  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5522  av[ac] = NULL;
5523 
5524  Assert(ac < lengthof(av));
5525 
5526  return postmaster_forkexec(ac, av);
5527 }
5528 #endif
5529 
5530 /*
5531  * Start a new bgworker.
5532  * Starting time conditions must have been checked already.
5533  *
5534  * This code is heavily based on autovacuum.c, q.v.
5535  */
5536 static void
5538 {
5539  pid_t worker_pid;
5540 
5541  ereport(DEBUG1,
5542  (errmsg("starting background worker process \"%s\"",
5543  rw->rw_worker.bgw_name)));
5544 
5545 #ifdef EXEC_BACKEND
5546  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5547 #else
5548  switch ((worker_pid = fork_process()))
5549 #endif
5550  {
5551  case -1:
5552  ereport(LOG,
5553  (errmsg("could not fork worker process: %m")));
5554  return;
5555 
5556 #ifndef EXEC_BACKEND
5557  case 0:
5558  /* in postmaster child ... */
5560 
5561  /* Close the postmaster's sockets */
5562  ClosePostmasterPorts(false);
5563 
5564  /*
5565  * Before blowing away PostmasterContext, save this bgworker's
5566  * data where it can find it.
5567  */
5568  MyBgworkerEntry = (BackgroundWorker *)
5570  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5571 
5572  /* Release postmaster's working memory context */
5576 
5578  break;
5579 #endif
5580  default:
5581  rw->rw_pid = worker_pid;
5582  rw->rw_backend->pid = rw->rw_pid;
5584  break;
5585  }
5586 }
5587 
5588 /*
5589  * Does the current postmaster state require starting a worker with the
5590  * specified start_time?
5591  */
5592 static bool
5594 {
5595  switch (pmState)
5596  {
5597  case PM_NO_CHILDREN:
5598  case PM_WAIT_DEAD_END:
5599  case PM_SHUTDOWN_2:
5600  case PM_SHUTDOWN:
5601  case PM_WAIT_BACKENDS:
5602  case PM_WAIT_READONLY:
5603  case PM_WAIT_BACKUP:
5604  break;
5605 
5606  case PM_RUN:
5607  if (start_time == BgWorkerStart_RecoveryFinished)
5608  return true;
5609  /* fall through */
5610 
5611  case PM_HOT_STANDBY:
5612  if (start_time == BgWorkerStart_ConsistentState)
5613  return true;
5614  /* fall through */
5615 
5616  case PM_RECOVERY:
5617  case PM_STARTUP:
5618  case PM_INIT:
5619  if (start_time == BgWorkerStart_PostmasterStart)
5620  return true;
5621  /* fall through */
5622 
5623  }
5624 
5625  return false;
5626 }
5627 
5628 /*
5629  * Allocate the Backend struct for a connected background worker, but don't
5630  * add it to the list of backends just yet.
5631  *
5632  * Some info from the Backend is copied into the passed rw.
5633  */
5634 static bool
5636 {
5637  Backend *bn;
5638 
5639  /*
5640  * Compute the cancel key that will be assigned to this session. We
5641  * probably don't need cancel keys for background workers, but we'd better
5642  * have something random in the field to prevent unfriendly people from
5643  * sending cancels to them.
5644  */
5646  {
5647  ereport(LOG,
5648  (errcode(ERRCODE_INTERNAL_ERROR),
5649  errmsg("could not generate random cancel key")));
5650 
5652  return false;
5653  }
5654 
5655  bn = malloc(sizeof(Backend));
5656  if (bn == NULL)
5657  {
5658  ereport(LOG,
5659  (errcode(ERRCODE_OUT_OF_MEMORY),
5660  errmsg("out of memory")));
5661 
5662  /*
5663  * The worker didn't really crash, but setting this nonzero makes
5664  * postmaster wait a bit before attempting to start it again; if it
5665  * tried again right away, most likely it'd find itself under the same
5666  * memory pressure.
5667  */
5669  return false;
5670  }
5671 
5672  bn->cancel_key = MyCancelKey;
5675  bn->dead_end = false;
5676  bn->bgworker_notify = false;
5677 
5678  rw->rw_backend = bn;
5679  rw->rw_child_slot = bn->child_slot;
5680 
5681  return true;
5682 }
5683 
5684 /*
5685  * If the time is right, start one background worker.
5686  *
5687  * As a side effect, the bgworker control variables are set or reset whenever
5688  * there are more workers to start after this one, and whenever the overall
5689  * system state requires it.
5690  */
5691 static void
5693 {
5694  slist_mutable_iter iter;
5695  TimestampTz now = 0;
5696 
5697  if (FatalError)
5698  {
5699  StartWorkerNeeded = false;
5700  HaveCrashedWorker = false;
5701  return; /* not yet */
5702  }
5703 
5704  HaveCrashedWorker = false;
5705 
5707  {
5708  RegisteredBgWorker *rw;
5709 
5710  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5711 
5712  /* already running? */
5713  if (rw->rw_pid != 0)
5714  continue;
5715 
5716  /* marked for death? */
5717  if (rw->rw_terminate)
5718  {
5719  ForgetBackgroundWorker(&iter);
5720  continue;
5721  }
5722 
5723  /*
5724  * If this worker has crashed previously, maybe it needs to be
5725  * restarted (unless on registration it specified it doesn't want to
5726  * be restarted at all). Check how long ago did a crash last happen.
5727  * If the last crash is too recent, don't start it right away; let it
5728  * be restarted once enough time has passed.
5729  */
5730  if (rw->rw_crashed_at != 0)
5731  {
5733  {
5734  ForgetBackgroundWorker(&iter);
5735  continue;
5736  }
5737 
5738  if (now == 0)
5739  now = GetCurrentTimestamp();
5740 
5742  rw->rw_worker.bgw_restart_time * 1000))
5743  {
5744  HaveCrashedWorker = true;
5745  continue;
5746  }
5747  }
5748 
5750  {
5751  /* reset crash time before calling assign_backendlist_entry */
5752  rw->rw_crashed_at = 0;
5753 
5754  /*
5755  * Allocate and assign the Backend element. Note we must do this
5756  * before forking, so that we can handle out of memory properly.
5757  */
5758  if (!assign_backendlist_entry(rw))
5759  return;
5760 
5761  do_start_bgworker(rw); /* sets rw->rw_pid */
5762 
5763  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5764 #ifdef EXEC_BACKEND
5765  ShmemBackendArrayAdd(rw->rw_backend);
5766 #endif
5767 
5768  /*
5769  * Have ServerLoop call us again. Note that there might not
5770  * actually *be* another runnable worker, but we don't care all
5771  * that much; we will find out the next time we run.
5772  */
5773  StartWorkerNeeded = true;
5774  return;
5775  }
5776  }
5777 
5778  /* no runnable worker found */
5779  StartWorkerNeeded = false;
5780 }
5781 
5782 /*
5783  * When a backend asks to be notified about worker state changes, we
5784  * set a flag in its backend entry. The background worker machinery needs
5785  * to know when such backends exit.
5786  */
5787 bool
5789 {
5790  dlist_iter iter;
5791  Backend *bp;
5792 
5793  dlist_foreach(iter, &BackendList)
5794  {
5795  bp = dlist_container(Backend, elem, iter.cur);
5796  if (bp->pid == pid)
5797  {
5798  bp->