PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
postmaster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * postmaster.c
4  * This program acts as a clearing house for requests to the
5  * POSTGRES system. Frontend programs send a startup message
6  * to the Postmaster and the postmaster uses the info in the
7  * message to setup a backend process.
8  *
9  * The postmaster also manages system-wide operations such as
10  * startup and shutdown. The postmaster itself doesn't do those
11  * operations, mind you --- it just forks off a subprocess to do them
12  * at the right times. It also takes care of resetting the system
13  * if a backend crashes.
14  *
15  * The postmaster process creates the shared memory and semaphore
16  * pools during startup, but as a rule does not touch them itself.
17  * In particular, it is not a member of the PGPROC array of backends
18  * and so it cannot participate in lock-manager operations. Keeping
19  * the postmaster away from shared memory operations makes it simpler
20  * and more reliable. The postmaster is almost always able to recover
21  * from crashes of individual backends by resetting shared memory;
22  * if it did much with shared memory then it would be prone to crashing
23  * along with the backends.
24  *
25  * When a request message is received, we now fork() immediately.
26  * The child process performs authentication of the request, and
27  * then becomes a backend if successful. This allows the auth code
28  * to be written in a simple single-threaded style (as opposed to the
29  * crufty "poor man's multitasking" code that used to be needed).
30  * More importantly, it ensures that blockages in non-multithreaded
31  * libraries like SSL or PAM cannot cause denial of service to other
32  * clients.
33  *
34  *
35  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  * src/backend/postmaster/postmaster.c
41  *
42  * NOTES
43  *
44  * Initialization:
45  * The Postmaster sets up shared memory data structures
46  * for the backends.
47  *
48  * Synchronization:
49  * The Postmaster shares memory with the backends but should avoid
50  * touching shared memory, so as not to become stuck if a crashing
51  * backend screws up locks or shared memory. Likewise, the Postmaster
52  * should never block on messages from frontend clients.
53  *
54  * Garbage Collection:
55  * The Postmaster cleans up after backends if they have an emergency
56  * exit and/or core dump.
57  *
58  * Error Reporting:
59  * Use write_stderr() only for reporting "interactive" errors
60  * (essentially, bogus arguments on the command line). Once the
61  * postmaster is launched, use ereport().
62  *
63  *-------------------------------------------------------------------------
64  */
65 
66 #include "postgres.h"
67 
68 #include <unistd.h>
69 #include <signal.h>
70 #include <time.h>
71 #include <sys/wait.h>
72 #include <ctype.h>
73 #include <sys/stat.h>
74 #include <sys/socket.h>
75 #include <fcntl.h>
76 #include <sys/param.h>
77 #include <netdb.h>
78 #include <limits.h>
79 
80 #ifdef HAVE_SYS_SELECT_H
81 #include <sys/select.h>
82 #endif
83 
84 #ifdef USE_BONJOUR
85 #include <dns_sd.h>
86 #endif
87 
88 #ifdef USE_SYSTEMD
89 #include <systemd/sd-daemon.h>
90 #endif
91 
92 #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 #include <pthread.h>
94 #endif
95 
96 #include "access/transam.h"
97 #include "access/xlog.h"
98 #include "bootstrap/bootstrap.h"
99 #include "catalog/pg_control.h"
100 #include "common/ip.h"
101 #include "lib/ilist.h"
102 #include "libpq/auth.h"
103 #include "libpq/libpq.h"
104 #include "libpq/pqsignal.h"
105 #include "miscadmin.h"
106 #include "pg_getopt.h"
107 #include "pgstat.h"
108 #include "port/pg_bswap.h"
109 #include "postmaster/autovacuum.h"
111 #include "postmaster/fork_process.h"
112 #include "postmaster/pgarch.h"
113 #include "postmaster/postmaster.h"
114 #include "postmaster/syslogger.h"
116 #include "replication/walsender.h"
117 #include "storage/fd.h"
118 #include "storage/ipc.h"
119 #include "storage/pg_shmem.h"
120 #include "storage/pmsignal.h"
121 #include "storage/proc.h"
122 #include "tcop/tcopprot.h"
123 #include "utils/builtins.h"
124 #include "utils/datetime.h"
125 #include "utils/dynamic_loader.h"
126 #include "utils/memutils.h"
127 #include "utils/pidfile.h"
128 #include "utils/ps_status.h"
129 #include "utils/timeout.h"
130 #include "utils/varlena.h"
131 
132 #ifdef EXEC_BACKEND
133 #include "storage/spin.h"
134 #endif
135 
136 
137 /*
138  * Possible types of a backend. Beyond being the possible bkend_type values in
139  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
140  * and CountChildren().
141  */
142 #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
143 #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
144 #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
145 #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
146 #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
147 
148 #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
149 
150 /*
151  * List of active backends (or child processes anyway; we don't actually
152  * know whether a given child has become a backend or is still in the
153  * authorization phase). This is used mainly to keep track of how many
154  * children we have and send them appropriate signals when necessary.
155  *
156  * "Special" children such as the startup, bgwriter and autovacuum launcher
157  * tasks are not in this list. Autovacuum worker and walsender are in it.
158  * Also, "dead_end" children are in it: these are children launched just for
159  * the purpose of sending a friendly rejection message to a would-be client.
160  * We must track them because they are attached to shared memory, but we know
161  * they will never become live backends. dead_end children are not assigned a
162  * PMChildSlot.
163  *
164  * Background workers are in this list, too.
165  */
166 typedef struct bkend
167 {
168  pid_t pid; /* process id of backend */
169  int32 cancel_key; /* cancel key for cancels for this backend */
170  int child_slot; /* PMChildSlot for this backend, if any */
171 
172  /*
173  * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
174  * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
175  * bkend_type is normal, you should check for a recent transition.
176  */
178  bool dead_end; /* is it going to send an error and quit? */
179  bool bgworker_notify; /* gets bgworker start/stop notifications */
180  dlist_node elem; /* list link in BackendList */
181 } Backend;
182 
184 
185 #ifdef EXEC_BACKEND
186 static Backend *ShmemBackendArray;
187 #endif
188 
190 
191 
192 
193 /* The socket number we are listening for connections on */
195 
196 /* The directory names for Unix socket(s) */
198 
199 /* The TCP listen address(es) */
201 
202 /*
203  * ReservedBackends is the number of backends reserved for superuser use.
204  * This number is taken out of the pool size given by MaxBackends so
205  * number of backend slots available to non-superusers is
206  * (MaxBackends - ReservedBackends). Note what this really means is
207  * "if there are <= ReservedBackends connections available, only superusers
208  * can make new connections" --- pre-existing superuser connections don't
209  * count against the limit.
210  */
212 
213 /* The socket(s) we're listening to. */
214 #define MAXLISTEN 64
216 
217 /*
218  * Set by the -o option
219  */
220 static char ExtraOptions[MAXPGPATH];
221 
222 /*
223  * These globals control the behavior of the postmaster in case some
224  * backend dumps core. Normally, it kills all peers of the dead backend
225  * and reinitializes shared memory. By specifying -s or -n, we can have
226  * the postmaster stop (rather than kill) peers and not reinitialize
227  * shared data structures. (Reinit is currently dead code, though.)
228  */
229 static bool Reinit = true;
230 static int SendStop = false;
231 
232 /* still more option variables */
233 bool EnableSSL = false;
234 
235 int PreAuthDelay = 0;
237 
238 bool log_hostname; /* for ps display and logging */
239 bool Log_connections = false;
240 bool Db_user_namespace = false;
241 
242 bool enable_bonjour = false;
245 
246 /* PIDs of special child processes; 0 when not running */
247 static pid_t StartupPID = 0,
256 
257 /* Startup process's status */
258 typedef enum
259 {
262  STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
265 
267 
268 /* Startup/shutdown state */
269 #define NoShutdown 0
270 #define SmartShutdown 1
271 #define FastShutdown 2
272 #define ImmediateShutdown 3
273 
274 static int Shutdown = NoShutdown;
275 
276 static bool FatalError = false; /* T if recovering from backend crash */
277 
278 /*
279  * We use a simple state machine to control startup, shutdown, and
280  * crash recovery (which is rather like shutdown followed by startup).
281  *
282  * After doing all the postmaster initialization work, we enter PM_STARTUP
283  * state and the startup process is launched. The startup process begins by
284  * reading the control file and other preliminary initialization steps.
285  * In a normal startup, or after crash recovery, the startup process exits
286  * with exit code 0 and we switch to PM_RUN state. However, archive recovery
287  * is handled specially since it takes much longer and we would like to support
288  * hot standby during archive recovery.
289  *
290  * When the startup process is ready to start archive recovery, it signals the
291  * postmaster, and we switch to PM_RECOVERY state. The background writer and
292  * checkpointer are launched, while the startup process continues applying WAL.
293  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
294  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
295  * state and begin accepting connections to perform read-only queries. When
296  * archive recovery is finished, the startup process exits with exit code 0
297  * and we switch to PM_RUN state.
298  *
299  * Normal child backends can only be launched when we are in PM_RUN or
300  * PM_HOT_STANDBY state. (We also allow launch of normal
301  * child backends in PM_WAIT_BACKUP state, but only for superusers.)
302  * In other states we handle connection requests by launching "dead_end"
303  * child processes, which will simply send the client an error message and
304  * quit. (We track these in the BackendList so that we can know when they
305  * are all gone; this is important because they're still connected to shared
306  * memory, and would interfere with an attempt to destroy the shmem segment,
307  * possibly leading to SHMALL failure when we try to make a new one.)
308  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
309  * to drain out of the system, and therefore stop accepting connection
310  * requests at all until the last existing child has quit (which hopefully
311  * will not be very long).
312  *
313  * Notice that this state variable does not distinguish *why* we entered
314  * states later than PM_RUN --- Shutdown and FatalError must be consulted
315  * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
316  * states, nor in PM_SHUTDOWN states (because we don't enter those states
317  * when trying to recover from a crash). It can be true in PM_STARTUP state,
318  * because we don't clear it until we've successfully started WAL redo.
319  */
320 typedef enum
321 {
322  PM_INIT, /* postmaster starting */
323  PM_STARTUP, /* waiting for startup subprocess */
324  PM_RECOVERY, /* in archive recovery mode */
325  PM_HOT_STANDBY, /* in hot standby mode */
326  PM_RUN, /* normal "database is alive" state */
327  PM_WAIT_BACKUP, /* waiting for online backup mode to end */
328  PM_WAIT_READONLY, /* waiting for read only backends to exit */
329  PM_WAIT_BACKENDS, /* waiting for live backends to exit */
330  PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
331  * ckpt */
332  PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
333  * finish */
334  PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
335  PM_NO_CHILDREN /* all important children have exited */
336 } PMState;
337 
339 
340 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
341 /* Zero means timeout is not running */
342 static time_t AbortStartTime = 0;
343 
344 /* Length of said timeout */
345 #define SIGKILL_CHILDREN_AFTER_SECS 5
346 
347 static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
348 
349 bool ClientAuthInProgress = false; /* T during new-client
350  * authentication */
351 
352 bool redirection_done = false; /* stderr redirected for syslogger? */
353 
354 /* received START_AUTOVAC_LAUNCHER signal */
355 static volatile sig_atomic_t start_autovac_launcher = false;
356 
357 /* the launcher needs to be signalled to communicate some condition */
358 static volatile bool avlauncher_needs_signal = false;
359 
360 /* received START_WALRECEIVER signal */
361 static volatile sig_atomic_t WalReceiverRequested = false;
362 
363 /* set when there's a worker that needs to be started up */
364 static volatile bool StartWorkerNeeded = true;
365 static volatile bool HaveCrashedWorker = false;
366 
367 #ifndef HAVE_STRONG_RANDOM
368 /*
369  * State for assigning cancel keys.
370  * Also, the global MyCancelKey passes the cancel key assigned to a given
371  * backend from the postmaster to that backend (via fork).
372  */
373 static unsigned int random_seed = 0;
374 static struct timeval random_start_time;
375 #endif
376 
377 #ifdef USE_SSL
378 /* Set when and if SSL has been initialized properly */
379 static bool LoadedSSL = false;
380 #endif
381 
382 #ifdef USE_BONJOUR
383 static DNSServiceRef bonjour_sdref = NULL;
384 #endif
385 
386 /*
387  * postmaster.c - function prototypes
388  */
389 static void CloseServerPorts(int status, Datum arg);
390 static void unlink_external_pid_file(int status, Datum arg);
391 static void getInstallationPaths(const char *argv0);
392 static void checkDataDir(void);
393 static Port *ConnCreate(int serverFd);
394 static void ConnFree(Port *port);
395 static void reset_shared(int port);
396 static void SIGHUP_handler(SIGNAL_ARGS);
397 static void pmdie(SIGNAL_ARGS);
398 static void reaper(SIGNAL_ARGS);
399 static void sigusr1_handler(SIGNAL_ARGS);
400 static void startup_die(SIGNAL_ARGS);
401 static void dummy_handler(SIGNAL_ARGS);
402 static void StartupPacketTimeoutHandler(void);
403 static void CleanupBackend(int pid, int exitstatus);
404 static bool CleanupBackgroundWorker(int pid, int exitstatus);
405 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
406 static void LogChildExit(int lev, const char *procname,
407  int pid, int exitstatus);
408 static void PostmasterStateMachine(void);
409 static void BackendInitialize(Port *port);
410 static void BackendRun(Port *port) pg_attribute_noreturn();
411 static void ExitPostmaster(int status) pg_attribute_noreturn();
412 static int ServerLoop(void);
413 static int BackendStartup(Port *port);
414 static int ProcessStartupPacket(Port *port, bool SSLdone);
415 static void processCancelRequest(Port *port, void *pkt);
416 static int initMasks(fd_set *rmask);
417 static void report_fork_failure_to_client(Port *port, int errnum);
418 static CAC_state canAcceptConnections(void);
419 static bool RandomCancelKey(int32 *cancel_key);
420 static void signal_child(pid_t pid, int signal);
421 static bool SignalSomeChildren(int signal, int targets);
422 static void TerminateChildren(int signal);
423 
424 #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
425 
426 static int CountChildren(int target);
428 static void maybe_start_bgworkers(void);
429 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
430 static pid_t StartChildProcess(AuxProcType type);
431 static void StartAutovacuumWorker(void);
432 static void MaybeStartWalReceiver(void);
433 static void InitPostmasterDeathWatchHandle(void);
434 
435 /*
436  * Archiver is allowed to start up at the current postmaster state?
437  *
438  * If WAL archiving is enabled always, we are allowed to start archiver
439  * even during recovery.
440  */
441 #define PgArchStartupAllowed() \
442  ((XLogArchivingActive() && pmState == PM_RUN) || \
443  (XLogArchivingAlways() && \
444  (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
445 
446 #ifdef EXEC_BACKEND
447 
448 #ifdef WIN32
449 #define WNOHANG 0 /* ignored, so any integer value will do */
450 
451 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
452 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
453 
454 static HANDLE win32ChildQueue;
455 
456 typedef struct
457 {
458  HANDLE waitHandle;
459  HANDLE procHandle;
460  DWORD procId;
461 } win32_deadchild_waitinfo;
462 #endif /* WIN32 */
463 
464 static pid_t backend_forkexec(Port *port);
465 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
466 
467 /* Type for a socket that can be inherited to a client process */
468 #ifdef WIN32
469 typedef struct
470 {
471  SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
472  * if not a socket */
473  WSAPROTOCOL_INFO wsainfo;
474 } InheritableSocket;
475 #else
476 typedef int InheritableSocket;
477 #endif
478 
479 /*
480  * Structure contains all variables passed to exec:ed backends
481  */
482 typedef struct
483 {
484  Port port;
485  InheritableSocket portsocket;
486  char DataDir[MAXPGPATH];
489  int MyPMChildSlot;
490 #ifndef WIN32
491  unsigned long UsedShmemSegID;
492 #else
493  HANDLE UsedShmemSegID;
494 #endif
495  void *UsedShmemSegAddr;
498  Backend *ShmemBackendArray;
499 #ifndef HAVE_SPINLOCKS
501 #endif
510  InheritableSocket pgStatSock;
511  pid_t PostmasterPid;
515  bool redirection_done;
516  bool IsBinaryUpgrade;
517  int max_safe_fds;
518  int MaxBackends;
519 #ifdef WIN32
520  HANDLE PostmasterHandle;
521  HANDLE initial_signal_pipe;
522  HANDLE syslogPipe[2];
523 #else
524  int postmaster_alive_fds[2];
525  int syslogPipe[2];
526 #endif
527  char my_exec_path[MAXPGPATH];
528  char pkglib_path[MAXPGPATH];
529  char ExtraOptions[MAXPGPATH];
530 } BackendParameters;
531 
532 static void read_backend_variables(char *id, Port *port);
533 static void restore_backend_variables(BackendParameters *param, Port *port);
534 
535 #ifndef WIN32
536 static bool save_backend_variables(BackendParameters *param, Port *port);
537 #else
538 static bool save_backend_variables(BackendParameters *param, Port *port,
539  HANDLE childProcess, pid_t childPid);
540 #endif
541 
542 static void ShmemBackendArrayAdd(Backend *bn);
543 static void ShmemBackendArrayRemove(Backend *bn);
544 #endif /* EXEC_BACKEND */
545 
546 #define StartupDataBase() StartChildProcess(StartupProcess)
547 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
548 #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
549 #define StartWalWriter() StartChildProcess(WalWriterProcess)
550 #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
551 
552 /* Macros to check exit status of a child process */
553 #define EXIT_STATUS_0(st) ((st) == 0)
554 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
555 #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
556 
557 #ifndef WIN32
558 /*
559  * File descriptors for pipe used to monitor if postmaster is alive.
560  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
561  */
562 int postmaster_alive_fds[2] = {-1, -1};
563 #else
564 /* Process handle of postmaster used for the same purpose on Windows */
565 HANDLE PostmasterHandle;
566 #endif
567 
568 /*
569  * Postmaster main entry point
570  */
571 void
572 PostmasterMain(int argc, char *argv[])
573 {
574  int opt;
575  int status;
576  char *userDoption = NULL;
577  bool listen_addr_saved = false;
578  int i;
579  char *output_config_variable = NULL;
580 
581  MyProcPid = PostmasterPid = getpid();
582 
583  MyStartTime = time(NULL);
584 
586 
587  /*
588  * for security, no dir or file created can be group or other accessible
589  */
590  umask(S_IRWXG | S_IRWXO);
591 
592  /*
593  * Initialize random(3) so we don't get the same values in every run.
594  *
595  * Note: the seed is pretty predictable from externally-visible facts such
596  * as postmaster start time, so avoid using random() for security-critical
597  * random values during postmaster startup. At the time of first
598  * connection, PostmasterRandom will select a hopefully-more-random seed.
599  */
600  srandom((unsigned int) (MyProcPid ^ MyStartTime));
601 
602  /*
603  * By default, palloc() requests in the postmaster will be allocated in
604  * the PostmasterContext, which is space that can be recycled by backends.
605  * Allocated data that needs to be available to backends should be
606  * allocated in TopMemoryContext.
607  */
609  "Postmaster",
612 
613  /* Initialize paths to installation files */
614  getInstallationPaths(argv[0]);
615 
616  /*
617  * Set up signal handlers for the postmaster process.
618  *
619  * In the postmaster, we want to install non-ignored handlers *without*
620  * SA_RESTART. This is because they'll be blocked at all times except
621  * when ServerLoop is waiting for something to happen, and during that
622  * window, we want signals to exit the select(2) wait so that ServerLoop
623  * can respond if anything interesting happened. On some platforms,
624  * signals marked SA_RESTART would not cause the select() wait to end.
625  * Child processes will generally want SA_RESTART, but we expect them to
626  * set up their own handlers before unblocking signals.
627  *
628  * CAUTION: when changing this list, check for side-effects on the signal
629  * handling setup of child processes. See tcop/postgres.c,
630  * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
631  * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
632  * postmaster/syslogger.c, postmaster/bgworker.c and
633  * postmaster/checkpointer.c.
634  */
635  pqinitmask();
637 
638  pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and
639  * have children do same */
640  pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */
641  pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */
642  pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */
643  pqsignal(SIGALRM, SIG_IGN); /* ignored */
644  pqsignal(SIGPIPE, SIG_IGN); /* ignored */
645  pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child
646  * process */
647  pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for
648  * children */
649  pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */
650  pqsignal(SIGTTIN, SIG_IGN); /* ignored */
651  pqsignal(SIGTTOU, SIG_IGN); /* ignored */
652  /* ignore SIGXFSZ, so that ulimit violations work like disk full */
653 #ifdef SIGXFSZ
654  pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
655 #endif
656 
657  /*
658  * Options setup
659  */
661 
662  opterr = 1;
663 
664  /*
665  * Parse command-line options. CAUTION: keep this in sync with
666  * tcop/postgres.c (the option sets should not conflict) and with the
667  * common help() function in main/main.c.
668  */
669  while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
670  {
671  switch (opt)
672  {
673  case 'B':
674  SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
675  break;
676 
677  case 'b':
678  /* Undocumented flag used for binary upgrades */
679  IsBinaryUpgrade = true;
680  break;
681 
682  case 'C':
683  output_config_variable = strdup(optarg);
684  break;
685 
686  case 'D':
687  userDoption = strdup(optarg);
688  break;
689 
690  case 'd':
692  break;
693 
694  case 'E':
695  SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
696  break;
697 
698  case 'e':
699  SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
700  break;
701 
702  case 'F':
703  SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
704  break;
705 
706  case 'f':
708  {
709  write_stderr("%s: invalid argument for option -f: \"%s\"\n",
710  progname, optarg);
711  ExitPostmaster(1);
712  }
713  break;
714 
715  case 'h':
716  SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
717  break;
718 
719  case 'i':
720  SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
721  break;
722 
723  case 'j':
724  /* only used by interactive backend */
725  break;
726 
727  case 'k':
728  SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
729  break;
730 
731  case 'l':
732  SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
733  break;
734 
735  case 'N':
736  SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
737  break;
738 
739  case 'n':
740  /* Don't reinit shared mem after abnormal exit */
741  Reinit = false;
742  break;
743 
744  case 'O':
745  SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
746  break;
747 
748  case 'o':
749  /* Other options to pass to the backend on the command line */
751  sizeof(ExtraOptions) - strlen(ExtraOptions),
752  " %s", optarg);
753  break;
754 
755  case 'P':
756  SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
757  break;
758 
759  case 'p':
761  break;
762 
763  case 'r':
764  /* only used by single-user backend */
765  break;
766 
767  case 'S':
769  break;
770 
771  case 's':
772  SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
773  break;
774 
775  case 'T':
776 
777  /*
778  * In the event that some backend dumps core, send SIGSTOP,
779  * rather than SIGQUIT, to all its peers. This lets the wily
780  * post_hacker collect core dumps from everyone.
781  */
782  SendStop = true;
783  break;
784 
785  case 't':
786  {
787  const char *tmp = get_stats_option_name(optarg);
788 
789  if (tmp)
790  {
792  }
793  else
794  {
795  write_stderr("%s: invalid argument for option -t: \"%s\"\n",
796  progname, optarg);
797  ExitPostmaster(1);
798  }
799  break;
800  }
801 
802  case 'W':
803  SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
804  break;
805 
806  case 'c':
807  case '-':
808  {
809  char *name,
810  *value;
811 
812  ParseLongOption(optarg, &name, &value);
813  if (!value)
814  {
815  if (opt == '-')
816  ereport(ERROR,
817  (errcode(ERRCODE_SYNTAX_ERROR),
818  errmsg("--%s requires a value",
819  optarg)));
820  else
821  ereport(ERROR,
822  (errcode(ERRCODE_SYNTAX_ERROR),
823  errmsg("-c %s requires a value",
824  optarg)));
825  }
826 
828  free(name);
829  if (value)
830  free(value);
831  break;
832  }
833 
834  default:
835  write_stderr("Try \"%s --help\" for more information.\n",
836  progname);
837  ExitPostmaster(1);
838  }
839  }
840 
841  /*
842  * Postmaster accepts no non-option switch arguments.
843  */
844  if (optind < argc)
845  {
846  write_stderr("%s: invalid argument: \"%s\"\n",
847  progname, argv[optind]);
848  write_stderr("Try \"%s --help\" for more information.\n",
849  progname);
850  ExitPostmaster(1);
851  }
852 
853  /*
854  * Locate the proper configuration files and data directory, and read
855  * postgresql.conf for the first time.
856  */
857  if (!SelectConfigFiles(userDoption, progname))
858  ExitPostmaster(2);
859 
860  if (output_config_variable != NULL)
861  {
862  /*
863  * "-C guc" was specified, so print GUC's value and exit. No extra
864  * permission check is needed because the user is reading inside the
865  * data dir.
866  */
867  const char *config_val = GetConfigOption(output_config_variable,
868  false, false);
869 
870  puts(config_val ? config_val : "");
871  ExitPostmaster(0);
872  }
873 
874  /* Verify that DataDir looks reasonable */
875  checkDataDir();
876 
877  /* And switch working directory into it */
878  ChangeToDataDir();
879 
880  /*
881  * Check for invalid combinations of GUC settings.
882  */
884  {
885  write_stderr("%s: superuser_reserved_connections must be less than max_connections\n", progname);
886  ExitPostmaster(1);
887  }
889  {
890  write_stderr("%s: max_wal_senders must be less than max_connections\n", progname);
891  ExitPostmaster(1);
892  }
894  ereport(ERROR,
895  (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
897  ereport(ERROR,
898  (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
899 
900  /*
901  * Other one-time internal sanity checks can go here, if they are fast.
902  * (Put any slow processing further down, after postmaster.pid creation.)
903  */
904  if (!CheckDateTokenTables())
905  {
906  write_stderr("%s: invalid datetoken tables, please fix\n", progname);
907  ExitPostmaster(1);
908  }
909 
910  /*
911  * Now that we are done processing the postmaster arguments, reset
912  * getopt(3) library so that it will work correctly in subprocesses.
913  */
914  optind = 1;
915 #ifdef HAVE_INT_OPTRESET
916  optreset = 1; /* some systems need this too */
917 #endif
918 
919  /* For debugging: display postmaster environment */
920  {
921  extern char **environ;
922  char **p;
923 
924  ereport(DEBUG3,
925  (errmsg_internal("%s: PostmasterMain: initial environment dump:",
926  progname)));
927  ereport(DEBUG3,
928  (errmsg_internal("-----------------------------------------")));
929  for (p = environ; *p; ++p)
930  ereport(DEBUG3,
931  (errmsg_internal("\t%s", *p)));
932  ereport(DEBUG3,
933  (errmsg_internal("-----------------------------------------")));
934  }
935 
936  /*
937  * Create lockfile for data directory.
938  *
939  * We want to do this before we try to grab the input sockets, because the
940  * data directory interlock is more reliable than the socket-file
941  * interlock (thanks to whoever decided to put socket files in /tmp :-().
942  * For the same reason, it's best to grab the TCP socket(s) before the
943  * Unix socket(s).
944  *
945  * Also note that this internally sets up the on_proc_exit function that
946  * is responsible for removing both data directory and socket lockfiles;
947  * so it must happen before opening sockets so that at exit, the socket
948  * lockfiles go away after CloseServerPorts runs.
949  */
950  CreateDataDirLockFile(true);
951 
952  /* read control file (error checking and contains config) */
954 
955  /*
956  * Initialize SSL library, if specified.
957  */
958 #ifdef USE_SSL
959  if (EnableSSL)
960  {
961  (void) secure_initialize(true);
962  LoadedSSL = true;
963  }
964 #endif
965 
966  /*
967  * Register the apply launcher. Since it registers a background worker,
968  * it needs to be called before InitializeMaxBackends(), and it's probably
969  * a good idea to call it before any modules had chance to take the
970  * background worker slots.
971  */
973 
974  /*
975  * process any libraries that should be preloaded at postmaster start
976  */
978 
979  /*
980  * Now that loadable modules have had their chance to register background
981  * workers, calculate MaxBackends.
982  */
984 
985  /*
986  * Establish input sockets.
987  *
988  * First, mark them all closed, and set up an on_proc_exit function that's
989  * charged with closing the sockets again at postmaster shutdown.
990  */
991  for (i = 0; i < MAXLISTEN; i++)
993 
995 
996  if (ListenAddresses)
997  {
998  char *rawstring;
999  List *elemlist;
1000  ListCell *l;
1001  int success = 0;
1002 
1003  /* Need a modifiable copy of ListenAddresses */
1004  rawstring = pstrdup(ListenAddresses);
1005 
1006  /* Parse string into list of hostnames */
1007  if (!SplitIdentifierString(rawstring, ',', &elemlist))
1008  {
1009  /* syntax error in list */
1010  ereport(FATAL,
1011  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1012  errmsg("invalid list syntax in parameter \"%s\"",
1013  "listen_addresses")));
1014  }
1015 
1016  foreach(l, elemlist)
1017  {
1018  char *curhost = (char *) lfirst(l);
1019 
1020  if (strcmp(curhost, "*") == 0)
1021  status = StreamServerPort(AF_UNSPEC, NULL,
1022  (unsigned short) PostPortNumber,
1023  NULL,
1025  else
1026  status = StreamServerPort(AF_UNSPEC, curhost,
1027  (unsigned short) PostPortNumber,
1028  NULL,
1029  ListenSocket, MAXLISTEN);
1030 
1031  if (status == STATUS_OK)
1032  {
1033  success++;
1034  /* record the first successful host addr in lockfile */
1035  if (!listen_addr_saved)
1036  {
1038  listen_addr_saved = true;
1039  }
1040  }
1041  else
1042  ereport(WARNING,
1043  (errmsg("could not create listen socket for \"%s\"",
1044  curhost)));
1045  }
1046 
1047  if (!success && elemlist != NIL)
1048  ereport(FATAL,
1049  (errmsg("could not create any TCP/IP sockets")));
1050 
1051  list_free(elemlist);
1052  pfree(rawstring);
1053  }
1054 
1055 #ifdef USE_BONJOUR
1056  /* Register for Bonjour only if we opened TCP socket(s) */
1058  {
1059  DNSServiceErrorType err;
1060 
1061  /*
1062  * We pass 0 for interface_index, which will result in registering on
1063  * all "applicable" interfaces. It's not entirely clear from the
1064  * DNS-SD docs whether this would be appropriate if we have bound to
1065  * just a subset of the available network interfaces.
1066  */
1067  err = DNSServiceRegister(&bonjour_sdref,
1068  0,
1069  0,
1070  bonjour_name,
1071  "_postgresql._tcp.",
1072  NULL,
1073  NULL,
1075  0,
1076  NULL,
1077  NULL,
1078  NULL);
1079  if (err != kDNSServiceErr_NoError)
1080  elog(LOG, "DNSServiceRegister() failed: error code %ld",
1081  (long) err);
1082 
1083  /*
1084  * We don't bother to read the mDNS daemon's reply, and we expect that
1085  * it will automatically terminate our registration when the socket is
1086  * closed at postmaster termination. So there's nothing more to be
1087  * done here. However, the bonjour_sdref is kept around so that
1088  * forked children can close their copies of the socket.
1089  */
1090  }
1091 #endif
1092 
1093 #ifdef HAVE_UNIX_SOCKETS
1095  {
1096  char *rawstring;
1097  List *elemlist;
1098  ListCell *l;
1099  int success = 0;
1100 
1101  /* Need a modifiable copy of Unix_socket_directories */
1102  rawstring = pstrdup(Unix_socket_directories);
1103 
1104  /* Parse string into list of directories */
1105  if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1106  {
1107  /* syntax error in list */
1108  ereport(FATAL,
1109  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1110  errmsg("invalid list syntax in parameter \"%s\"",
1111  "unix_socket_directories")));
1112  }
1113 
1114  foreach(l, elemlist)
1115  {
1116  char *socketdir = (char *) lfirst(l);
1117 
1118  status = StreamServerPort(AF_UNIX, NULL,
1119  (unsigned short) PostPortNumber,
1120  socketdir,
1121  ListenSocket, MAXLISTEN);
1122 
1123  if (status == STATUS_OK)
1124  {
1125  success++;
1126  /* record the first successful Unix socket in lockfile */
1127  if (success == 1)
1129  }
1130  else
1131  ereport(WARNING,
1132  (errmsg("could not create Unix-domain socket in directory \"%s\"",
1133  socketdir)));
1134  }
1135 
1136  if (!success && elemlist != NIL)
1137  ereport(FATAL,
1138  (errmsg("could not create any Unix-domain sockets")));
1139 
1140  list_free_deep(elemlist);
1141  pfree(rawstring);
1142  }
1143 #endif
1144 
1145  /*
1146  * check that we have some socket to listen on
1147  */
1148  if (ListenSocket[0] == PGINVALID_SOCKET)
1149  ereport(FATAL,
1150  (errmsg("no socket created for listening")));
1151 
1152  /*
1153  * If no valid TCP ports, write an empty line for listen address,
1154  * indicating the Unix socket must be used. Note that this line is not
1155  * added to the lock file until there is a socket backing it.
1156  */
1157  if (!listen_addr_saved)
1159 
1160  /*
1161  * Set up shared memory and semaphores.
1162  */
1164 
1165  /*
1166  * Estimate number of openable files. This must happen after setting up
1167  * semaphores, because on some platforms semaphores count as open files.
1168  */
1169  set_max_safe_fds();
1170 
1171  /*
1172  * Set reference point for stack-depth checking.
1173  */
1174  set_stack_base();
1175 
1176  /*
1177  * Initialize pipe (or process handle on Windows) that allows children to
1178  * wake up from sleep on postmaster death.
1179  */
1181 
1182 #ifdef WIN32
1183 
1184  /*
1185  * Initialize I/O completion port used to deliver list of dead children.
1186  */
1187  win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1188  if (win32ChildQueue == NULL)
1189  ereport(FATAL,
1190  (errmsg("could not create I/O completion port for child queue")));
1191 #endif
1192 
1193  /*
1194  * Record postmaster options. We delay this till now to avoid recording
1195  * bogus options (eg, NBuffers too high for available memory).
1196  */
1197  if (!CreateOptsFile(argc, argv, my_exec_path))
1198  ExitPostmaster(1);
1199 
1200 #ifdef EXEC_BACKEND
1201  /* Write out nondefault GUC settings for child processes to use */
1202  write_nondefault_variables(PGC_POSTMASTER);
1203 #endif
1204 
1205  /*
1206  * Write the external PID file if requested
1207  */
1208  if (external_pid_file)
1209  {
1210  FILE *fpidfile = fopen(external_pid_file, "w");
1211 
1212  if (fpidfile)
1213  {
1214  fprintf(fpidfile, "%d\n", MyProcPid);
1215  fclose(fpidfile);
1216 
1217  /* Make PID file world readable */
1218  if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1219  write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1221  }
1222  else
1223  write_stderr("%s: could not write external PID file \"%s\": %s\n",
1225 
1227  }
1228 
1229  /*
1230  * Remove old temporary files. At this point there can be no other
1231  * Postgres processes running in this directory, so this should be safe.
1232  */
1234 
1235  /*
1236  * Forcibly remove the files signaling a standby promotion request.
1237  * Otherwise, the existence of those files triggers a promotion too early,
1238  * whether a user wants that or not.
1239  *
1240  * This removal of files is usually unnecessary because they can exist
1241  * only during a few moments during a standby promotion. However there is
1242  * a race condition: if pg_ctl promote is executed and creates the files
1243  * during a promotion, the files can stay around even after the server is
1244  * brought up to new master. Then, if new standby starts by using the
1245  * backup taken from that master, the files can exist at the server
1246  * startup and should be removed in order to avoid an unexpected
1247  * promotion.
1248  *
1249  * Note that promotion signal files need to be removed before the startup
1250  * process is invoked. Because, after that, they can be used by
1251  * postmaster's SIGUSR1 signal handler.
1252  */
1254 
1255  /* Remove any outdated file holding the current log filenames. */
1256  if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1257  ereport(LOG,
1259  errmsg("could not remove file \"%s\": %m",
1261 
1262  /*
1263  * If enabled, start up syslogger collection subprocess
1264  */
1266 
1267  /*
1268  * Reset whereToSendOutput from DestDebug (its starting state) to
1269  * DestNone. This stops ereport from sending log messages to stderr unless
1270  * Log_destination permits. We don't do this until the postmaster is
1271  * fully launched, since startup failures may as well be reported to
1272  * stderr.
1273  *
1274  * If we are in fact disabling logging to stderr, first emit a log message
1275  * saying so, to provide a breadcrumb trail for users who may not remember
1276  * that their logging is configured to go somewhere else.
1277  */
1279  ereport(LOG,
1280  (errmsg("ending log output to stderr"),
1281  errhint("Future log output will go to log destination \"%s\".",
1283 
1285 
1286  /*
1287  * Initialize stats collection subsystem (this does NOT start the
1288  * collector process!)
1289  */
1290  pgstat_init();
1291 
1292  /*
1293  * Initialize the autovacuum subsystem (again, no process start yet)
1294  */
1295  autovac_init();
1296 
1297  /*
1298  * Load configuration files for client authentication.
1299  */
1300  if (!load_hba())
1301  {
1302  /*
1303  * It makes no sense to continue if we fail to load the HBA file,
1304  * since there is no way to connect to the database in this case.
1305  */
1306  ereport(FATAL,
1307  (errmsg("could not load pg_hba.conf")));
1308  }
1309  if (!load_ident())
1310  {
1311  /*
1312  * We can start up without the IDENT file, although it means that you
1313  * cannot log in using any of the authentication methods that need a
1314  * user name mapping. load_ident() already logged the details of error
1315  * to the log.
1316  */
1317  }
1318 
1319 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1320 
1321  /*
1322  * On macOS, libintl replaces setlocale() with a version that calls
1323  * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1324  * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1325  * the process multithreaded. The postmaster calls sigprocmask() and
1326  * calls fork() without an immediate exec(), both of which have undefined
1327  * behavior in a multithreaded program. A multithreaded postmaster is the
1328  * normal case on Windows, which offers neither fork() nor sigprocmask().
1329  */
1330  if (pthread_is_threaded_np() != 0)
1331  ereport(FATAL,
1332  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1333  errmsg("postmaster became multithreaded during startup"),
1334  errhint("Set the LC_ALL environment variable to a valid locale.")));
1335 #endif
1336 
1337  /*
1338  * Remember postmaster startup time
1339  */
1341 #ifndef HAVE_STRONG_RANDOM
1342  /* RandomCancelKey wants its own copy */
1344 #endif
1345 
1346  /*
1347  * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1348  * see what's happening.
1349  */
1351 
1352  /*
1353  * We're ready to rock and roll...
1354  */
1356  Assert(StartupPID != 0);
1358  pmState = PM_STARTUP;
1359 
1360  /* Some workers may be scheduled to start now */
1362 
1363  status = ServerLoop();
1364 
1365  /*
1366  * ServerLoop probably shouldn't ever return, but if it does, close down.
1367  */
1368  ExitPostmaster(status != STATUS_OK);
1369 
1370  abort(); /* not reached */
1371 }
1372 
1373 
1374 /*
1375  * on_proc_exit callback to close server's listen sockets
1376  */
1377 static void
1379 {
1380  int i;
1381 
1382  /*
1383  * First, explicitly close all the socket FDs. We used to just let this
1384  * happen implicitly at postmaster exit, but it's better to close them
1385  * before we remove the postmaster.pid lockfile; otherwise there's a race
1386  * condition if a new postmaster wants to re-use the TCP port number.
1387  */
1388  for (i = 0; i < MAXLISTEN; i++)
1389  {
1390  if (ListenSocket[i] != PGINVALID_SOCKET)
1391  {
1394  }
1395  }
1396 
1397  /*
1398  * Next, remove any filesystem entries for Unix sockets. To avoid race
1399  * conditions against incoming postmasters, this must happen after closing
1400  * the sockets and before removing lock files.
1401  */
1403 
1404  /*
1405  * We don't do anything about socket lock files here; those will be
1406  * removed in a later on_proc_exit callback.
1407  */
1408 }
1409 
1410 /*
1411  * on_proc_exit callback to delete external_pid_file
1412  */
1413 static void
1415 {
1416  if (external_pid_file)
1417  unlink(external_pid_file);
1418 }
1419 
1420 
1421 /*
1422  * Compute and check the directory paths to files that are part of the
1423  * installation (as deduced from the postgres executable's own location)
1424  */
1425 static void
1427 {
1428  DIR *pdir;
1429 
1430  /* Locate the postgres executable itself */
1431  if (find_my_exec(argv0, my_exec_path) < 0)
1432  elog(FATAL, "%s: could not locate my own executable path", argv0);
1433 
1434 #ifdef EXEC_BACKEND
1435  /* Locate executable backend before we change working directory */
1436  if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1437  postgres_exec_path) < 0)
1438  ereport(FATAL,
1439  (errmsg("%s: could not locate matching postgres executable",
1440  argv0)));
1441 #endif
1442 
1443  /*
1444  * Locate the pkglib directory --- this has to be set early in case we try
1445  * to load any modules from it in response to postgresql.conf entries.
1446  */
1448 
1449  /*
1450  * Verify that there's a readable directory there; otherwise the Postgres
1451  * installation is incomplete or corrupt. (A typical cause of this
1452  * failure is that the postgres executable has been moved or hardlinked to
1453  * some directory that's not a sibling of the installation lib/
1454  * directory.)
1455  */
1456  pdir = AllocateDir(pkglib_path);
1457  if (pdir == NULL)
1458  ereport(ERROR,
1460  errmsg("could not open directory \"%s\": %m",
1461  pkglib_path),
1462  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1463  my_exec_path)));
1464  FreeDir(pdir);
1465 
1466  /*
1467  * XXX is it worth similarly checking the share/ directory? If the lib/
1468  * directory is there, then share/ probably is too.
1469  */
1470 }
1471 
1472 
1473 /*
1474  * Validate the proposed data directory
1475  */
1476 static void
1478 {
1479  char path[MAXPGPATH];
1480  FILE *fp;
1481  struct stat stat_buf;
1482 
1483  Assert(DataDir);
1484 
1485  if (stat(DataDir, &stat_buf) != 0)
1486  {
1487  if (errno == ENOENT)
1488  ereport(FATAL,
1490  errmsg("data directory \"%s\" does not exist",
1491  DataDir)));
1492  else
1493  ereport(FATAL,
1495  errmsg("could not read permissions of directory \"%s\": %m",
1496  DataDir)));
1497  }
1498 
1499  /* eventual chdir would fail anyway, but let's test ... */
1500  if (!S_ISDIR(stat_buf.st_mode))
1501  ereport(FATAL,
1502  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1503  errmsg("specified data directory \"%s\" is not a directory",
1504  DataDir)));
1505 
1506  /*
1507  * Check that the directory belongs to my userid; if not, reject.
1508  *
1509  * This check is an essential part of the interlock that prevents two
1510  * postmasters from starting in the same directory (see CreateLockFile()).
1511  * Do not remove or weaken it.
1512  *
1513  * XXX can we safely enable this check on Windows?
1514  */
1515 #if !defined(WIN32) && !defined(__CYGWIN__)
1516  if (stat_buf.st_uid != geteuid())
1517  ereport(FATAL,
1518  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1519  errmsg("data directory \"%s\" has wrong ownership",
1520  DataDir),
1521  errhint("The server must be started by the user that owns the data directory.")));
1522 #endif
1523 
1524  /*
1525  * Check if the directory has group or world access. If so, reject.
1526  *
1527  * It would be possible to allow weaker constraints (for example, allow
1528  * group access) but we cannot make a general assumption that that is
1529  * okay; for example there are platforms where nearly all users
1530  * customarily belong to the same group. Perhaps this test should be
1531  * configurable.
1532  *
1533  * XXX temporarily suppress check when on Windows, because there may not
1534  * be proper support for Unix-y file permissions. Need to think of a
1535  * reasonable check to apply on Windows.
1536  */
1537 #if !defined(WIN32) && !defined(__CYGWIN__)
1538  if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
1539  ereport(FATAL,
1540  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1541  errmsg("data directory \"%s\" has group or world access",
1542  DataDir),
1543  errdetail("Permissions should be u=rwx (0700).")));
1544 #endif
1545 
1546  /* Look for PG_VERSION before looking for pg_control */
1548 
1549  snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1550 
1551  fp = AllocateFile(path, PG_BINARY_R);
1552  if (fp == NULL)
1553  {
1554  write_stderr("%s: could not find the database system\n"
1555  "Expected to find it in the directory \"%s\",\n"
1556  "but could not open file \"%s\": %s\n",
1557  progname, DataDir, path, strerror(errno));
1558  ExitPostmaster(2);
1559  }
1560  FreeFile(fp);
1561 }
1562 
1563 /*
1564  * Determine how long should we let ServerLoop sleep.
1565  *
1566  * In normal conditions we wait at most one minute, to ensure that the other
1567  * background tasks handled by ServerLoop get done even when no requests are
1568  * arriving. However, if there are background workers waiting to be started,
1569  * we don't actually sleep so that they are quickly serviced. Other exception
1570  * cases are as shown in the code.
1571  */
1572 static void
1573 DetermineSleepTime(struct timeval *timeout)
1574 {
1575  TimestampTz next_wakeup = 0;
1576 
1577  /*
1578  * Normal case: either there are no background workers at all, or we're in
1579  * a shutdown sequence (during which we ignore bgworkers altogether).
1580  */
1581  if (Shutdown > NoShutdown ||
1583  {
1584  if (AbortStartTime != 0)
1585  {
1586  /* time left to abort; clamp to 0 in case it already expired */
1587  timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1588  (time(NULL) - AbortStartTime);
1589  timeout->tv_sec = Max(timeout->tv_sec, 0);
1590  timeout->tv_usec = 0;
1591  }
1592  else
1593  {
1594  timeout->tv_sec = 60;
1595  timeout->tv_usec = 0;
1596  }
1597  return;
1598  }
1599 
1600  if (StartWorkerNeeded)
1601  {
1602  timeout->tv_sec = 0;
1603  timeout->tv_usec = 0;
1604  return;
1605  }
1606 
1607  if (HaveCrashedWorker)
1608  {
1609  slist_mutable_iter siter;
1610 
1611  /*
1612  * When there are crashed bgworkers, we sleep just long enough that
1613  * they are restarted when they request to be. Scan the list to
1614  * determine the minimum of all wakeup times according to most recent
1615  * crash time and requested restart interval.
1616  */
1618  {
1619  RegisteredBgWorker *rw;
1620  TimestampTz this_wakeup;
1621 
1622  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1623 
1624  if (rw->rw_crashed_at == 0)
1625  continue;
1626 
1628  || rw->rw_terminate)
1629  {
1630  ForgetBackgroundWorker(&siter);
1631  continue;
1632  }
1633 
1634  this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1635  1000L * rw->rw_worker.bgw_restart_time);
1636  if (next_wakeup == 0 || this_wakeup < next_wakeup)
1637  next_wakeup = this_wakeup;
1638  }
1639  }
1640 
1641  if (next_wakeup != 0)
1642  {
1643  long secs;
1644  int microsecs;
1645 
1647  &secs, &microsecs);
1648  timeout->tv_sec = secs;
1649  timeout->tv_usec = microsecs;
1650 
1651  /* Ensure we don't exceed one minute */
1652  if (timeout->tv_sec > 60)
1653  {
1654  timeout->tv_sec = 60;
1655  timeout->tv_usec = 0;
1656  }
1657  }
1658  else
1659  {
1660  timeout->tv_sec = 60;
1661  timeout->tv_usec = 0;
1662  }
1663 }
1664 
1665 /*
1666  * Main idle loop of postmaster
1667  *
1668  * NB: Needs to be called with signals blocked
1669  */
1670 static int
1672 {
1673  fd_set readmask;
1674  int nSockets;
1675  time_t last_lockfile_recheck_time,
1676  last_touch_time;
1677 
1678  last_lockfile_recheck_time = last_touch_time = time(NULL);
1679 
1680  nSockets = initMasks(&readmask);
1681 
1682  for (;;)
1683  {
1684  fd_set rmask;
1685  int selres;
1686  time_t now;
1687 
1688  /*
1689  * Wait for a connection request to arrive.
1690  *
1691  * We block all signals except while sleeping. That makes it safe for
1692  * signal handlers, which again block all signals while executing, to
1693  * do nontrivial work.
1694  *
1695  * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1696  * any new connections, so we don't call select(), and just sleep.
1697  */
1698  memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1699 
1700  if (pmState == PM_WAIT_DEAD_END)
1701  {
1703 
1704  pg_usleep(100000L); /* 100 msec seems reasonable */
1705  selres = 0;
1706 
1707  PG_SETMASK(&BlockSig);
1708  }
1709  else
1710  {
1711  /* must set timeout each time; some OSes change it! */
1712  struct timeval timeout;
1713 
1714  /* Needs to run with blocked signals! */
1715  DetermineSleepTime(&timeout);
1716 
1718 
1719  selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1720 
1721  PG_SETMASK(&BlockSig);
1722  }
1723 
1724  /* Now check the select() result */
1725  if (selres < 0)
1726  {
1727  if (errno != EINTR && errno != EWOULDBLOCK)
1728  {
1729  ereport(LOG,
1731  errmsg("select() failed in postmaster: %m")));
1732  return STATUS_ERROR;
1733  }
1734  }
1735 
1736  /*
1737  * New connection pending on any of our sockets? If so, fork a child
1738  * process to deal with it.
1739  */
1740  if (selres > 0)
1741  {
1742  int i;
1743 
1744  for (i = 0; i < MAXLISTEN; i++)
1745  {
1746  if (ListenSocket[i] == PGINVALID_SOCKET)
1747  break;
1748  if (FD_ISSET(ListenSocket[i], &rmask))
1749  {
1750  Port *port;
1751 
1752  port = ConnCreate(ListenSocket[i]);
1753  if (port)
1754  {
1755  BackendStartup(port);
1756 
1757  /*
1758  * We no longer need the open socket or port structure
1759  * in this process
1760  */
1761  StreamClose(port->sock);
1762  ConnFree(port);
1763  }
1764  }
1765  }
1766  }
1767 
1768  /* If we have lost the log collector, try to start a new one */
1769  if (SysLoggerPID == 0 && Logging_collector)
1771 
1772  /*
1773  * If no background writer process is running, and we are not in a
1774  * state that prevents it, start one. It doesn't matter if this
1775  * fails, we'll just try again later. Likewise for the checkpointer.
1776  */
1777  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1779  {
1780  if (CheckpointerPID == 0)
1782  if (BgWriterPID == 0)
1784  }
1785 
1786  /*
1787  * Likewise, if we have lost the walwriter process, try to start a new
1788  * one. But this is needed only in normal operation (else we cannot
1789  * be writing any new WAL).
1790  */
1791  if (WalWriterPID == 0 && pmState == PM_RUN)
1793 
1794  /*
1795  * If we have lost the autovacuum launcher, try to start a new one. We
1796  * don't want autovacuum to run in binary upgrade mode because
1797  * autovacuum might update relfrozenxid for empty tables before the
1798  * physical files are put in place.
1799  */
1800  if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1802  pmState == PM_RUN)
1803  {
1805  if (AutoVacPID != 0)
1806  start_autovac_launcher = false; /* signal processed */
1807  }
1808 
1809  /* If we have lost the stats collector, try to start a new one */
1810  if (PgStatPID == 0 &&
1811  (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1812  PgStatPID = pgstat_start();
1813 
1814  /* If we have lost the archiver, try to start a new one. */
1815  if (PgArchPID == 0 && PgArchStartupAllowed())
1816  PgArchPID = pgarch_start();
1817 
1818  /* If we need to signal the autovacuum launcher, do so now */
1820  {
1821  avlauncher_needs_signal = false;
1822  if (AutoVacPID != 0)
1823  kill(AutoVacPID, SIGUSR2);
1824  }
1825 
1826  /* If we need to start a WAL receiver, try to do that now */
1829 
1830  /* Get other worker processes running, if needed */
1833 
1834 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1835 
1836  /*
1837  * With assertions enabled, check regularly for appearance of
1838  * additional threads. All builds check at start and exit.
1839  */
1840  Assert(pthread_is_threaded_np() == 0);
1841 #endif
1842 
1843  /*
1844  * Lastly, check to see if it's time to do some things that we don't
1845  * want to do every single time through the loop, because they're a
1846  * bit expensive. Note that there's up to a minute of slop in when
1847  * these tasks will be performed, since DetermineSleepTime() will let
1848  * us sleep at most that long; except for SIGKILL timeout which has
1849  * special-case logic there.
1850  */
1851  now = time(NULL);
1852 
1853  /*
1854  * If we already sent SIGQUIT to children and they are slow to shut
1855  * down, it's time to send them SIGKILL. This doesn't happen
1856  * normally, but under certain conditions backends can get stuck while
1857  * shutting down. This is a last measure to get them unwedged.
1858  *
1859  * Note we also do this during recovery from a process crash.
1860  */
1861  if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1862  AbortStartTime != 0 &&
1864  {
1865  /* We were gentle with them before. Not anymore */
1867  /* reset flag so we don't SIGKILL again */
1868  AbortStartTime = 0;
1869  }
1870 
1871  /*
1872  * Once a minute, verify that postmaster.pid hasn't been removed or
1873  * overwritten. If it has, we force a shutdown. This avoids having
1874  * postmasters and child processes hanging around after their database
1875  * is gone, and maybe causing problems if a new database cluster is
1876  * created in the same place. It also provides some protection
1877  * against a DBA foolishly removing postmaster.pid and manually
1878  * starting a new postmaster. Data corruption is likely to ensue from
1879  * that anyway, but we can minimize the damage by aborting ASAP.
1880  */
1881  if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1882  {
1883  if (!RecheckDataDirLockFile())
1884  {
1885  ereport(LOG,
1886  (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1887  kill(MyProcPid, SIGQUIT);
1888  }
1889  last_lockfile_recheck_time = now;
1890  }
1891 
1892  /*
1893  * Touch Unix socket and lock files every 58 minutes, to ensure that
1894  * they are not removed by overzealous /tmp-cleaning tasks. We assume
1895  * no one runs cleaners with cutoff times of less than an hour ...
1896  */
1897  if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1898  {
1899  TouchSocketFiles();
1901  last_touch_time = now;
1902  }
1903  }
1904 }
1905 
1906 /*
1907  * Initialise the masks for select() for the ports we are listening on.
1908  * Return the number of sockets to listen on.
1909  */
1910 static int
1911 initMasks(fd_set *rmask)
1912 {
1913  int maxsock = -1;
1914  int i;
1915 
1916  FD_ZERO(rmask);
1917 
1918  for (i = 0; i < MAXLISTEN; i++)
1919  {
1920  int fd = ListenSocket[i];
1921 
1922  if (fd == PGINVALID_SOCKET)
1923  break;
1924  FD_SET(fd, rmask);
1925 
1926  if (fd > maxsock)
1927  maxsock = fd;
1928  }
1929 
1930  return maxsock + 1;
1931 }
1932 
1933 
1934 /*
1935  * Read a client's startup packet and do something according to it.
1936  *
1937  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1938  * not return at all.
1939  *
1940  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1941  * if that's what you want. Return STATUS_ERROR if you don't want to
1942  * send anything to the client, which would typically be appropriate
1943  * if we detect a communications failure.)
1944  */
1945 static int
1947 {
1948  int32 len;
1949  void *buf;
1950  ProtocolVersion proto;
1951  MemoryContext oldcontext;
1952 
1953  pq_startmsgread();
1954  if (pq_getbytes((char *) &len, 4) == EOF)
1955  {
1956  /*
1957  * EOF after SSLdone probably means the client didn't like our
1958  * response to NEGOTIATE_SSL_CODE. That's not an error condition, so
1959  * don't clutter the log with a complaint.
1960  */
1961  if (!SSLdone)
1963  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1964  errmsg("incomplete startup packet")));
1965  return STATUS_ERROR;
1966  }
1967 
1968  len = pg_ntoh32(len);
1969  len -= 4;
1970 
1971  if (len < (int32) sizeof(ProtocolVersion) ||
1973  {
1975  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1976  errmsg("invalid length of startup packet")));
1977  return STATUS_ERROR;
1978  }
1979 
1980  /*
1981  * Allocate at least the size of an old-style startup packet, plus one
1982  * extra byte, and make sure all are zeroes. This ensures we will have
1983  * null termination of all strings, in both fixed- and variable-length
1984  * packet layouts.
1985  */
1986  if (len <= (int32) sizeof(StartupPacket))
1987  buf = palloc0(sizeof(StartupPacket) + 1);
1988  else
1989  buf = palloc0(len + 1);
1990 
1991  if (pq_getbytes(buf, len) == EOF)
1992  {
1994  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1995  errmsg("incomplete startup packet")));
1996  return STATUS_ERROR;
1997  }
1998  pq_endmsgread();
1999 
2000  /*
2001  * The first field is either a protocol version number or a special
2002  * request code.
2003  */
2004  port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2005 
2006  if (proto == CANCEL_REQUEST_CODE)
2007  {
2008  processCancelRequest(port, buf);
2009  /* Not really an error, but we don't want to proceed further */
2010  return STATUS_ERROR;
2011  }
2012 
2013  if (proto == NEGOTIATE_SSL_CODE && !SSLdone)
2014  {
2015  char SSLok;
2016 
2017 #ifdef USE_SSL
2018  /* No SSL when disabled or on Unix sockets */
2019  if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
2020  SSLok = 'N';
2021  else
2022  SSLok = 'S'; /* Support for SSL */
2023 #else
2024  SSLok = 'N'; /* No support for SSL */
2025 #endif
2026 
2027 retry1:
2028  if (send(port->sock, &SSLok, 1, 0) != 1)
2029  {
2030  if (errno == EINTR)
2031  goto retry1; /* if interrupted, just retry */
2034  errmsg("failed to send SSL negotiation response: %m")));
2035  return STATUS_ERROR; /* close the connection */
2036  }
2037 
2038 #ifdef USE_SSL
2039  if (SSLok == 'S' && secure_open_server(port) == -1)
2040  return STATUS_ERROR;
2041 #endif
2042  /* regular startup packet, cancel, etc packet should follow... */
2043  /* but not another SSL negotiation request */
2044  return ProcessStartupPacket(port, true);
2045  }
2046 
2047  /* Could add additional special packet types here */
2048 
2049  /*
2050  * Set FrontendProtocol now so that ereport() knows what format to send if
2051  * we fail during startup.
2052  */
2053  FrontendProtocol = proto;
2054 
2055  /* Check we can handle the protocol the frontend is using. */
2056 
2061  ereport(FATAL,
2062  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2063  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2064  PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2068 
2069  /*
2070  * Now fetch parameters out of startup packet and save them into the Port
2071  * structure. All data structures attached to the Port struct must be
2072  * allocated in TopMemoryContext so that they will remain available in a
2073  * running backend (even after PostmasterContext is destroyed). We need
2074  * not worry about leaking this storage on failure, since we aren't in the
2075  * postmaster process anymore.
2076  */
2078 
2079  if (PG_PROTOCOL_MAJOR(proto) >= 3)
2080  {
2081  int32 offset = sizeof(ProtocolVersion);
2082 
2083  /*
2084  * Scan packet body for name/option pairs. We can assume any string
2085  * beginning within the packet body is null-terminated, thanks to
2086  * zeroing extra byte above.
2087  */
2088  port->guc_options = NIL;
2089 
2090  while (offset < len)
2091  {
2092  char *nameptr = ((char *) buf) + offset;
2093  int32 valoffset;
2094  char *valptr;
2095 
2096  if (*nameptr == '\0')
2097  break; /* found packet terminator */
2098  valoffset = offset + strlen(nameptr) + 1;
2099  if (valoffset >= len)
2100  break; /* missing value, will complain below */
2101  valptr = ((char *) buf) + valoffset;
2102 
2103  if (strcmp(nameptr, "database") == 0)
2104  port->database_name = pstrdup(valptr);
2105  else if (strcmp(nameptr, "user") == 0)
2106  port->user_name = pstrdup(valptr);
2107  else if (strcmp(nameptr, "options") == 0)
2108  port->cmdline_options = pstrdup(valptr);
2109  else if (strcmp(nameptr, "replication") == 0)
2110  {
2111  /*
2112  * Due to backward compatibility concerns the replication
2113  * parameter is a hybrid beast which allows the value to be
2114  * either boolean or the string 'database'. The latter
2115  * connects to a specific database which is e.g. required for
2116  * logical decoding while.
2117  */
2118  if (strcmp(valptr, "database") == 0)
2119  {
2120  am_walsender = true;
2121  am_db_walsender = true;
2122  }
2123  else if (!parse_bool(valptr, &am_walsender))
2124  ereport(FATAL,
2125  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2126  errmsg("invalid value for parameter \"%s\": \"%s\"",
2127  "replication",
2128  valptr),
2129  errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2130  }
2131  else
2132  {
2133  /* Assume it's a generic GUC option */
2134  port->guc_options = lappend(port->guc_options,
2135  pstrdup(nameptr));
2136  port->guc_options = lappend(port->guc_options,
2137  pstrdup(valptr));
2138  }
2139  offset = valoffset + strlen(valptr) + 1;
2140  }
2141 
2142  /*
2143  * If we didn't find a packet terminator exactly at the end of the
2144  * given packet length, complain.
2145  */
2146  if (offset != len - 1)
2147  ereport(FATAL,
2148  (errcode(ERRCODE_PROTOCOL_VIOLATION),
2149  errmsg("invalid startup packet layout: expected terminator as last byte")));
2150  }
2151  else
2152  {
2153  /*
2154  * Get the parameters from the old-style, fixed-width-fields startup
2155  * packet as C strings. The packet destination was cleared first so a
2156  * short packet has zeros silently added. We have to be prepared to
2157  * truncate the pstrdup result for oversize fields, though.
2158  */
2159  StartupPacket *packet = (StartupPacket *) buf;
2160 
2161  port->database_name = pstrdup(packet->database);
2162  if (strlen(port->database_name) > sizeof(packet->database))
2163  port->database_name[sizeof(packet->database)] = '\0';
2164  port->user_name = pstrdup(packet->user);
2165  if (strlen(port->user_name) > sizeof(packet->user))
2166  port->user_name[sizeof(packet->user)] = '\0';
2167  port->cmdline_options = pstrdup(packet->options);
2168  if (strlen(port->cmdline_options) > sizeof(packet->options))
2169  port->cmdline_options[sizeof(packet->options)] = '\0';
2170  port->guc_options = NIL;
2171  }
2172 
2173  /* Check a user name was given. */
2174  if (port->user_name == NULL || port->user_name[0] == '\0')
2175  ereport(FATAL,
2176  (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2177  errmsg("no PostgreSQL user name specified in startup packet")));
2178 
2179  /* The database defaults to the user name. */
2180  if (port->database_name == NULL || port->database_name[0] == '\0')
2181  port->database_name = pstrdup(port->user_name);
2182 
2183  if (Db_user_namespace)
2184  {
2185  /*
2186  * If user@, it is a global user, remove '@'. We only want to do this
2187  * if there is an '@' at the end and no earlier in the user string or
2188  * they may fake as a local user of another database attaching to this
2189  * database.
2190  */
2191  if (strchr(port->user_name, '@') ==
2192  port->user_name + strlen(port->user_name) - 1)
2193  *strchr(port->user_name, '@') = '\0';
2194  else
2195  {
2196  /* Append '@' and dbname */
2197  port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2198  }
2199  }
2200 
2201  /*
2202  * Truncate given database and user names to length of a Postgres name.
2203  * This avoids lookup failures when overlength names are given.
2204  */
2205  if (strlen(port->database_name) >= NAMEDATALEN)
2206  port->database_name[NAMEDATALEN - 1] = '\0';
2207  if (strlen(port->user_name) >= NAMEDATALEN)
2208  port->user_name[NAMEDATALEN - 1] = '\0';
2209 
2210  /*
2211  * Normal walsender backends, e.g. for streaming replication, are not
2212  * connected to a particular database. But walsenders used for logical
2213  * replication need to connect to a specific database. We allow streaming
2214  * replication commands to be issued even if connected to a database as it
2215  * can make sense to first make a basebackup and then stream changes
2216  * starting from that.
2217  */
2218  if (am_walsender && !am_db_walsender)
2219  port->database_name[0] = '\0';
2220 
2221  /*
2222  * Done putting stuff in TopMemoryContext.
2223  */
2224  MemoryContextSwitchTo(oldcontext);
2225 
2226  /*
2227  * If we're going to reject the connection due to database state, say so
2228  * now instead of wasting cycles on an authentication exchange. (This also
2229  * allows a pg_ping utility to be written.)
2230  */
2231  switch (port->canAcceptConnections)
2232  {
2233  case CAC_STARTUP:
2234  ereport(FATAL,
2236  errmsg("the database system is starting up")));
2237  break;
2238  case CAC_SHUTDOWN:
2239  ereport(FATAL,
2241  errmsg("the database system is shutting down")));
2242  break;
2243  case CAC_RECOVERY:
2244  ereport(FATAL,
2246  errmsg("the database system is in recovery mode")));
2247  break;
2248  case CAC_TOOMANY:
2249  ereport(FATAL,
2250  (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2251  errmsg("sorry, too many clients already")));
2252  break;
2253  case CAC_WAITBACKUP:
2254  /* OK for now, will check in InitPostgres */
2255  break;
2256  case CAC_OK:
2257  break;
2258  }
2259 
2260  return STATUS_OK;
2261 }
2262 
2263 
2264 /*
2265  * The client has sent a cancel request packet, not a normal
2266  * start-a-new-connection packet. Perform the necessary processing.
2267  * Nothing is sent back to the client.
2268  */
2269 static void
2271 {
2272  CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2273  int backendPID;
2274  int32 cancelAuthCode;
2275  Backend *bp;
2276 
2277 #ifndef EXEC_BACKEND
2278  dlist_iter iter;
2279 #else
2280  int i;
2281 #endif
2282 
2283  backendPID = (int) pg_ntoh32(canc->backendPID);
2284  cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2285 
2286  /*
2287  * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2288  * longer access the postmaster's own backend list, and must rely on the
2289  * duplicate array in shared memory.
2290  */
2291 #ifndef EXEC_BACKEND
2292  dlist_foreach(iter, &BackendList)
2293  {
2294  bp = dlist_container(Backend, elem, iter.cur);
2295 #else
2296  for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2297  {
2298  bp = (Backend *) &ShmemBackendArray[i];
2299 #endif
2300  if (bp->pid == backendPID)
2301  {
2302  if (bp->cancel_key == cancelAuthCode)
2303  {
2304  /* Found a match; signal that backend to cancel current op */
2305  ereport(DEBUG2,
2306  (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2307  backendPID)));
2308  signal_child(bp->pid, SIGINT);
2309  }
2310  else
2311  /* Right PID, wrong key: no way, Jose */
2312  ereport(LOG,
2313  (errmsg("wrong key in cancel request for process %d",
2314  backendPID)));
2315  return;
2316  }
2317  }
2318 
2319  /* No matching backend */
2320  ereport(LOG,
2321  (errmsg("PID %d in cancel request did not match any process",
2322  backendPID)));
2323 }
2324 
2325 /*
2326  * canAcceptConnections --- check to see if database state allows connections.
2327  */
2328 static CAC_state
2330 {
2331  CAC_state result = CAC_OK;
2332 
2333  /*
2334  * Can't start backends when in startup/shutdown/inconsistent recovery
2335  * state.
2336  *
2337  * In state PM_WAIT_BACKUP only superusers can connect (this must be
2338  * allowed so that a superuser can end online backup mode); we return
2339  * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2340  * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2341  * have checked for too many children.
2342  */
2343  if (pmState != PM_RUN)
2344  {
2345  if (pmState == PM_WAIT_BACKUP)
2346  result = CAC_WAITBACKUP; /* allow superusers only */
2347  else if (Shutdown > NoShutdown)
2348  return CAC_SHUTDOWN; /* shutdown is pending */
2349  else if (!FatalError &&
2350  (pmState == PM_STARTUP ||
2351  pmState == PM_RECOVERY))
2352  return CAC_STARTUP; /* normal startup */
2353  else if (!FatalError &&
2355  result = CAC_OK; /* connection OK during hot standby */
2356  else
2357  return CAC_RECOVERY; /* else must be crash recovery */
2358  }
2359 
2360  /*
2361  * Don't start too many children.
2362  *
2363  * We allow more connections than we can have backends here because some
2364  * might still be authenticating; they might fail auth, or some existing
2365  * backend might exit before the auth cycle is completed. The exact
2366  * MaxBackends limit is enforced when a new backend tries to join the
2367  * shared-inval backend array.
2368  *
2369  * The limit here must match the sizes of the per-child-process arrays;
2370  * see comments for MaxLivePostmasterChildren().
2371  */
2373  result = CAC_TOOMANY;
2374 
2375  return result;
2376 }
2377 
2378 
2379 /*
2380  * ConnCreate -- create a local connection data structure
2381  *
2382  * Returns NULL on failure, other than out-of-memory which is fatal.
2383  */
2384 static Port *
2385 ConnCreate(int serverFd)
2386 {
2387  Port *port;
2388 
2389  if (!(port = (Port *) calloc(1, sizeof(Port))))
2390  {
2391  ereport(LOG,
2392  (errcode(ERRCODE_OUT_OF_MEMORY),
2393  errmsg("out of memory")));
2394  ExitPostmaster(1);
2395  }
2396 
2397  if (StreamConnection(serverFd, port) != STATUS_OK)
2398  {
2399  if (port->sock != PGINVALID_SOCKET)
2400  StreamClose(port->sock);
2401  ConnFree(port);
2402  return NULL;
2403  }
2404 
2405  /*
2406  * Allocate GSSAPI specific state struct
2407  */
2408 #ifndef EXEC_BACKEND
2409 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2410  port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2411  if (!port->gss)
2412  {
2413  ereport(LOG,
2414  (errcode(ERRCODE_OUT_OF_MEMORY),
2415  errmsg("out of memory")));
2416  ExitPostmaster(1);
2417  }
2418 #endif
2419 #endif
2420 
2421  return port;
2422 }
2423 
2424 
2425 /*
2426  * ConnFree -- free a local connection data structure
2427  */
2428 static void
2430 {
2431 #ifdef USE_SSL
2432  secure_close(conn);
2433 #endif
2434  if (conn->gss)
2435  free(conn->gss);
2436  free(conn);
2437 }
2438 
2439 
2440 /*
2441  * ClosePostmasterPorts -- close all the postmaster's open sockets
2442  *
2443  * This is called during child process startup to release file descriptors
2444  * that are not needed by that child process. The postmaster still has
2445  * them open, of course.
2446  *
2447  * Note: we pass am_syslogger as a boolean because we don't want to set
2448  * the global variable yet when this is called.
2449  */
2450 void
2452 {
2453  int i;
2454 
2455 #ifndef WIN32
2456 
2457  /*
2458  * Close the write end of postmaster death watch pipe. It's important to
2459  * do this as early as possible, so that if postmaster dies, others won't
2460  * think that it's still running because we're holding the pipe open.
2461  */
2463  ereport(FATAL,
2465  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2467 #endif
2468 
2469  /* Close the listen sockets */
2470  for (i = 0; i < MAXLISTEN; i++)
2471  {
2472  if (ListenSocket[i] != PGINVALID_SOCKET)
2473  {
2476  }
2477  }
2478 
2479  /* If using syslogger, close the read side of the pipe */
2480  if (!am_syslogger)
2481  {
2482 #ifndef WIN32
2483  if (syslogPipe[0] >= 0)
2484  close(syslogPipe[0]);
2485  syslogPipe[0] = -1;
2486 #else
2487  if (syslogPipe[0])
2488  CloseHandle(syslogPipe[0]);
2489  syslogPipe[0] = 0;
2490 #endif
2491  }
2492 
2493 #ifdef USE_BONJOUR
2494  /* If using Bonjour, close the connection to the mDNS daemon */
2495  if (bonjour_sdref)
2496  close(DNSServiceRefSockFD(bonjour_sdref));
2497 #endif
2498 }
2499 
2500 
2501 /*
2502  * reset_shared -- reset shared memory and semaphores
2503  */
2504 static void
2505 reset_shared(int port)
2506 {
2507  /*
2508  * Create or re-create shared memory and semaphores.
2509  *
2510  * Note: in each "cycle of life" we will normally assign the same IPC keys
2511  * (if using SysV shmem and/or semas), since the port number is used to
2512  * determine IPC keys. This helps ensure that we will clean up dead IPC
2513  * objects if the postmaster crashes and is restarted.
2514  */
2515  CreateSharedMemoryAndSemaphores(false, port);
2516 }
2517 
2518 
2519 /*
2520  * SIGHUP -- reread config files, and tell children to do same
2521  */
2522 static void
2524 {
2525  int save_errno = errno;
2526 
2527  PG_SETMASK(&BlockSig);
2528 
2529  if (Shutdown <= SmartShutdown)
2530  {
2531  ereport(LOG,
2532  (errmsg("received SIGHUP, reloading configuration files")));
2535  if (StartupPID != 0)
2537  if (BgWriterPID != 0)
2539  if (CheckpointerPID != 0)
2541  if (WalWriterPID != 0)
2543  if (WalReceiverPID != 0)
2545  if (AutoVacPID != 0)
2547  if (PgArchPID != 0)
2549  if (SysLoggerPID != 0)
2551  if (PgStatPID != 0)
2553 
2554  /* Reload authentication config files too */
2555  if (!load_hba())
2556  ereport(LOG,
2557  (errmsg("pg_hba.conf was not reloaded")));
2558 
2559  if (!load_ident())
2560  ereport(LOG,
2561  (errmsg("pg_ident.conf was not reloaded")));
2562 
2563 #ifdef USE_SSL
2564  /* Reload SSL configuration as well */
2565  if (EnableSSL)
2566  {
2567  if (secure_initialize(false) == 0)
2568  LoadedSSL = true;
2569  else
2570  ereport(LOG,
2571  (errmsg("SSL configuration was not reloaded")));
2572  }
2573  else
2574  {
2575  secure_destroy();
2576  LoadedSSL = false;
2577  }
2578 #endif
2579 
2580 #ifdef EXEC_BACKEND
2581  /* Update the starting-point file for future children */
2582  write_nondefault_variables(PGC_SIGHUP);
2583 #endif
2584  }
2585 
2587 
2588  errno = save_errno;
2589 }
2590 
2591 
2592 /*
2593  * pmdie -- signal handler for processing various postmaster signals.
2594  */
2595 static void
2597 {
2598  int save_errno = errno;
2599 
2600  PG_SETMASK(&BlockSig);
2601 
2602  ereport(DEBUG2,
2603  (errmsg_internal("postmaster received signal %d",
2604  postgres_signal_arg)));
2605 
2606  switch (postgres_signal_arg)
2607  {
2608  case SIGTERM:
2609 
2610  /*
2611  * Smart Shutdown:
2612  *
2613  * Wait for children to end their work, then shut down.
2614  */
2615  if (Shutdown >= SmartShutdown)
2616  break;
2618  ereport(LOG,
2619  (errmsg("received smart shutdown request")));
2620 
2621  /* Report status */
2623 #ifdef USE_SYSTEMD
2624  sd_notify(0, "STOPPING=1");
2625 #endif
2626 
2627  if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2629  {
2630  /* autovac workers are told to shut down immediately */
2631  /* and bgworkers too; does this need tweaking? */
2632  SignalSomeChildren(SIGTERM,
2634  /* and the autovac launcher too */
2635  if (AutoVacPID != 0)
2636  signal_child(AutoVacPID, SIGTERM);
2637  /* and the bgwriter too */
2638  if (BgWriterPID != 0)
2639  signal_child(BgWriterPID, SIGTERM);
2640  /* and the walwriter too */
2641  if (WalWriterPID != 0)
2642  signal_child(WalWriterPID, SIGTERM);
2643 
2644  /*
2645  * If we're in recovery, we can't kill the startup process
2646  * right away, because at present doing so does not release
2647  * its locks. We might want to change this in a future
2648  * release. For the time being, the PM_WAIT_READONLY state
2649  * indicates that we're waiting for the regular (read only)
2650  * backends to die off; once they do, we'll kill the startup
2651  * and walreceiver processes.
2652  */
2653  pmState = (pmState == PM_RUN) ?
2655  }
2656 
2657  /*
2658  * Now wait for online backup mode to end and backends to exit. If
2659  * that is already the case, PostmasterStateMachine will take the
2660  * next step.
2661  */
2663  break;
2664 
2665  case SIGINT:
2666 
2667  /*
2668  * Fast Shutdown:
2669  *
2670  * Abort all children with SIGTERM (rollback active transactions
2671  * and exit) and shut down when they are gone.
2672  */
2673  if (Shutdown >= FastShutdown)
2674  break;
2676  ereport(LOG,
2677  (errmsg("received fast shutdown request")));
2678 
2679  /* Report status */
2681 #ifdef USE_SYSTEMD
2682  sd_notify(0, "STOPPING=1");
2683 #endif
2684 
2685  if (StartupPID != 0)
2686  signal_child(StartupPID, SIGTERM);
2687  if (BgWriterPID != 0)
2688  signal_child(BgWriterPID, SIGTERM);
2689  if (WalReceiverPID != 0)
2690  signal_child(WalReceiverPID, SIGTERM);
2691  if (pmState == PM_RECOVERY)
2692  {
2694 
2695  /*
2696  * Only startup, bgwriter, walreceiver, possibly bgworkers,
2697  * and/or checkpointer should be active in this state; we just
2698  * signaled the first four, and we don't want to kill
2699  * checkpointer yet.
2700  */
2702  }
2703  else if (pmState == PM_RUN ||
2704  pmState == PM_WAIT_BACKUP ||
2708  {
2709  ereport(LOG,
2710  (errmsg("aborting any active transactions")));
2711  /* shut down all backends and workers */
2712  SignalSomeChildren(SIGTERM,
2715  /* and the autovac launcher too */
2716  if (AutoVacPID != 0)
2717  signal_child(AutoVacPID, SIGTERM);
2718  /* and the walwriter too */
2719  if (WalWriterPID != 0)
2720  signal_child(WalWriterPID, SIGTERM);
2722  }
2723 
2724  /*
2725  * Now wait for backends to exit. If there are none,
2726  * PostmasterStateMachine will take the next step.
2727  */
2729  break;
2730 
2731  case SIGQUIT:
2732 
2733  /*
2734  * Immediate Shutdown:
2735  *
2736  * abort all children with SIGQUIT, wait for them to exit,
2737  * terminate remaining ones with SIGKILL, then exit without
2738  * attempt to properly shut down the data base system.
2739  */
2740  if (Shutdown >= ImmediateShutdown)
2741  break;
2743  ereport(LOG,
2744  (errmsg("received immediate shutdown request")));
2745 
2746  /* Report status */
2748 #ifdef USE_SYSTEMD
2749  sd_notify(0, "STOPPING=1");
2750 #endif
2751 
2754 
2755  /* set stopwatch for them to die */
2756  AbortStartTime = time(NULL);
2757 
2758  /*
2759  * Now wait for backends to exit. If there are none,
2760  * PostmasterStateMachine will take the next step.
2761  */
2763  break;
2764  }
2765 
2767 
2768  errno = save_errno;
2769 }
2770 
2771 /*
2772  * Reaper -- signal handler to cleanup after a child process dies.
2773  */
2774 static void
2776 {
2777  int save_errno = errno;
2778  int pid; /* process id of dead child process */
2779  int exitstatus; /* its exit status */
2780 
2781  PG_SETMASK(&BlockSig);
2782 
2783  ereport(DEBUG4,
2784  (errmsg_internal("reaping dead processes")));
2785 
2786  while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2787  {
2788  /*
2789  * Check if this child was a startup process.
2790  */
2791  if (pid == StartupPID)
2792  {
2793  StartupPID = 0;
2794 
2795  /*
2796  * Startup process exited in response to a shutdown request (or it
2797  * completed normally regardless of the shutdown request).
2798  */
2799  if (Shutdown > NoShutdown &&
2800  (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2801  {
2804  /* PostmasterStateMachine logic does the rest */
2805  continue;
2806  }
2807 
2808  if (EXIT_STATUS_3(exitstatus))
2809  {
2810  ereport(LOG,
2811  (errmsg("shutdown at recovery target")));
2814  TerminateChildren(SIGTERM);
2816  /* PostmasterStateMachine logic does the rest */
2817  continue;
2818  }
2819 
2820  /*
2821  * Unexpected exit of startup process (including FATAL exit)
2822  * during PM_STARTUP is treated as catastrophic. There are no
2823  * other processes running yet, so we can just exit.
2824  */
2825  if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2826  {
2827  LogChildExit(LOG, _("startup process"),
2828  pid, exitstatus);
2829  ereport(LOG,
2830  (errmsg("aborting startup due to startup process failure")));
2831  ExitPostmaster(1);
2832  }
2833 
2834  /*
2835  * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2836  * the startup process is catastrophic, so kill other children,
2837  * and set StartupStatus so we don't try to reinitialize after
2838  * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2839  * then we previously sent the startup process a SIGQUIT; so
2840  * that's probably the reason it died, and we do want to try to
2841  * restart in that case.
2842  */
2843  if (!EXIT_STATUS_0(exitstatus))
2844  {
2847  else
2849  HandleChildCrash(pid, exitstatus,
2850  _("startup process"));
2851  continue;
2852  }
2853 
2854  /*
2855  * Startup succeeded, commence normal operations
2856  */
2858  FatalError = false;
2859  Assert(AbortStartTime == 0);
2860  ReachedNormalRunning = true;
2861  pmState = PM_RUN;
2862 
2863  /*
2864  * Crank up the background tasks, if we didn't do that already
2865  * when we entered consistent recovery state. It doesn't matter
2866  * if this fails, we'll just try again later.
2867  */
2868  if (CheckpointerPID == 0)
2870  if (BgWriterPID == 0)
2872  if (WalWriterPID == 0)
2874 
2875  /*
2876  * Likewise, start other special children as needed. In a restart
2877  * situation, some of them may be alive already.
2878  */
2881  if (PgArchStartupAllowed() && PgArchPID == 0)
2882  PgArchPID = pgarch_start();
2883  if (PgStatPID == 0)
2884  PgStatPID = pgstat_start();
2885 
2886  /* workers may be scheduled to start now */
2888 
2889  /* at this point we are really open for business */
2890  ereport(LOG,
2891  (errmsg("database system is ready to accept connections")));
2892 
2893  /* Report status */
2895 #ifdef USE_SYSTEMD
2896  sd_notify(0, "READY=1");
2897 #endif
2898 
2899  continue;
2900  }
2901 
2902  /*
2903  * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2904  * one at the next iteration of the postmaster's main loop, if
2905  * necessary. Any other exit condition is treated as a crash.
2906  */
2907  if (pid == BgWriterPID)
2908  {
2909  BgWriterPID = 0;
2910  if (!EXIT_STATUS_0(exitstatus))
2911  HandleChildCrash(pid, exitstatus,
2912  _("background writer process"));
2913  continue;
2914  }
2915 
2916  /*
2917  * Was it the checkpointer?
2918  */
2919  if (pid == CheckpointerPID)
2920  {
2921  CheckpointerPID = 0;
2922  if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2923  {
2924  /*
2925  * OK, we saw normal exit of the checkpointer after it's been
2926  * told to shut down. We expect that it wrote a shutdown
2927  * checkpoint. (If for some reason it didn't, recovery will
2928  * occur on next postmaster start.)
2929  *
2930  * At this point we should have no normal backend children
2931  * left (else we'd not be in PM_SHUTDOWN state) but we might
2932  * have dead_end children to wait for.
2933  *
2934  * If we have an archiver subprocess, tell it to do a last
2935  * archive cycle and quit. Likewise, if we have walsender
2936  * processes, tell them to send any remaining WAL and quit.
2937  */
2939 
2940  /* Waken archiver for the last time */
2941  if (PgArchPID != 0)
2943 
2944  /*
2945  * Waken walsenders for the last time. No regular backends
2946  * should be around anymore.
2947  */
2949 
2951 
2952  /*
2953  * We can also shut down the stats collector now; there's
2954  * nothing left for it to do.
2955  */
2956  if (PgStatPID != 0)
2958  }
2959  else
2960  {
2961  /*
2962  * Any unexpected exit of the checkpointer (including FATAL
2963  * exit) is treated as a crash.
2964  */
2965  HandleChildCrash(pid, exitstatus,
2966  _("checkpointer process"));
2967  }
2968 
2969  continue;
2970  }
2971 
2972  /*
2973  * Was it the wal writer? Normal exit can be ignored; we'll start a
2974  * new one at the next iteration of the postmaster's main loop, if
2975  * necessary. Any other exit condition is treated as a crash.
2976  */
2977  if (pid == WalWriterPID)
2978  {
2979  WalWriterPID = 0;
2980  if (!EXIT_STATUS_0(exitstatus))
2981  HandleChildCrash(pid, exitstatus,
2982  _("WAL writer process"));
2983  continue;
2984  }
2985 
2986  /*
2987  * Was it the wal receiver? If exit status is zero (normal) or one
2988  * (FATAL exit), we assume everything is all right just like normal
2989  * backends. (If we need a new wal receiver, we'll start one at the
2990  * next iteration of the postmaster's main loop.)
2991  */
2992  if (pid == WalReceiverPID)
2993  {
2994  WalReceiverPID = 0;
2995  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2996  HandleChildCrash(pid, exitstatus,
2997  _("WAL receiver process"));
2998  continue;
2999  }
3000 
3001  /*
3002  * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3003  * start a new one at the next iteration of the postmaster's main
3004  * loop, if necessary. Any other exit condition is treated as a
3005  * crash.
3006  */
3007  if (pid == AutoVacPID)
3008  {
3009  AutoVacPID = 0;
3010  if (!EXIT_STATUS_0(exitstatus))
3011  HandleChildCrash(pid, exitstatus,
3012  _("autovacuum launcher process"));
3013  continue;
3014  }
3015 
3016  /*
3017  * Was it the archiver? If so, just try to start a new one; no need
3018  * to force reset of the rest of the system. (If fail, we'll try
3019  * again in future cycles of the main loop.). Unless we were waiting
3020  * for it to shut down; don't restart it in that case, and
3021  * PostmasterStateMachine() will advance to the next shutdown step.
3022  */
3023  if (pid == PgArchPID)
3024  {
3025  PgArchPID = 0;
3026  if (!EXIT_STATUS_0(exitstatus))
3027  LogChildExit(LOG, _("archiver process"),
3028  pid, exitstatus);
3029  if (PgArchStartupAllowed())
3030  PgArchPID = pgarch_start();
3031  continue;
3032  }
3033 
3034  /*
3035  * Was it the statistics collector? If so, just try to start a new
3036  * one; no need to force reset of the rest of the system. (If fail,
3037  * we'll try again in future cycles of the main loop.)
3038  */
3039  if (pid == PgStatPID)
3040  {
3041  PgStatPID = 0;
3042  if (!EXIT_STATUS_0(exitstatus))
3043  LogChildExit(LOG, _("statistics collector process"),
3044  pid, exitstatus);
3045  if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3046  PgStatPID = pgstat_start();
3047  continue;
3048  }
3049 
3050  /* Was it the system logger? If so, try to start a new one */
3051  if (pid == SysLoggerPID)
3052  {
3053  SysLoggerPID = 0;
3054  /* for safety's sake, launch new logger *first* */
3056  if (!EXIT_STATUS_0(exitstatus))
3057  LogChildExit(LOG, _("system logger process"),
3058  pid, exitstatus);
3059  continue;
3060  }
3061 
3062  /* Was it one of our background workers? */
3063  if (CleanupBackgroundWorker(pid, exitstatus))
3064  {
3065  /* have it be restarted */
3066  HaveCrashedWorker = true;
3067  continue;
3068  }
3069 
3070  /*
3071  * Else do standard backend child cleanup.
3072  */
3073  CleanupBackend(pid, exitstatus);
3074  } /* loop over pending child-death reports */
3075 
3076  /*
3077  * After cleaning out the SIGCHLD queue, see if we have any state changes
3078  * or actions to make.
3079  */
3081 
3082  /* Done with signal handler */
3084 
3085  errno = save_errno;
3086 }
3087 
3088 /*
3089  * Scan the bgworkers list and see if the given PID (which has just stopped
3090  * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3091  * bgworker, return false.
3092  *
3093  * This is heavily based on CleanupBackend. One important difference is that
3094  * we don't know yet that the dying process is a bgworker, so we must be silent
3095  * until we're sure it is.
3096  */
3097 static bool
3099  int exitstatus) /* child's exit status */
3100 {
3101  char namebuf[MAXPGPATH];
3102  slist_mutable_iter iter;
3103 
3105  {
3106  RegisteredBgWorker *rw;
3107 
3108  rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3109 
3110  if (rw->rw_pid != pid)
3111  continue;
3112 
3113 #ifdef WIN32
3114  /* see CleanupBackend */
3115  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3116  exitstatus = 0;
3117 #endif
3118 
3119  snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3120  rw->rw_worker.bgw_type);
3121 
3122 
3123  if (!EXIT_STATUS_0(exitstatus))
3124  {
3125  /* Record timestamp, so we know when to restart the worker. */
3127  }
3128  else
3129  {
3130  /* Zero exit status means terminate */
3131  rw->rw_crashed_at = 0;
3132  rw->rw_terminate = true;
3133  }
3134 
3135  /*
3136  * Additionally, for shared-memory-connected workers, just like a
3137  * backend, any exit status other than 0 or 1 is considered a crash
3138  * and causes a system-wide restart.
3139  */
3140  if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3141  {
3142  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3143  {
3144  HandleChildCrash(pid, exitstatus, namebuf);
3145  return true;
3146  }
3147  }
3148 
3149  /*
3150  * We must release the postmaster child slot whether this worker is
3151  * connected to shared memory or not, but we only treat it as a crash
3152  * if it is in fact connected.
3153  */
3156  {
3157  HandleChildCrash(pid, exitstatus, namebuf);
3158  return true;
3159  }
3160 
3161  /* Get it out of the BackendList and clear out remaining data */
3162  dlist_delete(&rw->rw_backend->elem);
3163 #ifdef EXEC_BACKEND
3164  ShmemBackendArrayRemove(rw->rw_backend);
3165 #endif
3166 
3167  /*
3168  * It's possible that this background worker started some OTHER
3169  * background worker and asked to be notified when that worker started
3170  * or stopped. If so, cancel any notifications destined for the
3171  * now-dead backend.
3172  */
3173  if (rw->rw_backend->bgworker_notify)
3175  free(rw->rw_backend);
3176  rw->rw_backend = NULL;
3177  rw->rw_pid = 0;
3178  rw->rw_child_slot = 0;
3179  ReportBackgroundWorkerExit(&iter); /* report child death */
3180 
3181  LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3182  namebuf, pid, exitstatus);
3183 
3184  return true;
3185  }
3186 
3187  return false;
3188 }
3189 
3190 /*
3191  * CleanupBackend -- cleanup after terminated backend.
3192  *
3193  * Remove all local state associated with backend.
3194  *
3195  * If you change this, see also CleanupBackgroundWorker.
3196  */
3197 static void
3199  int exitstatus) /* child's exit status. */
3200 {
3201  dlist_mutable_iter iter;
3202 
3203  LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3204 
3205  /*
3206  * If a backend dies in an ugly way then we must signal all other backends
3207  * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3208  * assume everything is all right and proceed to remove the backend from
3209  * the active backend list.
3210  */
3211 
3212 #ifdef WIN32
3213 
3214  /*
3215  * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3216  * since that sometimes happens under load when the process fails to start
3217  * properly (long before it starts using shared memory). Microsoft reports
3218  * it is related to mutex failure:
3219  * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3220  */
3221  if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3222  {
3223  LogChildExit(LOG, _("server process"), pid, exitstatus);
3224  exitstatus = 0;
3225  }
3226 #endif
3227 
3228  if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3229  {
3230  HandleChildCrash(pid, exitstatus, _("server process"));
3231  return;
3232  }
3233 
3234  dlist_foreach_modify(iter, &BackendList)
3235  {
3236  Backend *bp = dlist_container(Backend, elem, iter.cur);
3237 
3238  if (bp->pid == pid)
3239  {
3240  if (!bp->dead_end)
3241  {
3243  {
3244  /*
3245  * Uh-oh, the child failed to clean itself up. Treat as a
3246  * crash after all.
3247  */
3248  HandleChildCrash(pid, exitstatus, _("server process"));
3249  return;
3250  }
3251 #ifdef EXEC_BACKEND
3252  ShmemBackendArrayRemove(bp);
3253 #endif
3254  }
3255  if (bp->bgworker_notify)
3256  {
3257  /*
3258  * This backend may have been slated to receive SIGUSR1 when
3259  * some background worker started or stopped. Cancel those
3260  * notifications, as we don't want to signal PIDs that are not
3261  * PostgreSQL backends. This gets skipped in the (probably
3262  * very common) case where the backend has never requested any
3263  * such notifications.
3264  */
3266  }
3267  dlist_delete(iter.cur);
3268  free(bp);
3269  break;
3270  }
3271  }
3272 }
3273 
3274 /*
3275  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3276  * walwriter, autovacuum, or background worker.
3277  *
3278  * The objectives here are to clean up our local state about the child
3279  * process, and to signal all other remaining children to quickdie.
3280  */
3281 static void
3282 HandleChildCrash(int pid, int exitstatus, const char *procname)
3283 {
3284  dlist_mutable_iter iter;
3285  slist_iter siter;
3286  Backend *bp;
3287  bool take_action;
3288 
3289  /*
3290  * We only log messages and send signals if this is the first process
3291  * crash and we're not doing an immediate shutdown; otherwise, we're only
3292  * here to update postmaster's idea of live processes. If we have already
3293  * signalled children, nonzero exit status is to be expected, so don't
3294  * clutter log.
3295  */
3296  take_action = !FatalError && Shutdown != ImmediateShutdown;
3297 
3298  if (take_action)
3299  {
3300  LogChildExit(LOG, procname, pid, exitstatus);
3301  ereport(LOG,
3302  (errmsg("terminating any other active server processes")));
3303  }
3304 
3305  /* Process background workers. */
3307  {
3308  RegisteredBgWorker *rw;
3309 
3310  rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3311  if (rw->rw_pid == 0)
3312  continue; /* not running */
3313  if (rw->rw_pid == pid)
3314  {
3315  /*
3316  * Found entry for freshly-dead worker, so remove it.
3317  */
3319  dlist_delete(&rw->rw_backend->elem);
3320 #ifdef EXEC_BACKEND
3321  ShmemBackendArrayRemove(rw->rw_backend);
3322 #endif
3323  free(rw->rw_backend);
3324  rw->rw_backend = NULL;
3325  rw->rw_pid = 0;
3326  rw->rw_child_slot = 0;
3327  /* don't reset crashed_at */
3328  /* don't report child stop, either */
3329  /* Keep looping so we can signal remaining workers */
3330  }
3331  else
3332  {
3333  /*
3334  * This worker is still alive. Unless we did so already, tell it
3335  * to commit hara-kiri.
3336  *
3337  * SIGQUIT is the special signal that says exit without proc_exit
3338  * and let the user know what's going on. But if SendStop is set
3339  * (-s on command line), then we send SIGSTOP instead, so that we
3340  * can get core dumps from all backends by hand.
3341  */
3342  if (take_action)
3343  {
3344  ereport(DEBUG2,
3345  (errmsg_internal("sending %s to process %d",
3346  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3347  (int) rw->rw_pid)));
3349  }
3350  }
3351  }
3352 
3353  /* Process regular backends */
3354  dlist_foreach_modify(iter, &BackendList)
3355  {
3356  bp = dlist_container(Backend, elem, iter.cur);
3357 
3358  if (bp->pid == pid)
3359  {
3360  /*
3361  * Found entry for freshly-dead backend, so remove it.
3362  */
3363  if (!bp->dead_end)
3364  {
3366 #ifdef EXEC_BACKEND
3367  ShmemBackendArrayRemove(bp);
3368 #endif
3369  }
3370  dlist_delete(iter.cur);
3371  free(bp);
3372  /* Keep looping so we can signal remaining backends */
3373  }
3374  else
3375  {
3376  /*
3377  * This backend is still alive. Unless we did so already, tell it
3378  * to commit hara-kiri.
3379  *
3380  * SIGQUIT is the special signal that says exit without proc_exit
3381  * and let the user know what's going on. But if SendStop is set
3382  * (-s on command line), then we send SIGSTOP instead, so that we
3383  * can get core dumps from all backends by hand.
3384  *
3385  * We could exclude dead_end children here, but at least in the
3386  * SIGSTOP case it seems better to include them.
3387  *
3388  * Background workers were already processed above; ignore them
3389  * here.
3390  */
3391  if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3392  continue;
3393 
3394  if (take_action)
3395  {
3396  ereport(DEBUG2,
3397  (errmsg_internal("sending %s to process %d",
3398  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3399  (int) bp->pid)));
3400  signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3401  }
3402  }
3403  }
3404 
3405  /* Take care of the startup process too */
3406  if (pid == StartupPID)
3407  {
3408  StartupPID = 0;
3410  }
3411  else if (StartupPID != 0 && take_action)
3412  {
3413  ereport(DEBUG2,
3414  (errmsg_internal("sending %s to process %d",
3415  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3416  (int) StartupPID)));
3417  signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3419  }
3420 
3421  /* Take care of the bgwriter too */
3422  if (pid == BgWriterPID)
3423  BgWriterPID = 0;
3424  else if (BgWriterPID != 0 && take_action)
3425  {
3426  ereport(DEBUG2,
3427  (errmsg_internal("sending %s to process %d",
3428  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3429  (int) BgWriterPID)));
3430  signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3431  }
3432 
3433  /* Take care of the checkpointer too */
3434  if (pid == CheckpointerPID)
3435  CheckpointerPID = 0;
3436  else if (CheckpointerPID != 0 && take_action)
3437  {
3438  ereport(DEBUG2,
3439  (errmsg_internal("sending %s to process %d",
3440  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3441  (int) CheckpointerPID)));
3442  signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3443  }
3444 
3445  /* Take care of the walwriter too */
3446  if (pid == WalWriterPID)
3447  WalWriterPID = 0;
3448  else if (WalWriterPID != 0 && take_action)
3449  {
3450  ereport(DEBUG2,
3451  (errmsg_internal("sending %s to process %d",
3452  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3453  (int) WalWriterPID)));
3454  signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3455  }
3456 
3457  /* Take care of the walreceiver too */
3458  if (pid == WalReceiverPID)
3459  WalReceiverPID = 0;
3460  else if (WalReceiverPID != 0 && take_action)
3461  {
3462  ereport(DEBUG2,
3463  (errmsg_internal("sending %s to process %d",
3464  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3465  (int) WalReceiverPID)));
3466  signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3467  }
3468 
3469  /* Take care of the autovacuum launcher too */
3470  if (pid == AutoVacPID)
3471  AutoVacPID = 0;
3472  else if (AutoVacPID != 0 && take_action)
3473  {
3474  ereport(DEBUG2,
3475  (errmsg_internal("sending %s to process %d",
3476  (SendStop ? "SIGSTOP" : "SIGQUIT"),
3477  (int) AutoVacPID)));
3478  signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3479  }
3480 
3481  /*
3482  * Force a power-cycle of the pgarch process too. (This isn't absolutely
3483  * necessary, but it seems like a good idea for robustness, and it
3484  * simplifies the state-machine logic in the case where a shutdown request
3485  * arrives during crash processing.)
3486  */
3487  if (PgArchPID != 0 && take_action)
3488  {
3489  ereport(DEBUG2,
3490  (errmsg_internal("sending %s to process %d",
3491  "SIGQUIT",
3492  (int) PgArchPID)));
3493  signal_child(PgArchPID, SIGQUIT);
3494  }
3495 
3496  /*
3497  * Force a power-cycle of the pgstat process too. (This isn't absolutely
3498  * necessary, but it seems like a good idea for robustness, and it
3499  * simplifies the state-machine logic in the case where a shutdown request
3500  * arrives during crash processing.)
3501  */
3502  if (PgStatPID != 0 && take_action)
3503  {
3504  ereport(DEBUG2,
3505  (errmsg_internal("sending %s to process %d",
3506  "SIGQUIT",
3507  (int) PgStatPID)));
3508  signal_child(PgStatPID, SIGQUIT);
3510  }
3511 
3512  /* We do NOT restart the syslogger */
3513 
3514  if (Shutdown != ImmediateShutdown)
3515  FatalError = true;
3516 
3517  /* We now transit into a state of waiting for children to die */
3518  if (pmState == PM_RECOVERY ||
3519  pmState == PM_HOT_STANDBY ||
3520  pmState == PM_RUN ||
3521  pmState == PM_WAIT_BACKUP ||
3523  pmState == PM_SHUTDOWN)
3525 
3526  /*
3527  * .. and if this doesn't happen quickly enough, now the clock is ticking
3528  * for us to kill them without mercy.
3529  */
3530  if (AbortStartTime == 0)
3531  AbortStartTime = time(NULL);
3532 }
3533 
3534 /*
3535  * Log the death of a child process.
3536  */
3537 static void
3538 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3539 {
3540  /*
3541  * size of activity_buffer is arbitrary, but set equal to default
3542  * track_activity_query_size
3543  */
3544  char activity_buffer[1024];
3545  const char *activity = NULL;
3546 
3547  if (!EXIT_STATUS_0(exitstatus))
3548  activity = pgstat_get_crashed_backend_activity(pid,
3549  activity_buffer,
3550  sizeof(activity_buffer));
3551 
3552  if (WIFEXITED(exitstatus))
3553  ereport(lev,
3554 
3555  /*------
3556  translator: %s is a noun phrase describing a child process, such as
3557  "server process" */
3558  (errmsg("%s (PID %d) exited with exit code %d",
3559  procname, pid, WEXITSTATUS(exitstatus)),
3560  activity ? errdetail("Failed process was running: %s", activity) : 0));
3561  else if (WIFSIGNALED(exitstatus))
3562 #if defined(WIN32)
3563  ereport(lev,
3564 
3565  /*------
3566  translator: %s is a noun phrase describing a child process, such as
3567  "server process" */
3568  (errmsg("%s (PID %d) was terminated by exception 0x%X",
3569  procname, pid, WTERMSIG(exitstatus)),
3570  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3571  activity ? errdetail("Failed process was running: %s", activity) : 0));
3572 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
3573  ereport(lev,
3574 
3575  /*------
3576  translator: %s is a noun phrase describing a child process, such as
3577  "server process" */
3578  (errmsg("%s (PID %d) was terminated by signal %d: %s",
3579  procname, pid, WTERMSIG(exitstatus),
3580  WTERMSIG(exitstatus) < NSIG ?
3581  sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
3582  activity ? errdetail("Failed process was running: %s", activity) : 0));
3583 #else
3584  ereport(lev,
3585 
3586  /*------
3587  translator: %s is a noun phrase describing a child process, such as
3588  "server process" */
3589  (errmsg("%s (PID %d) was terminated by signal %d",
3590  procname, pid, WTERMSIG(exitstatus)),
3591  activity ? errdetail("Failed process was running: %s", activity) : 0));
3592 #endif
3593  else
3594  ereport(lev,
3595 
3596  /*------
3597  translator: %s is a noun phrase describing a child process, such as
3598  "server process" */
3599  (errmsg("%s (PID %d) exited with unrecognized status %d",
3600  procname, pid, exitstatus),
3601  activity ? errdetail("Failed process was running: %s", activity) : 0));
3602 }
3603 
3604 /*
3605  * Advance the postmaster's state machine and take actions as appropriate
3606  *
3607  * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3608  * receive the signals that might mean we need to change state.
3609  */
3610 static void
3612 {
3613  if (pmState == PM_WAIT_BACKUP)
3614  {
3615  /*
3616  * PM_WAIT_BACKUP state ends when online backup mode is not active.
3617  */
3618  if (!BackupInProgress())
3620  }
3621 
3622  if (pmState == PM_WAIT_READONLY)
3623  {
3624  /*
3625  * PM_WAIT_READONLY state ends when we have no regular backends that
3626  * have been started during recovery. We kill the startup and
3627  * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3628  * we might like to kill these processes first and then wait for
3629  * backends to die off, but that doesn't work at present because
3630  * killing the startup process doesn't release its locks.
3631  */
3633  {
3634  if (StartupPID != 0)
3635  signal_child(StartupPID, SIGTERM);
3636  if (WalReceiverPID != 0)
3637  signal_child(WalReceiverPID, SIGTERM);
3639  }
3640  }
3641 
3642  /*
3643  * If we are in a state-machine state that implies waiting for backends to
3644  * exit, see if they're all gone, and change state if so.
3645  */
3646  if (pmState == PM_WAIT_BACKENDS)
3647  {
3648  /*
3649  * PM_WAIT_BACKENDS state ends when we have no regular backends
3650  * (including autovac workers), no bgworkers (including unconnected
3651  * ones), and no walwriter, autovac launcher or bgwriter. If we are
3652  * doing crash recovery or an immediate shutdown then we expect the
3653  * checkpointer to exit as well, otherwise not. The archiver, stats,
3654  * and syslogger processes are disregarded since they are not
3655  * connected to shared memory; we also disregard dead_end children
3656  * here. Walsenders are also disregarded, they will be terminated
3657  * later after writing the checkpoint record, like the archiver
3658  * process.
3659  */
3661  StartupPID == 0 &&
3662  WalReceiverPID == 0 &&
3663  BgWriterPID == 0 &&
3664  (CheckpointerPID == 0 ||
3666  WalWriterPID == 0 &&
3667  AutoVacPID == 0)
3668  {
3670  {
3671  /*
3672  * Start waiting for dead_end children to die. This state
3673  * change causes ServerLoop to stop creating new ones.
3674  */
3676 
3677  /*
3678  * We already SIGQUIT'd the archiver and stats processes, if
3679  * any, when we started immediate shutdown or entered
3680  * FatalError state.
3681  */
3682  }
3683  else
3684  {
3685  /*
3686  * If we get here, we are proceeding with normal shutdown. All
3687  * the regular children are gone, and it's time to tell the
3688  * checkpointer to do a shutdown checkpoint.
3689  */
3691  /* Start the checkpointer if not running */
3692  if (CheckpointerPID == 0)
3694  /* And tell it to shut down */
3695  if (CheckpointerPID != 0)
3696  {
3698  pmState = PM_SHUTDOWN;
3699  }
3700  else
3701  {
3702  /*
3703  * If we failed to fork a checkpointer, just shut down.
3704  * Any required cleanup will happen at next restart. We
3705  * set FatalError so that an "abnormal shutdown" message
3706  * gets logged when we exit.
3707  */
3708  FatalError = true;
3710 
3711  /* Kill the walsenders, archiver and stats collector too */
3713  if (PgArchPID != 0)
3715  if (PgStatPID != 0)
3717  }
3718  }
3719  }
3720  }
3721 
3722  if (pmState == PM_SHUTDOWN_2)
3723  {
3724  /*
3725  * PM_SHUTDOWN_2 state ends when there's no other children than
3726  * dead_end children left. There shouldn't be any regular backends
3727  * left by now anyway; what we're really waiting for is walsenders and
3728  * archiver.
3729  *
3730  * Walreceiver should normally be dead by now, but not when a fast
3731  * shutdown is performed during recovery.
3732  */
3733  if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 &&
3734  WalReceiverPID == 0)
3735  {
3737  }
3738  }
3739 
3740  if (pmState == PM_WAIT_DEAD_END)
3741  {
3742  /*
3743  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3744  * (ie, no dead_end children remain), and the archiver and stats
3745  * collector are gone too.
3746  *
3747  * The reason we wait for those two is to protect them against a new
3748  * postmaster starting conflicting subprocesses; this isn't an
3749  * ironclad protection, but it at least helps in the
3750  * shutdown-and-immediately-restart scenario. Note that they have
3751  * already been sent appropriate shutdown signals, either during a
3752  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3753  * FatalError processing.
3754  */
3755  if (dlist_is_empty(&BackendList) &&
3756  PgArchPID == 0 && PgStatPID == 0)
3757  {
3758  /* These other guys should be dead already */
3759  Assert(StartupPID == 0);
3760  Assert(WalReceiverPID == 0);
3761  Assert(BgWriterPID == 0);
3762  Assert(CheckpointerPID == 0);
3763  Assert(WalWriterPID == 0);
3764  Assert(AutoVacPID == 0);
3765  /* syslogger is not considered here */
3767  }
3768  }
3769 
3770  /*
3771  * If we've been told to shut down, we exit as soon as there are no
3772  * remaining children. If there was a crash, cleanup will occur at the
3773  * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3774  * crash before exiting, but that seems unwise if we are quitting because
3775  * we got SIGTERM from init --- there may well not be time for recovery
3776  * before init decides to SIGKILL us.)
3777  *
3778  * Note that the syslogger continues to run. It will exit when it sees
3779  * EOF on its input pipe, which happens when there are no more upstream
3780  * processes.
3781  */
3783  {
3784  if (FatalError)
3785  {
3786  ereport(LOG, (errmsg("abnormal database system shutdown")));
3787  ExitPostmaster(1);
3788  }
3789  else
3790  {
3791  /*
3792  * Terminate exclusive backup mode to avoid recovery after a clean
3793  * fast shutdown. Since an exclusive backup can only be taken
3794  * during normal running (and not, for example, while running
3795  * under Hot Standby) it only makes sense to do this if we reached
3796  * normal running. If we're still in recovery, the backup file is
3797  * one we're recovering *from*, and we must keep it around so that
3798  * recovery restarts from the right place.
3799  */
3801  CancelBackup();
3802 
3803  /* Normal exit from the postmaster is here */
3804  ExitPostmaster(0);
3805  }
3806  }
3807 
3808  /*
3809  * If the startup process failed, or the user does not want an automatic
3810  * restart after backend crashes, wait for all non-syslogger children to
3811  * exit, and then exit postmaster. We don't try to reinitialize when the
3812  * startup process fails, because more than likely it will just fail again
3813  * and we will keep trying forever.
3814  */
3815  if (pmState == PM_NO_CHILDREN &&
3817  ExitPostmaster(1);
3818 
3819  /*
3820  * If we need to recover from a crash, wait for all non-syslogger children
3821  * to exit, then reset shmem and StartupDataBase.
3822  */
3823  if (FatalError && pmState == PM_NO_CHILDREN)
3824  {
3825  ereport(LOG,
3826  (errmsg("all server processes terminated; reinitializing")));
3827 
3828  /* allow background workers to immediately restart */
3830 
3831  shmem_exit(1);
3832 
3833  /* re-read control file into local memory */
3835 
3837 
3839  Assert(StartupPID != 0);
3841  pmState = PM_STARTUP;
3842  /* crash recovery started, reset SIGKILL flag */
3843  AbortStartTime = 0;
3844  }
3845 }
3846 
3847 
3848 /*
3849  * Send a signal to a postmaster child process
3850  *
3851  * On systems that have setsid(), each child process sets itself up as a
3852  * process group leader. For signals that are generally interpreted in the
3853  * appropriate fashion, we signal the entire process group not just the
3854  * direct child process. This allows us to, for example, SIGQUIT a blocked
3855  * archive_recovery script, or SIGINT a script being run by a backend via
3856  * system().
3857  *
3858  * There is a race condition for recently-forked children: they might not
3859  * have executed setsid() yet. So we signal the child directly as well as
3860  * the group. We assume such a child will handle the signal before trying
3861  * to spawn any grandchild processes. We also assume that signaling the
3862  * child twice will not cause any problems.
3863  */
3864 static void
3865 signal_child(pid_t pid, int signal)
3866 {
3867  if (kill(pid, signal) < 0)
3868  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3869 #ifdef HAVE_SETSID
3870  switch (signal)
3871  {
3872  case SIGINT:
3873  case SIGTERM:
3874  case SIGQUIT:
3875  case SIGSTOP:
3876  case SIGKILL:
3877  if (kill(-pid, signal) < 0)
3878  elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3879  break;
3880  default:
3881  break;
3882  }
3883 #endif
3884 }
3885 
3886 /*
3887  * Send a signal to the targeted children (but NOT special children;
3888  * dead_end children are never signaled, either).
3889  */
3890 static bool
3891 SignalSomeChildren(int signal, int target)
3892 {
3893  dlist_iter iter;
3894  bool signaled = false;
3895 
3896  dlist_foreach(iter, &BackendList)
3897  {
3898  Backend *bp = dlist_container(Backend, elem, iter.cur);
3899 
3900  if (bp->dead_end)
3901  continue;
3902 
3903  /*
3904  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3905  * it first and avoid touching shared memory for every child.
3906  */
3907  if (target != BACKEND_TYPE_ALL)
3908  {
3909  /*
3910  * Assign bkend_type for any recently announced WAL Sender
3911  * processes.
3912  */
3913  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3916 
3917  if (!(target & bp->bkend_type))
3918  continue;
3919  }
3920 
3921  ereport(DEBUG4,
3922  (errmsg_internal("sending signal %d to process %d",
3923  signal, (int) bp->pid)));
3924  signal_child(bp->pid, signal);
3925  signaled = true;
3926  }
3927  return signaled;
3928 }
3929 
3930 /*
3931  * Send a termination signal to children. This considers all of our children
3932  * processes, except syslogger and dead_end backends.
3933  */
3934 static void
3936 {
3937  SignalChildren(signal);
3938  if (StartupPID != 0)
3939  {
3940  signal_child(StartupPID, signal);
3941  if (signal == SIGQUIT || signal == SIGKILL)
3943  }
3944  if (BgWriterPID != 0)
3945  signal_child(BgWriterPID, signal);
3946  if (CheckpointerPID != 0)
3947  signal_child(CheckpointerPID, signal);
3948  if (WalWriterPID != 0)
3949  signal_child(WalWriterPID, signal);
3950  if (WalReceiverPID != 0)
3951  signal_child(WalReceiverPID, signal);
3952  if (AutoVacPID != 0)
3953  signal_child(AutoVacPID, signal);
3954  if (PgArchPID != 0)
3955  signal_child(PgArchPID, signal);
3956  if (PgStatPID != 0)
3957  signal_child(PgStatPID, signal);
3958 }
3959 
3960 /*
3961  * BackendStartup -- start backend process
3962  *
3963  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3964  *
3965  * Note: if you change this code, also consider StartAutovacuumWorker.
3966  */
3967 static int
3969 {
3970  Backend *bn; /* for backend cleanup */
3971  pid_t pid;
3972 
3973  /*
3974  * Create backend data structure. Better before the fork() so we can
3975  * handle failure cleanly.
3976  */
3977  bn = (Backend *) malloc(sizeof(Backend));
3978  if (!bn)
3979  {
3980  ereport(LOG,
3981  (errcode(ERRCODE_OUT_OF_MEMORY),
3982  errmsg("out of memory")));
3983  return STATUS_ERROR;
3984  }
3985 
3986  /*
3987  * Compute the cancel key that will be assigned to this backend. The
3988  * backend will have its own copy in the forked-off process' value of
3989  * MyCancelKey, so that it can transmit the key to the frontend.
3990  */
3992  {
3993  free(bn);
3994  ereport(LOG,
3995  (errcode(ERRCODE_INTERNAL_ERROR),
3996  errmsg("could not generate random cancel key")));
3997  return STATUS_ERROR;
3998  }
3999 
4000  bn->cancel_key = MyCancelKey;
4001 
4002  /* Pass down canAcceptConnections state */
4004  bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4006 
4007  /*
4008  * Unless it's a dead_end child, assign it a child slot number
4009  */
4010  if (!bn->dead_end)
4012  else
4013  bn->child_slot = 0;
4014 
4015  /* Hasn't asked to be notified about any bgworkers yet */
4016  bn->bgworker_notify = false;
4017 
4018 #ifdef EXEC_BACKEND
4019  pid = backend_forkexec(port);
4020 #else /* !EXEC_BACKEND */
4021  pid = fork_process();
4022  if (pid == 0) /* child */
4023  {
4024  free(bn);
4025 
4026  /* Detangle from postmaster */
4028 
4029  /* Close the postmaster's sockets */
4030  ClosePostmasterPorts(false);
4031 
4032  /* Perform additional initialization and collect startup packet */
4033  BackendInitialize(port);
4034 
4035  /* And run the backend */
4036  BackendRun(port);
4037  }
4038 #endif /* EXEC_BACKEND */
4039 
4040  if (pid < 0)
4041  {
4042  /* in parent, fork failed */
4043  int save_errno = errno;
4044 
4045  if (!bn->dead_end)
4047  free(bn);
4048  errno = save_errno;
4049  ereport(LOG,
4050  (errmsg("could not fork new process for connection: %m")));
4051  report_fork_failure_to_client(port, save_errno);
4052  return STATUS_ERROR;
4053  }
4054 
4055  /* in parent, successful fork */
4056  ereport(DEBUG2,
4057  (errmsg_internal("forked new backend, pid=%d socket=%d",
4058  (int) pid, (int) port->sock)));
4059 
4060  /*
4061  * Everything's been successful, it's safe to add this backend to our list
4062  * of backends.
4063  */
4064  bn->pid = pid;
4065  bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4066  dlist_push_head(&BackendList, &bn->elem);
4067 
4068 #ifdef EXEC_BACKEND
4069  if (!bn->dead_end)
4070  ShmemBackendArrayAdd(bn);
4071 #endif
4072 
4073  return STATUS_OK;
4074 }
4075 
4076 /*
4077  * Try to report backend fork() failure to client before we close the
4078  * connection. Since we do not care to risk blocking the postmaster on
4079  * this connection, we set the connection to non-blocking and try only once.
4080  *
4081  * This is grungy special-purpose code; we cannot use backend libpq since
4082  * it's not up and running.
4083  */
4084 static void
4086 {
4087  char buffer[1000];
4088  int rc;
4089 
4090  /* Format the error message packet (always V2 protocol) */
4091  snprintf(buffer, sizeof(buffer), "E%s%s\n",
4092  _("could not fork new process for connection: "),
4093  strerror(errnum));
4094 
4095  /* Set port to non-blocking. Don't do send() if this fails */
4096  if (!pg_set_noblock(port->sock))
4097  return;
4098 
4099  /* We'll retry after EINTR, but ignore all other failures */
4100  do
4101  {
4102  rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4103  } while (rc < 0 && errno == EINTR);
4104 }
4105 
4106 
4107 /*
4108  * BackendInitialize -- initialize an interactive (postmaster-child)
4109  * backend process, and collect the client's startup packet.
4110  *
4111  * returns: nothing. Will not return at all if there's any failure.
4112  *
4113  * Note: this code does not depend on having any access to shared memory.
4114  * In the EXEC_BACKEND case, we are physically attached to shared memory
4115  * but have not yet set up most of our local pointers to shmem structures.
4116  */
4117 static void
4119 {
4120  int status;
4121  int ret;
4122  char remote_host[NI_MAXHOST];
4123  char remote_port[NI_MAXSERV];
4124  char remote_ps_data[NI_MAXHOST];
4125 
4126  /* Save port etc. for ps status */
4127  MyProcPort = port;
4128 
4129  /*
4130  * PreAuthDelay is a debugging aid for investigating problems in the
4131  * authentication cycle: it can be set in postgresql.conf to allow time to
4132  * attach to the newly-forked backend with a debugger. (See also
4133  * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4134  * is not honored until after authentication.)
4135  */
4136  if (PreAuthDelay > 0)
4137  pg_usleep(PreAuthDelay * 1000000L);
4138 
4139  /* This flag will remain set until InitPostgres finishes authentication */
4140  ClientAuthInProgress = true; /* limit visibility of log messages */
4141 
4142  /* save process start time */
4145 
4146  /* set these to empty in case they are needed before we set them up */
4147  port->remote_host = "";
4148  port->remote_port = "";
4149 
4150  /*
4151  * Initialize libpq and enable reporting of ereport errors to the client.
4152  * Must do this now because authentication uses libpq to send messages.
4153  */
4154  pq_init(); /* initialize libpq to talk to client */
4155  whereToSendOutput = DestRemote; /* now safe to ereport to client */
4156 
4157  /*
4158  * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4159  * timeout while trying to collect the startup packet. Otherwise the
4160  * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4161  * buggy client fails to send the packet promptly. XXX it follows that
4162  * the remainder of this function must tolerate losing control at any
4163  * instant. Likewise, any pg_on_exit_callback registered before or during
4164  * this function must be prepared to execute at any instant between here
4165  * and the end of this function. Furthermore, affected callbacks execute
4166  * partially or not at all when a second exit-inducing signal arrives
4167  * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4168  * that mechanic, callbacks need not anticipate more than one call.) This
4169  * is fragile; it ought to instead follow the norm of handling interrupts
4170  * at selected, safe opportunities.
4171  */
4172  pqsignal(SIGTERM, startup_die);
4174  InitializeTimeouts(); /* establishes SIGALRM handler */
4176 
4177  /*
4178  * Get the remote host name and port for logging and status display.
4179  */
4180  remote_host[0] = '\0';
4181  remote_port[0] = '\0';
4182  if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4183  remote_host, sizeof(remote_host),
4184  remote_port, sizeof(remote_port),
4185  (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4186  ereport(WARNING,
4187  (errmsg_internal("pg_getnameinfo_all() failed: %s",
4188  gai_strerror(ret))));
4189  if (remote_port[0] == '\0')
4190  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4191  else
4192  snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4193 
4194  /*
4195  * Save remote_host and remote_port in port structure (after this, they
4196  * will appear in log_line_prefix data for log messages).
4197  */
4198  port->remote_host = strdup(remote_host);
4199  port->remote_port = strdup(remote_port);
4200 
4201  /* And now we can issue the Log_connections message, if wanted */
4202  if (Log_connections)
4203  {
4204  if (remote_port[0])
4205  ereport(LOG,
4206  (errmsg("connection received: host=%s port=%s",
4207  remote_host,
4208  remote_port)));
4209  else
4210  ereport(LOG,
4211  (errmsg("connection received: host=%s",
4212  remote_host)));
4213  }
4214 
4215  /*
4216  * If we did a reverse lookup to name, we might as well save the results
4217  * rather than possibly repeating the lookup during authentication.
4218  *
4219  * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4220  * get nothing useful for a client without an rDNS entry. Therefore, we
4221  * must check whether we got a numeric IPv4 or IPv6 address, and not save
4222  * it into remote_hostname if so. (This test is conservative and might
4223  * sometimes classify a hostname as numeric, but an error in that
4224  * direction is safe; it only results in a possible extra lookup.)
4225  */
4226  if (log_hostname &&
4227  ret == 0 &&
4228  strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4229  strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4230  port->remote_hostname = strdup(remote_host);
4231 
4232  /*
4233  * Ready to begin client interaction. We will give up and exit(1) after a
4234  * time delay, so that a broken client can't hog a connection
4235  * indefinitely. PreAuthDelay and any DNS interactions above don't count
4236  * against the time limit.
4237  *
4238  * Note: AuthenticationTimeout is applied here while waiting for the
4239  * startup packet, and then again in InitPostgres for the duration of any
4240  * authentication operations. So a hostile client could tie up the
4241  * process for nearly twice AuthenticationTimeout before we kick him off.
4242  *
4243  * Note: because PostgresMain will call InitializeTimeouts again, the
4244  * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4245  * since we never use it again after this function.
4246  */
4249 
4250  /*
4251  * Receive the startup packet (which might turn out to be a cancel request
4252  * packet).
4253  */
4254  status = ProcessStartupPacket(port, false);
4255 
4256  /*
4257  * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4258  * already did any appropriate error reporting.
4259  */
4260  if (status != STATUS_OK)
4261  proc_exit(0);
4262 
4263  /*
4264  * Now that we have the user and database name, we can set the process
4265  * title for ps. It's good to do this as early as possible in startup.
4266  *
4267  * For a walsender, the ps display is set in the following form:
4268  *
4269  * postgres: walsender <user> <host> <activity>
4270  *
4271  * To achieve that, we pass "walsender" as username and username
4272  * as dbname to init_ps_display(). XXX: should add a new variant of
4273  * init_ps_display() to avoid abusing the parameters like this.
4274  */
4275  if (am_walsender)
4277  update_process_title ? "authentication" : "");
4278  else
4279  init_ps_display(port->user_name, port->database_name, remote_ps_data,
4280  update_process_title ? "authentication" : "");
4281 
4282  /*
4283  * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4284  */
4286  PG_SETMASK(&BlockSig);
4287 }
4288 
4289 
4290 /*
4291  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4292  *
4293  * returns:
4294  * Shouldn't return at all.
4295  * If PostgresMain() fails, return status.
4296  */
4297 static void
4299 {
4300  char **av;
4301  int maxac;
4302  int ac;
4303  long secs;
4304  int usecs;
4305  int i;
4306 
4307  /*
4308  * Don't want backend to be able to see the postmaster random number
4309  * generator state. We have to clobber the static random_seed *and* start
4310  * a new random sequence in the random() library function.
4311  */
4312 #ifndef HAVE_STRONG_RANDOM
4313  random_seed = 0;
4314  random_start_time.tv_usec = 0;
4315 #endif
4316  /* slightly hacky way to convert timestamptz into integers */
4317  TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
4318  srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
4319 
4320  /*
4321  * Now, build the argv vector that will be given to PostgresMain.
4322  *
4323  * The maximum possible number of commandline arguments that could come
4324  * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4325  * pg_split_opts().
4326  */
4327  maxac = 2; /* for fixed args supplied below */
4328  maxac += (strlen(ExtraOptions) + 1) / 2;
4329 
4330  av = (char **) MemoryContextAlloc(TopMemoryContext,
4331  maxac * sizeof(char *));
4332  ac = 0;
4333 
4334  av[ac++] = "postgres";
4335 
4336  /*
4337  * Pass any backend switches specified with -o on the postmaster's own
4338  * command line. We assume these are secure.
4339  */
4340  pg_split_opts(av, &ac, ExtraOptions);
4341 
4342  av[ac] = NULL;
4343 
4344  Assert(ac < maxac);
4345 
4346  /*
4347  * Debug: print arguments being passed to backend
4348  */
4349  ereport(DEBUG3,
4350  (errmsg_internal("%s child[%d]: starting with (",
4351  progname, (int) getpid())));
4352  for (i = 0; i < ac; ++i)
4353  ereport(DEBUG3,
4354  (errmsg_internal("\t%s", av[i])));
4355  ereport(DEBUG3,
4356  (errmsg_internal(")")));
4357 
4358  /*
4359  * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4360  * just yet, though, because InitPostgres will need the HBA data.)
4361  */
4363 
4364  PostgresMain(ac, av, port->database_name, port->user_name);
4365 }
4366 
4367 
4368 #ifdef EXEC_BACKEND
4369 
4370 /*
4371  * postmaster_forkexec -- fork and exec a postmaster subprocess
4372  *
4373  * The caller must have set up the argv array already, except for argv[2]
4374  * which will be filled with the name of the temp variable file.
4375  *
4376  * Returns the child process PID, or -1 on fork failure (a suitable error
4377  * message has been logged on failure).
4378  *
4379  * All uses of this routine will dispatch to SubPostmasterMain in the
4380  * child process.
4381  */
4382 pid_t
4383 postmaster_forkexec(int argc, char *argv[])
4384 {
4385  Port port;
4386 
4387  /* This entry point passes dummy values for the Port variables */
4388  memset(&port, 0, sizeof(port));
4389  return internal_forkexec(argc, argv, &port);
4390 }
4391 
4392 /*
4393  * backend_forkexec -- fork/exec off a backend process
4394  *
4395  * Some operating systems (WIN32) don't have fork() so we have to simulate
4396  * it by storing parameters that need to be passed to the child and
4397  * then create a new child process.
4398  *
4399  * returns the pid of the fork/exec'd process, or -1 on failure
4400  */
4401 static pid_t
4402 backend_forkexec(Port *port)
4403 {
4404  char *av[4];
4405  int ac = 0;
4406 
4407  av[ac++] = "postgres";
4408  av[ac++] = "--forkbackend";
4409  av[ac++] = NULL; /* filled in by internal_forkexec */
4410 
4411  av[ac] = NULL;
4412  Assert(ac < lengthof(av));
4413 
4414  return internal_forkexec(ac, av, port);
4415 }
4416 
4417 #ifndef WIN32
4418 
4419 /*
4420  * internal_forkexec non-win32 implementation
4421  *
4422  * - writes out backend variables to the parameter file
4423  * - fork():s, and then exec():s the child process
4424  */
4425 static pid_t
4426 internal_forkexec(int argc, char *argv[], Port *port)
4427 {
4428  static unsigned long tmpBackendFileNum = 0;
4429  pid_t pid;
4430  char tmpfilename[MAXPGPATH];
4431  BackendParameters param;
4432  FILE *fp;
4433 
4434  if (!save_backend_variables(&param, port))
4435  return -1; /* log made by save_backend_variables */
4436 
4437  /* Calculate name for temp file */
4438  snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4440  MyProcPid, ++tmpBackendFileNum);
4441 
4442  /* Open file */
4443  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4444  if (!fp)
4445  {
4446  /*
4447  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4448  * directory
4449  */
4450  mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
4451 
4452  fp = AllocateFile(tmpfilename, PG_BINARY_W);
4453  if (!fp)
4454  {
4455  ereport(LOG,
4457  errmsg("could not create file \"%s\": %m",
4458  tmpfilename)));
4459  return -1;
4460  }
4461  }
4462 
4463  if (fwrite(&param, sizeof(param), 1, fp) != 1)
4464  {
4465  ereport(LOG,
4467  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4468  FreeFile(fp);
4469  return -1;
4470  }
4471 
4472  /* Release file */
4473  if (FreeFile(fp))
4474  {
4475  ereport(LOG,
4477  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4478  return -1;
4479  }
4480 
4481  /* Make sure caller set up argv properly */
4482  Assert(argc >= 3);
4483  Assert(argv[argc] == NULL);
4484  Assert(strncmp(argv[1], "--fork", 6) == 0);
4485  Assert(argv[2] == NULL);
4486 
4487  /* Insert temp file name after --fork argument */
4488  argv[2] = tmpfilename;
4489 
4490  /* Fire off execv in child */
4491  if ((pid = fork_process()) == 0)
4492  {
4493  if (execv(postgres_exec_path, argv) < 0)
4494  {
4495  ereport(LOG,
4496  (errmsg("could not execute server process \"%s\": %m",
4497  postgres_exec_path)));
4498  /* We're already in the child process here, can't return */
4499  exit(1);
4500  }
4501  }
4502 
4503  return pid; /* Parent returns pid, or -1 on fork failure */
4504 }
4505 #else /* WIN32 */
4506 
4507 /*
4508  * internal_forkexec win32 implementation
4509  *
4510  * - starts backend using CreateProcess(), in suspended state
4511  * - writes out backend variables to the parameter file
4512  * - during this, duplicates handles and sockets required for
4513  * inheritance into the new process
4514  * - resumes execution of the new process once the backend parameter
4515  * file is complete.
4516  */
4517 static pid_t
4518 internal_forkexec(int argc, char *argv[], Port *port)
4519 {
4520  int retry_count = 0;
4521  STARTUPINFO si;
4522  PROCESS_INFORMATION pi;
4523  int i;
4524  int j;
4525  char cmdLine[MAXPGPATH * 2];
4526  HANDLE paramHandle;
4527  BackendParameters *param;
4528  SECURITY_ATTRIBUTES sa;
4529  char paramHandleStr[32];
4530  win32_deadchild_waitinfo *childinfo;
4531 
4532  /* Make sure caller set up argv properly */
4533  Assert(argc >= 3);
4534  Assert(argv[argc] == NULL);
4535  Assert(strncmp(argv[1], "--fork", 6) == 0);
4536  Assert(argv[2] == NULL);
4537 
4538  /* Resume here if we need to retry */
4539 retry:
4540 
4541  /* Set up shared memory for parameter passing */
4542  ZeroMemory(&sa, sizeof(sa));
4543  sa.nLength = sizeof(sa);
4544  sa.bInheritHandle = TRUE;
4545  paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4546  &sa,
4547  PAGE_READWRITE,
4548  0,
4549  sizeof(BackendParameters),
4550  NULL);
4551  if (paramHandle == INVALID_HANDLE_VALUE)
4552  {
4553  elog(LOG, "could not create backend parameter file mapping: error code %lu",
4554  GetLastError());
4555  return -1;
4556  }
4557 
4558  param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4559  if (!param)
4560  {
4561  elog(LOG, "could not map backend parameter memory: error code %lu",
4562  GetLastError());
4563  CloseHandle(paramHandle);
4564  return -1;
4565  }
4566 
4567  /* Insert temp file name after --fork argument */
4568 #ifdef _WIN64
4569  sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4570 #else
4571  sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4572 #endif
4573  argv[2] = paramHandleStr;
4574 
4575  /* Format the cmd line */
4576  cmdLine[sizeof(cmdLine) - 1] = '\0';
4577  cmdLine[sizeof(cmdLine) - 2] = '\0';
4578  snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4579  i = 0;
4580  while (argv[++i] != NULL)
4581  {
4582  j = strlen(cmdLine);
4583  snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4584  }
4585  if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4586  {
4587  elog(LOG, "subprocess command line too long");
4588  return -1;
4589  }
4590 
4591  memset(&pi, 0, sizeof(pi));
4592  memset(&si, 0, sizeof(si));
4593  si.cb = sizeof(si);
4594 
4595  /*
4596  * Create the subprocess in a suspended state. This will be resumed later,
4597  * once we have written out the parameter file.
4598  */
4599  if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4600  NULL, NULL, &si, &pi))
4601  {
4602  elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4603  GetLastError());
4604  return -1;
4605  }
4606 
4607  if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4608  {
4609  /*
4610  * log made by save_backend_variables, but we have to clean up the
4611  * mess with the half-started process
4612  */
4613  if (!TerminateProcess(pi.hProcess, 255))
4614  ereport(LOG,
4615  (errmsg_internal("could not terminate unstarted process: error code %lu",
4616  GetLastError())));
4617  CloseHandle(pi.hProcess);
4618  CloseHandle(pi.hThread);
4619  return -1; /* log made by save_backend_variables */
4620  }
4621 
4622  /* Drop the parameter shared memory that is now inherited to the backend */
4623  if (!UnmapViewOfFile(param))
4624  elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4625  GetLastError());
4626  if (!CloseHandle(paramHandle))
4627  elog(LOG, "could not close handle to backend parameter file: error code %lu",
4628  GetLastError());
4629 
4630  /*
4631  * Reserve the memory region used by our main shared memory segment before
4632  * we resume the child process. Normally this should succeed, but if ASLR
4633  * is active then it might sometimes fail due to the stack or heap having
4634  * gotten mapped into that range. In that case, just terminate the
4635  * process and retry.
4636  */
4637  if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4638  {
4639  /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4640  if (!TerminateProcess(pi.hProcess, 255))
4641  ereport(LOG,
4642  (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4643  GetLastError())));
4644  CloseHandle(pi.hProcess);
4645  CloseHandle(pi.hThread);
4646  if (++retry_count < 100)
4647  goto retry;
4648  ereport(LOG,
4649  (errmsg("giving up after too many tries to reserve shared memory"),
4650  errhint("This might be caused by ASLR or antivirus software.")));
4651  return -1;
4652  }
4653 
4654  /*
4655  * Now that the backend variables are written out, we start the child
4656  * thread so it can start initializing while we set up the rest of the
4657  * parent state.
4658  */
4659  if (ResumeThread(pi.hThread) == -1)
4660  {
4661  if (!TerminateProcess(pi.hProcess, 255))
4662  {
4663  ereport(LOG,
4664  (errmsg_internal("could not terminate unstartable process: error code %lu",
4665  GetLastError())));
4666  CloseHandle(pi.hProcess);
4667  CloseHandle(pi.hThread);
4668  return -1;
4669  }
4670  CloseHandle(pi.hProcess);
4671  CloseHandle(pi.hThread);
4672  ereport(LOG,
4673  (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4674  GetLastError())));
4675  return -1;
4676  }
4677 
4678  /*
4679  * Queue a waiter for to signal when this child dies. The wait will be
4680  * handled automatically by an operating system thread pool.
4681  *
4682  * Note: use malloc instead of palloc, since it needs to be thread-safe.
4683  * Struct will be free():d from the callback function that runs on a
4684  * different thread.
4685  */
4686  childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4687  if (!childinfo)
4688  ereport(FATAL,
4689  (errcode(ERRCODE_OUT_OF_MEMORY),
4690  errmsg("out of memory")));
4691 
4692  childinfo->procHandle = pi.hProcess;
4693  childinfo->procId = pi.dwProcessId;
4694 
4695  if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4696  pi.hProcess,
4697  pgwin32_deadchild_callback,
4698  childinfo,
4699  INFINITE,
4700  WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4701  ereport(FATAL,
4702  (errmsg_internal("could not register process for wait: error code %lu",
4703  GetLastError())));
4704 
4705  /* Don't close pi.hProcess here - the wait thread needs access to it */
4706 
4707  CloseHandle(pi.hThread);
4708 
4709  return pi.dwProcessId;
4710 }
4711 #endif /* WIN32 */
4712 
4713 
4714 /*
4715  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4716  * to what it would be if we'd simply forked on Unix, and then
4717  * dispatch to the appropriate place.
4718  *
4719  * The first two command line arguments are expected to be "--forkFOO"
4720  * (where FOO indicates which postmaster child we are to become), and
4721  * the name of a variables file that we can read to load data that would
4722  * have been inherited by fork() on Unix. Remaining arguments go to the
4723  * subprocess FooMain() routine.
4724  */
4725 void
4726 SubPostmasterMain(int argc, char *argv[])
4727 {
4728  Port port;
4729 
4730  /* In EXEC_BACKEND case we will not have inherited these settings */
4731  IsPostmasterEnvironment = true;
4733 
4734  /* Setup as postmaster child */
4736 
4737  /* Setup essential subsystems (to ensure elog() behaves sanely) */
4739 
4740  /* Check we got appropriate args */
4741  if (argc < 3)
4742  elog(FATAL, "invalid subpostmaster invocation");
4743 
4744  /* Read in the variables file */
4745  memset(&port, 0, sizeof(Port));
4746  read_backend_variables(argv[2], &port);
4747 
4748  /* Close the postmaster's sockets (as soon as we know them) */
4749  ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4750 
4751  /*
4752  * Set reference point for stack-depth checking
4753  */
4754  set_stack_base();
4755 
4756  /*
4757  * Set up memory area for GSS information. Mirrors the code in ConnCreate
4758  * for the non-exec case.
4759  */
4760 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4761  port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4762  if (!port.gss)
4763  ereport(FATAL,
4764  (errcode(ERRCODE_OUT_OF_MEMORY),
4765  errmsg("out of memory")));
4766 #endif
4767 
4768  /*
4769  * If appropriate, physically re-attach to shared memory segment. We want
4770  * to do this before going any further to ensure that we can attach at the
4771  * same address the postmaster used. On the other hand, if we choose not
4772  * to re-attach, we may have other cleanup to do.
4773  *
4774  * If testing EXEC_BACKEND on Linux, you should run this as root before
4775  * starting the postmaster:
4776  *
4777  * echo 0 >/proc/sys/kernel/randomize_va_space
4778  *
4779  * This prevents using randomized stack and code addresses that cause the
4780  * child process's memory map to be different from the parent's, making it
4781  * sometimes impossible to attach to shared memory at the desired address.
4782  * Return the setting to its old value (usually '1' or '2') when finished.
4783  */
4784  if (strcmp(argv[1], "--forkbackend") == 0 ||
4785  strcmp(argv[1], "--forkavlauncher") == 0 ||
4786  strcmp(argv[1], "--forkavworker") == 0 ||
4787  strcmp(argv[1], "--forkboot") == 0 ||
4788  strncmp(argv[1], "--forkbgworker=", 15) == 0)
4790  else
4792 
4793  /* autovacuum needs this set before calling InitProcess */
4794  if (strcmp(argv[1], "--forkavlauncher") == 0)
4795  AutovacuumLauncherIAm();
4796  if (strcmp(argv[1], "--forkavworker") == 0)
4797  AutovacuumWorkerIAm();
4798 
4799  /*
4800  * Start our win32 signal implementation. This has to be done after we
4801  * read the backend variables, because we need to pick up the signal pipe
4802  * from the parent process.
4803  */
4804 #ifdef WIN32
4806 #endif
4807 
4808  /* In EXEC_BACKEND case we will not have inherited these settings */
4809  pqinitmask();
4810  PG_SETMASK(&BlockSig);
4811 
4812  /* Read in remaining GUC variables */
4813  read_nondefault_variables();
4814 
4815  /*
4816  * (re-)read control file, as it contains config. The postmaster will
4817  * already have read this, but this process doesn't know about that.
4818  */
4819  LocalProcessControlFile(false);
4820 
4821  /*
4822  * Reload any libraries that were preloaded by the postmaster. Since we
4823  * exec'd this process, those libraries didn't come along with us; but we
4824  * should load them into all child processes to be consistent with the
4825  * non-EXEC_BACKEND behavior.
4826  */
4828 
4829  /* Run backend or appropriate child */
4830  if (strcmp(argv[1], "--forkbackend") == 0)
4831  {
4832  Assert(argc == 3); /* shouldn't be any more args */
4833 
4834  /*
4835  * Need to reinitialize the SSL library in the backend, since the
4836  * context structures contain function pointers and cannot be passed
4837  * through the parameter file.
4838  *
4839  * If for some reason reload fails (maybe the user installed broken
4840  * key files), soldier on without SSL; that's better than all
4841  * connections becoming impossible.
4842  *
4843  * XXX should we do this in all child processes? For the moment it's
4844  * enough to do it in backend children.
4845  */
4846 #ifdef USE_SSL
4847  if (EnableSSL)
4848  {
4849  if (secure_initialize(false) == 0)
4850  LoadedSSL = true;
4851  else
4852  ereport(LOG,
4853  (errmsg("SSL configuration could not be loaded in child process")));
4854  }
4855 #endif
4856 
4857  /*
4858  * Perform additional initialization and collect startup packet.
4859  *
4860  * We want to do this before InitProcess() for a couple of reasons: 1.
4861  * so that we aren't eating up a PGPROC slot while waiting on the
4862  * client. 2. so that if InitProcess() fails due to being out of
4863  * PGPROC slots, we have already initialized libpq and are able to
4864  * report the error to the client.
4865  */
4866  BackendInitialize(&port);
4867 
4868  /* Restore basic shared memory pointers */
4870 
4871  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4872  InitProcess();
4873 
4874  /* Attach process to shared data structures */
4876 
4877  /* And run the backend */
4878  BackendRun(&port); /* does not return */
4879  }
4880  if (strcmp(argv[1], "--forkboot") == 0)
4881  {
4882  /* Restore basic shared memory pointers */
4884 
4885  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4887 
4888  /* Attach process to shared data structures */
4890 
4891  AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4892  }
4893  if (strcmp(argv[1], "--forkavlauncher") == 0)
4894  {
4895  /* Restore basic shared memory pointers */
4897 
4898  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4899  InitProcess();
4900 
4901  /* Attach process to shared data structures */
4903 
4904  AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4905  }
4906  if (strcmp(argv[1], "--forkavworker") == 0)
4907  {
4908  /* Restore basic shared memory pointers */
4910 
4911  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4912  InitProcess();
4913 
4914  /* Attach process to shared data structures */
4916 
4917  AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4918  }
4919  if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4920  {
4921  int shmem_slot;
4922 
4923  /* do this as early as possible; in particular, before InitProcess() */
4924  IsBackgroundWorker = true;
4925 
4926  /* Restore basic shared memory pointers */
4928 
4929  /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4930  InitProcess();
4931 
4932  /* Attach process to shared data structures */
4934 
4935  /* Fetch MyBgworkerEntry from shared memory */
4936  shmem_slot = atoi(argv[1] + 15);
4937  MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4938 
4940  }
4941  if (strcmp(argv[1], "--forkarch") == 0)
4942  {
4943  /* Do not want to attach to shared memory */
4944 
4945  PgArchiverMain(argc, argv); /* does not return */
4946  }
4947  if (strcmp(argv[1], "--forkcol") == 0)
4948  {
4949  /* Do not want to attach to shared memory */
4950 
4951  PgstatCollectorMain(argc, argv); /* does not return */
4952  }
4953  if (strcmp(argv[1], "--forklog") == 0)
4954  {
4955  /* Do not want to attach to shared memory */
4956 
4957  SysLoggerMain(argc, argv); /* does not return */
4958  }
4959 
4960  abort(); /* shouldn't get here */
4961 }
4962 #endif /* EXEC_BACKEND */
4963 
4964 
4965 /*
4966  * ExitPostmaster -- cleanup
4967  *
4968  * Do NOT call exit() directly --- always go through here!
4969  */
4970 static void
4972 {
4973 #ifdef HAVE_PTHREAD_IS_THREADED_NP
4974 
4975  /*
4976  * There is no known cause for a postmaster to become multithreaded after
4977  * startup. Recheck to account for the possibility of unknown causes.
4978  * This message uses LOG level, because an unclean shutdown at this point
4979  * would usually not look much different from a clean shutdown.
4980  */
4981  if (pthread_is_threaded_np() != 0)
4982  ereport(LOG,
4983  (errcode(ERRCODE_INTERNAL_ERROR),
4984  errmsg_internal("postmaster became multithreaded"),
4985  errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
4986 #endif
4987 
4988  /* should cleanup shared memory and kill all backends */
4989 
4990  /*
4991  * Not sure of the semantics here. When the Postmaster dies, should the
4992  * backends all be killed? probably not.
4993  *
4994  * MUST -- vadim 05-10-1999
4995  */
4996 
4997  proc_exit(status);
4998 }
4999 
5000 /*
5001  * sigusr1_handler - handle signal conditions from child processes
5002  */
5003 static void
5005 {
5006  int save_errno = errno;
5007 
5008  PG_SETMASK(&BlockSig);
5009 
5010  /* Process background worker state change. */
5012  {
5014  StartWorkerNeeded = true;
5015  }
5016 
5017  /*
5018  * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5019  * unexpected states. If the startup process quickly starts up, completes
5020  * recovery, exits, we might process the death of the startup process
5021  * first. We don't want to go back to recovery in that case.
5022  */
5025  {
5026  /* WAL redo has started. We're out of reinitialization. */
5027  FatalError = false;
5028  Assert(AbortStartTime == 0);
5029 
5030  /*
5031  * Crank up the background tasks. It doesn't matter if this fails,
5032  * we'll just try again later.
5033  */
5034  Assert(CheckpointerPID == 0);
5036  Assert(BgWriterPID == 0);
5038 
5039  /*
5040  * Start the archiver if we're responsible for (re-)archiving received
5041  * files.
5042  */
5043  Assert(PgArchPID == 0);
5044  if (XLogArchivingAlways())
5045  PgArchPID = pgarch_start();
5046 
5047  /*
5048  * If we aren't planning to enter hot standby mode later, treat
5049  * RECOVERY_STARTED as meaning we're out of startup, and report status
5050  * accordingly.
5051  */
5052  if (!EnableHotStandby)
5053  {
5055 #ifdef USE_SYSTEMD
5056  sd_notify(0, "READY=1");
5057 #endif
5058  }
5059 
5060  pmState = PM_RECOVERY;
5061  }
5064  {
5065  /*
5066  * Likewise, start other special children as needed.
5067  */
5068  Assert(PgStatPID == 0);
5069  PgStatPID = pgstat_start();
5070 
5071  ereport(LOG,
5072  (errmsg("database system is ready to accept read only connections")));
5073 
5074  /* Report status */
5076 #ifdef USE_SYSTEMD
5077  sd_notify(0, "READY=1");
5078 #endif
5079 
5081  /* Some workers may be scheduled to start now */
5082  StartWorkerNeeded = true;
5083  }
5084 
5087 
5089  PgArchPID != 0)
5090  {
5091  /*
5092  * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5093  * next WAL file.
5094  */
5096  }
5097 
5099  SysLoggerPID != 0)
5100  {
5101  /* Tell syslogger to rotate logfile */
5103  }
5104 
5106  Shutdown == NoShutdown)
5107  {
5108  /*
5109  * Start one iteration of the autovacuum daemon, even if autovacuuming
5110  * is nominally not enabled. This is so we can have an active defense
5111  * against transaction ID wraparound. We set a flag for the main loop
5112  * to do it rather than trying to do it here --- this is because the
5113  * autovac process itself may send the signal, and we want to handle
5114  * that by launching another iteration as soon as the current one
5115  * completes.
5116  */
5117  start_autovac_launcher = true;
5118  }
5119 
5121  Shutdown == NoShutdown)
5122  {
5123  /* The autovacuum launcher wants us to start a worker process. */
5125  }
5126 
5128  {
5129  /* Startup Process wants us to start the walreceiver process. */
5130  /* Start immediately if possible, else remember request for later. */
5131  WalReceiverRequested = true;
5133  }
5134 
5137  {
5138  /* Advance postmaster's state machine */
5140  }
5141 
5142  if (CheckPromoteSignal() && StartupPID != 0 &&
5143  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5145  {
5146  /* Tell startup process to finish recovery */
5148  }
5149 
5151 
5152  errno = save_errno;
5153 }
5154 
5155 /*
5156  * SIGTERM or SIGQUIT while processing startup packet.
5157  * Clean up and exit(1).
5158  *
5159  * XXX: possible future improvement: try to send a message indicating
5160  * why we are disconnecting. Problem is to be sure we don't block while
5161  * doing so, nor mess up SSL initialization. In practice, if the client
5162  * has wedged here, it probably couldn't do anything with the message anyway.
5163  */
5164 static void
5166 {
5167  proc_exit(1);
5168 }
5169 
5170 /*
5171  * Dummy signal handler
5172  *
5173  * We use this for signals that we don't actually use in the postmaster,
5174  * but we do use in backends. If we were to SIG_IGN such signals in the
5175  * postmaster, then a newly started backend might drop a signal that arrives
5176  * before it's able to reconfigure its signal processing. (See notes in
5177  * tcop/postgres.c.)
5178  */
5179 static void
5181 {
5182 }
5183 
5184 /*
5185  * Timeout while processing startup packet.
5186  * As for startup_die(), we clean up and exit(1).
5187  */
5188 static void
5190 {
5191  proc_exit(1);
5192 }
5193 
5194 
5195 /*
5196  * Generate a random cancel key.
5197  */
5198 static bool
5200 {
5201 #ifdef HAVE_STRONG_RANDOM
5202  return pg_strong_random((char *) cancel_key, sizeof(int32));
5203 #else
5204 
5205  /*
5206  * If built with --disable-strong-random, use plain old erand48.
5207  *
5208  * We cannot use pg_backend_random() in postmaster, because it stores its
5209  * state in shared memory.
5210  */
5211  static unsigned short seed[3];
5212 
5213  /*
5214  * Select a random seed at the time of first receiving a request.
5215  */
5216  if (random_seed == 0)
5217  {
5218  struct timeval random_stop_time;
5219 
5220  gettimeofday(&random_stop_time, NULL);
5221 
5222  seed[0] = (unsigned short) random_start_time.tv_usec;
5223  seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
5224  seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
5225 
5226  random_seed = 1;
5227  }
5228 
5229  *cancel_key = pg_jrand48(seed);
5230 
5231  return true;
5232 #endif
5233 }
5234 
5235 /*
5236  * Count up number of child processes of specified types (dead_end children
5237  * are always excluded).
5238  */
5239 static int
5240 CountChildren(int target)
5241 {
5242  dlist_iter iter;
5243  int cnt = 0;
5244 
5245  dlist_foreach(iter, &BackendList)
5246  {
5247  Backend *bp = dlist_container(Backend, elem, iter.cur);
5248 
5249  if (bp->dead_end)
5250  continue;
5251 
5252  /*
5253  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5254  * it first and avoid touching shared memory for every child.
5255  */
5256  if (target != BACKEND_TYPE_ALL)
5257  {
5258  /*
5259  * Assign bkend_type for any recently announced WAL Sender
5260  * processes.
5261  */
5262  if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5265 
5266  if (!(target & bp->bkend_type))
5267  continue;
5268  }
5269 
5270  cnt++;
5271  }
5272  return cnt;
5273 }
5274 
5275 
5276 /*
5277  * StartChildProcess -- start an auxiliary process for the postmaster
5278  *
5279  * "type" determines what kind of child will be started. All child types
5280  * initially go to AuxiliaryProcessMain, which will handle common setup.
5281  *
5282  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5283  * to start subprocess.
5284  */
5285 static pid_t
5287 {
5288  pid_t pid;
5289  char *av[10];
5290  int ac = 0;
5291  char typebuf[32];
5292 
5293  /*
5294  * Set up command-line arguments for subprocess
5295  */
5296  av[ac++] = "postgres";
5297 
5298 #ifdef EXEC_BACKEND
5299  av[ac++] = "--forkboot";
5300  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5301 #endif
5302 
5303  snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5304  av[ac++] = typebuf;
5305 
5306  av[ac] = NULL;
5307  Assert(ac < lengthof(av));
5308 
5309 #ifdef EXEC_BACKEND
5310  pid = postmaster_forkexec(ac, av);
5311 #else /* !EXEC_BACKEND */
5312  pid = fork_process();
5313 
5314  if (pid == 0) /* child */
5315  {
5317 
5318  /* Close the postmaster's sockets */
5319  ClosePostmasterPorts(false);
5320 
5321  /* Release postmaster's working memory context */
5324  PostmasterContext = NULL;
5325 
5326  AuxiliaryProcessMain(ac, av);
5327  ExitPostmaster(0);
5328  }
5329 #endif /* EXEC_BACKEND */
5330 
5331  if (pid < 0)
5332  {
5333  /* in parent, fork failed */
5334  int save_errno = errno;
5335 
5336  errno = save_errno;
5337  switch (type)
5338  {
5339  case StartupProcess:
5340  ereport(LOG,
5341  (errmsg("could not fork startup process: %m")));
5342  break;
5343  case BgWriterProcess:
5344  ereport(LOG,
5345  (errmsg("could not fork background writer process: %m")));
5346  break;
5347  case CheckpointerProcess:
5348  ereport(LOG,
5349  (errmsg("could not fork checkpointer process: %m")));
5350  break;
5351  case WalWriterProcess:
5352  ereport(LOG,
5353  (errmsg("could not fork WAL writer process: %m")));
5354  break;
5355  case WalReceiverProcess:
5356  ereport(LOG,
5357  (errmsg("could not fork WAL receiver process: %m")));
5358  break;
5359  default:
5360  ereport(LOG,
5361  (errmsg("could not fork process: %m")));
5362  break;
5363  }
5364 
5365  /*
5366  * fork failure is fatal during startup, but there's no need to choke
5367  * immediately if starting other child types fails.
5368  */
5369  if (type == StartupProcess)
5370  ExitPostmaster(1);
5371  return 0;
5372  }
5373 
5374  /*
5375  * in parent, successful fork
5376  */
5377  return pid;
5378 }
5379 
5380 /*
5381  * StartAutovacuumWorker
5382  * Start an autovac worker process.
5383  *
5384  * This function is here because it enters the resulting PID into the
5385  * postmaster's private backends list.
5386  *
5387  * NB -- this code very roughly matches BackendStartup.
5388  */
5389 static void
5391 {
5392  Backend *bn;
5393 
5394  /*
5395  * If not in condition to run a process, don't try, but handle it like a
5396  * fork failure. This does not normally happen, since the signal is only
5397  * supposed to be sent by autovacuum launcher when it's OK to do it, but
5398  * we have to check to avoid race-condition problems during DB state
5399  * changes.
5400  */
5401  if (canAcceptConnections() == CAC_OK)
5402  {
5403  /*
5404  * Compute the cancel key that will be assigned to this session. We
5405  * probably don't need cancel keys for autovac workers, but we'd
5406  * better have something random in the field to prevent unfriendly
5407  * people from sending cancels to them.
5408  */
5410  {
5411  ereport(LOG,
5412  (errcode(ERRCODE_INTERNAL_ERROR),
5413  errmsg("could not generate random cancel key")));
5414  return;
5415  }
5416 
5417  bn = (Backend *) malloc(sizeof(Backend));
5418  if (bn)
5419  {
5420  bn->cancel_key = MyCancelKey;
5421 
5422  /* Autovac workers are not dead_end and need a child slot */
5423  bn->dead_end = false;
5425  bn->bgworker_notify = false;
5426 
5427  bn->pid = StartAutoVacWorker();
5428  if (bn->pid > 0)
5429  {
5431  dlist_push_head(&BackendList, &bn->elem);
5432 #ifdef EXEC_BACKEND
5433  ShmemBackendArrayAdd(bn);
5434 #endif
5435  /* all OK */
5436  return;
5437  }
5438 
5439  /*
5440  * fork failed, fall through to report -- actual error message was
5441  * logged by StartAutoVacWorker
5442  */
5444  free(bn);
5445  }
5446  else
5447  ereport(LOG,
5448  (errcode(ERRCODE_OUT_OF_MEMORY),
5449  errmsg("out of memory")));
5450  }
5451 
5452  /*
5453  * Report the failure to the launcher, if it's running. (If it's not, we
5454  * might not even be connected to shared memory, so don't try to call
5455  * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5456  * responds to the condition, but we don't do that here, instead waiting
5457  * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5458  * quick succession between the autovac launcher and postmaster in case
5459  * things get ugly.
5460  */
5461  if (AutoVacPID != 0)
5462  {
5464  avlauncher_needs_signal = true;
5465  }
5466 }
5467 
5468 /*
5469  * MaybeStartWalReceiver
5470  * Start the WAL receiver process, if not running and our state allows.
5471  */
5472 static void
5474 {
5475  if (WalReceiverPID == 0 &&
5476  (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5478  Shutdown == NoShutdown)
5479  {
5481  WalReceiverRequested = false;
5482  }
5483 }
5484 
5485 
5486 /*
5487  * Create the opts file
5488  */
5489 static bool
5490 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5491 {
5492  FILE *fp;
5493  int i;
5494 
5495 #define OPTS_FILE "postmaster.opts"
5496 
5497  if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5498  {
5499  elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5500  return false;
5501  }
5502 
5503  fprintf(fp, "%s", fullprogname);
5504  for (i = 1; i < argc; i++)
5505  fprintf(fp, " \"%s\"", argv[i]);
5506  fputs("\n", fp);
5507 
5508  if (fclose(fp))
5509  {
5510  elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5511  return false;
5512  }
5513 
5514  return true;
5515 }
5516 
5517 
5518 /*
5519  * MaxLivePostmasterChildren
5520  *
5521  * This reports the number of entries needed in per-child-process arrays
5522  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5523  * These arrays include regular backends, autovac workers, walsenders
5524  * and background workers, but not special children nor dead_end children.
5525  * This allows the arrays to have a fixed maximum size, to wit the same
5526  * too-many-children limit enforced by canAcceptConnections(). The exact value
5527  * isn't too critical as long as it's more than MaxBackends.
5528  */
5529 int
5531 {
5532  return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5534 }
5535 
5536 /*
5537  * Connect background worker to a database.
5538  */
5539 void
5541 {
5543 
5544  /* XXX is this the right errcode? */
5546  ereport(FATAL,
5547  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5548  errmsg("database connection requirement not indicated during registration")));
5549 
5550  InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL);
5551 
5552  /* it had better not gotten out of "init" mode yet */
5553  if (!IsInitProcessingMode())
5554  ereport(ERROR,
5555  (errmsg("invalid processing mode in background worker")));
5557 }
5558 
5559 /*
5560  * Connect background worker to a database using OIDs.
5561  */
5562 void
5564 {
5566 
5567  /* XXX is this the right errcode? */
5569  ereport(FATAL,
5570  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5571  errmsg("database connection requirement not indicated during registration")));
5572 
5573  InitPostgres(NULL, dboid, NULL, useroid, NULL);
5574 
5575  /* it had better not gotten out of "init" mode yet */
5576  if (!IsInitProcessingMode())
5577  ereport(ERROR,
5578  (errmsg("invalid processing mode in background worker")));
5580 }
5581 
5582 /*
5583  * Block/unblock signals in a background worker
5584  */
5585 void
5587 {
5588  PG_SETMASK(&BlockSig);
5589 }
5590 
5591 void
5593 {
5595 }
5596 
5597 #ifdef EXEC_BACKEND
5598 static pid_t
5599 bgworker_forkexec(int shmem_slot)
5600 {
5601  char *av[10];
5602  int ac = 0;
5603  char forkav[MAXPGPATH];
5604 
5605  snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5606 
5607  av[ac++] = "postgres";
5608  av[ac++] = forkav;
5609  av[ac++] = NULL; /* filled in by postmaster_forkexec */
5610  av[ac] = NULL;
5611 
5612  Assert(ac < lengthof(av));
5613 
5614  return postmaster_forkexec(ac, av);
5615 }
5616 #endif
5617 
5618 /*
5619  * Start a new bgworker.
5620  * Starting time conditions must have been checked already.
5621  *
5622  * Returns true on success, false on failure.
5623  * In either case, update the RegisteredBgWorker's state appropriately.
5624  *
5625  * This code is heavily based on autovacuum.c, q.v.
5626  */
5627 static bool
5629 {
5630  pid_t worker_pid;
5631 
5632  Assert(rw->rw_pid == 0);
5633 
5634  /*
5635  * Allocate and assign the Backend element. Note we must do this before
5636  * forking, so that we can handle out of memory properly.
5637  *
5638  * Treat failure as though the worker had crashed. That way, the
5639  * postmaster will wait a bit before attempting to start it again; if it
5640  * tried again right away, most likely it'd find itself repeating the
5641  * out-of-memory or fork failure condition.
5642  */
5643  if (!assign_backendlist_entry(rw))
5644  {
5646  return false;
5647  }
5648 
5649  ereport(DEBUG1,
5650  (errmsg("starting background worker process \"%s\"",
5651  rw->rw_worker.bgw_name)));
5652 
5653 #ifdef EXEC_BACKEND
5654  switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5655 #else
5656  switch ((worker_pid = fork_process()))
5657 #endif
5658  {
5659  case -1:
5660  /* in postmaster, fork failed ... */
5661  ereport(LOG,
5662  (errmsg("could not fork worker process: %m")));
5663  /* undo what assign_backendlist_entry did */
5665  rw->rw_child_slot = 0;
5666  free(rw->rw_backend);
5667  rw->rw_backend = NULL;
5668  /* mark entry as crashed, so we'll try again later */
5670  break;
5671 
5672 #ifndef EXEC_BACKEND
5673  case 0:
5674  /* in postmaster child ... */
5676 
5677  /* Close the postmaster's sockets */
5678  ClosePostmasterPorts(false);
5679 
5680  /*
5681  * Before blowing away PostmasterContext, save this bgworker's
5682  * data where it can find it.
5683  */
5684  MyBgworkerEntry = (BackgroundWorker *)
5686  memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5687 
5688  /* Release postmaster's working memory context */
5691  PostmasterContext = NULL;
5692 
5694 
5695  exit(1); /* should not get here */
5696  break;
5697 #endif
5698  default:
5699  /* in postmaster, fork successful ... */
5700  rw->rw_pid = worker_pid;
5701  rw->rw_backend->pid = rw->rw_pid;
5703  /* add new worker to lists of backends */
5704  dlist_push_head(&BackendList, &rw->rw_backend->elem);
5705 #ifdef EXEC_BACKEND
5706  ShmemBackendArrayAdd(rw->rw_backend);
5707 #endif
5708  return true;
5709  }
5710 
5711  return false;
5712 }
5713 
5714 /*
5715  * Does the current postmaster state require starting a worker with the
5716  * specified start_time?
5717  */
5718 static bool
5720 {
5721  switch (pmState)
5722  {
5723  case PM_NO_CHILDREN:
5724  case PM_WAIT_DEAD_END:
5725  case PM_SHUTDOWN_2:
5726  case PM_SHUTDOWN:
5727  case PM_WAIT_BACKENDS:
5728  case PM_WAIT_READONLY:
5729  case PM_WAIT_BACKUP:
5730  break;
5731 
5732  case PM_RUN:
5733  if (start_time == BgWorkerStart_RecoveryFinished)
5734  return true;
5735  /* fall through */
5736 
5737  case PM_HOT_STANDBY:
5738  if (start_time == BgWorkerStart_ConsistentState)
5739  return true;
5740  /* fall through */
5741 
5742  case PM_RECOVERY:
5743  case PM_STARTUP:
5744  case PM_INIT:
5745  if (start_time == BgWorkerStart_PostmasterStart)
5746  return true;
5747  /* fall through */
5748 
5749  }
5750 
5751  return false;
5752 }
5753 
5754 /*
5755  * Allocate the Backend struct for a connected background worker, but don't
5756  * add it to the list of backends just yet.
5757  *
5758  * On failure, return false without changing any worker state.
5759  *
5760  * Some info from the Backend is copied into the passed rw.
5761  */
5762 static bool
5764 {
5765  Backend *bn;
5766 
5767  /*
5768  * Compute the cancel key that will be assigned to this session. We
5769  * probably don't need cancel keys for background workers, but we'd better
5770  * have something random in the field to prevent unfriendly people from
5771  * sending cancels to them.
5772  */
5774  {
5775  ereport(LOG,
5776  (errcode(ERRCODE_INTERNAL_ERROR),
5777  errmsg("could not generate random cancel key")));
5778  return false;
5779  }
5780 
5781  bn = malloc(sizeof(Backend));
5782  if (bn == NULL)
5783  {
5784  ereport(LOG,
5785  (errcode(ERRCODE_OUT_OF_MEMORY),
5786  errmsg("out of memory")));
5787  return false;
5788  }
5789 
5790  bn->cancel_key = MyCancelKey;
5793  bn->dead_end = false;
5794  bn->bgworker_notify = false;
5795 
5796  rw->rw_backend = bn;
5797  rw->rw_child_slot = bn->child_slot;
5798 
5799  return true;
5800 }
5801 
5802 /*
5803  * If the time is right, start background worker(s).
5804  *
5805  * As a side effect, the bgworker control variables are set or reset
5806  * depending on whether more workers may need to be started.
5807  *
5808  * We limit the number of workers started per call, to avoid consuming the
5809  * postmaster's attention for too long when many such requests are pending.
5810  * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5811  * call this function again