PostgreSQL Source Code  git master
pqcomm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  * Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend. They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data.
9  *
10  * To emit an outgoing message, use the routines in pqformat.c to construct
11  * the message in a buffer and then emit it in one call to pq_putmessage.
12  * There are no functions to send raw bytes or partial messages; this
13  * ensures that the channel will not be clogged by an incomplete message if
14  * execution is aborted by ereport(ERROR) partway through the message.
15  *
16  * At one time, libpq was shared between frontend and backend, but now
17  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
18  * All that remains is similarities of names to trap the unwary...
19  *
20  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  * src/backend/libpq/pqcomm.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 
28 /*------------------------
29  * INTERFACE ROUTINES
30  *
31  * setup/teardown:
32  * StreamServerPort - Open postmaster's server port
33  * StreamConnection - Create new connection with client
34  * StreamClose - Close a client/backend connection
35  * TouchSocketFiles - Protect socket files against /tmp cleaners
36  * pq_init - initialize libpq at backend startup
37  * socket_comm_reset - reset libpq during error recovery
38  * socket_close - shutdown libpq at backend exit
39  *
40  * low-level I/O:
41  * pq_getbytes - get a known number of bytes from connection
42  * pq_getmessage - get a message with length word from connection
43  * pq_getbyte - get next byte from connection
44  * pq_peekbyte - peek at next byte from connection
45  * pq_flush - flush pending output
46  * pq_flush_if_writable - flush pending output if writable without blocking
47  * pq_getbyte_if_available - get a byte if available without blocking
48  *
49  * message-level I/O
50  * pq_putmessage - send a normal message (suppressed in COPY OUT mode)
51  * pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
52  *
53  *------------------------
54  */
55 #include "postgres.h"
56 
57 #ifdef HAVE_POLL_H
58 #include <poll.h>
59 #endif
60 #include <signal.h>
61 #include <fcntl.h>
62 #include <grp.h>
63 #include <unistd.h>
64 #include <sys/file.h>
65 #include <sys/socket.h>
66 #include <sys/stat.h>
67 #include <sys/time.h>
68 #include <netdb.h>
69 #include <netinet/in.h>
70 #ifdef HAVE_NETINET_TCP_H
71 #include <netinet/tcp.h>
72 #endif
73 #include <utime.h>
74 #ifdef _MSC_VER /* mstcpip.h is missing on mingw */
75 #include <mstcpip.h>
76 #endif
77 
78 #include "common/ip.h"
79 #include "libpq/libpq.h"
80 #include "miscadmin.h"
81 #include "port/pg_bswap.h"
82 #include "storage/ipc.h"
83 #include "utils/guc.h"
84 #include "utils/memutils.h"
85 
86 /*
87  * Cope with the various platform-specific ways to spell TCP keepalive socket
88  * options. This doesn't cover Windows, which as usual does its own thing.
89  */
90 #if defined(TCP_KEEPIDLE)
91 /* TCP_KEEPIDLE is the name of this option on Linux and *BSD */
92 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPIDLE
93 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPIDLE"
94 #elif defined(TCP_KEEPALIVE_THRESHOLD)
95 /* TCP_KEEPALIVE_THRESHOLD is the name of this option on Solaris >= 11 */
96 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE_THRESHOLD
97 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE_THRESHOLD"
98 #elif defined(TCP_KEEPALIVE) && defined(__darwin__)
99 /* TCP_KEEPALIVE is the name of this option on macOS */
100 /* Caution: Solaris has this symbol but it means something different */
101 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE
102 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE"
103 #endif
104 
105 /*
106  * Configuration options
107  */
110 
111 /* Where the Unix socket files are (list of palloc'd strings) */
112 static List *sock_paths = NIL;
113 
114 /*
115  * Buffers for low-level I/O.
116  *
117  * The receive buffer is fixed size. Send buffer is usually 8k, but can be
118  * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
119  */
120 
121 #define PQ_SEND_BUFFER_SIZE 8192
122 #define PQ_RECV_BUFFER_SIZE 8192
123 
124 static char *PqSendBuffer;
125 static int PqSendBufferSize; /* Size send buffer */
126 static int PqSendPointer; /* Next index to store a byte in PqSendBuffer */
127 static int PqSendStart; /* Next index to send a byte in PqSendBuffer */
128 
130 static int PqRecvPointer; /* Next index to read a byte from PqRecvBuffer */
131 static int PqRecvLength; /* End of data available in PqRecvBuffer */
132 
133 /*
134  * Message status
135  */
136 static bool PqCommBusy; /* busy sending data to the client */
137 static bool PqCommReadingMsg; /* in the middle of reading a message */
138 
139 
140 /* Internal functions */
141 static void socket_comm_reset(void);
142 static void socket_close(int code, Datum arg);
143 static void socket_set_nonblocking(bool nonblocking);
144 static int socket_flush(void);
145 static int socket_flush_if_writable(void);
146 static bool socket_is_send_pending(void);
147 static int socket_putmessage(char msgtype, const char *s, size_t len);
148 static void socket_putmessage_noblock(char msgtype, const char *s, size_t len);
149 static int internal_putbytes(const char *s, size_t len);
150 static int internal_flush(void);
151 
152 #ifdef HAVE_UNIX_SOCKETS
153 static int Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath);
154 static int Setup_AF_UNIX(const char *sock_path);
155 #endif /* HAVE_UNIX_SOCKETS */
156 
159  socket_flush,
164 };
165 
167 
169 
170 
171 /* --------------------------------
172  * pq_init - initialize libpq at backend startup
173  * --------------------------------
174  */
175 void
176 pq_init(void)
177 {
178  int socket_pos PG_USED_FOR_ASSERTS_ONLY;
179  int latch_pos PG_USED_FOR_ASSERTS_ONLY;
180 
181  /* initialize state variables */
185  PqCommBusy = false;
186  PqCommReadingMsg = false;
187 
188  /* set up process-exit hook to close the socket */
190 
191  /*
192  * In backends (as soon as forked) we operate the underlying socket in
193  * nonblocking mode and use latches to implement blocking semantics if
194  * needed. That allows us to provide safely interruptible reads and
195  * writes.
196  *
197  * Use COMMERROR on failure, because ERROR would try to send the error to
198  * the client, which might require changing the mode again, leading to
199  * infinite recursion.
200  */
201 #ifndef WIN32
204  (errmsg("could not set socket to nonblocking mode: %m")));
205 #endif
206 
209  MyProcPort->sock, NULL, NULL);
211  MyLatch, NULL);
213  NULL, NULL);
214 
215  /*
216  * The event positions match the order we added them, but let's sanity
217  * check them to be sure.
218  */
219  Assert(socket_pos == FeBeWaitSetSocketPos);
220  Assert(latch_pos == FeBeWaitSetLatchPos);
221 }
222 
223 /* --------------------------------
224  * socket_comm_reset - reset libpq during error recovery
225  *
226  * This is called from error recovery at the outer idle loop. It's
227  * just to get us out of trouble if we somehow manage to elog() from
228  * inside a pqcomm.c routine (which ideally will never happen, but...)
229  * --------------------------------
230  */
231 static void
233 {
234  /* Do not throw away pending data, but do reset the busy flag */
235  PqCommBusy = false;
236 }
237 
238 /* --------------------------------
239  * socket_close - shutdown libpq at backend exit
240  *
241  * This is the one pg_on_exit_callback in place during BackendInitialize().
242  * That function's unusual signal handling constrains that this callback be
243  * safe to run at any instant.
244  * --------------------------------
245  */
246 static void
248 {
249  /* Nothing to do in a standalone backend, where MyProcPort is NULL. */
250  if (MyProcPort != NULL)
251  {
252 #ifdef ENABLE_GSS
253  /*
254  * Shutdown GSSAPI layer. This section does nothing when interrupting
255  * BackendInitialize(), because pg_GSS_recvauth() makes first use of
256  * "ctx" and "cred".
257  *
258  * Note that we don't bother to free MyProcPort->gss, since we're
259  * about to exit anyway.
260  */
261  if (MyProcPort->gss)
262  {
263  OM_uint32 min_s;
264 
265  if (MyProcPort->gss->ctx != GSS_C_NO_CONTEXT)
266  gss_delete_sec_context(&min_s, &MyProcPort->gss->ctx, NULL);
267 
268  if (MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL)
269  gss_release_cred(&min_s, &MyProcPort->gss->cred);
270  }
271 #endif /* ENABLE_GSS */
272 
273  /*
274  * Cleanly shut down SSL layer. Nowhere else does a postmaster child
275  * call this, so this is safe when interrupting BackendInitialize().
276  */
278 
279  /*
280  * On most platforms, we leave the socket open until the process dies.
281  * This allows clients to perform a "synchronous close" if they care
282  * --- wait till the transport layer reports connection closure, and
283  * you can be sure the backend has exited. Saves a kernel call, too.
284  *
285  * However, that does not work on Windows: if the kernel closes the
286  * socket it will invoke an "abortive shutdown" that discards any data
287  * not yet sent to the client. (This is a flat-out violation of the
288  * TCP RFCs, but count on Microsoft not to care about that.) To get
289  * the spec-compliant "graceful shutdown" behavior, we must invoke
290  * closesocket() explicitly. When using OpenSSL, it seems that clean
291  * shutdown also requires an explicit shutdown() call.
292  *
293  * This code runs late enough during process shutdown that we should
294  * have finished all externally-visible shutdown activities, so that
295  * in principle it's good enough to act as a synchronous close on
296  * Windows too. But it's a lot more fragile than the other way.
297  */
298 #ifdef WIN32
299  shutdown(MyProcPort->sock, SD_SEND);
301 #endif
302 
303  /* In any case, set sock to PGINVALID_SOCKET to prevent further I/O */
305  }
306 }
307 
308 
309 
310 /*
311  * Streams -- wrapper around Unix socket system calls
312  *
313  *
314  * Stream functions are used for vanilla TCP connection protocol.
315  */
316 
317 
318 /*
319  * StreamServerPort -- open a "listening" port to accept connections.
320  *
321  * family should be AF_UNIX or AF_UNSPEC; portNumber is the port number.
322  * For AF_UNIX ports, hostName should be NULL and unixSocketDir must be
323  * specified. For TCP ports, hostName is either NULL for all interfaces or
324  * the interface to listen on, and unixSocketDir is ignored (can be NULL).
325  *
326  * Successfully opened sockets are added to the ListenSocket[] array (of
327  * length MaxListen), at the first position that isn't PGINVALID_SOCKET.
328  *
329  * RETURNS: STATUS_OK or STATUS_ERROR
330  */
331 
332 int
333 StreamServerPort(int family, const char *hostName, unsigned short portNumber,
334  const char *unixSocketDir,
335  pgsocket ListenSocket[], int MaxListen)
336 {
337  pgsocket fd;
338  int err;
339  int maxconn;
340  int ret;
341  char portNumberStr[32];
342  const char *familyDesc;
343  char familyDescBuf[64];
344  const char *addrDesc;
345  char addrBuf[NI_MAXHOST];
346  char *service;
347  struct addrinfo *addrs = NULL,
348  *addr;
349  struct addrinfo hint;
350  int listen_index = 0;
351  int added = 0;
352 
353 #ifdef HAVE_UNIX_SOCKETS
354  char unixSocketPath[MAXPGPATH];
355 #endif
356 #if !defined(WIN32) || defined(IPV6_V6ONLY)
357  int one = 1;
358 #endif
359 
360  /* Initialize hint structure */
361  MemSet(&hint, 0, sizeof(hint));
362  hint.ai_family = family;
363  hint.ai_flags = AI_PASSIVE;
364  hint.ai_socktype = SOCK_STREAM;
365 
366 #ifdef HAVE_UNIX_SOCKETS
367  if (family == AF_UNIX)
368  {
369  /*
370  * Create unixSocketPath from portNumber and unixSocketDir and lock
371  * that file path
372  */
373  UNIXSOCK_PATH(unixSocketPath, portNumber, unixSocketDir);
374  if (strlen(unixSocketPath) >= UNIXSOCK_PATH_BUFLEN)
375  {
376  ereport(LOG,
377  (errmsg("Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
378  unixSocketPath,
379  (int) (UNIXSOCK_PATH_BUFLEN - 1))));
380  return STATUS_ERROR;
381  }
382  if (Lock_AF_UNIX(unixSocketDir, unixSocketPath) != STATUS_OK)
383  return STATUS_ERROR;
384  service = unixSocketPath;
385  }
386  else
387 #endif /* HAVE_UNIX_SOCKETS */
388  {
389  snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
390  service = portNumberStr;
391  }
392 
393  ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
394  if (ret || !addrs)
395  {
396  if (hostName)
397  ereport(LOG,
398  (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
399  hostName, service, gai_strerror(ret))));
400  else
401  ereport(LOG,
402  (errmsg("could not translate service \"%s\" to address: %s",
403  service, gai_strerror(ret))));
404  if (addrs)
405  pg_freeaddrinfo_all(hint.ai_family, addrs);
406  return STATUS_ERROR;
407  }
408 
409  for (addr = addrs; addr; addr = addr->ai_next)
410  {
411  if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
412  {
413  /*
414  * Only set up a unix domain socket when they really asked for it.
415  * The service/port is different in that case.
416  */
417  continue;
418  }
419 
420  /* See if there is still room to add 1 more socket. */
421  for (; listen_index < MaxListen; listen_index++)
422  {
423  if (ListenSocket[listen_index] == PGINVALID_SOCKET)
424  break;
425  }
426  if (listen_index >= MaxListen)
427  {
428  ereport(LOG,
429  (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
430  MaxListen)));
431  break;
432  }
433 
434  /* set up address family name for log messages */
435  switch (addr->ai_family)
436  {
437  case AF_INET:
438  familyDesc = _("IPv4");
439  break;
440 #ifdef HAVE_IPV6
441  case AF_INET6:
442  familyDesc = _("IPv6");
443  break;
444 #endif
445 #ifdef HAVE_UNIX_SOCKETS
446  case AF_UNIX:
447  familyDesc = _("Unix");
448  break;
449 #endif
450  default:
451  snprintf(familyDescBuf, sizeof(familyDescBuf),
452  _("unrecognized address family %d"),
453  addr->ai_family);
454  familyDesc = familyDescBuf;
455  break;
456  }
457 
458  /* set up text form of address for log messages */
459 #ifdef HAVE_UNIX_SOCKETS
460  if (addr->ai_family == AF_UNIX)
461  addrDesc = unixSocketPath;
462  else
463 #endif
464  {
465  pg_getnameinfo_all((const struct sockaddr_storage *) addr->ai_addr,
466  addr->ai_addrlen,
467  addrBuf, sizeof(addrBuf),
468  NULL, 0,
470  addrDesc = addrBuf;
471  }
472 
473  if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) == PGINVALID_SOCKET)
474  {
475  ereport(LOG,
477  /* translator: first %s is IPv4, IPv6, or Unix */
478  errmsg("could not create %s socket for address \"%s\": %m",
479  familyDesc, addrDesc)));
480  continue;
481  }
482 
483 #ifndef WIN32
484 
485  /*
486  * Without the SO_REUSEADDR flag, a new postmaster can't be started
487  * right away after a stop or crash, giving "address already in use"
488  * error on TCP ports.
489  *
490  * On win32, however, this behavior only happens if the
491  * SO_EXCLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows
492  * multiple servers to listen on the same address, resulting in
493  * unpredictable behavior. With no flags at all, win32 behaves as Unix
494  * with SO_REUSEADDR.
495  */
496  if (!IS_AF_UNIX(addr->ai_family))
497  {
498  if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
499  (char *) &one, sizeof(one))) == -1)
500  {
501  ereport(LOG,
503  /* translator: third %s is IPv4, IPv6, or Unix */
504  errmsg("%s(%s) failed for %s address \"%s\": %m",
505  "setsockopt", "SO_REUSEADDR",
506  familyDesc, addrDesc)));
507  closesocket(fd);
508  continue;
509  }
510  }
511 #endif
512 
513 #ifdef IPV6_V6ONLY
514  if (addr->ai_family == AF_INET6)
515  {
516  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
517  (char *) &one, sizeof(one)) == -1)
518  {
519  ereport(LOG,
521  /* translator: third %s is IPv4, IPv6, or Unix */
522  errmsg("%s(%s) failed for %s address \"%s\": %m",
523  "setsockopt", "IPV6_V6ONLY",
524  familyDesc, addrDesc)));
525  closesocket(fd);
526  continue;
527  }
528  }
529 #endif
530 
531  /*
532  * Note: This might fail on some OS's, like Linux older than
533  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
534  * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4
535  * connections.
536  */
537  err = bind(fd, addr->ai_addr, addr->ai_addrlen);
538  if (err < 0)
539  {
540  int saved_errno = errno;
541 
542  ereport(LOG,
544  /* translator: first %s is IPv4, IPv6, or Unix */
545  errmsg("could not bind %s address \"%s\": %m",
546  familyDesc, addrDesc),
547  saved_errno == EADDRINUSE ?
548  (IS_AF_UNIX(addr->ai_family) ?
549  errhint("Is another postmaster already running on port %d?",
550  (int) portNumber) :
551  errhint("Is another postmaster already running on port %d?"
552  " If not, wait a few seconds and retry.",
553  (int) portNumber)) : 0));
554  closesocket(fd);
555  continue;
556  }
557 
558 #ifdef HAVE_UNIX_SOCKETS
559  if (addr->ai_family == AF_UNIX)
560  {
561  if (Setup_AF_UNIX(service) != STATUS_OK)
562  {
563  closesocket(fd);
564  break;
565  }
566  }
567 #endif
568 
569  /*
570  * Select appropriate accept-queue length limit. PG_SOMAXCONN is only
571  * intended to provide a clamp on the request on platforms where an
572  * overly large request provokes a kernel error (are there any?).
573  */
574  maxconn = MaxBackends * 2;
575  if (maxconn > PG_SOMAXCONN)
576  maxconn = PG_SOMAXCONN;
577 
578  err = listen(fd, maxconn);
579  if (err < 0)
580  {
581  ereport(LOG,
583  /* translator: first %s is IPv4, IPv6, or Unix */
584  errmsg("could not listen on %s address \"%s\": %m",
585  familyDesc, addrDesc)));
586  closesocket(fd);
587  continue;
588  }
589 
590 #ifdef HAVE_UNIX_SOCKETS
591  if (addr->ai_family == AF_UNIX)
592  ereport(LOG,
593  (errmsg("listening on Unix socket \"%s\"",
594  addrDesc)));
595  else
596 #endif
597  ereport(LOG,
598  /* translator: first %s is IPv4 or IPv6 */
599  (errmsg("listening on %s address \"%s\", port %d",
600  familyDesc, addrDesc, (int) portNumber)));
601 
602  ListenSocket[listen_index] = fd;
603  added++;
604  }
605 
606  pg_freeaddrinfo_all(hint.ai_family, addrs);
607 
608  if (!added)
609  return STATUS_ERROR;
610 
611  return STATUS_OK;
612 }
613 
614 
615 #ifdef HAVE_UNIX_SOCKETS
616 
617 /*
618  * Lock_AF_UNIX -- configure unix socket file path
619  */
620 static int
621 Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath)
622 {
623  /* no lock file for abstract sockets */
624  if (unixSocketPath[0] == '@')
625  return STATUS_OK;
626 
627  /*
628  * Grab an interlock file associated with the socket file.
629  *
630  * Note: there are two reasons for using a socket lock file, rather than
631  * trying to interlock directly on the socket itself. First, it's a lot
632  * more portable, and second, it lets us remove any pre-existing socket
633  * file without race conditions.
634  */
635  CreateSocketLockFile(unixSocketPath, true, unixSocketDir);
636 
637  /*
638  * Once we have the interlock, we can safely delete any pre-existing
639  * socket file to avoid failure at bind() time.
640  */
641  (void) unlink(unixSocketPath);
642 
643  /*
644  * Remember socket file pathnames for later maintenance.
645  */
646  sock_paths = lappend(sock_paths, pstrdup(unixSocketPath));
647 
648  return STATUS_OK;
649 }
650 
651 
652 /*
653  * Setup_AF_UNIX -- configure unix socket permissions
654  */
655 static int
656 Setup_AF_UNIX(const char *sock_path)
657 {
658  /* no file system permissions for abstract sockets */
659  if (sock_path[0] == '@')
660  return STATUS_OK;
661 
662  /*
663  * Fix socket ownership/permission if requested. Note we must do this
664  * before we listen() to avoid a window where unwanted connections could
665  * get accepted.
666  */
668  if (Unix_socket_group[0] != '\0')
669  {
670 #ifdef WIN32
671  elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
672 #else
673  char *endptr;
674  unsigned long val;
675  gid_t gid;
676 
677  val = strtoul(Unix_socket_group, &endptr, 10);
678  if (*endptr == '\0')
679  { /* numeric group id */
680  gid = val;
681  }
682  else
683  { /* convert group name to id */
684  struct group *gr;
685 
686  gr = getgrnam(Unix_socket_group);
687  if (!gr)
688  {
689  ereport(LOG,
690  (errmsg("group \"%s\" does not exist",
692  return STATUS_ERROR;
693  }
694  gid = gr->gr_gid;
695  }
696  if (chown(sock_path, -1, gid) == -1)
697  {
698  ereport(LOG,
700  errmsg("could not set group of file \"%s\": %m",
701  sock_path)));
702  return STATUS_ERROR;
703  }
704 #endif
705  }
706 
707  if (chmod(sock_path, Unix_socket_permissions) == -1)
708  {
709  ereport(LOG,
711  errmsg("could not set permissions of file \"%s\": %m",
712  sock_path)));
713  return STATUS_ERROR;
714  }
715  return STATUS_OK;
716 }
717 #endif /* HAVE_UNIX_SOCKETS */
718 
719 
720 /*
721  * StreamConnection -- create a new connection with client using
722  * server port. Set port->sock to the FD of the new connection.
723  *
724  * ASSUME: that this doesn't need to be non-blocking because
725  * the Postmaster uses select() to tell when the socket is ready for
726  * accept().
727  *
728  * RETURNS: STATUS_OK or STATUS_ERROR
729  */
730 int
732 {
733  /* accept connection and fill in the client (remote) address */
734  port->raddr.salen = sizeof(port->raddr.addr);
735  if ((port->sock = accept(server_fd,
736  (struct sockaddr *) &port->raddr.addr,
737  &port->raddr.salen)) == PGINVALID_SOCKET)
738  {
739  ereport(LOG,
741  errmsg("could not accept new connection: %m")));
742 
743  /*
744  * If accept() fails then postmaster.c will still see the server
745  * socket as read-ready, and will immediately try again. To avoid
746  * uselessly sucking lots of CPU, delay a bit before trying again.
747  * (The most likely reason for failure is being out of kernel file
748  * table slots; we can do little except hope some will get freed up.)
749  */
750  pg_usleep(100000L); /* wait 0.1 sec */
751  return STATUS_ERROR;
752  }
753 
754  /* fill in the server (local) address */
755  port->laddr.salen = sizeof(port->laddr.addr);
756  if (getsockname(port->sock,
757  (struct sockaddr *) &port->laddr.addr,
758  &port->laddr.salen) < 0)
759  {
760  ereport(LOG,
761  (errmsg("%s() failed: %m", "getsockname")));
762  return STATUS_ERROR;
763  }
764 
765  /* select NODELAY and KEEPALIVE options if it's a TCP connection */
766  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
767  {
768  int on;
769 #ifdef WIN32
770  int oldopt;
771  int optlen;
772  int newopt;
773 #endif
774 
775 #ifdef TCP_NODELAY
776  on = 1;
777  if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
778  (char *) &on, sizeof(on)) < 0)
779  {
780  ereport(LOG,
781  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_NODELAY")));
782  return STATUS_ERROR;
783  }
784 #endif
785  on = 1;
786  if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
787  (char *) &on, sizeof(on)) < 0)
788  {
789  ereport(LOG,
790  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_KEEPALIVE")));
791  return STATUS_ERROR;
792  }
793 
794 #ifdef WIN32
795 
796  /*
797  * This is a Win32 socket optimization. The OS send buffer should be
798  * large enough to send the whole Postgres send buffer in one go, or
799  * performance suffers. The Postgres send buffer can be enlarged if a
800  * very large message needs to be sent, but we won't attempt to
801  * enlarge the OS buffer if that happens, so somewhat arbitrarily
802  * ensure that the OS buffer is at least PQ_SEND_BUFFER_SIZE * 4.
803  * (That's 32kB with the current default).
804  *
805  * The default OS buffer size used to be 8kB in earlier Windows
806  * versions, but was raised to 64kB in Windows 2012. So it shouldn't
807  * be necessary to change it in later versions anymore. Changing it
808  * unnecessarily can even reduce performance, because setting
809  * SO_SNDBUF in the application disables the "dynamic send buffering"
810  * feature that was introduced in Windows 7. So before fiddling with
811  * SO_SNDBUF, check if the current buffer size is already large enough
812  * and only increase it if necessary.
813  *
814  * See https://support.microsoft.com/kb/823764/EN-US/ and
815  * https://msdn.microsoft.com/en-us/library/bb736549%28v=vs.85%29.aspx
816  */
817  optlen = sizeof(oldopt);
818  if (getsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &oldopt,
819  &optlen) < 0)
820  {
821  ereport(LOG,
822  (errmsg("%s(%s) failed: %m", "getsockopt", "SO_SNDBUF")));
823  return STATUS_ERROR;
824  }
825  newopt = PQ_SEND_BUFFER_SIZE * 4;
826  if (oldopt < newopt)
827  {
828  if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &newopt,
829  sizeof(newopt)) < 0)
830  {
831  ereport(LOG,
832  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_SNDBUF")));
833  return STATUS_ERROR;
834  }
835  }
836 #endif
837 
838  /*
839  * Also apply the current keepalive parameters. If we fail to set a
840  * parameter, don't error out, because these aren't universally
841  * supported. (Note: you might think we need to reset the GUC
842  * variables to 0 in such a case, but it's not necessary because the
843  * show hooks for these variables report the truth anyway.)
844  */
849  }
850 
851  return STATUS_OK;
852 }
853 
854 /*
855  * StreamClose -- close a client/backend connection
856  *
857  * NOTE: this is NOT used to terminate a session; it is just used to release
858  * the file descriptor in a process that should no longer have the socket
859  * open. (For example, the postmaster calls this after passing ownership
860  * of the connection to a child process.) It is expected that someone else
861  * still has the socket open. So, we only want to close the descriptor,
862  * we do NOT want to send anything to the far end.
863  */
864 void
866 {
867  closesocket(sock);
868 }
869 
870 /*
871  * TouchSocketFiles -- mark socket files as recently accessed
872  *
873  * This routine should be called every so often to ensure that the socket
874  * files have a recent mod date (ordinary operations on sockets usually won't
875  * change the mod date). That saves them from being removed by
876  * overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
877  * never have put the socket file in /tmp...)
878  */
879 void
881 {
882  ListCell *l;
883 
884  /* Loop through all created sockets... */
885  foreach(l, sock_paths)
886  {
887  char *sock_path = (char *) lfirst(l);
888 
889  /* Ignore errors; there's no point in complaining */
890  (void) utime(sock_path, NULL);
891  }
892 }
893 
894 /*
895  * RemoveSocketFiles -- unlink socket files at postmaster shutdown
896  */
897 void
899 {
900  ListCell *l;
901 
902  /* Loop through all created sockets... */
903  foreach(l, sock_paths)
904  {
905  char *sock_path = (char *) lfirst(l);
906 
907  /* Ignore any error. */
908  (void) unlink(sock_path);
909  }
910  /* Since we're about to exit, no need to reclaim storage */
911  sock_paths = NIL;
912 }
913 
914 
915 /* --------------------------------
916  * Low-level I/O routines begin here.
917  *
918  * These routines communicate with a frontend client across a connection
919  * already established by the preceding routines.
920  * --------------------------------
921  */
922 
923 /* --------------------------------
924  * socket_set_nonblocking - set socket blocking/non-blocking
925  *
926  * Sets the socket non-blocking if nonblocking is true, or sets it
927  * blocking otherwise.
928  * --------------------------------
929  */
930 static void
931 socket_set_nonblocking(bool nonblocking)
932 {
933  if (MyProcPort == NULL)
934  ereport(ERROR,
935  (errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
936  errmsg("there is no client connection")));
937 
938  MyProcPort->noblock = nonblocking;
939 }
940 
941 /* --------------------------------
942  * pq_recvbuf - load some bytes into the input buffer
943  *
944  * returns 0 if OK, EOF if trouble
945  * --------------------------------
946  */
947 static int
949 {
950  if (PqRecvPointer > 0)
951  {
953  {
954  /* still some unread data, left-justify it in the buffer */
958  PqRecvPointer = 0;
959  }
960  else
962  }
963 
964  /* Ensure that we're in blocking mode */
965  socket_set_nonblocking(false);
966 
967  /* Can fill buffer from PqRecvLength and upwards */
968  for (;;)
969  {
970  int r;
971 
974 
975  if (r < 0)
976  {
977  if (errno == EINTR)
978  continue; /* Ok if interrupted */
979 
980  /*
981  * Careful: an ereport() that tries to write to the client would
982  * cause recursion to here, leading to stack overflow and core
983  * dump! This message must go *only* to the postmaster log.
984  */
987  errmsg("could not receive data from client: %m")));
988  return EOF;
989  }
990  if (r == 0)
991  {
992  /*
993  * EOF detected. We used to write a log message here, but it's
994  * better to expect the ultimate caller to do that.
995  */
996  return EOF;
997  }
998  /* r contains number of bytes read, so just incr length */
999  PqRecvLength += r;
1000  return 0;
1001  }
1002 }
1003 
1004 /* --------------------------------
1005  * pq_getbyte - get a single byte from connection, or return EOF
1006  * --------------------------------
1007  */
1008 int
1010 {
1012 
1013  while (PqRecvPointer >= PqRecvLength)
1014  {
1015  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1016  return EOF; /* Failed to recv data */
1017  }
1018  return (unsigned char) PqRecvBuffer[PqRecvPointer++];
1019 }
1020 
1021 /* --------------------------------
1022  * pq_peekbyte - peek at next byte from connection
1023  *
1024  * Same as pq_getbyte() except we don't advance the pointer.
1025  * --------------------------------
1026  */
1027 int
1029 {
1031 
1032  while (PqRecvPointer >= PqRecvLength)
1033  {
1034  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1035  return EOF; /* Failed to recv data */
1036  }
1037  return (unsigned char) PqRecvBuffer[PqRecvPointer];
1038 }
1039 
1040 /* --------------------------------
1041  * pq_getbyte_if_available - get a single byte from connection,
1042  * if available
1043  *
1044  * The received byte is stored in *c. Returns 1 if a byte was read,
1045  * 0 if no data was available, or EOF if trouble.
1046  * --------------------------------
1047  */
1048 int
1050 {
1051  int r;
1052 
1054 
1056  {
1058  return 1;
1059  }
1060 
1061  /* Put the socket into non-blocking mode */
1062  socket_set_nonblocking(true);
1063 
1064  r = secure_read(MyProcPort, c, 1);
1065  if (r < 0)
1066  {
1067  /*
1068  * Ok if no data available without blocking or interrupted (though
1069  * EINTR really shouldn't happen with a non-blocking socket). Report
1070  * other errors.
1071  */
1072  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
1073  r = 0;
1074  else
1075  {
1076  /*
1077  * Careful: an ereport() that tries to write to the client would
1078  * cause recursion to here, leading to stack overflow and core
1079  * dump! This message must go *only* to the postmaster log.
1080  */
1083  errmsg("could not receive data from client: %m")));
1084  r = EOF;
1085  }
1086  }
1087  else if (r == 0)
1088  {
1089  /* EOF detected */
1090  r = EOF;
1091  }
1092 
1093  return r;
1094 }
1095 
1096 /* --------------------------------
1097  * pq_getbytes - get a known number of bytes from connection
1098  *
1099  * returns 0 if OK, EOF if trouble
1100  * --------------------------------
1101  */
1102 int
1103 pq_getbytes(char *s, size_t len)
1104 {
1105  size_t amount;
1106 
1108 
1109  while (len > 0)
1110  {
1111  while (PqRecvPointer >= PqRecvLength)
1112  {
1113  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1114  return EOF; /* Failed to recv data */
1115  }
1116  amount = PqRecvLength - PqRecvPointer;
1117  if (amount > len)
1118  amount = len;
1119  memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
1120  PqRecvPointer += amount;
1121  s += amount;
1122  len -= amount;
1123  }
1124  return 0;
1125 }
1126 
1127 /* --------------------------------
1128  * pq_discardbytes - throw away a known number of bytes
1129  *
1130  * same as pq_getbytes except we do not copy the data to anyplace.
1131  * this is used for resynchronizing after read errors.
1132  *
1133  * returns 0 if OK, EOF if trouble
1134  * --------------------------------
1135  */
1136 static int
1138 {
1139  size_t amount;
1140 
1142 
1143  while (len > 0)
1144  {
1145  while (PqRecvPointer >= PqRecvLength)
1146  {
1147  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1148  return EOF; /* Failed to recv data */
1149  }
1150  amount = PqRecvLength - PqRecvPointer;
1151  if (amount > len)
1152  amount = len;
1153  PqRecvPointer += amount;
1154  len -= amount;
1155  }
1156  return 0;
1157 }
1158 
1159 /* --------------------------------
1160  * pq_buffer_has_data - is any buffered data available to read?
1161  *
1162  * This will *not* attempt to read more data.
1163  * --------------------------------
1164  */
1165 bool
1167 {
1168  return (PqRecvPointer < PqRecvLength);
1169 }
1170 
1171 
1172 /* --------------------------------
1173  * pq_startmsgread - begin reading a message from the client.
1174  *
1175  * This must be called before any of the pq_get* functions.
1176  * --------------------------------
1177  */
1178 void
1180 {
1181  /*
1182  * There shouldn't be a read active already, but let's check just to be
1183  * sure.
1184  */
1185  if (PqCommReadingMsg)
1186  ereport(FATAL,
1187  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1188  errmsg("terminating connection because protocol synchronization was lost")));
1189 
1190  PqCommReadingMsg = true;
1191 }
1192 
1193 
1194 /* --------------------------------
1195  * pq_endmsgread - finish reading message.
1196  *
1197  * This must be called after reading a message with pq_getbytes()
1198  * and friends, to indicate that we have read the whole message.
1199  * pq_getmessage() does this implicitly.
1200  * --------------------------------
1201  */
1202 void
1204 {
1206 
1207  PqCommReadingMsg = false;
1208 }
1209 
1210 /* --------------------------------
1211  * pq_is_reading_msg - are we currently reading a message?
1212  *
1213  * This is used in error recovery at the outer idle loop to detect if we have
1214  * lost protocol sync, and need to terminate the connection. pq_startmsgread()
1215  * will check for that too, but it's nicer to detect it earlier.
1216  * --------------------------------
1217  */
1218 bool
1220 {
1221  return PqCommReadingMsg;
1222 }
1223 
1224 /* --------------------------------
1225  * pq_getmessage - get a message with length word from connection
1226  *
1227  * The return value is placed in an expansible StringInfo, which has
1228  * already been initialized by the caller.
1229  * Only the message body is placed in the StringInfo; the length word
1230  * is removed. Also, s->cursor is initialized to zero for convenience
1231  * in scanning the message contents.
1232  *
1233  * maxlen is the upper limit on the length of the
1234  * message we are willing to accept. We abort the connection (by
1235  * returning EOF) if client tries to send more than that.
1236  *
1237  * returns 0 if OK, EOF if trouble
1238  * --------------------------------
1239  */
1240 int
1242 {
1243  int32 len;
1244 
1246 
1247  resetStringInfo(s);
1248 
1249  /* Read message length word */
1250  if (pq_getbytes((char *) &len, 4) == EOF)
1251  {
1253  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1254  errmsg("unexpected EOF within message length word")));
1255  return EOF;
1256  }
1257 
1258  len = pg_ntoh32(len);
1259 
1260  if (len < 4 || len > maxlen)
1261  {
1263  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1264  errmsg("invalid message length")));
1265  return EOF;
1266  }
1267 
1268  len -= 4; /* discount length itself */
1269 
1270  if (len > 0)
1271  {
1272  /*
1273  * Allocate space for message. If we run out of room (ridiculously
1274  * large message), we will elog(ERROR), but we want to discard the
1275  * message body so as not to lose communication sync.
1276  */
1277  PG_TRY();
1278  {
1279  enlargeStringInfo(s, len);
1280  }
1281  PG_CATCH();
1282  {
1283  if (pq_discardbytes(len) == EOF)
1285  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1286  errmsg("incomplete message from client")));
1287 
1288  /* we discarded the rest of the message so we're back in sync. */
1289  PqCommReadingMsg = false;
1290  PG_RE_THROW();
1291  }
1292  PG_END_TRY();
1293 
1294  /* And grab the message */
1295  if (pq_getbytes(s->data, len) == EOF)
1296  {
1298  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1299  errmsg("incomplete message from client")));
1300  return EOF;
1301  }
1302  s->len = len;
1303  /* Place a trailing null per StringInfo convention */
1304  s->data[len] = '\0';
1305  }
1306 
1307  /* finished reading the message. */
1308  PqCommReadingMsg = false;
1309 
1310  return 0;
1311 }
1312 
1313 
1314 static int
1315 internal_putbytes(const char *s, size_t len)
1316 {
1317  size_t amount;
1318 
1319  while (len > 0)
1320  {
1321  /* If buffer is full, then flush it out */
1323  {
1324  socket_set_nonblocking(false);
1325  if (internal_flush())
1326  return EOF;
1327  }
1328  amount = PqSendBufferSize - PqSendPointer;
1329  if (amount > len)
1330  amount = len;
1331  memcpy(PqSendBuffer + PqSendPointer, s, amount);
1332  PqSendPointer += amount;
1333  s += amount;
1334  len -= amount;
1335  }
1336  return 0;
1337 }
1338 
1339 /* --------------------------------
1340  * socket_flush - flush pending output
1341  *
1342  * returns 0 if OK, EOF if trouble
1343  * --------------------------------
1344  */
1345 static int
1347 {
1348  int res;
1349 
1350  /* No-op if reentrant call */
1351  if (PqCommBusy)
1352  return 0;
1353  PqCommBusy = true;
1354  socket_set_nonblocking(false);
1355  res = internal_flush();
1356  PqCommBusy = false;
1357  return res;
1358 }
1359 
1360 /* --------------------------------
1361  * internal_flush - flush pending output
1362  *
1363  * Returns 0 if OK (meaning everything was sent, or operation would block
1364  * and the socket is in non-blocking mode), or EOF if trouble.
1365  * --------------------------------
1366  */
1367 static int
1369 {
1370  static int last_reported_send_errno = 0;
1371 
1372  char *bufptr = PqSendBuffer + PqSendStart;
1373  char *bufend = PqSendBuffer + PqSendPointer;
1374 
1375  while (bufptr < bufend)
1376  {
1377  int r;
1378 
1379  r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1380 
1381  if (r <= 0)
1382  {
1383  if (errno == EINTR)
1384  continue; /* Ok if we were interrupted */
1385 
1386  /*
1387  * Ok if no data writable without blocking, and the socket is in
1388  * non-blocking mode.
1389  */
1390  if (errno == EAGAIN ||
1391  errno == EWOULDBLOCK)
1392  {
1393  return 0;
1394  }
1395 
1396  /*
1397  * Careful: an ereport() that tries to write to the client would
1398  * cause recursion to here, leading to stack overflow and core
1399  * dump! This message must go *only* to the postmaster log.
1400  *
1401  * If a client disconnects while we're in the midst of output, we
1402  * might write quite a bit of data before we get to a safe query
1403  * abort point. So, suppress duplicate log messages.
1404  */
1405  if (errno != last_reported_send_errno)
1406  {
1407  last_reported_send_errno = errno;
1410  errmsg("could not send data to client: %m")));
1411  }
1412 
1413  /*
1414  * We drop the buffered data anyway so that processing can
1415  * continue, even though we'll probably quit soon. We also set a
1416  * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
1417  * the connection.
1418  */
1419  PqSendStart = PqSendPointer = 0;
1421  InterruptPending = 1;
1422  return EOF;
1423  }
1424 
1425  last_reported_send_errno = 0; /* reset after any successful send */
1426  bufptr += r;
1427  PqSendStart += r;
1428  }
1429 
1430  PqSendStart = PqSendPointer = 0;
1431  return 0;
1432 }
1433 
1434 /* --------------------------------
1435  * pq_flush_if_writable - flush pending output if writable without blocking
1436  *
1437  * Returns 0 if OK, or EOF if trouble.
1438  * --------------------------------
1439  */
1440 static int
1442 {
1443  int res;
1444 
1445  /* Quick exit if nothing to do */
1446  if (PqSendPointer == PqSendStart)
1447  return 0;
1448 
1449  /* No-op if reentrant call */
1450  if (PqCommBusy)
1451  return 0;
1452 
1453  /* Temporarily put the socket into non-blocking mode */
1454  socket_set_nonblocking(true);
1455 
1456  PqCommBusy = true;
1457  res = internal_flush();
1458  PqCommBusy = false;
1459  return res;
1460 }
1461 
1462 /* --------------------------------
1463  * socket_is_send_pending - is there any pending data in the output buffer?
1464  * --------------------------------
1465  */
1466 static bool
1468 {
1469  return (PqSendStart < PqSendPointer);
1470 }
1471 
1472 /* --------------------------------
1473  * Message-level I/O routines begin here.
1474  * --------------------------------
1475  */
1476 
1477 
1478 /* --------------------------------
1479  * socket_putmessage - send a normal message (suppressed in COPY OUT mode)
1480  *
1481  * msgtype is a message type code to place before the message body.
1482  *
1483  * len is the length of the message body data at *s. A message length
1484  * word (equal to len+4 because it counts itself too) is inserted by this
1485  * routine.
1486  *
1487  * We suppress messages generated while pqcomm.c is busy. This
1488  * avoids any possibility of messages being inserted within other
1489  * messages. The only known trouble case arises if SIGQUIT occurs
1490  * during a pqcomm.c routine --- quickdie() will try to send a warning
1491  * message, and the most reasonable approach seems to be to drop it.
1492  *
1493  * returns 0 if OK, EOF if trouble
1494  * --------------------------------
1495  */
1496 static int
1497 socket_putmessage(char msgtype, const char *s, size_t len)
1498 {
1499  uint32 n32;
1500 
1501  Assert(msgtype != 0);
1502 
1503  if (PqCommBusy)
1504  return 0;
1505  PqCommBusy = true;
1506  if (internal_putbytes(&msgtype, 1))
1507  goto fail;
1508 
1509  n32 = pg_hton32((uint32) (len + 4));
1510  if (internal_putbytes((char *) &n32, 4))
1511  goto fail;
1512 
1513  if (internal_putbytes(s, len))
1514  goto fail;
1515  PqCommBusy = false;
1516  return 0;
1517 
1518 fail:
1519  PqCommBusy = false;
1520  return EOF;
1521 }
1522 
1523 /* --------------------------------
1524  * pq_putmessage_noblock - like pq_putmessage, but never blocks
1525  *
1526  * If the output buffer is too small to hold the message, the buffer
1527  * is enlarged.
1528  */
1529 static void
1530 socket_putmessage_noblock(char msgtype, const char *s, size_t len)
1531 {
1533  int required;
1534 
1535  /*
1536  * Ensure we have enough space in the output buffer for the message header
1537  * as well as the message itself.
1538  */
1539  required = PqSendPointer + 1 + 4 + len;
1540  if (required > PqSendBufferSize)
1541  {
1544  }
1545  res = pq_putmessage(msgtype, s, len);
1546  Assert(res == 0); /* should not fail when the message fits in
1547  * buffer */
1548 }
1549 
1550 /* --------------------------------
1551  * pq_putmessage_v2 - send a message in protocol version 2
1552  *
1553  * msgtype is a message type code to place before the message body.
1554  *
1555  * We no longer support protocol version 2, but we have kept this
1556  * function so that if a client tries to connect with protocol version 2,
1557  * as a courtesy we can still send the "unsupported protocol version"
1558  * error to the client in the old format.
1559  *
1560  * Like in pq_putmessage(), we suppress messages generated while
1561  * pqcomm.c is busy.
1562  *
1563  * returns 0 if OK, EOF if trouble
1564  * --------------------------------
1565  */
1566 int
1567 pq_putmessage_v2(char msgtype, const char *s, size_t len)
1568 {
1569  Assert(msgtype != 0);
1570 
1571  if (PqCommBusy)
1572  return 0;
1573  PqCommBusy = true;
1574  if (internal_putbytes(&msgtype, 1))
1575  goto fail;
1576 
1577  if (internal_putbytes(s, len))
1578  goto fail;
1579  PqCommBusy = false;
1580  return 0;
1581 
1582 fail:
1583  PqCommBusy = false;
1584  return EOF;
1585 }
1586 
1587 /*
1588  * Support for TCP Keepalive parameters
1589  */
1590 
1591 /*
1592  * On Windows, we need to set both idle and interval at the same time.
1593  * We also cannot reset them to the default (setting to zero will
1594  * actually set them to zero, not default), therefore we fallback to
1595  * the out-of-the-box default instead.
1596  */
1597 #if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
1598 static int
1599 pq_setkeepaliveswin32(Port *port, int idle, int interval)
1600 {
1601  struct tcp_keepalive ka;
1602  DWORD retsize;
1603 
1604  if (idle <= 0)
1605  idle = 2 * 60 * 60; /* default = 2 hours */
1606  if (interval <= 0)
1607  interval = 1; /* default = 1 second */
1608 
1609  ka.onoff = 1;
1610  ka.keepalivetime = idle * 1000;
1611  ka.keepaliveinterval = interval * 1000;
1612 
1613  if (WSAIoctl(port->sock,
1614  SIO_KEEPALIVE_VALS,
1615  (LPVOID) &ka,
1616  sizeof(ka),
1617  NULL,
1618  0,
1619  &retsize,
1620  NULL,
1621  NULL)
1622  != 0)
1623  {
1624  ereport(LOG,
1625  (errmsg("%s(%s) failed: error code %d",
1626  "WSAIoctl", "SIO_KEEPALIVE_VALS", WSAGetLastError())));
1627  return STATUS_ERROR;
1628  }
1629  if (port->keepalives_idle != idle)
1630  port->keepalives_idle = idle;
1631  if (port->keepalives_interval != interval)
1632  port->keepalives_interval = interval;
1633  return STATUS_OK;
1634 }
1635 #endif
1636 
1637 int
1639 {
1640 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1641  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1642  return 0;
1643 
1644  if (port->keepalives_idle != 0)
1645  return port->keepalives_idle;
1646 
1647  if (port->default_keepalives_idle == 0)
1648  {
1649 #ifndef WIN32
1650  socklen_t size = sizeof(port->default_keepalives_idle);
1651 
1652  if (getsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1653  (char *) &port->default_keepalives_idle,
1654  &size) < 0)
1655  {
1656  ereport(LOG,
1657  (errmsg("%s(%s) failed: %m", "getsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1658  port->default_keepalives_idle = -1; /* don't know */
1659  }
1660 #else /* WIN32 */
1661  /* We can't get the defaults on Windows, so return "don't know" */
1662  port->default_keepalives_idle = -1;
1663 #endif /* WIN32 */
1664  }
1665 
1666  return port->default_keepalives_idle;
1667 #else
1668  return 0;
1669 #endif
1670 }
1671 
1672 int
1674 {
1675  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1676  return STATUS_OK;
1677 
1678 /* check SIO_KEEPALIVE_VALS here, not just WIN32, as some toolchains lack it */
1679 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1680  if (idle == port->keepalives_idle)
1681  return STATUS_OK;
1682 
1683 #ifndef WIN32
1684  if (port->default_keepalives_idle <= 0)
1685  {
1686  if (pq_getkeepalivesidle(port) < 0)
1687  {
1688  if (idle == 0)
1689  return STATUS_OK; /* default is set but unknown */
1690  else
1691  return STATUS_ERROR;
1692  }
1693  }
1694 
1695  if (idle == 0)
1696  idle = port->default_keepalives_idle;
1697 
1698  if (setsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1699  (char *) &idle, sizeof(idle)) < 0)
1700  {
1701  ereport(LOG,
1702  (errmsg("%s(%s) failed: %m", "setsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1703  return STATUS_ERROR;
1704  }
1705 
1706  port->keepalives_idle = idle;
1707 #else /* WIN32 */
1708  return pq_setkeepaliveswin32(port, idle, port->keepalives_interval);
1709 #endif
1710 #else
1711  if (idle != 0)
1712  {
1713  ereport(LOG,
1714  (errmsg("setting the keepalive idle time is not supported")));
1715  return STATUS_ERROR;
1716  }
1717 #endif
1718 
1719  return STATUS_OK;
1720 }
1721 
1722 int
1724 {
1725 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1726  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1727  return 0;
1728 
1729  if (port->keepalives_interval != 0)
1730  return port->keepalives_interval;
1731 
1732  if (port->default_keepalives_interval == 0)
1733  {
1734 #ifndef WIN32
1735  socklen_t size = sizeof(port->default_keepalives_interval);
1736 
1737  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1738  (char *) &port->default_keepalives_interval,
1739  &size) < 0)
1740  {
1741  ereport(LOG,
1742  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPINTVL")));
1743  port->default_keepalives_interval = -1; /* don't know */
1744  }
1745 #else
1746  /* We can't get the defaults on Windows, so return "don't know" */
1747  port->default_keepalives_interval = -1;
1748 #endif /* WIN32 */
1749  }
1750 
1751  return port->default_keepalives_interval;
1752 #else
1753  return 0;
1754 #endif
1755 }
1756 
1757 int
1759 {
1760  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1761  return STATUS_OK;
1762 
1763 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1764  if (interval == port->keepalives_interval)
1765  return STATUS_OK;
1766 
1767 #ifndef WIN32
1768  if (port->default_keepalives_interval <= 0)
1769  {
1770  if (pq_getkeepalivesinterval(port) < 0)
1771  {
1772  if (interval == 0)
1773  return STATUS_OK; /* default is set but unknown */
1774  else
1775  return STATUS_ERROR;
1776  }
1777  }
1778 
1779  if (interval == 0)
1780  interval = port->default_keepalives_interval;
1781 
1782  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1783  (char *) &interval, sizeof(interval)) < 0)
1784  {
1785  ereport(LOG,
1786  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPINTVL")));
1787  return STATUS_ERROR;
1788  }
1789 
1790  port->keepalives_interval = interval;
1791 #else /* WIN32 */
1792  return pq_setkeepaliveswin32(port, port->keepalives_idle, interval);
1793 #endif
1794 #else
1795  if (interval != 0)
1796  {
1797  ereport(LOG,
1798  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPINTVL")));
1799  return STATUS_ERROR;
1800  }
1801 #endif
1802 
1803  return STATUS_OK;
1804 }
1805 
1806 int
1808 {
1809 #ifdef TCP_KEEPCNT
1810  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1811  return 0;
1812 
1813  if (port->keepalives_count != 0)
1814  return port->keepalives_count;
1815 
1816  if (port->default_keepalives_count == 0)
1817  {
1818  socklen_t size = sizeof(port->default_keepalives_count);
1819 
1820  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1821  (char *) &port->default_keepalives_count,
1822  &size) < 0)
1823  {
1824  ereport(LOG,
1825  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPCNT")));
1826  port->default_keepalives_count = -1; /* don't know */
1827  }
1828  }
1829 
1830  return port->default_keepalives_count;
1831 #else
1832  return 0;
1833 #endif
1834 }
1835 
1836 int
1838 {
1839  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1840  return STATUS_OK;
1841 
1842 #ifdef TCP_KEEPCNT
1843  if (count == port->keepalives_count)
1844  return STATUS_OK;
1845 
1846  if (port->default_keepalives_count <= 0)
1847  {
1848  if (pq_getkeepalivescount(port) < 0)
1849  {
1850  if (count == 0)
1851  return STATUS_OK; /* default is set but unknown */
1852  else
1853  return STATUS_ERROR;
1854  }
1855  }
1856 
1857  if (count == 0)
1858  count = port->default_keepalives_count;
1859 
1860  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1861  (char *) &count, sizeof(count)) < 0)
1862  {
1863  ereport(LOG,
1864  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPCNT")));
1865  return STATUS_ERROR;
1866  }
1867 
1868  port->keepalives_count = count;
1869 #else
1870  if (count != 0)
1871  {
1872  ereport(LOG,
1873  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPCNT")));
1874  return STATUS_ERROR;
1875  }
1876 #endif
1877 
1878  return STATUS_OK;
1879 }
1880 
1881 int
1883 {
1884 #ifdef TCP_USER_TIMEOUT
1885  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1886  return 0;
1887 
1888  if (port->tcp_user_timeout != 0)
1889  return port->tcp_user_timeout;
1890 
1891  if (port->default_tcp_user_timeout == 0)
1892  {
1893  socklen_t size = sizeof(port->default_tcp_user_timeout);
1894 
1895  if (getsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1896  (char *) &port->default_tcp_user_timeout,
1897  &size) < 0)
1898  {
1899  ereport(LOG,
1900  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_USER_TIMEOUT")));
1901  port->default_tcp_user_timeout = -1; /* don't know */
1902  }
1903  }
1904 
1905  return port->default_tcp_user_timeout;
1906 #else
1907  return 0;
1908 #endif
1909 }
1910 
1911 int
1913 {
1914  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1915  return STATUS_OK;
1916 
1917 #ifdef TCP_USER_TIMEOUT
1918  if (timeout == port->tcp_user_timeout)
1919  return STATUS_OK;
1920 
1921  if (port->default_tcp_user_timeout <= 0)
1922  {
1923  if (pq_gettcpusertimeout(port) < 0)
1924  {
1925  if (timeout == 0)
1926  return STATUS_OK; /* default is set but unknown */
1927  else
1928  return STATUS_ERROR;
1929  }
1930  }
1931 
1932  if (timeout == 0)
1933  timeout = port->default_tcp_user_timeout;
1934 
1935  if (setsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1936  (char *) &timeout, sizeof(timeout)) < 0)
1937  {
1938  ereport(LOG,
1939  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_USER_TIMEOUT")));
1940  return STATUS_ERROR;
1941  }
1942 
1943  port->tcp_user_timeout = timeout;
1944 #else
1945  if (timeout != 0)
1946  {
1947  ereport(LOG,
1948  (errmsg("%s(%s) not supported", "setsockopt", "TCP_USER_TIMEOUT")));
1949  return STATUS_ERROR;
1950  }
1951 #endif
1952 
1953  return STATUS_OK;
1954 }
1955 
1956 /*
1957  * Check if the client is still connected.
1958  */
1959 bool
1961 {
1962 #if defined(POLLRDHUP)
1963  /*
1964  * POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
1965  * the other end. We don't have a portable way to do that without
1966  * actually trying to read or write data on other systems. We don't want
1967  * to read because that would be confused by pipelined queries and COPY
1968  * data. Perhaps in future we'll try to write a heartbeat message instead.
1969  */
1970  struct pollfd pollfd;
1971  int rc;
1972 
1973  pollfd.fd = MyProcPort->sock;
1974  pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
1975  pollfd.revents = 0;
1976 
1977  rc = poll(&pollfd, 1, 0);
1978 
1979  if (rc < 0)
1980  {
1983  errmsg("could not poll socket: %m")));
1984  return false;
1985  }
1986  else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
1987  return false;
1988 #endif
1989 
1990  return true;
1991 }
ssize_t secure_write(Port *port, void *ptr, size_t len)
Definition: be-secure.c:260
void secure_close(Port *port)
Definition: be-secure.c:135
ssize_t secure_read(Port *port, void *ptr, size_t len)
Definition: be-secure.c:147
unsigned int uint32
Definition: c.h:441
#define STATUS_OK
Definition: c.h:1167
signed int int32
Definition: c.h:429
#define MemSet(start, val, len)
Definition: c.h:1008
#define STATUS_ERROR
Definition: c.h:1168
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
int errcode_for_socket_access(void)
Definition: elog.c:787
int errcode_for_file_access(void)
Definition: elog.c:716
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define _(x)
Definition: elog.c:89
#define LOG
Definition: elog.h:25
#define PG_RE_THROW()
Definition: elog.h:340
#define COMMERROR
Definition: elog.h:27
#define PG_END_TRY()
Definition: elog.h:324
#define FATAL
Definition: elog.h:35
#define PG_TRY()
Definition: elog.h:299
#define WARNING
Definition: elog.h:30
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define PG_CATCH()
Definition: elog.h:309
#define ereport(elevel,...)
Definition: elog.h:143
#define gai_strerror
Definition: getaddrinfo.h:146
#define NI_NUMERICHOST
Definition: getaddrinfo.h:78
#define NI_MAXHOST
Definition: getaddrinfo.h:88
#define AI_PASSIVE
Definition: getaddrinfo.h:62
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
volatile sig_atomic_t ClientConnectionLost
Definition: globals.c:34
int MaxBackends
Definition: globals.c:139
struct Port * MyProcPort
Definition: globals.c:46
struct Latch * MyLatch
Definition: globals.c:57
int tcp_keepalives_idle
Definition: guc.c:625
int tcp_keepalives_interval
Definition: guc.c:626
int tcp_keepalives_count
Definition: guc.c:627
int tcp_user_timeout
Definition: guc.c:628
long val
Definition: informix.c:664
void pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
Definition: ip.c:88
int pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, char *node, int nodelen, char *service, int servicelen, int flags)
Definition: ip.c:122
int pg_getaddrinfo_all(const char *hostname, const char *servname, const struct addrinfo *hintp, struct addrinfo **result)
Definition: ip.c:57
#define IS_AF_UNIX(fam)
Definition: ip.h:24
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:305
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:684
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:862
#define WL_LATCH_SET
Definition: latch.h:125
#define WL_POSTMASTER_DEATH
Definition: latch.h:129
#define WL_SOCKET_WRITEABLE
Definition: latch.h:127
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:49
#define FeBeWaitSetLatchPos
Definition: libpq.h:64
#define FeBeWaitSetSocketPos
Definition: libpq.h:63
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:336
char * pstrdup(const char *in)
Definition: mcxt.c:1299
MemoryContext TopMemoryContext
Definition: mcxt.c:48
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir)
Definition: miscinit.c:1301
void * arg
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define pg_hton32(x)
Definition: pg_bswap.h:121
#define MAXPGPATH
#define PG_SOMAXCONN
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:169
#define NIL
Definition: pg_list.h:65
static int port
Definition: pg_regress.c:92
bool pg_set_noblock(pgsocket sock)
Definition: noblock.c:25
int pgsocket
Definition: port.h:29
#define snprintf
Definition: port.h:225
unsigned int socklen_t
Definition: port.h:40
#define PGINVALID_SOCKET
Definition: port.h:31
#define closesocket
Definition: port.h:342
uintptr_t Datum
Definition: postgres.h:411
static pgsocket ListenSocket[MAXLISTEN]
Definition: postmaster.c:222
static int PqRecvLength
Definition: pqcomm.c:131
int pq_setkeepalivesinterval(int interval, Port *port)
Definition: pqcomm.c:1758
const PQcommMethods * PqCommMethods
Definition: pqcomm.c:166
static int pq_recvbuf(void)
Definition: pqcomm.c:948
static int PqSendStart
Definition: pqcomm.c:127
int Unix_socket_permissions
Definition: pqcomm.c:108
static int internal_flush(void)
Definition: pqcomm.c:1368
static void socket_set_nonblocking(bool nonblocking)
Definition: pqcomm.c:931
int pq_peekbyte(void)
Definition: pqcomm.c:1028
int pq_getbyte_if_available(unsigned char *c)
Definition: pqcomm.c:1049
static int socket_flush_if_writable(void)
Definition: pqcomm.c:1441
int pq_getkeepalivescount(Port *port)
Definition: pqcomm.c:1807
#define PQ_RECV_BUFFER_SIZE
Definition: pqcomm.c:122
int pq_getkeepalivesinterval(Port *port)
Definition: pqcomm.c:1723
static int PqSendPointer
Definition: pqcomm.c:126
static int pq_discardbytes(size_t len)
Definition: pqcomm.c:1137
int pq_settcpusertimeout(int timeout, Port *port)
Definition: pqcomm.c:1912
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1241
static const PQcommMethods PqCommSocketMethods
Definition: pqcomm.c:157
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1103
static bool PqCommReadingMsg
Definition: pqcomm.c:137
char * Unix_socket_group
Definition: pqcomm.c:109
void StreamClose(pgsocket sock)
Definition: pqcomm.c:865
static int socket_flush(void)
Definition: pqcomm.c:1346
#define PQ_SEND_BUFFER_SIZE
Definition: pqcomm.c:121
int pq_setkeepalivesidle(int idle, Port *port)
Definition: pqcomm.c:1673
int StreamServerPort(int family, const char *hostName, unsigned short portNumber, const char *unixSocketDir, pgsocket ListenSocket[], int MaxListen)
Definition: pqcomm.c:333
static void socket_comm_reset(void)
Definition: pqcomm.c:232
WaitEventSet * FeBeWaitSet
Definition: pqcomm.c:168
static char * PqSendBuffer
Definition: pqcomm.c:124
int pq_getkeepalivesidle(Port *port)
Definition: pqcomm.c:1638
void pq_endmsgread(void)
Definition: pqcomm.c:1203
static List * sock_paths
Definition: pqcomm.c:112
void TouchSocketFiles(void)
Definition: pqcomm.c:880
static bool PqCommBusy
Definition: pqcomm.c:136
int pq_getbyte(void)
Definition: pqcomm.c:1009
static bool socket_is_send_pending(void)
Definition: pqcomm.c:1467
static int socket_putmessage(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1497
static void socket_putmessage_noblock(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1530
bool pq_buffer_has_data(void)
Definition: pqcomm.c:1166
static char PqRecvBuffer[PQ_RECV_BUFFER_SIZE]
Definition: pqcomm.c:129
static int PqRecvPointer
Definition: pqcomm.c:130
void pq_init(void)
Definition: pqcomm.c:176
static void socket_close(int code, Datum arg)
Definition: pqcomm.c:247
int pq_putmessage_v2(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1567
bool pq_is_reading_msg(void)
Definition: pqcomm.c:1219
void RemoveSocketFiles(void)
Definition: pqcomm.c:898
int pq_gettcpusertimeout(Port *port)
Definition: pqcomm.c:1882
bool pq_check_connection(void)
Definition: pqcomm.c:1960
static int PqSendBufferSize
Definition: pqcomm.c:125
void pq_startmsgread(void)
Definition: pqcomm.c:1179
int pq_setkeepalivescount(int count, Port *port)
Definition: pqcomm.c:1837
int StreamConnection(pgsocket server_fd, Port *port)
Definition: pqcomm.c:731
static int internal_putbytes(const char *s, size_t len)
Definition: pqcomm.c:1315
#define UNIXSOCK_PATH(path, port, sockdir)
Definition: pqcomm.h:70
#define UNIXSOCK_PATH_BUFLEN
Definition: pqcomm.h:86
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
void pg_usleep(long microsec)
Definition: signal.c:53
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
Definition: pg_list.h:51
Definition: libpq-be.h:126
void * gss
Definition: libpq-be.h:203
pgsocket sock
Definition: libpq-be.h:127
bool noblock
Definition: libpq-be.h:128
int ai_socktype
Definition: getaddrinfo.h:102
struct addrinfo * ai_next
Definition: getaddrinfo.h:107
int ai_flags
Definition: getaddrinfo.h:100
int ai_family
Definition: getaddrinfo.h:101
#define bind(s, addr, addrlen)
Definition: win32_port.h:470
#define EINTR
Definition: win32_port.h:351
#define EWOULDBLOCK
Definition: win32_port.h:357
#define EADDRINUSE
Definition: win32_port.h:377
int gid_t
Definition: win32_port.h:245
#define socket(af, type, protocol)
Definition: win32_port.h:469
#define accept(s, addr, addrlen)
Definition: win32_port.h:472
#define listen(s, backlog)
Definition: win32_port.h:471
#define EAGAIN
Definition: win32_port.h:349