PostgreSQL Source Code  git master
pqcomm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  * Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend. They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data.
9  *
10  * To emit an outgoing message, use the routines in pqformat.c to construct
11  * the message in a buffer and then emit it in one call to pq_putmessage.
12  * There are no functions to send raw bytes or partial messages; this
13  * ensures that the channel will not be clogged by an incomplete message if
14  * execution is aborted by ereport(ERROR) partway through the message.
15  *
16  * At one time, libpq was shared between frontend and backend, but now
17  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
18  * All that remains is similarities of names to trap the unwary...
19  *
20  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  * src/backend/libpq/pqcomm.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 
28 /*------------------------
29  * INTERFACE ROUTINES
30  *
31  * setup/teardown:
32  * StreamServerPort - Open postmaster's server port
33  * StreamConnection - Create new connection with client
34  * StreamClose - Close a client/backend connection
35  * TouchSocketFiles - Protect socket files against /tmp cleaners
36  * pq_init - initialize libpq at backend startup
37  * socket_comm_reset - reset libpq during error recovery
38  * socket_close - shutdown libpq at backend exit
39  *
40  * low-level I/O:
41  * pq_getbytes - get a known number of bytes from connection
42  * pq_getmessage - get a message with length word from connection
43  * pq_getbyte - get next byte from connection
44  * pq_peekbyte - peek at next byte from connection
45  * pq_flush - flush pending output
46  * pq_flush_if_writable - flush pending output if writable without blocking
47  * pq_getbyte_if_available - get a byte if available without blocking
48  *
49  * message-level I/O
50  * pq_putmessage - send a normal message (suppressed in COPY OUT mode)
51  * pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
52  *
53  *------------------------
54  */
55 #include "postgres.h"
56 
57 #ifdef HAVE_POLL_H
58 #include <poll.h>
59 #endif
60 #include <signal.h>
61 #include <fcntl.h>
62 #include <grp.h>
63 #include <unistd.h>
64 #include <sys/file.h>
65 #include <sys/socket.h>
66 #include <sys/stat.h>
67 #include <sys/time.h>
68 #include <netdb.h>
69 #include <netinet/in.h>
70 #ifdef HAVE_NETINET_TCP_H
71 #include <netinet/tcp.h>
72 #endif
73 #include <utime.h>
74 #ifdef _MSC_VER /* mstcpip.h is missing on mingw */
75 #include <mstcpip.h>
76 #endif
77 
78 #include "common/ip.h"
79 #include "libpq/libpq.h"
80 #include "miscadmin.h"
81 #include "port/pg_bswap.h"
82 #include "storage/ipc.h"
83 #include "utils/guc.h"
84 #include "utils/memutils.h"
85 
86 /*
87  * Cope with the various platform-specific ways to spell TCP keepalive socket
88  * options. This doesn't cover Windows, which as usual does its own thing.
89  */
90 #if defined(TCP_KEEPIDLE)
91 /* TCP_KEEPIDLE is the name of this option on Linux and *BSD */
92 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPIDLE
93 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPIDLE"
94 #elif defined(TCP_KEEPALIVE_THRESHOLD)
95 /* TCP_KEEPALIVE_THRESHOLD is the name of this option on Solaris >= 11 */
96 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE_THRESHOLD
97 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE_THRESHOLD"
98 #elif defined(TCP_KEEPALIVE) && defined(__darwin__)
99 /* TCP_KEEPALIVE is the name of this option on macOS */
100 /* Caution: Solaris has this symbol but it means something different */
101 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE
102 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE"
103 #endif
104 
105 /*
106  * Configuration options
107  */
110 
111 /* Where the Unix socket files are (list of palloc'd strings) */
112 static List *sock_paths = NIL;
113 
114 /*
115  * Buffers for low-level I/O.
116  *
117  * The receive buffer is fixed size. Send buffer is usually 8k, but can be
118  * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
119  */
120 
121 #define PQ_SEND_BUFFER_SIZE 8192
122 #define PQ_RECV_BUFFER_SIZE 8192
123 
124 static char *PqSendBuffer;
125 static int PqSendBufferSize; /* Size send buffer */
126 static int PqSendPointer; /* Next index to store a byte in PqSendBuffer */
127 static int PqSendStart; /* Next index to send a byte in PqSendBuffer */
128 
130 static int PqRecvPointer; /* Next index to read a byte from PqRecvBuffer */
131 static int PqRecvLength; /* End of data available in PqRecvBuffer */
132 
133 /*
134  * Message status
135  */
136 static bool PqCommBusy; /* busy sending data to the client */
137 static bool PqCommReadingMsg; /* in the middle of reading a message */
138 
139 
140 /* Internal functions */
141 static void socket_comm_reset(void);
142 static void socket_close(int code, Datum arg);
143 static void socket_set_nonblocking(bool nonblocking);
144 static int socket_flush(void);
145 static int socket_flush_if_writable(void);
146 static bool socket_is_send_pending(void);
147 static int socket_putmessage(char msgtype, const char *s, size_t len);
148 static void socket_putmessage_noblock(char msgtype, const char *s, size_t len);
149 static int internal_putbytes(const char *s, size_t len);
150 static int internal_flush(void);
151 
152 #ifdef HAVE_UNIX_SOCKETS
153 static int Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath);
154 static int Setup_AF_UNIX(const char *sock_path);
155 #endif /* HAVE_UNIX_SOCKETS */
156 
159  socket_flush,
164 };
165 
167 
169 
170 
171 /* --------------------------------
172  * pq_init - initialize libpq at backend startup
173  * --------------------------------
174  */
175 void
176 pq_init(void)
177 {
178  int socket_pos PG_USED_FOR_ASSERTS_ONLY;
179  int latch_pos PG_USED_FOR_ASSERTS_ONLY;
180 
181  /* initialize state variables */
185  PqCommBusy = false;
186  PqCommReadingMsg = false;
187 
188  /* set up process-exit hook to close the socket */
190 
191  /*
192  * In backends (as soon as forked) we operate the underlying socket in
193  * nonblocking mode and use latches to implement blocking semantics if
194  * needed. That allows us to provide safely interruptible reads and
195  * writes.
196  *
197  * Use COMMERROR on failure, because ERROR would try to send the error to
198  * the client, which might require changing the mode again, leading to
199  * infinite recursion.
200  */
201 #ifndef WIN32
204  (errmsg("could not set socket to nonblocking mode: %m")));
205 #endif
206 
209  MyProcPort->sock, NULL, NULL);
211  MyLatch, NULL);
213  NULL, NULL);
214 
215  /*
216  * The event positions match the order we added them, but let's sanity
217  * check them to be sure.
218  */
219  Assert(socket_pos == FeBeWaitSetSocketPos);
220  Assert(latch_pos == FeBeWaitSetLatchPos);
221 }
222 
223 /* --------------------------------
224  * socket_comm_reset - reset libpq during error recovery
225  *
226  * This is called from error recovery at the outer idle loop. It's
227  * just to get us out of trouble if we somehow manage to elog() from
228  * inside a pqcomm.c routine (which ideally will never happen, but...)
229  * --------------------------------
230  */
231 static void
233 {
234  /* Do not throw away pending data, but do reset the busy flag */
235  PqCommBusy = false;
236 }
237 
238 /* --------------------------------
239  * socket_close - shutdown libpq at backend exit
240  *
241  * This is the one pg_on_exit_callback in place during BackendInitialize().
242  * That function's unusual signal handling constrains that this callback be
243  * safe to run at any instant.
244  * --------------------------------
245  */
246 static void
248 {
249  /* Nothing to do in a standalone backend, where MyProcPort is NULL. */
250  if (MyProcPort != NULL)
251  {
252 #ifdef ENABLE_GSS
253  /*
254  * Shutdown GSSAPI layer. This section does nothing when interrupting
255  * BackendInitialize(), because pg_GSS_recvauth() makes first use of
256  * "ctx" and "cred".
257  *
258  * Note that we don't bother to free MyProcPort->gss, since we're
259  * about to exit anyway.
260  */
261  if (MyProcPort->gss)
262  {
263  OM_uint32 min_s;
264 
265  if (MyProcPort->gss->ctx != GSS_C_NO_CONTEXT)
266  gss_delete_sec_context(&min_s, &MyProcPort->gss->ctx, NULL);
267 
268  if (MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL)
269  gss_release_cred(&min_s, &MyProcPort->gss->cred);
270  }
271 #endif /* ENABLE_GSS */
272 
273  /*
274  * Cleanly shut down SSL layer. Nowhere else does a postmaster child
275  * call this, so this is safe when interrupting BackendInitialize().
276  */
278 
279  /*
280  * Formerly we did an explicit close() here, but it seems better to
281  * leave the socket open until the process dies. This allows clients
282  * to perform a "synchronous close" if they care --- wait till the
283  * transport layer reports connection closure, and you can be sure the
284  * backend has exited.
285  *
286  * We do set sock to PGINVALID_SOCKET to prevent any further I/O,
287  * though.
288  */
290  }
291 }
292 
293 
294 
295 /*
296  * Streams -- wrapper around Unix socket system calls
297  *
298  *
299  * Stream functions are used for vanilla TCP connection protocol.
300  */
301 
302 
303 /*
304  * StreamServerPort -- open a "listening" port to accept connections.
305  *
306  * family should be AF_UNIX or AF_UNSPEC; portNumber is the port number.
307  * For AF_UNIX ports, hostName should be NULL and unixSocketDir must be
308  * specified. For TCP ports, hostName is either NULL for all interfaces or
309  * the interface to listen on, and unixSocketDir is ignored (can be NULL).
310  *
311  * Successfully opened sockets are added to the ListenSocket[] array (of
312  * length MaxListen), at the first position that isn't PGINVALID_SOCKET.
313  *
314  * RETURNS: STATUS_OK or STATUS_ERROR
315  */
316 
317 int
318 StreamServerPort(int family, const char *hostName, unsigned short portNumber,
319  const char *unixSocketDir,
320  pgsocket ListenSocket[], int MaxListen)
321 {
322  pgsocket fd;
323  int err;
324  int maxconn;
325  int ret;
326  char portNumberStr[32];
327  const char *familyDesc;
328  char familyDescBuf[64];
329  const char *addrDesc;
330  char addrBuf[NI_MAXHOST];
331  char *service;
332  struct addrinfo *addrs = NULL,
333  *addr;
334  struct addrinfo hint;
335  int listen_index = 0;
336  int added = 0;
337 
338 #ifdef HAVE_UNIX_SOCKETS
339  char unixSocketPath[MAXPGPATH];
340 #endif
341 #if !defined(WIN32) || defined(IPV6_V6ONLY)
342  int one = 1;
343 #endif
344 
345  /* Initialize hint structure */
346  MemSet(&hint, 0, sizeof(hint));
347  hint.ai_family = family;
348  hint.ai_flags = AI_PASSIVE;
349  hint.ai_socktype = SOCK_STREAM;
350 
351 #ifdef HAVE_UNIX_SOCKETS
352  if (family == AF_UNIX)
353  {
354  /*
355  * Create unixSocketPath from portNumber and unixSocketDir and lock
356  * that file path
357  */
358  UNIXSOCK_PATH(unixSocketPath, portNumber, unixSocketDir);
359  if (strlen(unixSocketPath) >= UNIXSOCK_PATH_BUFLEN)
360  {
361  ereport(LOG,
362  (errmsg("Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
363  unixSocketPath,
364  (int) (UNIXSOCK_PATH_BUFLEN - 1))));
365  return STATUS_ERROR;
366  }
367  if (Lock_AF_UNIX(unixSocketDir, unixSocketPath) != STATUS_OK)
368  return STATUS_ERROR;
369  service = unixSocketPath;
370  }
371  else
372 #endif /* HAVE_UNIX_SOCKETS */
373  {
374  snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
375  service = portNumberStr;
376  }
377 
378  ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
379  if (ret || !addrs)
380  {
381  if (hostName)
382  ereport(LOG,
383  (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
384  hostName, service, gai_strerror(ret))));
385  else
386  ereport(LOG,
387  (errmsg("could not translate service \"%s\" to address: %s",
388  service, gai_strerror(ret))));
389  if (addrs)
390  pg_freeaddrinfo_all(hint.ai_family, addrs);
391  return STATUS_ERROR;
392  }
393 
394  for (addr = addrs; addr; addr = addr->ai_next)
395  {
396  if (family != AF_UNIX && addr->ai_family == AF_UNIX)
397  {
398  /*
399  * Only set up a unix domain socket when they really asked for it.
400  * The service/port is different in that case.
401  */
402  continue;
403  }
404 
405  /* See if there is still room to add 1 more socket. */
406  for (; listen_index < MaxListen; listen_index++)
407  {
408  if (ListenSocket[listen_index] == PGINVALID_SOCKET)
409  break;
410  }
411  if (listen_index >= MaxListen)
412  {
413  ereport(LOG,
414  (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
415  MaxListen)));
416  break;
417  }
418 
419  /* set up address family name for log messages */
420  switch (addr->ai_family)
421  {
422  case AF_INET:
423  familyDesc = _("IPv4");
424  break;
425 #ifdef HAVE_IPV6
426  case AF_INET6:
427  familyDesc = _("IPv6");
428  break;
429 #endif
430 #ifdef HAVE_UNIX_SOCKETS
431  case AF_UNIX:
432  familyDesc = _("Unix");
433  break;
434 #endif
435  default:
436  snprintf(familyDescBuf, sizeof(familyDescBuf),
437  _("unrecognized address family %d"),
438  addr->ai_family);
439  familyDesc = familyDescBuf;
440  break;
441  }
442 
443  /* set up text form of address for log messages */
444 #ifdef HAVE_UNIX_SOCKETS
445  if (addr->ai_family == AF_UNIX)
446  addrDesc = unixSocketPath;
447  else
448 #endif
449  {
450  pg_getnameinfo_all((const struct sockaddr_storage *) addr->ai_addr,
451  addr->ai_addrlen,
452  addrBuf, sizeof(addrBuf),
453  NULL, 0,
455  addrDesc = addrBuf;
456  }
457 
458  if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) == PGINVALID_SOCKET)
459  {
460  ereport(LOG,
462  /* translator: first %s is IPv4, IPv6, or Unix */
463  errmsg("could not create %s socket for address \"%s\": %m",
464  familyDesc, addrDesc)));
465  continue;
466  }
467 
468 #ifndef WIN32
469 
470  /*
471  * Without the SO_REUSEADDR flag, a new postmaster can't be started
472  * right away after a stop or crash, giving "address already in use"
473  * error on TCP ports.
474  *
475  * On win32, however, this behavior only happens if the
476  * SO_EXCLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows
477  * multiple servers to listen on the same address, resulting in
478  * unpredictable behavior. With no flags at all, win32 behaves as Unix
479  * with SO_REUSEADDR.
480  */
481  if (addr->ai_family != AF_UNIX)
482  {
483  if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
484  (char *) &one, sizeof(one))) == -1)
485  {
486  ereport(LOG,
488  /* translator: third %s is IPv4, IPv6, or Unix */
489  errmsg("%s(%s) failed for %s address \"%s\": %m",
490  "setsockopt", "SO_REUSEADDR",
491  familyDesc, addrDesc)));
492  closesocket(fd);
493  continue;
494  }
495  }
496 #endif
497 
498 #ifdef IPV6_V6ONLY
499  if (addr->ai_family == AF_INET6)
500  {
501  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
502  (char *) &one, sizeof(one)) == -1)
503  {
504  ereport(LOG,
506  /* translator: third %s is IPv4, IPv6, or Unix */
507  errmsg("%s(%s) failed for %s address \"%s\": %m",
508  "setsockopt", "IPV6_V6ONLY",
509  familyDesc, addrDesc)));
510  closesocket(fd);
511  continue;
512  }
513  }
514 #endif
515 
516  /*
517  * Note: This might fail on some OS's, like Linux older than
518  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
519  * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4
520  * connections.
521  */
522  err = bind(fd, addr->ai_addr, addr->ai_addrlen);
523  if (err < 0)
524  {
525  int saved_errno = errno;
526 
527  ereport(LOG,
529  /* translator: first %s is IPv4, IPv6, or Unix */
530  errmsg("could not bind %s address \"%s\": %m",
531  familyDesc, addrDesc),
532  saved_errno == EADDRINUSE ?
533  (addr->ai_family == AF_UNIX ?
534  errhint("Is another postmaster already running on port %d?",
535  (int) portNumber) :
536  errhint("Is another postmaster already running on port %d?"
537  " If not, wait a few seconds and retry.",
538  (int) portNumber)) : 0));
539  closesocket(fd);
540  continue;
541  }
542 
543 #ifdef HAVE_UNIX_SOCKETS
544  if (addr->ai_family == AF_UNIX)
545  {
546  if (Setup_AF_UNIX(service) != STATUS_OK)
547  {
548  closesocket(fd);
549  break;
550  }
551  }
552 #endif
553 
554  /*
555  * Select appropriate accept-queue length limit. PG_SOMAXCONN is only
556  * intended to provide a clamp on the request on platforms where an
557  * overly large request provokes a kernel error (are there any?).
558  */
559  maxconn = MaxBackends * 2;
560  if (maxconn > PG_SOMAXCONN)
561  maxconn = PG_SOMAXCONN;
562 
563  err = listen(fd, maxconn);
564  if (err < 0)
565  {
566  ereport(LOG,
568  /* translator: first %s is IPv4, IPv6, or Unix */
569  errmsg("could not listen on %s address \"%s\": %m",
570  familyDesc, addrDesc)));
571  closesocket(fd);
572  continue;
573  }
574 
575 #ifdef HAVE_UNIX_SOCKETS
576  if (addr->ai_family == AF_UNIX)
577  ereport(LOG,
578  (errmsg("listening on Unix socket \"%s\"",
579  addrDesc)));
580  else
581 #endif
582  ereport(LOG,
583  /* translator: first %s is IPv4 or IPv6 */
584  (errmsg("listening on %s address \"%s\", port %d",
585  familyDesc, addrDesc, (int) portNumber)));
586 
587  ListenSocket[listen_index] = fd;
588  added++;
589  }
590 
591  pg_freeaddrinfo_all(hint.ai_family, addrs);
592 
593  if (!added)
594  return STATUS_ERROR;
595 
596  return STATUS_OK;
597 }
598 
599 
600 #ifdef HAVE_UNIX_SOCKETS
601 
602 /*
603  * Lock_AF_UNIX -- configure unix socket file path
604  */
605 static int
606 Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath)
607 {
608  /* no lock file for abstract sockets */
609  if (unixSocketPath[0] == '@')
610  return STATUS_OK;
611 
612  /*
613  * Grab an interlock file associated with the socket file.
614  *
615  * Note: there are two reasons for using a socket lock file, rather than
616  * trying to interlock directly on the socket itself. First, it's a lot
617  * more portable, and second, it lets us remove any pre-existing socket
618  * file without race conditions.
619  */
620  CreateSocketLockFile(unixSocketPath, true, unixSocketDir);
621 
622  /*
623  * Once we have the interlock, we can safely delete any pre-existing
624  * socket file to avoid failure at bind() time.
625  */
626  (void) unlink(unixSocketPath);
627 
628  /*
629  * Remember socket file pathnames for later maintenance.
630  */
631  sock_paths = lappend(sock_paths, pstrdup(unixSocketPath));
632 
633  return STATUS_OK;
634 }
635 
636 
637 /*
638  * Setup_AF_UNIX -- configure unix socket permissions
639  */
640 static int
641 Setup_AF_UNIX(const char *sock_path)
642 {
643  /* no file system permissions for abstract sockets */
644  if (sock_path[0] == '@')
645  return STATUS_OK;
646 
647  /*
648  * Fix socket ownership/permission if requested. Note we must do this
649  * before we listen() to avoid a window where unwanted connections could
650  * get accepted.
651  */
653  if (Unix_socket_group[0] != '\0')
654  {
655 #ifdef WIN32
656  elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
657 #else
658  char *endptr;
659  unsigned long val;
660  gid_t gid;
661 
662  val = strtoul(Unix_socket_group, &endptr, 10);
663  if (*endptr == '\0')
664  { /* numeric group id */
665  gid = val;
666  }
667  else
668  { /* convert group name to id */
669  struct group *gr;
670 
671  gr = getgrnam(Unix_socket_group);
672  if (!gr)
673  {
674  ereport(LOG,
675  (errmsg("group \"%s\" does not exist",
677  return STATUS_ERROR;
678  }
679  gid = gr->gr_gid;
680  }
681  if (chown(sock_path, -1, gid) == -1)
682  {
683  ereport(LOG,
685  errmsg("could not set group of file \"%s\": %m",
686  sock_path)));
687  return STATUS_ERROR;
688  }
689 #endif
690  }
691 
692  if (chmod(sock_path, Unix_socket_permissions) == -1)
693  {
694  ereport(LOG,
696  errmsg("could not set permissions of file \"%s\": %m",
697  sock_path)));
698  return STATUS_ERROR;
699  }
700  return STATUS_OK;
701 }
702 #endif /* HAVE_UNIX_SOCKETS */
703 
704 
705 /*
706  * StreamConnection -- create a new connection with client using
707  * server port. Set port->sock to the FD of the new connection.
708  *
709  * ASSUME: that this doesn't need to be non-blocking because
710  * the Postmaster uses select() to tell when the socket is ready for
711  * accept().
712  *
713  * RETURNS: STATUS_OK or STATUS_ERROR
714  */
715 int
717 {
718  /* accept connection and fill in the client (remote) address */
719  port->raddr.salen = sizeof(port->raddr.addr);
720  if ((port->sock = accept(server_fd,
721  (struct sockaddr *) &port->raddr.addr,
722  &port->raddr.salen)) == PGINVALID_SOCKET)
723  {
724  ereport(LOG,
726  errmsg("could not accept new connection: %m")));
727 
728  /*
729  * If accept() fails then postmaster.c will still see the server
730  * socket as read-ready, and will immediately try again. To avoid
731  * uselessly sucking lots of CPU, delay a bit before trying again.
732  * (The most likely reason for failure is being out of kernel file
733  * table slots; we can do little except hope some will get freed up.)
734  */
735  pg_usleep(100000L); /* wait 0.1 sec */
736  return STATUS_ERROR;
737  }
738 
739  /* fill in the server (local) address */
740  port->laddr.salen = sizeof(port->laddr.addr);
741  if (getsockname(port->sock,
742  (struct sockaddr *) &port->laddr.addr,
743  &port->laddr.salen) < 0)
744  {
745  ereport(LOG,
746  (errmsg("%s() failed: %m", "getsockname")));
747  return STATUS_ERROR;
748  }
749 
750  /* select NODELAY and KEEPALIVE options if it's a TCP connection */
751  if (port->laddr.addr.ss_family != AF_UNIX)
752  {
753  int on;
754 #ifdef WIN32
755  int oldopt;
756  int optlen;
757  int newopt;
758 #endif
759 
760 #ifdef TCP_NODELAY
761  on = 1;
762  if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
763  (char *) &on, sizeof(on)) < 0)
764  {
765  ereport(LOG,
766  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_NODELAY")));
767  return STATUS_ERROR;
768  }
769 #endif
770  on = 1;
771  if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
772  (char *) &on, sizeof(on)) < 0)
773  {
774  ereport(LOG,
775  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_KEEPALIVE")));
776  return STATUS_ERROR;
777  }
778 
779 #ifdef WIN32
780 
781  /*
782  * This is a Win32 socket optimization. The OS send buffer should be
783  * large enough to send the whole Postgres send buffer in one go, or
784  * performance suffers. The Postgres send buffer can be enlarged if a
785  * very large message needs to be sent, but we won't attempt to
786  * enlarge the OS buffer if that happens, so somewhat arbitrarily
787  * ensure that the OS buffer is at least PQ_SEND_BUFFER_SIZE * 4.
788  * (That's 32kB with the current default).
789  *
790  * The default OS buffer size used to be 8kB in earlier Windows
791  * versions, but was raised to 64kB in Windows 2012. So it shouldn't
792  * be necessary to change it in later versions anymore. Changing it
793  * unnecessarily can even reduce performance, because setting
794  * SO_SNDBUF in the application disables the "dynamic send buffering"
795  * feature that was introduced in Windows 7. So before fiddling with
796  * SO_SNDBUF, check if the current buffer size is already large enough
797  * and only increase it if necessary.
798  *
799  * See https://support.microsoft.com/kb/823764/EN-US/ and
800  * https://msdn.microsoft.com/en-us/library/bb736549%28v=vs.85%29.aspx
801  */
802  optlen = sizeof(oldopt);
803  if (getsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &oldopt,
804  &optlen) < 0)
805  {
806  ereport(LOG,
807  (errmsg("%s(%s) failed: %m", "getsockopt", "SO_SNDBUF")));
808  return STATUS_ERROR;
809  }
810  newopt = PQ_SEND_BUFFER_SIZE * 4;
811  if (oldopt < newopt)
812  {
813  if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &newopt,
814  sizeof(newopt)) < 0)
815  {
816  ereport(LOG,
817  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_SNDBUF")));
818  return STATUS_ERROR;
819  }
820  }
821 #endif
822 
823  /*
824  * Also apply the current keepalive parameters. If we fail to set a
825  * parameter, don't error out, because these aren't universally
826  * supported. (Note: you might think we need to reset the GUC
827  * variables to 0 in such a case, but it's not necessary because the
828  * show hooks for these variables report the truth anyway.)
829  */
834  }
835 
836  return STATUS_OK;
837 }
838 
839 /*
840  * StreamClose -- close a client/backend connection
841  *
842  * NOTE: this is NOT used to terminate a session; it is just used to release
843  * the file descriptor in a process that should no longer have the socket
844  * open. (For example, the postmaster calls this after passing ownership
845  * of the connection to a child process.) It is expected that someone else
846  * still has the socket open. So, we only want to close the descriptor,
847  * we do NOT want to send anything to the far end.
848  */
849 void
851 {
852  closesocket(sock);
853 }
854 
855 /*
856  * TouchSocketFiles -- mark socket files as recently accessed
857  *
858  * This routine should be called every so often to ensure that the socket
859  * files have a recent mod date (ordinary operations on sockets usually won't
860  * change the mod date). That saves them from being removed by
861  * overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
862  * never have put the socket file in /tmp...)
863  */
864 void
866 {
867  ListCell *l;
868 
869  /* Loop through all created sockets... */
870  foreach(l, sock_paths)
871  {
872  char *sock_path = (char *) lfirst(l);
873 
874  /* Ignore errors; there's no point in complaining */
875  (void) utime(sock_path, NULL);
876  }
877 }
878 
879 /*
880  * RemoveSocketFiles -- unlink socket files at postmaster shutdown
881  */
882 void
884 {
885  ListCell *l;
886 
887  /* Loop through all created sockets... */
888  foreach(l, sock_paths)
889  {
890  char *sock_path = (char *) lfirst(l);
891 
892  /* Ignore any error. */
893  (void) unlink(sock_path);
894  }
895  /* Since we're about to exit, no need to reclaim storage */
896  sock_paths = NIL;
897 }
898 
899 
900 /* --------------------------------
901  * Low-level I/O routines begin here.
902  *
903  * These routines communicate with a frontend client across a connection
904  * already established by the preceding routines.
905  * --------------------------------
906  */
907 
908 /* --------------------------------
909  * socket_set_nonblocking - set socket blocking/non-blocking
910  *
911  * Sets the socket non-blocking if nonblocking is true, or sets it
912  * blocking otherwise.
913  * --------------------------------
914  */
915 static void
916 socket_set_nonblocking(bool nonblocking)
917 {
918  if (MyProcPort == NULL)
919  ereport(ERROR,
920  (errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
921  errmsg("there is no client connection")));
922 
923  MyProcPort->noblock = nonblocking;
924 }
925 
926 /* --------------------------------
927  * pq_recvbuf - load some bytes into the input buffer
928  *
929  * returns 0 if OK, EOF if trouble
930  * --------------------------------
931  */
932 static int
934 {
935  if (PqRecvPointer > 0)
936  {
938  {
939  /* still some unread data, left-justify it in the buffer */
943  PqRecvPointer = 0;
944  }
945  else
947  }
948 
949  /* Ensure that we're in blocking mode */
950  socket_set_nonblocking(false);
951 
952  /* Can fill buffer from PqRecvLength and upwards */
953  for (;;)
954  {
955  int r;
956 
959 
960  if (r < 0)
961  {
962  if (errno == EINTR)
963  continue; /* Ok if interrupted */
964 
965  /*
966  * Careful: an ereport() that tries to write to the client would
967  * cause recursion to here, leading to stack overflow and core
968  * dump! This message must go *only* to the postmaster log.
969  */
972  errmsg("could not receive data from client: %m")));
973  return EOF;
974  }
975  if (r == 0)
976  {
977  /*
978  * EOF detected. We used to write a log message here, but it's
979  * better to expect the ultimate caller to do that.
980  */
981  return EOF;
982  }
983  /* r contains number of bytes read, so just incr length */
984  PqRecvLength += r;
985  return 0;
986  }
987 }
988 
989 /* --------------------------------
990  * pq_getbyte - get a single byte from connection, or return EOF
991  * --------------------------------
992  */
993 int
995 {
997 
998  while (PqRecvPointer >= PqRecvLength)
999  {
1000  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1001  return EOF; /* Failed to recv data */
1002  }
1003  return (unsigned char) PqRecvBuffer[PqRecvPointer++];
1004 }
1005 
1006 /* --------------------------------
1007  * pq_peekbyte - peek at next byte from connection
1008  *
1009  * Same as pq_getbyte() except we don't advance the pointer.
1010  * --------------------------------
1011  */
1012 int
1014 {
1016 
1017  while (PqRecvPointer >= PqRecvLength)
1018  {
1019  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1020  return EOF; /* Failed to recv data */
1021  }
1022  return (unsigned char) PqRecvBuffer[PqRecvPointer];
1023 }
1024 
1025 /* --------------------------------
1026  * pq_getbyte_if_available - get a single byte from connection,
1027  * if available
1028  *
1029  * The received byte is stored in *c. Returns 1 if a byte was read,
1030  * 0 if no data was available, or EOF if trouble.
1031  * --------------------------------
1032  */
1033 int
1035 {
1036  int r;
1037 
1039 
1041  {
1043  return 1;
1044  }
1045 
1046  /* Put the socket into non-blocking mode */
1047  socket_set_nonblocking(true);
1048 
1049  r = secure_read(MyProcPort, c, 1);
1050  if (r < 0)
1051  {
1052  /*
1053  * Ok if no data available without blocking or interrupted (though
1054  * EINTR really shouldn't happen with a non-blocking socket). Report
1055  * other errors.
1056  */
1057  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
1058  r = 0;
1059  else
1060  {
1061  /*
1062  * Careful: an ereport() that tries to write to the client would
1063  * cause recursion to here, leading to stack overflow and core
1064  * dump! This message must go *only* to the postmaster log.
1065  */
1068  errmsg("could not receive data from client: %m")));
1069  r = EOF;
1070  }
1071  }
1072  else if (r == 0)
1073  {
1074  /* EOF detected */
1075  r = EOF;
1076  }
1077 
1078  return r;
1079 }
1080 
1081 /* --------------------------------
1082  * pq_getbytes - get a known number of bytes from connection
1083  *
1084  * returns 0 if OK, EOF if trouble
1085  * --------------------------------
1086  */
1087 int
1088 pq_getbytes(char *s, size_t len)
1089 {
1090  size_t amount;
1091 
1093 
1094  while (len > 0)
1095  {
1096  while (PqRecvPointer >= PqRecvLength)
1097  {
1098  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1099  return EOF; /* Failed to recv data */
1100  }
1101  amount = PqRecvLength - PqRecvPointer;
1102  if (amount > len)
1103  amount = len;
1104  memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
1105  PqRecvPointer += amount;
1106  s += amount;
1107  len -= amount;
1108  }
1109  return 0;
1110 }
1111 
1112 /* --------------------------------
1113  * pq_discardbytes - throw away a known number of bytes
1114  *
1115  * same as pq_getbytes except we do not copy the data to anyplace.
1116  * this is used for resynchronizing after read errors.
1117  *
1118  * returns 0 if OK, EOF if trouble
1119  * --------------------------------
1120  */
1121 static int
1123 {
1124  size_t amount;
1125 
1127 
1128  while (len > 0)
1129  {
1130  while (PqRecvPointer >= PqRecvLength)
1131  {
1132  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1133  return EOF; /* Failed to recv data */
1134  }
1135  amount = PqRecvLength - PqRecvPointer;
1136  if (amount > len)
1137  amount = len;
1138  PqRecvPointer += amount;
1139  len -= amount;
1140  }
1141  return 0;
1142 }
1143 
1144 /* --------------------------------
1145  * pq_buffer_has_data - is any buffered data available to read?
1146  *
1147  * This will *not* attempt to read more data.
1148  * --------------------------------
1149  */
1150 bool
1152 {
1153  return (PqRecvPointer < PqRecvLength);
1154 }
1155 
1156 
1157 /* --------------------------------
1158  * pq_startmsgread - begin reading a message from the client.
1159  *
1160  * This must be called before any of the pq_get* functions.
1161  * --------------------------------
1162  */
1163 void
1165 {
1166  /*
1167  * There shouldn't be a read active already, but let's check just to be
1168  * sure.
1169  */
1170  if (PqCommReadingMsg)
1171  ereport(FATAL,
1172  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1173  errmsg("terminating connection because protocol synchronization was lost")));
1174 
1175  PqCommReadingMsg = true;
1176 }
1177 
1178 
1179 /* --------------------------------
1180  * pq_endmsgread - finish reading message.
1181  *
1182  * This must be called after reading a message with pq_getbytes()
1183  * and friends, to indicate that we have read the whole message.
1184  * pq_getmessage() does this implicitly.
1185  * --------------------------------
1186  */
1187 void
1189 {
1191 
1192  PqCommReadingMsg = false;
1193 }
1194 
1195 /* --------------------------------
1196  * pq_is_reading_msg - are we currently reading a message?
1197  *
1198  * This is used in error recovery at the outer idle loop to detect if we have
1199  * lost protocol sync, and need to terminate the connection. pq_startmsgread()
1200  * will check for that too, but it's nicer to detect it earlier.
1201  * --------------------------------
1202  */
1203 bool
1205 {
1206  return PqCommReadingMsg;
1207 }
1208 
1209 /* --------------------------------
1210  * pq_getmessage - get a message with length word from connection
1211  *
1212  * The return value is placed in an expansible StringInfo, which has
1213  * already been initialized by the caller.
1214  * Only the message body is placed in the StringInfo; the length word
1215  * is removed. Also, s->cursor is initialized to zero for convenience
1216  * in scanning the message contents.
1217  *
1218  * maxlen is the upper limit on the length of the
1219  * message we are willing to accept. We abort the connection (by
1220  * returning EOF) if client tries to send more than that.
1221  *
1222  * returns 0 if OK, EOF if trouble
1223  * --------------------------------
1224  */
1225 int
1227 {
1228  int32 len;
1229 
1231 
1232  resetStringInfo(s);
1233 
1234  /* Read message length word */
1235  if (pq_getbytes((char *) &len, 4) == EOF)
1236  {
1238  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1239  errmsg("unexpected EOF within message length word")));
1240  return EOF;
1241  }
1242 
1243  len = pg_ntoh32(len);
1244 
1245  if (len < 4 || len > maxlen)
1246  {
1248  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1249  errmsg("invalid message length")));
1250  return EOF;
1251  }
1252 
1253  len -= 4; /* discount length itself */
1254 
1255  if (len > 0)
1256  {
1257  /*
1258  * Allocate space for message. If we run out of room (ridiculously
1259  * large message), we will elog(ERROR), but we want to discard the
1260  * message body so as not to lose communication sync.
1261  */
1262  PG_TRY();
1263  {
1264  enlargeStringInfo(s, len);
1265  }
1266  PG_CATCH();
1267  {
1268  if (pq_discardbytes(len) == EOF)
1270  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1271  errmsg("incomplete message from client")));
1272 
1273  /* we discarded the rest of the message so we're back in sync. */
1274  PqCommReadingMsg = false;
1275  PG_RE_THROW();
1276  }
1277  PG_END_TRY();
1278 
1279  /* And grab the message */
1280  if (pq_getbytes(s->data, len) == EOF)
1281  {
1283  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1284  errmsg("incomplete message from client")));
1285  return EOF;
1286  }
1287  s->len = len;
1288  /* Place a trailing null per StringInfo convention */
1289  s->data[len] = '\0';
1290  }
1291 
1292  /* finished reading the message. */
1293  PqCommReadingMsg = false;
1294 
1295  return 0;
1296 }
1297 
1298 
1299 static int
1300 internal_putbytes(const char *s, size_t len)
1301 {
1302  size_t amount;
1303 
1304  while (len > 0)
1305  {
1306  /* If buffer is full, then flush it out */
1308  {
1309  socket_set_nonblocking(false);
1310  if (internal_flush())
1311  return EOF;
1312  }
1313  amount = PqSendBufferSize - PqSendPointer;
1314  if (amount > len)
1315  amount = len;
1316  memcpy(PqSendBuffer + PqSendPointer, s, amount);
1317  PqSendPointer += amount;
1318  s += amount;
1319  len -= amount;
1320  }
1321  return 0;
1322 }
1323 
1324 /* --------------------------------
1325  * socket_flush - flush pending output
1326  *
1327  * returns 0 if OK, EOF if trouble
1328  * --------------------------------
1329  */
1330 static int
1332 {
1333  int res;
1334 
1335  /* No-op if reentrant call */
1336  if (PqCommBusy)
1337  return 0;
1338  PqCommBusy = true;
1339  socket_set_nonblocking(false);
1340  res = internal_flush();
1341  PqCommBusy = false;
1342  return res;
1343 }
1344 
1345 /* --------------------------------
1346  * internal_flush - flush pending output
1347  *
1348  * Returns 0 if OK (meaning everything was sent, or operation would block
1349  * and the socket is in non-blocking mode), or EOF if trouble.
1350  * --------------------------------
1351  */
1352 static int
1354 {
1355  static int last_reported_send_errno = 0;
1356 
1357  char *bufptr = PqSendBuffer + PqSendStart;
1358  char *bufend = PqSendBuffer + PqSendPointer;
1359 
1360  while (bufptr < bufend)
1361  {
1362  int r;
1363 
1364  r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1365 
1366  if (r <= 0)
1367  {
1368  if (errno == EINTR)
1369  continue; /* Ok if we were interrupted */
1370 
1371  /*
1372  * Ok if no data writable without blocking, and the socket is in
1373  * non-blocking mode.
1374  */
1375  if (errno == EAGAIN ||
1376  errno == EWOULDBLOCK)
1377  {
1378  return 0;
1379  }
1380 
1381  /*
1382  * Careful: an ereport() that tries to write to the client would
1383  * cause recursion to here, leading to stack overflow and core
1384  * dump! This message must go *only* to the postmaster log.
1385  *
1386  * If a client disconnects while we're in the midst of output, we
1387  * might write quite a bit of data before we get to a safe query
1388  * abort point. So, suppress duplicate log messages.
1389  */
1390  if (errno != last_reported_send_errno)
1391  {
1392  last_reported_send_errno = errno;
1395  errmsg("could not send data to client: %m")));
1396  }
1397 
1398  /*
1399  * We drop the buffered data anyway so that processing can
1400  * continue, even though we'll probably quit soon. We also set a
1401  * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
1402  * the connection.
1403  */
1404  PqSendStart = PqSendPointer = 0;
1406  InterruptPending = 1;
1407  return EOF;
1408  }
1409 
1410  last_reported_send_errno = 0; /* reset after any successful send */
1411  bufptr += r;
1412  PqSendStart += r;
1413  }
1414 
1415  PqSendStart = PqSendPointer = 0;
1416  return 0;
1417 }
1418 
1419 /* --------------------------------
1420  * pq_flush_if_writable - flush pending output if writable without blocking
1421  *
1422  * Returns 0 if OK, or EOF if trouble.
1423  * --------------------------------
1424  */
1425 static int
1427 {
1428  int res;
1429 
1430  /* Quick exit if nothing to do */
1431  if (PqSendPointer == PqSendStart)
1432  return 0;
1433 
1434  /* No-op if reentrant call */
1435  if (PqCommBusy)
1436  return 0;
1437 
1438  /* Temporarily put the socket into non-blocking mode */
1439  socket_set_nonblocking(true);
1440 
1441  PqCommBusy = true;
1442  res = internal_flush();
1443  PqCommBusy = false;
1444  return res;
1445 }
1446 
1447 /* --------------------------------
1448  * socket_is_send_pending - is there any pending data in the output buffer?
1449  * --------------------------------
1450  */
1451 static bool
1453 {
1454  return (PqSendStart < PqSendPointer);
1455 }
1456 
1457 /* --------------------------------
1458  * Message-level I/O routines begin here.
1459  * --------------------------------
1460  */
1461 
1462 
1463 /* --------------------------------
1464  * socket_putmessage - send a normal message (suppressed in COPY OUT mode)
1465  *
1466  * msgtype is a message type code to place before the message body.
1467  *
1468  * len is the length of the message body data at *s. A message length
1469  * word (equal to len+4 because it counts itself too) is inserted by this
1470  * routine.
1471  *
1472  * We suppress messages generated while pqcomm.c is busy. This
1473  * avoids any possibility of messages being inserted within other
1474  * messages. The only known trouble case arises if SIGQUIT occurs
1475  * during a pqcomm.c routine --- quickdie() will try to send a warning
1476  * message, and the most reasonable approach seems to be to drop it.
1477  *
1478  * returns 0 if OK, EOF if trouble
1479  * --------------------------------
1480  */
1481 static int
1482 socket_putmessage(char msgtype, const char *s, size_t len)
1483 {
1484  uint32 n32;
1485 
1486  Assert(msgtype != 0);
1487 
1488  if (PqCommBusy)
1489  return 0;
1490  PqCommBusy = true;
1491  if (internal_putbytes(&msgtype, 1))
1492  goto fail;
1493 
1494  n32 = pg_hton32((uint32) (len + 4));
1495  if (internal_putbytes((char *) &n32, 4))
1496  goto fail;
1497 
1498  if (internal_putbytes(s, len))
1499  goto fail;
1500  PqCommBusy = false;
1501  return 0;
1502 
1503 fail:
1504  PqCommBusy = false;
1505  return EOF;
1506 }
1507 
1508 /* --------------------------------
1509  * pq_putmessage_noblock - like pq_putmessage, but never blocks
1510  *
1511  * If the output buffer is too small to hold the message, the buffer
1512  * is enlarged.
1513  */
1514 static void
1515 socket_putmessage_noblock(char msgtype, const char *s, size_t len)
1516 {
1518  int required;
1519 
1520  /*
1521  * Ensure we have enough space in the output buffer for the message header
1522  * as well as the message itself.
1523  */
1524  required = PqSendPointer + 1 + 4 + len;
1525  if (required > PqSendBufferSize)
1526  {
1529  }
1530  res = pq_putmessage(msgtype, s, len);
1531  Assert(res == 0); /* should not fail when the message fits in
1532  * buffer */
1533 }
1534 
1535 /* --------------------------------
1536  * pq_putmessage_v2 - send a message in protocol version 2
1537  *
1538  * msgtype is a message type code to place before the message body.
1539  *
1540  * We no longer support protocol version 2, but we have kept this
1541  * function so that if a client tries to connect with protocol version 2,
1542  * as a courtesy we can still send the "unsupported protocol version"
1543  * error to the client in the old format.
1544  *
1545  * Like in pq_putmessage(), we suppress messages generated while
1546  * pqcomm.c is busy.
1547  *
1548  * returns 0 if OK, EOF if trouble
1549  * --------------------------------
1550  */
1551 int
1552 pq_putmessage_v2(char msgtype, const char *s, size_t len)
1553 {
1554  Assert(msgtype != 0);
1555 
1556  if (PqCommBusy)
1557  return 0;
1558  PqCommBusy = true;
1559  if (internal_putbytes(&msgtype, 1))
1560  goto fail;
1561 
1562  if (internal_putbytes(s, len))
1563  goto fail;
1564  PqCommBusy = false;
1565  return 0;
1566 
1567 fail:
1568  PqCommBusy = false;
1569  return EOF;
1570 }
1571 
1572 /*
1573  * Support for TCP Keepalive parameters
1574  */
1575 
1576 /*
1577  * On Windows, we need to set both idle and interval at the same time.
1578  * We also cannot reset them to the default (setting to zero will
1579  * actually set them to zero, not default), therefore we fallback to
1580  * the out-of-the-box default instead.
1581  */
1582 #if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
1583 static int
1584 pq_setkeepaliveswin32(Port *port, int idle, int interval)
1585 {
1586  struct tcp_keepalive ka;
1587  DWORD retsize;
1588 
1589  if (idle <= 0)
1590  idle = 2 * 60 * 60; /* default = 2 hours */
1591  if (interval <= 0)
1592  interval = 1; /* default = 1 second */
1593 
1594  ka.onoff = 1;
1595  ka.keepalivetime = idle * 1000;
1596  ka.keepaliveinterval = interval * 1000;
1597 
1598  if (WSAIoctl(port->sock,
1599  SIO_KEEPALIVE_VALS,
1600  (LPVOID) &ka,
1601  sizeof(ka),
1602  NULL,
1603  0,
1604  &retsize,
1605  NULL,
1606  NULL)
1607  != 0)
1608  {
1609  ereport(LOG,
1610  (errmsg("%s(%s) failed: error code %d",
1611  "WSAIoctl", "SIO_KEEPALIVE_VALS", WSAGetLastError())));
1612  return STATUS_ERROR;
1613  }
1614  if (port->keepalives_idle != idle)
1615  port->keepalives_idle = idle;
1616  if (port->keepalives_interval != interval)
1617  port->keepalives_interval = interval;
1618  return STATUS_OK;
1619 }
1620 #endif
1621 
1622 int
1624 {
1625 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1626  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1627  return 0;
1628 
1629  if (port->keepalives_idle != 0)
1630  return port->keepalives_idle;
1631 
1632  if (port->default_keepalives_idle == 0)
1633  {
1634 #ifndef WIN32
1635  socklen_t size = sizeof(port->default_keepalives_idle);
1636 
1637  if (getsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1638  (char *) &port->default_keepalives_idle,
1639  &size) < 0)
1640  {
1641  ereport(LOG,
1642  (errmsg("%s(%s) failed: %m", "getsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1643  port->default_keepalives_idle = -1; /* don't know */
1644  }
1645 #else /* WIN32 */
1646  /* We can't get the defaults on Windows, so return "don't know" */
1647  port->default_keepalives_idle = -1;
1648 #endif /* WIN32 */
1649  }
1650 
1651  return port->default_keepalives_idle;
1652 #else
1653  return 0;
1654 #endif
1655 }
1656 
1657 int
1659 {
1660  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1661  return STATUS_OK;
1662 
1663 /* check SIO_KEEPALIVE_VALS here, not just WIN32, as some toolchains lack it */
1664 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1665  if (idle == port->keepalives_idle)
1666  return STATUS_OK;
1667 
1668 #ifndef WIN32
1669  if (port->default_keepalives_idle <= 0)
1670  {
1671  if (pq_getkeepalivesidle(port) < 0)
1672  {
1673  if (idle == 0)
1674  return STATUS_OK; /* default is set but unknown */
1675  else
1676  return STATUS_ERROR;
1677  }
1678  }
1679 
1680  if (idle == 0)
1681  idle = port->default_keepalives_idle;
1682 
1683  if (setsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1684  (char *) &idle, sizeof(idle)) < 0)
1685  {
1686  ereport(LOG,
1687  (errmsg("%s(%s) failed: %m", "setsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1688  return STATUS_ERROR;
1689  }
1690 
1691  port->keepalives_idle = idle;
1692 #else /* WIN32 */
1693  return pq_setkeepaliveswin32(port, idle, port->keepalives_interval);
1694 #endif
1695 #else
1696  if (idle != 0)
1697  {
1698  ereport(LOG,
1699  (errmsg("setting the keepalive idle time is not supported")));
1700  return STATUS_ERROR;
1701  }
1702 #endif
1703 
1704  return STATUS_OK;
1705 }
1706 
1707 int
1709 {
1710 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1711  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1712  return 0;
1713 
1714  if (port->keepalives_interval != 0)
1715  return port->keepalives_interval;
1716 
1717  if (port->default_keepalives_interval == 0)
1718  {
1719 #ifndef WIN32
1720  socklen_t size = sizeof(port->default_keepalives_interval);
1721 
1722  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1723  (char *) &port->default_keepalives_interval,
1724  &size) < 0)
1725  {
1726  ereport(LOG,
1727  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPINTVL")));
1728  port->default_keepalives_interval = -1; /* don't know */
1729  }
1730 #else
1731  /* We can't get the defaults on Windows, so return "don't know" */
1732  port->default_keepalives_interval = -1;
1733 #endif /* WIN32 */
1734  }
1735 
1736  return port->default_keepalives_interval;
1737 #else
1738  return 0;
1739 #endif
1740 }
1741 
1742 int
1744 {
1745  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1746  return STATUS_OK;
1747 
1748 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1749  if (interval == port->keepalives_interval)
1750  return STATUS_OK;
1751 
1752 #ifndef WIN32
1753  if (port->default_keepalives_interval <= 0)
1754  {
1755  if (pq_getkeepalivesinterval(port) < 0)
1756  {
1757  if (interval == 0)
1758  return STATUS_OK; /* default is set but unknown */
1759  else
1760  return STATUS_ERROR;
1761  }
1762  }
1763 
1764  if (interval == 0)
1765  interval = port->default_keepalives_interval;
1766 
1767  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1768  (char *) &interval, sizeof(interval)) < 0)
1769  {
1770  ereport(LOG,
1771  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPINTVL")));
1772  return STATUS_ERROR;
1773  }
1774 
1775  port->keepalives_interval = interval;
1776 #else /* WIN32 */
1777  return pq_setkeepaliveswin32(port, port->keepalives_idle, interval);
1778 #endif
1779 #else
1780  if (interval != 0)
1781  {
1782  ereport(LOG,
1783  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPINTVL")));
1784  return STATUS_ERROR;
1785  }
1786 #endif
1787 
1788  return STATUS_OK;
1789 }
1790 
1791 int
1793 {
1794 #ifdef TCP_KEEPCNT
1795  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1796  return 0;
1797 
1798  if (port->keepalives_count != 0)
1799  return port->keepalives_count;
1800 
1801  if (port->default_keepalives_count == 0)
1802  {
1803  socklen_t size = sizeof(port->default_keepalives_count);
1804 
1805  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1806  (char *) &port->default_keepalives_count,
1807  &size) < 0)
1808  {
1809  ereport(LOG,
1810  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPCNT")));
1811  port->default_keepalives_count = -1; /* don't know */
1812  }
1813  }
1814 
1815  return port->default_keepalives_count;
1816 #else
1817  return 0;
1818 #endif
1819 }
1820 
1821 int
1823 {
1824  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1825  return STATUS_OK;
1826 
1827 #ifdef TCP_KEEPCNT
1828  if (count == port->keepalives_count)
1829  return STATUS_OK;
1830 
1831  if (port->default_keepalives_count <= 0)
1832  {
1833  if (pq_getkeepalivescount(port) < 0)
1834  {
1835  if (count == 0)
1836  return STATUS_OK; /* default is set but unknown */
1837  else
1838  return STATUS_ERROR;
1839  }
1840  }
1841 
1842  if (count == 0)
1843  count = port->default_keepalives_count;
1844 
1845  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1846  (char *) &count, sizeof(count)) < 0)
1847  {
1848  ereport(LOG,
1849  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPCNT")));
1850  return STATUS_ERROR;
1851  }
1852 
1853  port->keepalives_count = count;
1854 #else
1855  if (count != 0)
1856  {
1857  ereport(LOG,
1858  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPCNT")));
1859  return STATUS_ERROR;
1860  }
1861 #endif
1862 
1863  return STATUS_OK;
1864 }
1865 
1866 int
1868 {
1869 #ifdef TCP_USER_TIMEOUT
1870  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1871  return 0;
1872 
1873  if (port->tcp_user_timeout != 0)
1874  return port->tcp_user_timeout;
1875 
1876  if (port->default_tcp_user_timeout == 0)
1877  {
1878  socklen_t size = sizeof(port->default_tcp_user_timeout);
1879 
1880  if (getsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1881  (char *) &port->default_tcp_user_timeout,
1882  &size) < 0)
1883  {
1884  ereport(LOG,
1885  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_USER_TIMEOUT")));
1886  port->default_tcp_user_timeout = -1; /* don't know */
1887  }
1888  }
1889 
1890  return port->default_tcp_user_timeout;
1891 #else
1892  return 0;
1893 #endif
1894 }
1895 
1896 int
1898 {
1899  if (port == NULL || port->laddr.addr.ss_family == AF_UNIX)
1900  return STATUS_OK;
1901 
1902 #ifdef TCP_USER_TIMEOUT
1903  if (timeout == port->tcp_user_timeout)
1904  return STATUS_OK;
1905 
1906  if (port->default_tcp_user_timeout <= 0)
1907  {
1908  if (pq_gettcpusertimeout(port) < 0)
1909  {
1910  if (timeout == 0)
1911  return STATUS_OK; /* default is set but unknown */
1912  else
1913  return STATUS_ERROR;
1914  }
1915  }
1916 
1917  if (timeout == 0)
1918  timeout = port->default_tcp_user_timeout;
1919 
1920  if (setsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1921  (char *) &timeout, sizeof(timeout)) < 0)
1922  {
1923  ereport(LOG,
1924  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_USER_TIMEOUT")));
1925  return STATUS_ERROR;
1926  }
1927 
1928  port->tcp_user_timeout = timeout;
1929 #else
1930  if (timeout != 0)
1931  {
1932  ereport(LOG,
1933  (errmsg("%s(%s) not supported", "setsockopt", "TCP_USER_TIMEOUT")));
1934  return STATUS_ERROR;
1935  }
1936 #endif
1937 
1938  return STATUS_OK;
1939 }
1940 
1941 /*
1942  * Check if the client is still connected.
1943  */
1944 bool
1946 {
1947  WaitEvent events[FeBeWaitSetNEvents];
1948  int rc;
1949 
1950  /*
1951  * It's OK to modify the socket event filter without restoring, because
1952  * all FeBeWaitSet socket wait sites do the same.
1953  */
1955 
1956 retry:
1957  rc = WaitEventSetWait(FeBeWaitSet, 0, events, lengthof(events), 0);
1958  for (int i = 0; i < rc; ++i)
1959  {
1960  if (events[i].events & WL_SOCKET_CLOSED)
1961  return false;
1962  if (events[i].events & WL_LATCH_SET)
1963  {
1964  /*
1965  * A latch event might be preventing other events from being
1966  * reported. Reset it and poll again. No need to restore it
1967  * because no code should expect latches to survive across
1968  * CHECK_FOR_INTERRUPTS().
1969  */
1971  goto retry;
1972  }
1973  }
1974 
1975  return true;
1976 }
ssize_t secure_write(Port *port, void *ptr, size_t len)
Definition: be-secure.c:260
void secure_close(Port *port)
Definition: be-secure.c:135
ssize_t secure_read(Port *port, void *ptr, size_t len)
Definition: be-secure.c:147
unsigned int uint32
Definition: c.h:452
#define STATUS_OK
Definition: c.h:1178
signed int int32
Definition: c.h:440
#define lengthof(array)
Definition: c.h:745
#define MemSet(start, val, len)
Definition: c.h:1019
#define STATUS_ERROR
Definition: c.h:1179
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:166
int errcode_for_socket_access(void)
Definition: elog.c:787
int errcode_for_file_access(void)
Definition: elog.c:716
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define _(x)
Definition: elog.c:89
#define LOG
Definition: elog.h:25
#define PG_RE_THROW()
Definition: elog.h:340
#define COMMERROR
Definition: elog.h:27
#define PG_END_TRY()
Definition: elog.h:324
#define FATAL
Definition: elog.h:35
#define PG_TRY()
Definition: elog.h:299
#define WARNING
Definition: elog.h:30
#define ERROR
Definition: elog.h:33
#define PG_CATCH()
Definition: elog.h:309
#define ereport(elevel,...)
Definition: elog.h:143
#define gai_strerror
Definition: getaddrinfo.h:146
#define NI_NUMERICHOST
Definition: getaddrinfo.h:78
#define NI_MAXHOST
Definition: getaddrinfo.h:88
#define AI_PASSIVE
Definition: getaddrinfo.h:62
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
volatile sig_atomic_t ClientConnectionLost
Definition: globals.c:34
int MaxBackends
Definition: globals.c:140
struct Port * MyProcPort
Definition: globals.c:47
struct Latch * MyLatch
Definition: globals.c:58
int tcp_keepalives_idle
Definition: guc.c:663
int tcp_keepalives_interval
Definition: guc.c:664
int tcp_keepalives_count
Definition: guc.c:665
int tcp_user_timeout
Definition: guc.c:666
long val
Definition: informix.c:664
void pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
Definition: ip.c:88
int pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, char *node, int nodelen, char *service, int servicelen, int flags)
Definition: ip.c:122
int pg_getaddrinfo_all(const char *hostname, const char *servname, const struct addrinfo *hintp, struct addrinfo **result)
Definition: ip.c:57
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:305
int i
Definition: isn.c:73
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:707
void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
Definition: latch.c:972
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:886
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
Definition: latch.c:1345
void ResetLatch(Latch *latch)
Definition: latch.c:683
#define WL_SOCKET_CLOSED
Definition: latch.h:137
#define WL_LATCH_SET
Definition: latch.h:125
#define WL_POSTMASTER_DEATH
Definition: latch.h:129
#define WL_SOCKET_WRITEABLE
Definition: latch.h:127
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:49
#define FeBeWaitSetLatchPos
Definition: libpq.h:64
#define FeBeWaitSetNEvents
Definition: libpq.h:65
#define FeBeWaitSetSocketPos
Definition: libpq.h:63
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
char * pstrdup(const char *in)
Definition: mcxt.c:1305
MemoryContext TopMemoryContext
Definition: mcxt.c:48
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir)
Definition: miscinit.c:1303
void * arg
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define pg_hton32(x)
Definition: pg_bswap.h:121
#define MAXPGPATH
#define PG_SOMAXCONN
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:170
#define NIL
Definition: pg_list.h:66
static int port
Definition: pg_regress.c:92
bool pg_set_noblock(pgsocket sock)
Definition: noblock.c:25
int pgsocket
Definition: port.h:29
#define snprintf
Definition: port.h:225
unsigned int socklen_t
Definition: port.h:40
#define PGINVALID_SOCKET
Definition: port.h:31
#define closesocket
Definition: port.h:342
uintptr_t Datum
Definition: postgres.h:411
static pgsocket ListenSocket[MAXLISTEN]
Definition: postmaster.c:223
static int PqRecvLength
Definition: pqcomm.c:131
int pq_setkeepalivesinterval(int interval, Port *port)
Definition: pqcomm.c:1743
const PQcommMethods * PqCommMethods
Definition: pqcomm.c:166
static int pq_recvbuf(void)
Definition: pqcomm.c:933
static int PqSendStart
Definition: pqcomm.c:127
int Unix_socket_permissions
Definition: pqcomm.c:108
static int internal_flush(void)
Definition: pqcomm.c:1353
static void socket_set_nonblocking(bool nonblocking)
Definition: pqcomm.c:916
int pq_peekbyte(void)
Definition: pqcomm.c:1013
int pq_getbyte_if_available(unsigned char *c)
Definition: pqcomm.c:1034
static int socket_flush_if_writable(void)
Definition: pqcomm.c:1426
int pq_getkeepalivescount(Port *port)
Definition: pqcomm.c:1792
#define PQ_RECV_BUFFER_SIZE
Definition: pqcomm.c:122
int pq_getkeepalivesinterval(Port *port)
Definition: pqcomm.c:1708
static int PqSendPointer
Definition: pqcomm.c:126
static int pq_discardbytes(size_t len)
Definition: pqcomm.c:1122
int pq_settcpusertimeout(int timeout, Port *port)
Definition: pqcomm.c:1897
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1226
static const PQcommMethods PqCommSocketMethods
Definition: pqcomm.c:157
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1088
static bool PqCommReadingMsg
Definition: pqcomm.c:137
char * Unix_socket_group
Definition: pqcomm.c:109
void StreamClose(pgsocket sock)
Definition: pqcomm.c:850
static int socket_flush(void)
Definition: pqcomm.c:1331
#define PQ_SEND_BUFFER_SIZE
Definition: pqcomm.c:121
int pq_setkeepalivesidle(int idle, Port *port)
Definition: pqcomm.c:1658
int StreamServerPort(int family, const char *hostName, unsigned short portNumber, const char *unixSocketDir, pgsocket ListenSocket[], int MaxListen)
Definition: pqcomm.c:318
static void socket_comm_reset(void)
Definition: pqcomm.c:232
WaitEventSet * FeBeWaitSet
Definition: pqcomm.c:168
static char * PqSendBuffer
Definition: pqcomm.c:124
int pq_getkeepalivesidle(Port *port)
Definition: pqcomm.c:1623
void pq_endmsgread(void)
Definition: pqcomm.c:1188
static List * sock_paths
Definition: pqcomm.c:112
void TouchSocketFiles(void)
Definition: pqcomm.c:865
static bool PqCommBusy
Definition: pqcomm.c:136
int pq_getbyte(void)
Definition: pqcomm.c:994
static bool socket_is_send_pending(void)
Definition: pqcomm.c:1452
static int socket_putmessage(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1482
static void socket_putmessage_noblock(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1515
bool pq_buffer_has_data(void)
Definition: pqcomm.c:1151
static char PqRecvBuffer[PQ_RECV_BUFFER_SIZE]
Definition: pqcomm.c:129
static int PqRecvPointer
Definition: pqcomm.c:130
void pq_init(void)
Definition: pqcomm.c:176
static void socket_close(int code, Datum arg)
Definition: pqcomm.c:247
int pq_putmessage_v2(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1552
bool pq_is_reading_msg(void)
Definition: pqcomm.c:1204
void RemoveSocketFiles(void)
Definition: pqcomm.c:883
int pq_gettcpusertimeout(Port *port)
Definition: pqcomm.c:1867
bool pq_check_connection(void)
Definition: pqcomm.c:1945
static int PqSendBufferSize
Definition: pqcomm.c:125
void pq_startmsgread(void)
Definition: pqcomm.c:1164
int pq_setkeepalivescount(int count, Port *port)
Definition: pqcomm.c:1822
int StreamConnection(pgsocket server_fd, Port *port)
Definition: pqcomm.c:716
static int internal_putbytes(const char *s, size_t len)
Definition: pqcomm.c:1300
#define UNIXSOCK_PATH(path, port, sockdir)
Definition: pqcomm.h:70
#define UNIXSOCK_PATH_BUFLEN
Definition: pqcomm.h:86
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
void pg_usleep(long microsec)
Definition: signal.c:53
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
Definition: pg_list.h:52
Definition: libpq-be.h:125
void * gss
Definition: libpq-be.h:202
pgsocket sock
Definition: libpq-be.h:126
bool noblock
Definition: libpq-be.h:127
int ai_socktype
Definition: getaddrinfo.h:102
struct addrinfo * ai_next
Definition: getaddrinfo.h:107
int ai_flags
Definition: getaddrinfo.h:100
int ai_family
Definition: getaddrinfo.h:101
#define bind(s, addr, addrlen)
Definition: win32_port.h:470
#define EINTR
Definition: win32_port.h:351
#define EWOULDBLOCK
Definition: win32_port.h:357
#define EADDRINUSE
Definition: win32_port.h:377
int gid_t
Definition: win32_port.h:245
#define socket(af, type, protocol)
Definition: win32_port.h:469
#define accept(s, addr, addrlen)
Definition: win32_port.h:472
#define listen(s, backlog)
Definition: win32_port.h:471
#define EAGAIN
Definition: win32_port.h:349