PostgreSQL Source Code  git master
pqcomm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  * Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend. They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data.
9  *
10  * To emit an outgoing message, use the routines in pqformat.c to construct
11  * the message in a buffer and then emit it in one call to pq_putmessage.
12  * There are no functions to send raw bytes or partial messages; this
13  * ensures that the channel will not be clogged by an incomplete message if
14  * execution is aborted by ereport(ERROR) partway through the message.
15  *
16  * At one time, libpq was shared between frontend and backend, but now
17  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
18  * All that remains is similarities of names to trap the unwary...
19  *
20  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
21  * Portions Copyright (c) 1994, Regents of the University of California
22  *
23  * src/backend/libpq/pqcomm.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 
28 /*------------------------
29  * INTERFACE ROUTINES
30  *
31  * setup/teardown:
32  * StreamServerPort - Open postmaster's server port
33  * StreamConnection - Create new connection with client
34  * StreamClose - Close a client/backend connection
35  * TouchSocketFiles - Protect socket files against /tmp cleaners
36  * pq_init - initialize libpq at backend startup
37  * socket_comm_reset - reset libpq during error recovery
38  * socket_close - shutdown libpq at backend exit
39  *
40  * low-level I/O:
41  * pq_getbytes - get a known number of bytes from connection
42  * pq_getmessage - get a message with length word from connection
43  * pq_getbyte - get next byte from connection
44  * pq_peekbyte - peek at next byte from connection
45  * pq_flush - flush pending output
46  * pq_flush_if_writable - flush pending output if writable without blocking
47  * pq_getbyte_if_available - get a byte if available without blocking
48  *
49  * message-level I/O
50  * pq_putmessage - send a normal message (suppressed in COPY OUT mode)
51  * pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
52  *
53  *------------------------
54  */
55 #include "postgres.h"
56 
57 #ifdef HAVE_POLL_H
58 #include <poll.h>
59 #endif
60 #include <signal.h>
61 #include <fcntl.h>
62 #include <grp.h>
63 #include <unistd.h>
64 #include <sys/file.h>
65 #include <sys/socket.h>
66 #include <sys/stat.h>
67 #include <sys/time.h>
68 #include <netdb.h>
69 #include <netinet/in.h>
70 #ifdef HAVE_NETINET_TCP_H
71 #include <netinet/tcp.h>
72 #endif
73 #include <utime.h>
74 #ifdef _MSC_VER /* mstcpip.h is missing on mingw */
75 #include <mstcpip.h>
76 #endif
77 
78 #include "common/ip.h"
79 #include "libpq/libpq.h"
80 #include "miscadmin.h"
81 #include "port/pg_bswap.h"
82 #include "storage/ipc.h"
83 #include "utils/guc.h"
84 #include "utils/memutils.h"
85 
86 /*
87  * Cope with the various platform-specific ways to spell TCP keepalive socket
88  * options. This doesn't cover Windows, which as usual does its own thing.
89  */
90 #if defined(TCP_KEEPIDLE)
91 /* TCP_KEEPIDLE is the name of this option on Linux and *BSD */
92 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPIDLE
93 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPIDLE"
94 #elif defined(TCP_KEEPALIVE_THRESHOLD)
95 /* TCP_KEEPALIVE_THRESHOLD is the name of this option on Solaris >= 11 */
96 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE_THRESHOLD
97 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE_THRESHOLD"
98 #elif defined(TCP_KEEPALIVE) && defined(__darwin__)
99 /* TCP_KEEPALIVE is the name of this option on macOS */
100 /* Caution: Solaris has this symbol but it means something different */
101 #define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE
102 #define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE"
103 #endif
104 
105 /*
106  * Configuration options
107  */
110 
111 /* Where the Unix socket files are (list of palloc'd strings) */
112 static List *sock_paths = NIL;
113 
114 /*
115  * Buffers for low-level I/O.
116  *
117  * The receive buffer is fixed size. Send buffer is usually 8k, but can be
118  * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
119  */
120 
121 #define PQ_SEND_BUFFER_SIZE 8192
122 #define PQ_RECV_BUFFER_SIZE 8192
123 
124 static char *PqSendBuffer;
125 static int PqSendBufferSize; /* Size send buffer */
126 static int PqSendPointer; /* Next index to store a byte in PqSendBuffer */
127 static int PqSendStart; /* Next index to send a byte in PqSendBuffer */
128 
130 static int PqRecvPointer; /* Next index to read a byte from PqRecvBuffer */
131 static int PqRecvLength; /* End of data available in PqRecvBuffer */
132 
133 /*
134  * Message status
135  */
136 static bool PqCommBusy; /* busy sending data to the client */
137 static bool PqCommReadingMsg; /* in the middle of reading a message */
138 
139 
140 /* Internal functions */
141 static void socket_comm_reset(void);
142 static void socket_close(int code, Datum arg);
143 static void socket_set_nonblocking(bool nonblocking);
144 static int socket_flush(void);
145 static int socket_flush_if_writable(void);
146 static bool socket_is_send_pending(void);
147 static int socket_putmessage(char msgtype, const char *s, size_t len);
148 static void socket_putmessage_noblock(char msgtype, const char *s, size_t len);
149 static int internal_putbytes(const char *s, size_t len);
150 static int internal_flush(void);
151 
152 #ifdef HAVE_UNIX_SOCKETS
153 static int Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath);
154 static int Setup_AF_UNIX(const char *sock_path);
155 #endif /* HAVE_UNIX_SOCKETS */
156 
159  socket_flush,
164 };
165 
167 
169 
170 
171 /* --------------------------------
172  * pq_init - initialize libpq at backend startup
173  * --------------------------------
174  */
175 void
176 pq_init(void)
177 {
178  int socket_pos PG_USED_FOR_ASSERTS_ONLY;
179  int latch_pos PG_USED_FOR_ASSERTS_ONLY;
180 
181  /* initialize state variables */
185  PqCommBusy = false;
186  PqCommReadingMsg = false;
187 
188  /* set up process-exit hook to close the socket */
190 
191  /*
192  * In backends (as soon as forked) we operate the underlying socket in
193  * nonblocking mode and use latches to implement blocking semantics if
194  * needed. That allows us to provide safely interruptible reads and
195  * writes.
196  *
197  * Use COMMERROR on failure, because ERROR would try to send the error to
198  * the client, which might require changing the mode again, leading to
199  * infinite recursion.
200  */
201 #ifndef WIN32
204  (errmsg("could not set socket to nonblocking mode: %m")));
205 #endif
206 
207  FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3);
208  socket_pos = AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE,
209  MyProcPort->sock, NULL, NULL);
210  latch_pos = AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
211  MyLatch, NULL);
213  NULL, NULL);
214 
215  /*
216  * The event positions match the order we added them, but let's sanity
217  * check them to be sure.
218  */
219  Assert(socket_pos == FeBeWaitSetSocketPos);
220  Assert(latch_pos == FeBeWaitSetLatchPos);
221 }
222 
223 /* --------------------------------
224  * socket_comm_reset - reset libpq during error recovery
225  *
226  * This is called from error recovery at the outer idle loop. It's
227  * just to get us out of trouble if we somehow manage to elog() from
228  * inside a pqcomm.c routine (which ideally will never happen, but...)
229  * --------------------------------
230  */
231 static void
233 {
234  /* Do not throw away pending data, but do reset the busy flag */
235  PqCommBusy = false;
236 }
237 
238 /* --------------------------------
239  * socket_close - shutdown libpq at backend exit
240  *
241  * This is the one pg_on_exit_callback in place during BackendInitialize().
242  * That function's unusual signal handling constrains that this callback be
243  * safe to run at any instant.
244  * --------------------------------
245  */
246 static void
248 {
249  /* Nothing to do in a standalone backend, where MyProcPort is NULL. */
250  if (MyProcPort != NULL)
251  {
252 #ifdef ENABLE_GSS
253  /*
254  * Shutdown GSSAPI layer. This section does nothing when interrupting
255  * BackendInitialize(), because pg_GSS_recvauth() makes first use of
256  * "ctx" and "cred".
257  *
258  * Note that we don't bother to free MyProcPort->gss, since we're
259  * about to exit anyway.
260  */
261  if (MyProcPort->gss)
262  {
263  OM_uint32 min_s;
264 
265  if (MyProcPort->gss->ctx != GSS_C_NO_CONTEXT)
266  gss_delete_sec_context(&min_s, &MyProcPort->gss->ctx, NULL);
267 
268  if (MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL)
269  gss_release_cred(&min_s, &MyProcPort->gss->cred);
270  }
271 #endif /* ENABLE_GSS */
272 
273  /*
274  * Cleanly shut down SSL layer. Nowhere else does a postmaster child
275  * call this, so this is safe when interrupting BackendInitialize().
276  */
278 
279  /*
280  * Formerly we did an explicit close() here, but it seems better to
281  * leave the socket open until the process dies. This allows clients
282  * to perform a "synchronous close" if they care --- wait till the
283  * transport layer reports connection closure, and you can be sure the
284  * backend has exited.
285  *
286  * We do set sock to PGINVALID_SOCKET to prevent any further I/O,
287  * though.
288  */
290  }
291 }
292 
293 
294 
295 /*
296  * Streams -- wrapper around Unix socket system calls
297  *
298  *
299  * Stream functions are used for vanilla TCP connection protocol.
300  */
301 
302 
303 /*
304  * StreamServerPort -- open a "listening" port to accept connections.
305  *
306  * family should be AF_UNIX or AF_UNSPEC; portNumber is the port number.
307  * For AF_UNIX ports, hostName should be NULL and unixSocketDir must be
308  * specified. For TCP ports, hostName is either NULL for all interfaces or
309  * the interface to listen on, and unixSocketDir is ignored (can be NULL).
310  *
311  * Successfully opened sockets are added to the ListenSocket[] array (of
312  * length MaxListen), at the first position that isn't PGINVALID_SOCKET.
313  *
314  * RETURNS: STATUS_OK or STATUS_ERROR
315  */
316 
317 int
318 StreamServerPort(int family, const char *hostName, unsigned short portNumber,
319  const char *unixSocketDir,
320  pgsocket ListenSocket[], int MaxListen)
321 {
322  pgsocket fd;
323  int err;
324  int maxconn;
325  int ret;
326  char portNumberStr[32];
327  const char *familyDesc;
328  char familyDescBuf[64];
329  const char *addrDesc;
330  char addrBuf[NI_MAXHOST];
331  char *service;
332  struct addrinfo *addrs = NULL,
333  *addr;
334  struct addrinfo hint;
335  int listen_index = 0;
336  int added = 0;
337 
338 #ifdef HAVE_UNIX_SOCKETS
339  char unixSocketPath[MAXPGPATH];
340 #endif
341 #if !defined(WIN32) || defined(IPV6_V6ONLY)
342  int one = 1;
343 #endif
344 
345  /* Initialize hint structure */
346  MemSet(&hint, 0, sizeof(hint));
347  hint.ai_family = family;
348  hint.ai_flags = AI_PASSIVE;
349  hint.ai_socktype = SOCK_STREAM;
350 
351 #ifdef HAVE_UNIX_SOCKETS
352  if (family == AF_UNIX)
353  {
354  /*
355  * Create unixSocketPath from portNumber and unixSocketDir and lock
356  * that file path
357  */
358  UNIXSOCK_PATH(unixSocketPath, portNumber, unixSocketDir);
359  if (strlen(unixSocketPath) >= UNIXSOCK_PATH_BUFLEN)
360  {
361  ereport(LOG,
362  (errmsg("Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
363  unixSocketPath,
364  (int) (UNIXSOCK_PATH_BUFLEN - 1))));
365  return STATUS_ERROR;
366  }
367  if (Lock_AF_UNIX(unixSocketDir, unixSocketPath) != STATUS_OK)
368  return STATUS_ERROR;
369  service = unixSocketPath;
370  }
371  else
372 #endif /* HAVE_UNIX_SOCKETS */
373  {
374  snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
375  service = portNumberStr;
376  }
377 
378  ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
379  if (ret || !addrs)
380  {
381  if (hostName)
382  ereport(LOG,
383  (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
384  hostName, service, gai_strerror(ret))));
385  else
386  ereport(LOG,
387  (errmsg("could not translate service \"%s\" to address: %s",
388  service, gai_strerror(ret))));
389  if (addrs)
390  pg_freeaddrinfo_all(hint.ai_family, addrs);
391  return STATUS_ERROR;
392  }
393 
394  for (addr = addrs; addr; addr = addr->ai_next)
395  {
396  if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
397  {
398  /*
399  * Only set up a unix domain socket when they really asked for it.
400  * The service/port is different in that case.
401  */
402  continue;
403  }
404 
405  /* See if there is still room to add 1 more socket. */
406  for (; listen_index < MaxListen; listen_index++)
407  {
408  if (ListenSocket[listen_index] == PGINVALID_SOCKET)
409  break;
410  }
411  if (listen_index >= MaxListen)
412  {
413  ereport(LOG,
414  (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
415  MaxListen)));
416  break;
417  }
418 
419  /* set up address family name for log messages */
420  switch (addr->ai_family)
421  {
422  case AF_INET:
423  familyDesc = _("IPv4");
424  break;
425 #ifdef HAVE_IPV6
426  case AF_INET6:
427  familyDesc = _("IPv6");
428  break;
429 #endif
430 #ifdef HAVE_UNIX_SOCKETS
431  case AF_UNIX:
432  familyDesc = _("Unix");
433  break;
434 #endif
435  default:
436  snprintf(familyDescBuf, sizeof(familyDescBuf),
437  _("unrecognized address family %d"),
438  addr->ai_family);
439  familyDesc = familyDescBuf;
440  break;
441  }
442 
443  /* set up text form of address for log messages */
444 #ifdef HAVE_UNIX_SOCKETS
445  if (addr->ai_family == AF_UNIX)
446  addrDesc = unixSocketPath;
447  else
448 #endif
449  {
450  pg_getnameinfo_all((const struct sockaddr_storage *) addr->ai_addr,
451  addr->ai_addrlen,
452  addrBuf, sizeof(addrBuf),
453  NULL, 0,
455  addrDesc = addrBuf;
456  }
457 
458  if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) == PGINVALID_SOCKET)
459  {
460  ereport(LOG,
462  /* translator: first %s is IPv4, IPv6, or Unix */
463  errmsg("could not create %s socket for address \"%s\": %m",
464  familyDesc, addrDesc)));
465  continue;
466  }
467 
468 #ifndef WIN32
469 
470  /*
471  * Without the SO_REUSEADDR flag, a new postmaster can't be started
472  * right away after a stop or crash, giving "address already in use"
473  * error on TCP ports.
474  *
475  * On win32, however, this behavior only happens if the
476  * SO_EXCLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows
477  * multiple servers to listen on the same address, resulting in
478  * unpredictable behavior. With no flags at all, win32 behaves as Unix
479  * with SO_REUSEADDR.
480  */
481  if (!IS_AF_UNIX(addr->ai_family))
482  {
483  if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
484  (char *) &one, sizeof(one))) == -1)
485  {
486  ereport(LOG,
488  /* translator: third %s is IPv4, IPv6, or Unix */
489  errmsg("%s(%s) failed for %s address \"%s\": %m",
490  "setsockopt", "SO_REUSEADDR",
491  familyDesc, addrDesc)));
492  closesocket(fd);
493  continue;
494  }
495  }
496 #endif
497 
498 #ifdef IPV6_V6ONLY
499  if (addr->ai_family == AF_INET6)
500  {
501  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
502  (char *) &one, sizeof(one)) == -1)
503  {
504  ereport(LOG,
506  /* translator: third %s is IPv4, IPv6, or Unix */
507  errmsg("%s(%s) failed for %s address \"%s\": %m",
508  "setsockopt", "IPV6_V6ONLY",
509  familyDesc, addrDesc)));
510  closesocket(fd);
511  continue;
512  }
513  }
514 #endif
515 
516  /*
517  * Note: This might fail on some OS's, like Linux older than
518  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
519  * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4
520  * connections.
521  */
522  err = bind(fd, addr->ai_addr, addr->ai_addrlen);
523  if (err < 0)
524  {
525  int saved_errno = errno;
526 
527  ereport(LOG,
529  /* translator: first %s is IPv4, IPv6, or Unix */
530  errmsg("could not bind %s address \"%s\": %m",
531  familyDesc, addrDesc),
532  saved_errno == EADDRINUSE ?
533  (IS_AF_UNIX(addr->ai_family) ?
534  errhint("Is another postmaster already running on port %d?",
535  (int) portNumber) :
536  errhint("Is another postmaster already running on port %d?"
537  " If not, wait a few seconds and retry.",
538  (int) portNumber)) : 0));
539  closesocket(fd);
540  continue;
541  }
542 
543 #ifdef HAVE_UNIX_SOCKETS
544  if (addr->ai_family == AF_UNIX)
545  {
546  if (Setup_AF_UNIX(service) != STATUS_OK)
547  {
548  closesocket(fd);
549  break;
550  }
551  }
552 #endif
553 
554  /*
555  * Select appropriate accept-queue length limit. PG_SOMAXCONN is only
556  * intended to provide a clamp on the request on platforms where an
557  * overly large request provokes a kernel error (are there any?).
558  */
559  maxconn = MaxBackends * 2;
560  if (maxconn > PG_SOMAXCONN)
561  maxconn = PG_SOMAXCONN;
562 
563  err = listen(fd, maxconn);
564  if (err < 0)
565  {
566  ereport(LOG,
568  /* translator: first %s is IPv4, IPv6, or Unix */
569  errmsg("could not listen on %s address \"%s\": %m",
570  familyDesc, addrDesc)));
571  closesocket(fd);
572  continue;
573  }
574 
575 #ifdef HAVE_UNIX_SOCKETS
576  if (addr->ai_family == AF_UNIX)
577  ereport(LOG,
578  (errmsg("listening on Unix socket \"%s\"",
579  addrDesc)));
580  else
581 #endif
582  ereport(LOG,
583  /* translator: first %s is IPv4 or IPv6 */
584  (errmsg("listening on %s address \"%s\", port %d",
585  familyDesc, addrDesc, (int) portNumber)));
586 
587  ListenSocket[listen_index] = fd;
588  added++;
589  }
590 
591  pg_freeaddrinfo_all(hint.ai_family, addrs);
592 
593  if (!added)
594  return STATUS_ERROR;
595 
596  return STATUS_OK;
597 }
598 
599 
600 #ifdef HAVE_UNIX_SOCKETS
601 
602 /*
603  * Lock_AF_UNIX -- configure unix socket file path
604  */
605 static int
606 Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath)
607 {
608  /* no lock file for abstract sockets */
609  if (unixSocketPath[0] == '@')
610  return STATUS_OK;
611 
612  /*
613  * Grab an interlock file associated with the socket file.
614  *
615  * Note: there are two reasons for using a socket lock file, rather than
616  * trying to interlock directly on the socket itself. First, it's a lot
617  * more portable, and second, it lets us remove any pre-existing socket
618  * file without race conditions.
619  */
620  CreateSocketLockFile(unixSocketPath, true, unixSocketDir);
621 
622  /*
623  * Once we have the interlock, we can safely delete any pre-existing
624  * socket file to avoid failure at bind() time.
625  */
626  (void) unlink(unixSocketPath);
627 
628  /*
629  * Remember socket file pathnames for later maintenance.
630  */
631  sock_paths = lappend(sock_paths, pstrdup(unixSocketPath));
632 
633  return STATUS_OK;
634 }
635 
636 
637 /*
638  * Setup_AF_UNIX -- configure unix socket permissions
639  */
640 static int
641 Setup_AF_UNIX(const char *sock_path)
642 {
643  /* no file system permissions for abstract sockets */
644  if (sock_path[0] == '@')
645  return STATUS_OK;
646 
647  /*
648  * Fix socket ownership/permission if requested. Note we must do this
649  * before we listen() to avoid a window where unwanted connections could
650  * get accepted.
651  */
653  if (Unix_socket_group[0] != '\0')
654  {
655 #ifdef WIN32
656  elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
657 #else
658  char *endptr;
659  unsigned long val;
660  gid_t gid;
661 
662  val = strtoul(Unix_socket_group, &endptr, 10);
663  if (*endptr == '\0')
664  { /* numeric group id */
665  gid = val;
666  }
667  else
668  { /* convert group name to id */
669  struct group *gr;
670 
671  gr = getgrnam(Unix_socket_group);
672  if (!gr)
673  {
674  ereport(LOG,
675  (errmsg("group \"%s\" does not exist",
677  return STATUS_ERROR;
678  }
679  gid = gr->gr_gid;
680  }
681  if (chown(sock_path, -1, gid) == -1)
682  {
683  ereport(LOG,
685  errmsg("could not set group of file \"%s\": %m",
686  sock_path)));
687  return STATUS_ERROR;
688  }
689 #endif
690  }
691 
692  if (chmod(sock_path, Unix_socket_permissions) == -1)
693  {
694  ereport(LOG,
696  errmsg("could not set permissions of file \"%s\": %m",
697  sock_path)));
698  return STATUS_ERROR;
699  }
700  return STATUS_OK;
701 }
702 #endif /* HAVE_UNIX_SOCKETS */
703 
704 
705 /*
706  * StreamConnection -- create a new connection with client using
707  * server port. Set port->sock to the FD of the new connection.
708  *
709  * ASSUME: that this doesn't need to be non-blocking because
710  * the Postmaster uses select() to tell when the socket is ready for
711  * accept().
712  *
713  * RETURNS: STATUS_OK or STATUS_ERROR
714  */
715 int
717 {
718  /* accept connection and fill in the client (remote) address */
719  port->raddr.salen = sizeof(port->raddr.addr);
720  if ((port->sock = accept(server_fd,
721  (struct sockaddr *) &port->raddr.addr,
722  &port->raddr.salen)) == PGINVALID_SOCKET)
723  {
724  ereport(LOG,
726  errmsg("could not accept new connection: %m")));
727 
728  /*
729  * If accept() fails then postmaster.c will still see the server
730  * socket as read-ready, and will immediately try again. To avoid
731  * uselessly sucking lots of CPU, delay a bit before trying again.
732  * (The most likely reason for failure is being out of kernel file
733  * table slots; we can do little except hope some will get freed up.)
734  */
735  pg_usleep(100000L); /* wait 0.1 sec */
736  return STATUS_ERROR;
737  }
738 
739  /* fill in the server (local) address */
740  port->laddr.salen = sizeof(port->laddr.addr);
741  if (getsockname(port->sock,
742  (struct sockaddr *) &port->laddr.addr,
743  &port->laddr.salen) < 0)
744  {
745  ereport(LOG,
746  (errmsg("%s() failed: %m", "getsockname")));
747  return STATUS_ERROR;
748  }
749 
750  /* select NODELAY and KEEPALIVE options if it's a TCP connection */
751  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
752  {
753  int on;
754 #ifdef WIN32
755  int oldopt;
756  int optlen;
757  int newopt;
758 #endif
759 
760 #ifdef TCP_NODELAY
761  on = 1;
762  if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
763  (char *) &on, sizeof(on)) < 0)
764  {
765  ereport(LOG,
766  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_NODELAY")));
767  return STATUS_ERROR;
768  }
769 #endif
770  on = 1;
771  if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
772  (char *) &on, sizeof(on)) < 0)
773  {
774  ereport(LOG,
775  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_KEEPALIVE")));
776  return STATUS_ERROR;
777  }
778 
779 #ifdef WIN32
780 
781  /*
782  * This is a Win32 socket optimization. The OS send buffer should be
783  * large enough to send the whole Postgres send buffer in one go, or
784  * performance suffers. The Postgres send buffer can be enlarged if a
785  * very large message needs to be sent, but we won't attempt to
786  * enlarge the OS buffer if that happens, so somewhat arbitrarily
787  * ensure that the OS buffer is at least PQ_SEND_BUFFER_SIZE * 4.
788  * (That's 32kB with the current default).
789  *
790  * The default OS buffer size used to be 8kB in earlier Windows
791  * versions, but was raised to 64kB in Windows 2012. So it shouldn't
792  * be necessary to change it in later versions anymore. Changing it
793  * unnecessarily can even reduce performance, because setting
794  * SO_SNDBUF in the application disables the "dynamic send buffering"
795  * feature that was introduced in Windows 7. So before fiddling with
796  * SO_SNDBUF, check if the current buffer size is already large enough
797  * and only increase it if necessary.
798  *
799  * See https://support.microsoft.com/kb/823764/EN-US/ and
800  * https://msdn.microsoft.com/en-us/library/bb736549%28v=vs.85%29.aspx
801  */
802  optlen = sizeof(oldopt);
803  if (getsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &oldopt,
804  &optlen) < 0)
805  {
806  ereport(LOG,
807  (errmsg("%s(%s) failed: %m", "getsockopt", "SO_SNDBUF")));
808  return STATUS_ERROR;
809  }
810  newopt = PQ_SEND_BUFFER_SIZE * 4;
811  if (oldopt < newopt)
812  {
813  if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &newopt,
814  sizeof(newopt)) < 0)
815  {
816  ereport(LOG,
817  (errmsg("%s(%s) failed: %m", "setsockopt", "SO_SNDBUF")));
818  return STATUS_ERROR;
819  }
820  }
821 #endif
822 
823  /*
824  * Also apply the current keepalive parameters. If we fail to set a
825  * parameter, don't error out, because these aren't universally
826  * supported. (Note: you might think we need to reset the GUC
827  * variables to 0 in such a case, but it's not necessary because the
828  * show hooks for these variables report the truth anyway.)
829  */
834  }
835 
836  return STATUS_OK;
837 }
838 
839 /*
840  * StreamClose -- close a client/backend connection
841  *
842  * NOTE: this is NOT used to terminate a session; it is just used to release
843  * the file descriptor in a process that should no longer have the socket
844  * open. (For example, the postmaster calls this after passing ownership
845  * of the connection to a child process.) It is expected that someone else
846  * still has the socket open. So, we only want to close the descriptor,
847  * we do NOT want to send anything to the far end.
848  */
849 void
851 {
852  closesocket(sock);
853 }
854 
855 /*
856  * TouchSocketFiles -- mark socket files as recently accessed
857  *
858  * This routine should be called every so often to ensure that the socket
859  * files have a recent mod date (ordinary operations on sockets usually won't
860  * change the mod date). That saves them from being removed by
861  * overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
862  * never have put the socket file in /tmp...)
863  */
864 void
866 {
867  ListCell *l;
868 
869  /* Loop through all created sockets... */
870  foreach(l, sock_paths)
871  {
872  char *sock_path = (char *) lfirst(l);
873 
874  /* Ignore errors; there's no point in complaining */
875  (void) utime(sock_path, NULL);
876  }
877 }
878 
879 /*
880  * RemoveSocketFiles -- unlink socket files at postmaster shutdown
881  */
882 void
884 {
885  ListCell *l;
886 
887  /* Loop through all created sockets... */
888  foreach(l, sock_paths)
889  {
890  char *sock_path = (char *) lfirst(l);
891 
892  /* Ignore any error. */
893  (void) unlink(sock_path);
894  }
895  /* Since we're about to exit, no need to reclaim storage */
896  sock_paths = NIL;
897 }
898 
899 
900 /* --------------------------------
901  * Low-level I/O routines begin here.
902  *
903  * These routines communicate with a frontend client across a connection
904  * already established by the preceding routines.
905  * --------------------------------
906  */
907 
908 /* --------------------------------
909  * socket_set_nonblocking - set socket blocking/non-blocking
910  *
911  * Sets the socket non-blocking if nonblocking is true, or sets it
912  * blocking otherwise.
913  * --------------------------------
914  */
915 static void
916 socket_set_nonblocking(bool nonblocking)
917 {
918  if (MyProcPort == NULL)
919  ereport(ERROR,
920  (errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
921  errmsg("there is no client connection")));
922 
923  MyProcPort->noblock = nonblocking;
924 }
925 
926 /* --------------------------------
927  * pq_recvbuf - load some bytes into the input buffer
928  *
929  * returns 0 if OK, EOF if trouble
930  * --------------------------------
931  */
932 static int
934 {
935  if (PqRecvPointer > 0)
936  {
938  {
939  /* still some unread data, left-justify it in the buffer */
943  PqRecvPointer = 0;
944  }
945  else
947  }
948 
949  /* Ensure that we're in blocking mode */
950  socket_set_nonblocking(false);
951 
952  /* Can fill buffer from PqRecvLength and upwards */
953  for (;;)
954  {
955  int r;
956 
959 
960  if (r < 0)
961  {
962  if (errno == EINTR)
963  continue; /* Ok if interrupted */
964 
965  /*
966  * Careful: an ereport() that tries to write to the client would
967  * cause recursion to here, leading to stack overflow and core
968  * dump! This message must go *only* to the postmaster log.
969  */
972  errmsg("could not receive data from client: %m")));
973  return EOF;
974  }
975  if (r == 0)
976  {
977  /*
978  * EOF detected. We used to write a log message here, but it's
979  * better to expect the ultimate caller to do that.
980  */
981  return EOF;
982  }
983  /* r contains number of bytes read, so just incr length */
984  PqRecvLength += r;
985  return 0;
986  }
987 }
988 
989 /* --------------------------------
990  * pq_getbyte - get a single byte from connection, or return EOF
991  * --------------------------------
992  */
993 int
995 {
997 
998  while (PqRecvPointer >= PqRecvLength)
999  {
1000  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1001  return EOF; /* Failed to recv data */
1002  }
1003  return (unsigned char) PqRecvBuffer[PqRecvPointer++];
1004 }
1005 
1006 /* --------------------------------
1007  * pq_peekbyte - peek at next byte from connection
1008  *
1009  * Same as pq_getbyte() except we don't advance the pointer.
1010  * --------------------------------
1011  */
1012 int
1014 {
1016 
1017  while (PqRecvPointer >= PqRecvLength)
1018  {
1019  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1020  return EOF; /* Failed to recv data */
1021  }
1022  return (unsigned char) PqRecvBuffer[PqRecvPointer];
1023 }
1024 
1025 /* --------------------------------
1026  * pq_getbyte_if_available - get a single byte from connection,
1027  * if available
1028  *
1029  * The received byte is stored in *c. Returns 1 if a byte was read,
1030  * 0 if no data was available, or EOF if trouble.
1031  * --------------------------------
1032  */
1033 int
1035 {
1036  int r;
1037 
1039 
1041  {
1042  *c = PqRecvBuffer[PqRecvPointer++];
1043  return 1;
1044  }
1045 
1046  /* Put the socket into non-blocking mode */
1047  socket_set_nonblocking(true);
1048 
1049  r = secure_read(MyProcPort, c, 1);
1050  if (r < 0)
1051  {
1052  /*
1053  * Ok if no data available without blocking or interrupted (though
1054  * EINTR really shouldn't happen with a non-blocking socket). Report
1055  * other errors.
1056  */
1057  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
1058  r = 0;
1059  else
1060  {
1061  /*
1062  * Careful: an ereport() that tries to write to the client would
1063  * cause recursion to here, leading to stack overflow and core
1064  * dump! This message must go *only* to the postmaster log.
1065  */
1068  errmsg("could not receive data from client: %m")));
1069  r = EOF;
1070  }
1071  }
1072  else if (r == 0)
1073  {
1074  /* EOF detected */
1075  r = EOF;
1076  }
1077 
1078  return r;
1079 }
1080 
1081 /* --------------------------------
1082  * pq_getbytes - get a known number of bytes from connection
1083  *
1084  * returns 0 if OK, EOF if trouble
1085  * --------------------------------
1086  */
1087 int
1088 pq_getbytes(char *s, size_t len)
1089 {
1090  size_t amount;
1091 
1093 
1094  while (len > 0)
1095  {
1096  while (PqRecvPointer >= PqRecvLength)
1097  {
1098  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1099  return EOF; /* Failed to recv data */
1100  }
1101  amount = PqRecvLength - PqRecvPointer;
1102  if (amount > len)
1103  amount = len;
1104  memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
1105  PqRecvPointer += amount;
1106  s += amount;
1107  len -= amount;
1108  }
1109  return 0;
1110 }
1111 
1112 /* --------------------------------
1113  * pq_discardbytes - throw away a known number of bytes
1114  *
1115  * same as pq_getbytes except we do not copy the data to anyplace.
1116  * this is used for resynchronizing after read errors.
1117  *
1118  * returns 0 if OK, EOF if trouble
1119  * --------------------------------
1120  */
1121 static int
1122 pq_discardbytes(size_t len)
1123 {
1124  size_t amount;
1125 
1127 
1128  while (len > 0)
1129  {
1130  while (PqRecvPointer >= PqRecvLength)
1131  {
1132  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1133  return EOF; /* Failed to recv data */
1134  }
1135  amount = PqRecvLength - PqRecvPointer;
1136  if (amount > len)
1137  amount = len;
1138  PqRecvPointer += amount;
1139  len -= amount;
1140  }
1141  return 0;
1142 }
1143 
1144 
1145 /* --------------------------------
1146  * pq_startmsgread - begin reading a message from the client.
1147  *
1148  * This must be called before any of the pq_get* functions.
1149  * --------------------------------
1150  */
1151 void
1153 {
1154  /*
1155  * There shouldn't be a read active already, but let's check just to be
1156  * sure.
1157  */
1158  if (PqCommReadingMsg)
1159  ereport(FATAL,
1160  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1161  errmsg("terminating connection because protocol synchronization was lost")));
1162 
1163  PqCommReadingMsg = true;
1164 }
1165 
1166 
1167 /* --------------------------------
1168  * pq_endmsgread - finish reading message.
1169  *
1170  * This must be called after reading a message with pq_getbytes()
1171  * and friends, to indicate that we have read the whole message.
1172  * pq_getmessage() does this implicitly.
1173  * --------------------------------
1174  */
1175 void
1177 {
1179 
1180  PqCommReadingMsg = false;
1181 }
1182 
1183 /* --------------------------------
1184  * pq_is_reading_msg - are we currently reading a message?
1185  *
1186  * This is used in error recovery at the outer idle loop to detect if we have
1187  * lost protocol sync, and need to terminate the connection. pq_startmsgread()
1188  * will check for that too, but it's nicer to detect it earlier.
1189  * --------------------------------
1190  */
1191 bool
1193 {
1194  return PqCommReadingMsg;
1195 }
1196 
1197 /* --------------------------------
1198  * pq_getmessage - get a message with length word from connection
1199  *
1200  * The return value is placed in an expansible StringInfo, which has
1201  * already been initialized by the caller.
1202  * Only the message body is placed in the StringInfo; the length word
1203  * is removed. Also, s->cursor is initialized to zero for convenience
1204  * in scanning the message contents.
1205  *
1206  * maxlen is the upper limit on the length of the
1207  * message we are willing to accept. We abort the connection (by
1208  * returning EOF) if client tries to send more than that.
1209  *
1210  * returns 0 if OK, EOF if trouble
1211  * --------------------------------
1212  */
1213 int
1215 {
1216  int32 len;
1217 
1219 
1220  resetStringInfo(s);
1221 
1222  /* Read message length word */
1223  if (pq_getbytes((char *) &len, 4) == EOF)
1224  {
1226  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1227  errmsg("unexpected EOF within message length word")));
1228  return EOF;
1229  }
1230 
1231  len = pg_ntoh32(len);
1232 
1233  if (len < 4 || len > maxlen)
1234  {
1236  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1237  errmsg("invalid message length")));
1238  return EOF;
1239  }
1240 
1241  len -= 4; /* discount length itself */
1242 
1243  if (len > 0)
1244  {
1245  /*
1246  * Allocate space for message. If we run out of room (ridiculously
1247  * large message), we will elog(ERROR), but we want to discard the
1248  * message body so as not to lose communication sync.
1249  */
1250  PG_TRY();
1251  {
1252  enlargeStringInfo(s, len);
1253  }
1254  PG_CATCH();
1255  {
1256  if (pq_discardbytes(len) == EOF)
1258  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1259  errmsg("incomplete message from client")));
1260 
1261  /* we discarded the rest of the message so we're back in sync. */
1262  PqCommReadingMsg = false;
1263  PG_RE_THROW();
1264  }
1265  PG_END_TRY();
1266 
1267  /* And grab the message */
1268  if (pq_getbytes(s->data, len) == EOF)
1269  {
1271  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1272  errmsg("incomplete message from client")));
1273  return EOF;
1274  }
1275  s->len = len;
1276  /* Place a trailing null per StringInfo convention */
1277  s->data[len] = '\0';
1278  }
1279 
1280  /* finished reading the message. */
1281  PqCommReadingMsg = false;
1282 
1283  return 0;
1284 }
1285 
1286 
1287 static int
1288 internal_putbytes(const char *s, size_t len)
1289 {
1290  size_t amount;
1291 
1292  while (len > 0)
1293  {
1294  /* If buffer is full, then flush it out */
1296  {
1297  socket_set_nonblocking(false);
1298  if (internal_flush())
1299  return EOF;
1300  }
1301  amount = PqSendBufferSize - PqSendPointer;
1302  if (amount > len)
1303  amount = len;
1304  memcpy(PqSendBuffer + PqSendPointer, s, amount);
1305  PqSendPointer += amount;
1306  s += amount;
1307  len -= amount;
1308  }
1309  return 0;
1310 }
1311 
1312 /* --------------------------------
1313  * socket_flush - flush pending output
1314  *
1315  * returns 0 if OK, EOF if trouble
1316  * --------------------------------
1317  */
1318 static int
1320 {
1321  int res;
1322 
1323  /* No-op if reentrant call */
1324  if (PqCommBusy)
1325  return 0;
1326  PqCommBusy = true;
1327  socket_set_nonblocking(false);
1328  res = internal_flush();
1329  PqCommBusy = false;
1330  return res;
1331 }
1332 
1333 /* --------------------------------
1334  * internal_flush - flush pending output
1335  *
1336  * Returns 0 if OK (meaning everything was sent, or operation would block
1337  * and the socket is in non-blocking mode), or EOF if trouble.
1338  * --------------------------------
1339  */
1340 static int
1342 {
1343  static int last_reported_send_errno = 0;
1344 
1345  char *bufptr = PqSendBuffer + PqSendStart;
1346  char *bufend = PqSendBuffer + PqSendPointer;
1347 
1348  while (bufptr < bufend)
1349  {
1350  int r;
1351 
1352  r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1353 
1354  if (r <= 0)
1355  {
1356  if (errno == EINTR)
1357  continue; /* Ok if we were interrupted */
1358 
1359  /*
1360  * Ok if no data writable without blocking, and the socket is in
1361  * non-blocking mode.
1362  */
1363  if (errno == EAGAIN ||
1364  errno == EWOULDBLOCK)
1365  {
1366  return 0;
1367  }
1368 
1369  /*
1370  * Careful: an ereport() that tries to write to the client would
1371  * cause recursion to here, leading to stack overflow and core
1372  * dump! This message must go *only* to the postmaster log.
1373  *
1374  * If a client disconnects while we're in the midst of output, we
1375  * might write quite a bit of data before we get to a safe query
1376  * abort point. So, suppress duplicate log messages.
1377  */
1378  if (errno != last_reported_send_errno)
1379  {
1380  last_reported_send_errno = errno;
1383  errmsg("could not send data to client: %m")));
1384  }
1385 
1386  /*
1387  * We drop the buffered data anyway so that processing can
1388  * continue, even though we'll probably quit soon. We also set a
1389  * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
1390  * the connection.
1391  */
1392  PqSendStart = PqSendPointer = 0;
1394  InterruptPending = 1;
1395  return EOF;
1396  }
1397 
1398  last_reported_send_errno = 0; /* reset after any successful send */
1399  bufptr += r;
1400  PqSendStart += r;
1401  }
1402 
1403  PqSendStart = PqSendPointer = 0;
1404  return 0;
1405 }
1406 
1407 /* --------------------------------
1408  * pq_flush_if_writable - flush pending output if writable without blocking
1409  *
1410  * Returns 0 if OK, or EOF if trouble.
1411  * --------------------------------
1412  */
1413 static int
1415 {
1416  int res;
1417 
1418  /* Quick exit if nothing to do */
1419  if (PqSendPointer == PqSendStart)
1420  return 0;
1421 
1422  /* No-op if reentrant call */
1423  if (PqCommBusy)
1424  return 0;
1425 
1426  /* Temporarily put the socket into non-blocking mode */
1427  socket_set_nonblocking(true);
1428 
1429  PqCommBusy = true;
1430  res = internal_flush();
1431  PqCommBusy = false;
1432  return res;
1433 }
1434 
1435 /* --------------------------------
1436  * socket_is_send_pending - is there any pending data in the output buffer?
1437  * --------------------------------
1438  */
1439 static bool
1441 {
1442  return (PqSendStart < PqSendPointer);
1443 }
1444 
1445 /* --------------------------------
1446  * Message-level I/O routines begin here.
1447  * --------------------------------
1448  */
1449 
1450 
1451 /* --------------------------------
1452  * socket_putmessage - send a normal message (suppressed in COPY OUT mode)
1453  *
1454  * msgtype is a message type code to place before the message body.
1455  *
1456  * len is the length of the message body data at *s. A message length
1457  * word (equal to len+4 because it counts itself too) is inserted by this
1458  * routine.
1459  *
1460  * We suppress messages generated while pqcomm.c is busy. This
1461  * avoids any possibility of messages being inserted within other
1462  * messages. The only known trouble case arises if SIGQUIT occurs
1463  * during a pqcomm.c routine --- quickdie() will try to send a warning
1464  * message, and the most reasonable approach seems to be to drop it.
1465  *
1466  * returns 0 if OK, EOF if trouble
1467  * --------------------------------
1468  */
1469 static int
1470 socket_putmessage(char msgtype, const char *s, size_t len)
1471 {
1472  uint32 n32;
1473 
1474  Assert(msgtype != 0);
1475 
1476  if (PqCommBusy)
1477  return 0;
1478  PqCommBusy = true;
1479  if (internal_putbytes(&msgtype, 1))
1480  goto fail;
1481 
1482  n32 = pg_hton32((uint32) (len + 4));
1483  if (internal_putbytes((char *) &n32, 4))
1484  goto fail;
1485 
1486  if (internal_putbytes(s, len))
1487  goto fail;
1488  PqCommBusy = false;
1489  return 0;
1490 
1491 fail:
1492  PqCommBusy = false;
1493  return EOF;
1494 }
1495 
1496 /* --------------------------------
1497  * pq_putmessage_noblock - like pq_putmessage, but never blocks
1498  *
1499  * If the output buffer is too small to hold the message, the buffer
1500  * is enlarged.
1501  */
1502 static void
1503 socket_putmessage_noblock(char msgtype, const char *s, size_t len)
1504 {
1505  int res PG_USED_FOR_ASSERTS_ONLY;
1506  int required;
1507 
1508  /*
1509  * Ensure we have enough space in the output buffer for the message header
1510  * as well as the message itself.
1511  */
1512  required = PqSendPointer + 1 + 4 + len;
1513  if (required > PqSendBufferSize)
1514  {
1515  PqSendBuffer = repalloc(PqSendBuffer, required);
1517  }
1518  res = pq_putmessage(msgtype, s, len);
1519  Assert(res == 0); /* should not fail when the message fits in
1520  * buffer */
1521 }
1522 
1523 /* --------------------------------
1524  * pq_putmessage_v2 - send a message in protocol version 2
1525  *
1526  * msgtype is a message type code to place before the message body.
1527  *
1528  * We no longer support protocol version 2, but we have kept this
1529  * function so that if a client tries to connect with protocol version 2,
1530  * as a courtesy we can still send the "unsupported protocol version"
1531  * error to the client in the old format.
1532  *
1533  * Like in pq_putmessage(), we suppress messages generated while
1534  * pqcomm.c is busy.
1535  *
1536  * returns 0 if OK, EOF if trouble
1537  * --------------------------------
1538  */
1539 int
1540 pq_putmessage_v2(char msgtype, const char *s, size_t len)
1541 {
1542  Assert(msgtype != 0);
1543 
1544  if (PqCommBusy)
1545  return 0;
1546  PqCommBusy = true;
1547  if (internal_putbytes(&msgtype, 1))
1548  goto fail;
1549 
1550  if (internal_putbytes(s, len))
1551  goto fail;
1552  PqCommBusy = false;
1553  return 0;
1554 
1555 fail:
1556  PqCommBusy = false;
1557  return EOF;
1558 }
1559 
1560 /*
1561  * Support for TCP Keepalive parameters
1562  */
1563 
1564 /*
1565  * On Windows, we need to set both idle and interval at the same time.
1566  * We also cannot reset them to the default (setting to zero will
1567  * actually set them to zero, not default), therefore we fallback to
1568  * the out-of-the-box default instead.
1569  */
1570 #if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
1571 static int
1572 pq_setkeepaliveswin32(Port *port, int idle, int interval)
1573 {
1574  struct tcp_keepalive ka;
1575  DWORD retsize;
1576 
1577  if (idle <= 0)
1578  idle = 2 * 60 * 60; /* default = 2 hours */
1579  if (interval <= 0)
1580  interval = 1; /* default = 1 second */
1581 
1582  ka.onoff = 1;
1583  ka.keepalivetime = idle * 1000;
1584  ka.keepaliveinterval = interval * 1000;
1585 
1586  if (WSAIoctl(port->sock,
1587  SIO_KEEPALIVE_VALS,
1588  (LPVOID) &ka,
1589  sizeof(ka),
1590  NULL,
1591  0,
1592  &retsize,
1593  NULL,
1594  NULL)
1595  != 0)
1596  {
1597  ereport(LOG,
1598  (errmsg("%s(%s) failed: error code %d",
1599  "WSAIoctl", "SIO_KEEPALIVE_VALS", WSAGetLastError())));
1600  return STATUS_ERROR;
1601  }
1602  if (port->keepalives_idle != idle)
1603  port->keepalives_idle = idle;
1604  if (port->keepalives_interval != interval)
1605  port->keepalives_interval = interval;
1606  return STATUS_OK;
1607 }
1608 #endif
1609 
1610 int
1612 {
1613 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1614  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1615  return 0;
1616 
1617  if (port->keepalives_idle != 0)
1618  return port->keepalives_idle;
1619 
1620  if (port->default_keepalives_idle == 0)
1621  {
1622 #ifndef WIN32
1623  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_idle);
1624 
1625  if (getsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1626  (char *) &port->default_keepalives_idle,
1627  &size) < 0)
1628  {
1629  ereport(LOG,
1630  (errmsg("%s(%s) failed: %m", "getsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1631  port->default_keepalives_idle = -1; /* don't know */
1632  }
1633 #else /* WIN32 */
1634  /* We can't get the defaults on Windows, so return "don't know" */
1635  port->default_keepalives_idle = -1;
1636 #endif /* WIN32 */
1637  }
1638 
1639  return port->default_keepalives_idle;
1640 #else
1641  return 0;
1642 #endif
1643 }
1644 
1645 int
1646 pq_setkeepalivesidle(int idle, Port *port)
1647 {
1648  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1649  return STATUS_OK;
1650 
1651 /* check SIO_KEEPALIVE_VALS here, not just WIN32, as some toolchains lack it */
1652 #if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1653  if (idle == port->keepalives_idle)
1654  return STATUS_OK;
1655 
1656 #ifndef WIN32
1657  if (port->default_keepalives_idle <= 0)
1658  {
1659  if (pq_getkeepalivesidle(port) < 0)
1660  {
1661  if (idle == 0)
1662  return STATUS_OK; /* default is set but unknown */
1663  else
1664  return STATUS_ERROR;
1665  }
1666  }
1667 
1668  if (idle == 0)
1669  idle = port->default_keepalives_idle;
1670 
1671  if (setsockopt(port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1672  (char *) &idle, sizeof(idle)) < 0)
1673  {
1674  ereport(LOG,
1675  (errmsg("%s(%s) failed: %m", "setsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1676  return STATUS_ERROR;
1677  }
1678 
1679  port->keepalives_idle = idle;
1680 #else /* WIN32 */
1681  return pq_setkeepaliveswin32(port, idle, port->keepalives_interval);
1682 #endif
1683 #else
1684  if (idle != 0)
1685  {
1686  ereport(LOG,
1687  (errmsg("setting the keepalive idle time is not supported")));
1688  return STATUS_ERROR;
1689  }
1690 #endif
1691 
1692  return STATUS_OK;
1693 }
1694 
1695 int
1697 {
1698 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1699  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1700  return 0;
1701 
1702  if (port->keepalives_interval != 0)
1703  return port->keepalives_interval;
1704 
1705  if (port->default_keepalives_interval == 0)
1706  {
1707 #ifndef WIN32
1708  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_interval);
1709 
1710  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1711  (char *) &port->default_keepalives_interval,
1712  &size) < 0)
1713  {
1714  ereport(LOG,
1715  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPINTVL")));
1716  port->default_keepalives_interval = -1; /* don't know */
1717  }
1718 #else
1719  /* We can't get the defaults on Windows, so return "don't know" */
1720  port->default_keepalives_interval = -1;
1721 #endif /* WIN32 */
1722  }
1723 
1724  return port->default_keepalives_interval;
1725 #else
1726  return 0;
1727 #endif
1728 }
1729 
1730 int
1732 {
1733  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1734  return STATUS_OK;
1735 
1736 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1737  if (interval == port->keepalives_interval)
1738  return STATUS_OK;
1739 
1740 #ifndef WIN32
1741  if (port->default_keepalives_interval <= 0)
1742  {
1743  if (pq_getkeepalivesinterval(port) < 0)
1744  {
1745  if (interval == 0)
1746  return STATUS_OK; /* default is set but unknown */
1747  else
1748  return STATUS_ERROR;
1749  }
1750  }
1751 
1752  if (interval == 0)
1753  interval = port->default_keepalives_interval;
1754 
1755  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1756  (char *) &interval, sizeof(interval)) < 0)
1757  {
1758  ereport(LOG,
1759  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPINTVL")));
1760  return STATUS_ERROR;
1761  }
1762 
1763  port->keepalives_interval = interval;
1764 #else /* WIN32 */
1765  return pq_setkeepaliveswin32(port, port->keepalives_idle, interval);
1766 #endif
1767 #else
1768  if (interval != 0)
1769  {
1770  ereport(LOG,
1771  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPINTVL")));
1772  return STATUS_ERROR;
1773  }
1774 #endif
1775 
1776  return STATUS_OK;
1777 }
1778 
1779 int
1781 {
1782 #ifdef TCP_KEEPCNT
1783  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1784  return 0;
1785 
1786  if (port->keepalives_count != 0)
1787  return port->keepalives_count;
1788 
1789  if (port->default_keepalives_count == 0)
1790  {
1791  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_count);
1792 
1793  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1794  (char *) &port->default_keepalives_count,
1795  &size) < 0)
1796  {
1797  ereport(LOG,
1798  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_KEEPCNT")));
1799  port->default_keepalives_count = -1; /* don't know */
1800  }
1801  }
1802 
1803  return port->default_keepalives_count;
1804 #else
1805  return 0;
1806 #endif
1807 }
1808 
1809 int
1810 pq_setkeepalivescount(int count, Port *port)
1811 {
1812  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1813  return STATUS_OK;
1814 
1815 #ifdef TCP_KEEPCNT
1816  if (count == port->keepalives_count)
1817  return STATUS_OK;
1818 
1819  if (port->default_keepalives_count <= 0)
1820  {
1821  if (pq_getkeepalivescount(port) < 0)
1822  {
1823  if (count == 0)
1824  return STATUS_OK; /* default is set but unknown */
1825  else
1826  return STATUS_ERROR;
1827  }
1828  }
1829 
1830  if (count == 0)
1831  count = port->default_keepalives_count;
1832 
1833  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1834  (char *) &count, sizeof(count)) < 0)
1835  {
1836  ereport(LOG,
1837  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_KEEPCNT")));
1838  return STATUS_ERROR;
1839  }
1840 
1841  port->keepalives_count = count;
1842 #else
1843  if (count != 0)
1844  {
1845  ereport(LOG,
1846  (errmsg("%s(%s) not supported", "setsockopt", "TCP_KEEPCNT")));
1847  return STATUS_ERROR;
1848  }
1849 #endif
1850 
1851  return STATUS_OK;
1852 }
1853 
1854 int
1856 {
1857 #ifdef TCP_USER_TIMEOUT
1858  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1859  return 0;
1860 
1861  if (port->tcp_user_timeout != 0)
1862  return port->tcp_user_timeout;
1863 
1864  if (port->default_tcp_user_timeout == 0)
1865  {
1866  ACCEPT_TYPE_ARG3 size = sizeof(port->default_tcp_user_timeout);
1867 
1868  if (getsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1869  (char *) &port->default_tcp_user_timeout,
1870  &size) < 0)
1871  {
1872  ereport(LOG,
1873  (errmsg("%s(%s) failed: %m", "getsockopt", "TCP_USER_TIMEOUT")));
1874  port->default_tcp_user_timeout = -1; /* don't know */
1875  }
1876  }
1877 
1878  return port->default_tcp_user_timeout;
1879 #else
1880  return 0;
1881 #endif
1882 }
1883 
1884 int
1885 pq_settcpusertimeout(int timeout, Port *port)
1886 {
1887  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1888  return STATUS_OK;
1889 
1890 #ifdef TCP_USER_TIMEOUT
1891  if (timeout == port->tcp_user_timeout)
1892  return STATUS_OK;
1893 
1894  if (port->default_tcp_user_timeout <= 0)
1895  {
1896  if (pq_gettcpusertimeout(port) < 0)
1897  {
1898  if (timeout == 0)
1899  return STATUS_OK; /* default is set but unknown */
1900  else
1901  return STATUS_ERROR;
1902  }
1903  }
1904 
1905  if (timeout == 0)
1906  timeout = port->default_tcp_user_timeout;
1907 
1908  if (setsockopt(port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1909  (char *) &timeout, sizeof(timeout)) < 0)
1910  {
1911  ereport(LOG,
1912  (errmsg("%s(%s) failed: %m", "setsockopt", "TCP_USER_TIMEOUT")));
1913  return STATUS_ERROR;
1914  }
1915 
1916  port->tcp_user_timeout = timeout;
1917 #else
1918  if (timeout != 0)
1919  {
1920  ereport(LOG,
1921  (errmsg("%s(%s) not supported", "setsockopt", "TCP_USER_TIMEOUT")));
1922  return STATUS_ERROR;
1923  }
1924 #endif
1925 
1926  return STATUS_OK;
1927 }
1928 
1929 /*
1930  * Check if the client is still connected.
1931  */
1932 bool
1934 {
1935 #if defined(POLLRDHUP)
1936  /*
1937  * POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
1938  * the other end. We don't have a portable way to do that without
1939  * actually trying to read or write data on other systems. We don't want
1940  * to read because that would be confused by pipelined queries and COPY
1941  * data. Perhaps in future we'll try to write a heartbeat message instead.
1942  */
1943  struct pollfd pollfd;
1944  int rc;
1945 
1946  pollfd.fd = MyProcPort->sock;
1947  pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
1948  pollfd.revents = 0;
1949 
1950  rc = poll(&pollfd, 1, 0);
1951 
1952  if (rc < 0)
1953  {
1956  errmsg("could not poll socket: %m")));
1957  return false;
1958  }
1959  else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
1960  return false;
1961 #endif
1962 
1963  return true;
1964 }
int StreamServerPort(int family, const char *hostName, unsigned short portNumber, const char *unixSocketDir, pgsocket ListenSocket[], int MaxListen)
Definition: pqcomm.c:318
int pq_putmessage_v2(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1540
#define NIL
Definition: pg_list.h:65
void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir)
Definition: miscinit.c:1284
static List * sock_paths
Definition: pqcomm.c:112
#define FeBeWaitSetSocketPos
Definition: libpq.h:63
#define WL_SOCKET_WRITEABLE
Definition: latch.h:127
static char PqRecvBuffer[PQ_RECV_BUFFER_SIZE]
Definition: pqcomm.c:129
void pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
Definition: ip.c:88
#define UNIXSOCK_PATH(path, port, sockdir)
Definition: pqcomm.h:70
int errhint(const char *fmt,...)
Definition: elog.c:1156
#define accept(s, addr, addrlen)
Definition: win32_port.h:462
#define NI_NUMERICHOST
Definition: getaddrinfo.h:78
struct Port * MyProcPort
Definition: globals.c:46
int pq_peekbyte(void)
Definition: pqcomm.c:1013
void StreamClose(pgsocket sock)
Definition: pqcomm.c:850
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:862
int keepalives_idle
Definition: libpq-be.h:186
#define EAGAIN
Definition: win32_port.h:341
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:305
static char * PqSendBuffer
Definition: pqcomm.c:124
int pq_setkeepalivesinterval(int interval, Port *port)
Definition: pqcomm.c:1731
#define UNIXSOCK_PATH_BUFLEN
Definition: pqcomm.h:86
#define closesocket
Definition: port.h:332
static int socket_flush_if_writable(void)
Definition: pqcomm.c:1414
char * pstrdup(const char *in)
Definition: mcxt.c:1299
static int PqSendStart
Definition: pqcomm.c:127
static int pq_discardbytes(size_t len)
Definition: pqcomm.c:1122
int pq_getkeepalivesinterval(Port *port)
Definition: pqcomm.c:1696
struct sockaddr_storage addr
Definition: pqcomm.h:64
int tcp_user_timeout
Definition: guc.c:595
const PQcommMethods * PqCommMethods
Definition: pqcomm.c:166
#define FeBeWaitSetLatchPos
Definition: libpq.h:64
int errcode(int sqlerrcode)
Definition: elog.c:698
Definition: libpq-be.h:125
static bool socket_is_send_pending(void)
Definition: pqcomm.c:1440
#define STATUS_ERROR
Definition: c.h:1171
#define MemSet(start, val, len)
Definition: c.h:1008
ssize_t secure_read(Port *port, void *ptr, size_t len)
Definition: be-secure.c:147
WaitEventSet * FeBeWaitSet
Definition: pqcomm.c:168
#define LOG
Definition: elog.h:26
int StreamConnection(pgsocket server_fd, Port *port)
Definition: pqcomm.c:716
int default_keepalives_interval
Definition: libpq-be.h:183
#define bind(s, addr, addrlen)
Definition: win32_port.h:460
int default_keepalives_count
Definition: libpq-be.h:184
static int fd(const char *x, int i)
Definition: preproc-init.c:105
pgsocket sock
Definition: libpq-be.h:127
static void socket_comm_reset(void)
Definition: pqcomm.c:232
#define gai_strerror
Definition: getaddrinfo.h:146
int tcp_keepalives_idle
Definition: guc.c:592
signed int int32
Definition: c.h:429
int pg_getaddrinfo_all(const char *hostname, const char *servname, const struct addrinfo *hintp, struct addrinfo **result)
Definition: ip.c:57
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:684
static int internal_flush(void)
Definition: pqcomm.c:1341
int pq_gettcpusertimeout(Port *port)
Definition: pqcomm.c:1855
SockAddr raddr
Definition: libpq-be.h:131
static void socket_set_nonblocking(bool nonblocking)
Definition: pqcomm.c:916
void pg_usleep(long microsec)
Definition: signal.c:53
bool pq_check_connection(void)
Definition: pqcomm.c:1933
void pq_init(void)
Definition: pqcomm.c:176
static int PqRecvLength
Definition: pqcomm.c:131
int keepalives_count
Definition: libpq-be.h:188
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define NI_MAXHOST
Definition: getaddrinfo.h:88
#define ERROR
Definition: elog.h:46
void pq_startmsgread(void)
Definition: pqcomm.c:1152
bool pq_is_reading_msg(void)
Definition: pqcomm.c:1192
int pq_setkeepalivesidle(int idle, Port *port)
Definition: pqcomm.c:1646
#define pg_hton32(x)
Definition: pg_bswap.h:121
#define IS_AF_UNIX(fam)
Definition: ip.h:24
#define FATAL
Definition: elog.h:49
#define MAXPGPATH
int tcp_user_timeout
Definition: libpq-be.h:189
static int pq_recvbuf(void)
Definition: pqcomm.c:933
int pq_getbyte_if_available(unsigned char *c)
Definition: pqcomm.c:1034
int MaxBackends
Definition: globals.c:139
static int PqSendPointer
Definition: pqcomm.c:126
char * c
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1088
#define AI_PASSIVE
Definition: getaddrinfo.h:62
#define COMMERROR
Definition: elog.h:30
int errcode_for_file_access(void)
Definition: elog.c:721
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
volatile sig_atomic_t ClientConnectionLost
Definition: globals.c:34
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
unsigned int uint32
Definition: c.h:441
int pgsocket
Definition: port.h:31
ACCEPT_TYPE_ARG3 salen
Definition: pqcomm.h:65
void TouchSocketFiles(void)
Definition: pqcomm.c:865
ssize_t secure_write(Port *port, void *ptr, size_t len)
Definition: be-secure.c:260
#define STATUS_OK
Definition: c.h:1170
int tcp_keepalives_interval
Definition: guc.c:593
MemoryContext TopMemoryContext
Definition: mcxt.c:48
int errcode_for_socket_access(void)
Definition: elog.c:792
SockAddr laddr
Definition: libpq-be.h:130
static int port
Definition: pg_regress.c:92
List * lappend(List *list, void *datum)
Definition: list.c:336
int default_keepalives_idle
Definition: libpq-be.h:182
static int PqSendBufferSize
Definition: pqcomm.c:125
static int internal_putbytes(const char *s, size_t len)
Definition: pqcomm.c:1288
#define WARNING
Definition: elog.h:40
static bool PqCommBusy
Definition: pqcomm.c:136
#define listen(s, backlog)
Definition: win32_port.h:461
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1214
#define WL_POSTMASTER_DEATH
Definition: latch.h:129
static pgsocket ListenSocket[MAXLISTEN]
Definition: postmaster.c:220
#define socket(af, type, protocol)
Definition: win32_port.h:459
uintptr_t Datum
Definition: postgres.h:411
#define PGINVALID_SOCKET
Definition: port.h:33
#define PG_SOMAXCONN
int pq_getbyte(void)
Definition: pqcomm.c:994
void pq_endmsgread(void)
Definition: pqcomm.c:1176
#define ereport(elevel,...)
Definition: elog.h:157
#define PG_CATCH()
Definition: elog.h:323
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
int pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, char *node, int nodelen, char *service, int servicelen, int flags)
Definition: ip.c:122
bool pg_set_noblock(pgsocket sock)
Definition: noblock.c:25
int pq_setkeepalivescount(int count, Port *port)
Definition: pqcomm.c:1810
int Unix_socket_permissions
Definition: pqcomm.c:108
int pq_getkeepalivescount(Port *port)
Definition: pqcomm.c:1780
int ai_socktype
Definition: getaddrinfo.h:102
void secure_close(Port *port)
Definition: be-secure.c:135
int pq_getkeepalivesidle(Port *port)
Definition: pqcomm.c:1611
#define EADDRINUSE
Definition: win32_port.h:369
bool noblock
Definition: libpq-be.h:128
volatile sig_atomic_t InterruptPending
Definition: globals.c:30
#define PG_RE_THROW()
Definition: elog.h:354
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
static const PQcommMethods PqCommSocketMethods
Definition: pqcomm.c:157
static int socket_putmessage(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1470
static void socket_close(int code, Datum arg)
Definition: pqcomm.c:247
static int socket_flush(void)
Definition: pqcomm.c:1319
struct addrinfo * ai_next
Definition: getaddrinfo.h:107
void * gss
Definition: libpq-be.h:203
static void socket_putmessage_noblock(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1503
int errmsg(const char *fmt,...)
Definition: elog.c:909
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
#define elog(elevel,...)
Definition: elog.h:232
int gid_t
Definition: win32_port.h:237
#define PQ_RECV_BUFFER_SIZE
Definition: pqcomm.c:122
int pq_settcpusertimeout(int timeout, Port *port)
Definition: pqcomm.c:1885
int tcp_keepalives_count
Definition: guc.c:594
void * arg
struct Latch * MyLatch
Definition: globals.c:57
static int PqRecvPointer
Definition: pqcomm.c:130
#define EWOULDBLOCK
Definition: win32_port.h:349
char * Unix_socket_group
Definition: pqcomm.c:109
int default_tcp_user_timeout
Definition: libpq-be.h:185
static bool PqCommReadingMsg
Definition: pqcomm.c:137
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:49
#define EINTR
Definition: win32_port.h:343
void RemoveSocketFiles(void)
Definition: pqcomm.c:883
int ai_flags
Definition: getaddrinfo.h:100
#define PG_TRY()
Definition: elog.h:313
Definition: pg_list.h:50
int keepalives_interval
Definition: libpq-be.h:187
#define snprintf
Definition: port.h:216
#define WL_LATCH_SET
Definition: latch.h:125
#define _(x)
Definition: elog.c:89
#define PQ_SEND_BUFFER_SIZE
Definition: pqcomm.c:121
long val
Definition: informix.c:664
#define PG_END_TRY()
Definition: elog.h:338
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
int ai_family
Definition: getaddrinfo.h:101