PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pqcomm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pqcomm.c
4  * Communication functions between the Frontend and the Backend
5  *
6  * These routines handle the low-level details of communication between
7  * frontend and backend. They just shove data across the communication
8  * channel, and are ignorant of the semantics of the data --- or would be,
9  * except for major brain damage in the design of the old COPY OUT protocol.
10  * Unfortunately, COPY OUT was designed to commandeer the communication
11  * channel (it just transfers data without wrapping it into messages).
12  * No other messages can be sent while COPY OUT is in progress; and if the
13  * copy is aborted by an ereport(ERROR), we need to close out the copy so that
14  * the frontend gets back into sync. Therefore, these routines have to be
15  * aware of COPY OUT state. (New COPY-OUT is message-based and does *not*
16  * set the DoingCopyOut flag.)
17  *
18  * NOTE: generally, it's a bad idea to emit outgoing messages directly with
19  * pq_putbytes(), especially if the message would require multiple calls
20  * to send. Instead, use the routines in pqformat.c to construct the message
21  * in a buffer and then emit it in one call to pq_putmessage. This ensures
22  * that the channel will not be clogged by an incomplete message if execution
23  * is aborted by ereport(ERROR) partway through the message. The only
24  * non-libpq code that should call pq_putbytes directly is old-style COPY OUT.
25  *
26  * At one time, libpq was shared between frontend and backend, but now
27  * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
28  * All that remains is similarities of names to trap the unwary...
29  *
30  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
31  * Portions Copyright (c) 1994, Regents of the University of California
32  *
33  * src/backend/libpq/pqcomm.c
34  *
35  *-------------------------------------------------------------------------
36  */
37 
38 /*------------------------
39  * INTERFACE ROUTINES
40  *
41  * setup/teardown:
42  * StreamServerPort - Open postmaster's server port
43  * StreamConnection - Create new connection with client
44  * StreamClose - Close a client/backend connection
45  * TouchSocketFiles - Protect socket files against /tmp cleaners
46  * pq_init - initialize libpq at backend startup
47  * pq_comm_reset - reset libpq during error recovery
48  * pq_close - shutdown libpq at backend exit
49  *
50  * low-level I/O:
51  * pq_getbytes - get a known number of bytes from connection
52  * pq_getstring - get a null terminated string from connection
53  * pq_getmessage - get a message with length word from connection
54  * pq_getbyte - get next byte from connection
55  * pq_peekbyte - peek at next byte from connection
56  * pq_putbytes - send bytes to connection (not flushed until pq_flush)
57  * pq_flush - flush pending output
58  * pq_flush_if_writable - flush pending output if writable without blocking
59  * pq_getbyte_if_available - get a byte if available without blocking
60  *
61  * message-level I/O (and old-style-COPY-OUT cruft):
62  * pq_putmessage - send a normal message (suppressed in COPY OUT mode)
63  * pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
64  * pq_startcopyout - inform libpq that a COPY OUT transfer is beginning
65  * pq_endcopyout - end a COPY OUT transfer
66  *
67  *------------------------
68  */
69 #include "postgres.h"
70 
71 #include <signal.h>
72 #include <fcntl.h>
73 #include <grp.h>
74 #include <unistd.h>
75 #include <sys/file.h>
76 #include <sys/socket.h>
77 #include <sys/stat.h>
78 #include <sys/time.h>
79 #include <netdb.h>
80 #include <netinet/in.h>
81 #ifdef HAVE_NETINET_TCP_H
82 #include <netinet/tcp.h>
83 #endif
84 #include <arpa/inet.h>
85 #ifdef HAVE_UTIME_H
86 #include <utime.h>
87 #endif
88 #ifdef WIN32_ONLY_COMPILER /* mstcpip.h is missing on mingw */
89 #include <mstcpip.h>
90 #endif
91 
92 #include "common/ip.h"
93 #include "libpq/libpq.h"
94 #include "miscadmin.h"
95 #include "storage/ipc.h"
96 #include "utils/guc.h"
97 #include "utils/memutils.h"
98 
99 /*
100  * Configuration options
101  */
104 
105 /* Where the Unix socket files are (list of palloc'd strings) */
106 static List *sock_paths = NIL;
107 
108 /*
109  * Buffers for low-level I/O.
110  *
111  * The receive buffer is fixed size. Send buffer is usually 8k, but can be
112  * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
113  */
114 
115 #define PQ_SEND_BUFFER_SIZE 8192
116 #define PQ_RECV_BUFFER_SIZE 8192
117 
118 static char *PqSendBuffer;
119 static int PqSendBufferSize; /* Size send buffer */
120 static int PqSendPointer; /* Next index to store a byte in PqSendBuffer */
121 static int PqSendStart; /* Next index to send a byte in PqSendBuffer */
122 
124 static int PqRecvPointer; /* Next index to read a byte from PqRecvBuffer */
125 static int PqRecvLength; /* End of data available in PqRecvBuffer */
126 
127 /*
128  * Message status
129  */
130 static bool PqCommBusy; /* busy sending data to the client */
131 static bool PqCommReadingMsg; /* in the middle of reading a message */
132 static bool DoingCopyOut; /* in old-protocol COPY OUT processing */
133 
134 
135 /* Internal functions */
136 static void socket_comm_reset(void);
137 static void socket_close(int code, Datum arg);
138 static void socket_set_nonblocking(bool nonblocking);
139 static int socket_flush(void);
140 static int socket_flush_if_writable(void);
141 static bool socket_is_send_pending(void);
142 static int socket_putmessage(char msgtype, const char *s, size_t len);
143 static void socket_putmessage_noblock(char msgtype, const char *s, size_t len);
144 static void socket_startcopyout(void);
145 static void socket_endcopyout(bool errorAbort);
146 static int internal_putbytes(const char *s, size_t len);
147 static int internal_flush(void);
148 
149 #ifdef HAVE_UNIX_SOCKETS
150 static int Lock_AF_UNIX(char *unixSocketDir, char *unixSocketPath);
151 static int Setup_AF_UNIX(char *sock_path);
152 #endif /* HAVE_UNIX_SOCKETS */
153 
156  socket_flush,
163 };
164 
166 
168 
169 
170 /* --------------------------------
171  * pq_init - initialize libpq at backend startup
172  * --------------------------------
173  */
174 void
175 pq_init(void)
176 {
177  /* initialize state variables */
181  PqCommBusy = false;
182  PqCommReadingMsg = false;
183  DoingCopyOut = false;
184 
185  /* set up process-exit hook to close the socket */
187 
188  /*
189  * In backends (as soon as forked) we operate the underlying socket in
190  * nonblocking mode and use latches to implement blocking semantics if
191  * needed. That allows us to provide safely interruptible reads and
192  * writes.
193  *
194  * Use COMMERROR on failure, because ERROR would try to send the error to
195  * the client, which might require changing the mode again, leading to
196  * infinite recursion.
197  */
198 #ifndef WIN32
201  (errmsg("could not set socket to nonblocking mode: %m")));
202 #endif
203 
204  FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3);
206  NULL, NULL);
207  AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, -1, MyLatch, NULL);
208  AddWaitEventToSet(FeBeWaitSet, WL_POSTMASTER_DEATH, -1, NULL, NULL);
209 }
210 
211 /* --------------------------------
212  * socket_comm_reset - reset libpq during error recovery
213  *
214  * This is called from error recovery at the outer idle loop. It's
215  * just to get us out of trouble if we somehow manage to elog() from
216  * inside a pqcomm.c routine (which ideally will never happen, but...)
217  * --------------------------------
218  */
219 static void
221 {
222  /* Do not throw away pending data, but do reset the busy flag */
223  PqCommBusy = false;
224  /* We can abort any old-style COPY OUT, too */
225  pq_endcopyout(true);
226 }
227 
228 /* --------------------------------
229  * socket_close - shutdown libpq at backend exit
230  *
231  * This is the one pg_on_exit_callback in place during BackendInitialize().
232  * That function's unusual signal handling constrains that this callback be
233  * safe to run at any instant.
234  * --------------------------------
235  */
236 static void
238 {
239  /* Nothing to do in a standalone backend, where MyProcPort is NULL. */
240  if (MyProcPort != NULL)
241  {
242 #if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
243 #ifdef ENABLE_GSS
244  OM_uint32 min_s;
245 
246  /*
247  * Shutdown GSSAPI layer. This section does nothing when interrupting
248  * BackendInitialize(), because pg_GSS_recvauth() makes first use of
249  * "ctx" and "cred".
250  */
251  if (MyProcPort->gss->ctx != GSS_C_NO_CONTEXT)
252  gss_delete_sec_context(&min_s, &MyProcPort->gss->ctx, NULL);
253 
254  if (MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL)
255  gss_release_cred(&min_s, &MyProcPort->gss->cred);
256 #endif /* ENABLE_GSS */
257 
258  /*
259  * GSS and SSPI share the port->gss struct. Since nowhere else does a
260  * postmaster child free this, doing so is safe when interrupting
261  * BackendInitialize().
262  */
263  free(MyProcPort->gss);
264 #endif /* ENABLE_GSS || ENABLE_SSPI */
265 
266  /*
267  * Cleanly shut down SSL layer. Nowhere else does a postmaster child
268  * call this, so this is safe when interrupting BackendInitialize().
269  */
271 
272  /*
273  * Formerly we did an explicit close() here, but it seems better to
274  * leave the socket open until the process dies. This allows clients
275  * to perform a "synchronous close" if they care --- wait till the
276  * transport layer reports connection closure, and you can be sure the
277  * backend has exited.
278  *
279  * We do set sock to PGINVALID_SOCKET to prevent any further I/O,
280  * though.
281  */
283  }
284 }
285 
286 
287 
288 /*
289  * Streams -- wrapper around Unix socket system calls
290  *
291  *
292  * Stream functions are used for vanilla TCP connection protocol.
293  */
294 
295 
296 /*
297  * StreamServerPort -- open a "listening" port to accept connections.
298  *
299  * family should be AF_UNIX or AF_UNSPEC; portNumber is the port number.
300  * For AF_UNIX ports, hostName should be NULL and unixSocketDir must be
301  * specified. For TCP ports, hostName is either NULL for all interfaces or
302  * the interface to listen on, and unixSocketDir is ignored (can be NULL).
303  *
304  * Successfully opened sockets are added to the ListenSocket[] array (of
305  * length MaxListen), at the first position that isn't PGINVALID_SOCKET.
306  *
307  * RETURNS: STATUS_OK or STATUS_ERROR
308  */
309 
310 int
311 StreamServerPort(int family, char *hostName, unsigned short portNumber,
312  char *unixSocketDir,
313  pgsocket ListenSocket[], int MaxListen)
314 {
315  pgsocket fd;
316  int err;
317  int maxconn;
318  int ret;
319  char portNumberStr[32];
320  const char *familyDesc;
321  char familyDescBuf[64];
322  char *service;
323  struct addrinfo *addrs = NULL,
324  *addr;
325  struct addrinfo hint;
326  int listen_index = 0;
327  int added = 0;
328 
329 #ifdef HAVE_UNIX_SOCKETS
330  char unixSocketPath[MAXPGPATH];
331 #endif
332 #if !defined(WIN32) || defined(IPV6_V6ONLY)
333  int one = 1;
334 #endif
335 
336  /* Initialize hint structure */
337  MemSet(&hint, 0, sizeof(hint));
338  hint.ai_family = family;
339  hint.ai_flags = AI_PASSIVE;
340  hint.ai_socktype = SOCK_STREAM;
341 
342 #ifdef HAVE_UNIX_SOCKETS
343  if (family == AF_UNIX)
344  {
345  /*
346  * Create unixSocketPath from portNumber and unixSocketDir and lock
347  * that file path
348  */
349  UNIXSOCK_PATH(unixSocketPath, portNumber, unixSocketDir);
350  if (strlen(unixSocketPath) >= UNIXSOCK_PATH_BUFLEN)
351  {
352  ereport(LOG,
353  (errmsg("Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
354  unixSocketPath,
355  (int) (UNIXSOCK_PATH_BUFLEN - 1))));
356  return STATUS_ERROR;
357  }
358  if (Lock_AF_UNIX(unixSocketDir, unixSocketPath) != STATUS_OK)
359  return STATUS_ERROR;
360  service = unixSocketPath;
361  }
362  else
363 #endif /* HAVE_UNIX_SOCKETS */
364  {
365  snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
366  service = portNumberStr;
367  }
368 
369  ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
370  if (ret || !addrs)
371  {
372  if (hostName)
373  ereport(LOG,
374  (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
375  hostName, service, gai_strerror(ret))));
376  else
377  ereport(LOG,
378  (errmsg("could not translate service \"%s\" to address: %s",
379  service, gai_strerror(ret))));
380  if (addrs)
381  pg_freeaddrinfo_all(hint.ai_family, addrs);
382  return STATUS_ERROR;
383  }
384 
385  for (addr = addrs; addr; addr = addr->ai_next)
386  {
387  if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family))
388  {
389  /*
390  * Only set up a unix domain socket when they really asked for it.
391  * The service/port is different in that case.
392  */
393  continue;
394  }
395 
396  /* See if there is still room to add 1 more socket. */
397  for (; listen_index < MaxListen; listen_index++)
398  {
399  if (ListenSocket[listen_index] == PGINVALID_SOCKET)
400  break;
401  }
402  if (listen_index >= MaxListen)
403  {
404  ereport(LOG,
405  (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
406  MaxListen)));
407  break;
408  }
409 
410  /* set up family name for possible error messages */
411  switch (addr->ai_family)
412  {
413  case AF_INET:
414  familyDesc = _("IPv4");
415  break;
416 #ifdef HAVE_IPV6
417  case AF_INET6:
418  familyDesc = _("IPv6");
419  break;
420 #endif
421 #ifdef HAVE_UNIX_SOCKETS
422  case AF_UNIX:
423  familyDesc = _("Unix");
424  break;
425 #endif
426  default:
427  snprintf(familyDescBuf, sizeof(familyDescBuf),
428  _("unrecognized address family %d"),
429  addr->ai_family);
430  familyDesc = familyDescBuf;
431  break;
432  }
433 
434  if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) == PGINVALID_SOCKET)
435  {
436  ereport(LOG,
438  /* translator: %s is IPv4, IPv6, or Unix */
439  errmsg("could not create %s socket: %m",
440  familyDesc)));
441  continue;
442  }
443 
444 #ifndef WIN32
445 
446  /*
447  * Without the SO_REUSEADDR flag, a new postmaster can't be started
448  * right away after a stop or crash, giving "address already in use"
449  * error on TCP ports.
450  *
451  * On win32, however, this behavior only happens if the
452  * SO_EXLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows multiple
453  * servers to listen on the same address, resulting in unpredictable
454  * behavior. With no flags at all, win32 behaves as Unix with
455  * SO_REUSEADDR.
456  */
457  if (!IS_AF_UNIX(addr->ai_family))
458  {
459  if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
460  (char *) &one, sizeof(one))) == -1)
461  {
462  ereport(LOG,
464  errmsg("setsockopt(SO_REUSEADDR) failed: %m")));
465  closesocket(fd);
466  continue;
467  }
468  }
469 #endif
470 
471 #ifdef IPV6_V6ONLY
472  if (addr->ai_family == AF_INET6)
473  {
474  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
475  (char *) &one, sizeof(one)) == -1)
476  {
477  ereport(LOG,
479  errmsg("setsockopt(IPV6_V6ONLY) failed: %m")));
480  closesocket(fd);
481  continue;
482  }
483  }
484 #endif
485 
486  /*
487  * Note: This might fail on some OS's, like Linux older than
488  * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
489  * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4
490  * connections.
491  */
492  err = bind(fd, addr->ai_addr, addr->ai_addrlen);
493  if (err < 0)
494  {
495  ereport(LOG,
497  /* translator: %s is IPv4, IPv6, or Unix */
498  errmsg("could not bind %s socket: %m",
499  familyDesc),
500  (IS_AF_UNIX(addr->ai_family)) ?
501  errhint("Is another postmaster already running on port %d?"
502  " If not, remove socket file \"%s\" and retry.",
503  (int) portNumber, service) :
504  errhint("Is another postmaster already running on port %d?"
505  " If not, wait a few seconds and retry.",
506  (int) portNumber)));
507  closesocket(fd);
508  continue;
509  }
510 
511 #ifdef HAVE_UNIX_SOCKETS
512  if (addr->ai_family == AF_UNIX)
513  {
514  if (Setup_AF_UNIX(service) != STATUS_OK)
515  {
516  closesocket(fd);
517  break;
518  }
519  }
520 #endif
521 
522  /*
523  * Select appropriate accept-queue length limit. PG_SOMAXCONN is only
524  * intended to provide a clamp on the request on platforms where an
525  * overly large request provokes a kernel error (are there any?).
526  */
527  maxconn = MaxBackends * 2;
528  if (maxconn > PG_SOMAXCONN)
529  maxconn = PG_SOMAXCONN;
530 
531  err = listen(fd, maxconn);
532  if (err < 0)
533  {
534  ereport(LOG,
536  /* translator: %s is IPv4, IPv6, or Unix */
537  errmsg("could not listen on %s socket: %m",
538  familyDesc)));
539  closesocket(fd);
540  continue;
541  }
542  ListenSocket[listen_index] = fd;
543  added++;
544  }
545 
546  pg_freeaddrinfo_all(hint.ai_family, addrs);
547 
548  if (!added)
549  return STATUS_ERROR;
550 
551  return STATUS_OK;
552 }
553 
554 
555 #ifdef HAVE_UNIX_SOCKETS
556 
557 /*
558  * Lock_AF_UNIX -- configure unix socket file path
559  */
560 static int
561 Lock_AF_UNIX(char *unixSocketDir, char *unixSocketPath)
562 {
563  /*
564  * Grab an interlock file associated with the socket file.
565  *
566  * Note: there are two reasons for using a socket lock file, rather than
567  * trying to interlock directly on the socket itself. First, it's a lot
568  * more portable, and second, it lets us remove any pre-existing socket
569  * file without race conditions.
570  */
571  CreateSocketLockFile(unixSocketPath, true, unixSocketDir);
572 
573  /*
574  * Once we have the interlock, we can safely delete any pre-existing
575  * socket file to avoid failure at bind() time.
576  */
577  (void) unlink(unixSocketPath);
578 
579  /*
580  * Remember socket file pathnames for later maintenance.
581  */
582  sock_paths = lappend(sock_paths, pstrdup(unixSocketPath));
583 
584  return STATUS_OK;
585 }
586 
587 
588 /*
589  * Setup_AF_UNIX -- configure unix socket permissions
590  */
591 static int
592 Setup_AF_UNIX(char *sock_path)
593 {
594  /*
595  * Fix socket ownership/permission if requested. Note we must do this
596  * before we listen() to avoid a window where unwanted connections could
597  * get accepted.
598  */
600  if (Unix_socket_group[0] != '\0')
601  {
602 #ifdef WIN32
603  elog(WARNING, "configuration item unix_socket_group is not supported on this platform");
604 #else
605  char *endptr;
606  unsigned long val;
607  gid_t gid;
608 
609  val = strtoul(Unix_socket_group, &endptr, 10);
610  if (*endptr == '\0')
611  { /* numeric group id */
612  gid = val;
613  }
614  else
615  { /* convert group name to id */
616  struct group *gr;
617 
618  gr = getgrnam(Unix_socket_group);
619  if (!gr)
620  {
621  ereport(LOG,
622  (errmsg("group \"%s\" does not exist",
624  return STATUS_ERROR;
625  }
626  gid = gr->gr_gid;
627  }
628  if (chown(sock_path, -1, gid) == -1)
629  {
630  ereport(LOG,
632  errmsg("could not set group of file \"%s\": %m",
633  sock_path)));
634  return STATUS_ERROR;
635  }
636 #endif
637  }
638 
639  if (chmod(sock_path, Unix_socket_permissions) == -1)
640  {
641  ereport(LOG,
643  errmsg("could not set permissions of file \"%s\": %m",
644  sock_path)));
645  return STATUS_ERROR;
646  }
647  return STATUS_OK;
648 }
649 #endif /* HAVE_UNIX_SOCKETS */
650 
651 
652 /*
653  * StreamConnection -- create a new connection with client using
654  * server port. Set port->sock to the FD of the new connection.
655  *
656  * ASSUME: that this doesn't need to be non-blocking because
657  * the Postmaster uses select() to tell when the server master
658  * socket is ready for accept().
659  *
660  * RETURNS: STATUS_OK or STATUS_ERROR
661  */
662 int
664 {
665  /* accept connection and fill in the client (remote) address */
666  port->raddr.salen = sizeof(port->raddr.addr);
667  if ((port->sock = accept(server_fd,
668  (struct sockaddr *) & port->raddr.addr,
669  &port->raddr.salen)) == PGINVALID_SOCKET)
670  {
671  ereport(LOG,
673  errmsg("could not accept new connection: %m")));
674 
675  /*
676  * If accept() fails then postmaster.c will still see the server
677  * socket as read-ready, and will immediately try again. To avoid
678  * uselessly sucking lots of CPU, delay a bit before trying again.
679  * (The most likely reason for failure is being out of kernel file
680  * table slots; we can do little except hope some will get freed up.)
681  */
682  pg_usleep(100000L); /* wait 0.1 sec */
683  return STATUS_ERROR;
684  }
685 
686  /* fill in the server (local) address */
687  port->laddr.salen = sizeof(port->laddr.addr);
688  if (getsockname(port->sock,
689  (struct sockaddr *) & port->laddr.addr,
690  &port->laddr.salen) < 0)
691  {
692  elog(LOG, "getsockname() failed: %m");
693  return STATUS_ERROR;
694  }
695 
696  /* select NODELAY and KEEPALIVE options if it's a TCP connection */
697  if (!IS_AF_UNIX(port->laddr.addr.ss_family))
698  {
699  int on;
700 #ifdef WIN32
701  int oldopt;
702  int optlen;
703  int newopt;
704 #endif
705 
706 #ifdef TCP_NODELAY
707  on = 1;
708  if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
709  (char *) &on, sizeof(on)) < 0)
710  {
711  elog(LOG, "setsockopt(TCP_NODELAY) failed: %m");
712  return STATUS_ERROR;
713  }
714 #endif
715  on = 1;
716  if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
717  (char *) &on, sizeof(on)) < 0)
718  {
719  elog(LOG, "setsockopt(SO_KEEPALIVE) failed: %m");
720  return STATUS_ERROR;
721  }
722 
723 #ifdef WIN32
724 
725  /*
726  * This is a Win32 socket optimization. The OS send buffer should be
727  * large enough to send the whole Postgres send buffer in one go, or
728  * performance suffers. The Postgres send buffer can be enlarged if a
729  * very large message needs to be sent, but we won't attempt to
730  * enlarge the OS buffer if that happens, so somewhat arbitrarily
731  * ensure that the OS buffer is at least PQ_SEND_BUFFER_SIZE * 4.
732  * (That's 32kB with the current default).
733  *
734  * The default OS buffer size used to be 8kB in earlier Windows
735  * versions, but was raised to 64kB in Windows 2012. So it shouldn't
736  * be necessary to change it in later versions anymore. Changing it
737  * unnecessarily can even reduce performance, because setting
738  * SO_SNDBUF in the application disables the "dynamic send buffering"
739  * feature that was introduced in Windows 7. So before fiddling with
740  * SO_SNDBUF, check if the current buffer size is already large enough
741  * and only increase it if necessary.
742  *
743  * See https://support.microsoft.com/kb/823764/EN-US/ and
744  * https://msdn.microsoft.com/en-us/library/bb736549%28v=vs.85%29.aspx
745  */
746  optlen = sizeof(oldopt);
747  if (getsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &oldopt,
748  &optlen) < 0)
749  {
750  elog(LOG, "getsockopt(SO_SNDBUF) failed: %m");
751  return STATUS_ERROR;
752  }
753  newopt = PQ_SEND_BUFFER_SIZE * 4;
754  if (oldopt < newopt)
755  {
756  if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char *) &newopt,
757  sizeof(newopt)) < 0)
758  {
759  elog(LOG, "setsockopt(SO_SNDBUF) failed: %m");
760  return STATUS_ERROR;
761  }
762  }
763 #endif
764 
765  /*
766  * Also apply the current keepalive parameters. If we fail to set a
767  * parameter, don't error out, because these aren't universally
768  * supported. (Note: you might think we need to reset the GUC
769  * variables to 0 in such a case, but it's not necessary because the
770  * show hooks for these variables report the truth anyway.)
771  */
775  }
776 
777  return STATUS_OK;
778 }
779 
780 /*
781  * StreamClose -- close a client/backend connection
782  *
783  * NOTE: this is NOT used to terminate a session; it is just used to release
784  * the file descriptor in a process that should no longer have the socket
785  * open. (For example, the postmaster calls this after passing ownership
786  * of the connection to a child process.) It is expected that someone else
787  * still has the socket open. So, we only want to close the descriptor,
788  * we do NOT want to send anything to the far end.
789  */
790 void
792 {
793  closesocket(sock);
794 }
795 
796 /*
797  * TouchSocketFiles -- mark socket files as recently accessed
798  *
799  * This routine should be called every so often to ensure that the socket
800  * files have a recent mod date (ordinary operations on sockets usually won't
801  * change the mod date). That saves them from being removed by
802  * overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
803  * never have put the socket file in /tmp...)
804  */
805 void
807 {
808  ListCell *l;
809 
810  /* Loop through all created sockets... */
811  foreach(l, sock_paths)
812  {
813  char *sock_path = (char *) lfirst(l);
814 
815  /*
816  * utime() is POSIX standard, utimes() is a common alternative. If we
817  * have neither, there's no way to affect the mod or access time of
818  * the socket :-(
819  *
820  * In either path, we ignore errors; there's no point in complaining.
821  */
822 #ifdef HAVE_UTIME
823  utime(sock_path, NULL);
824 #else /* !HAVE_UTIME */
825 #ifdef HAVE_UTIMES
826  utimes(sock_path, NULL);
827 #endif /* HAVE_UTIMES */
828 #endif /* HAVE_UTIME */
829  }
830 }
831 
832 /*
833  * RemoveSocketFiles -- unlink socket files at postmaster shutdown
834  */
835 void
837 {
838  ListCell *l;
839 
840  /* Loop through all created sockets... */
841  foreach(l, sock_paths)
842  {
843  char *sock_path = (char *) lfirst(l);
844 
845  /* Ignore any error. */
846  (void) unlink(sock_path);
847  }
848  /* Since we're about to exit, no need to reclaim storage */
849  sock_paths = NIL;
850 }
851 
852 
853 /* --------------------------------
854  * Low-level I/O routines begin here.
855  *
856  * These routines communicate with a frontend client across a connection
857  * already established by the preceding routines.
858  * --------------------------------
859  */
860 
861 /* --------------------------------
862  * socket_set_nonblocking - set socket blocking/non-blocking
863  *
864  * Sets the socket non-blocking if nonblocking is TRUE, or sets it
865  * blocking otherwise.
866  * --------------------------------
867  */
868 static void
869 socket_set_nonblocking(bool nonblocking)
870 {
871  if (MyProcPort == NULL)
872  ereport(ERROR,
873  (errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
874  errmsg("there is no client connection")));
875 
876  MyProcPort->noblock = nonblocking;
877 }
878 
879 /* --------------------------------
880  * pq_recvbuf - load some bytes into the input buffer
881  *
882  * returns 0 if OK, EOF if trouble
883  * --------------------------------
884  */
885 static int
887 {
888  if (PqRecvPointer > 0)
889  {
891  {
892  /* still some unread data, left-justify it in the buffer */
896  PqRecvPointer = 0;
897  }
898  else
900  }
901 
902  /* Ensure that we're in blocking mode */
903  socket_set_nonblocking(false);
904 
905  /* Can fill buffer from PqRecvLength and upwards */
906  for (;;)
907  {
908  int r;
909 
912 
913  if (r < 0)
914  {
915  if (errno == EINTR)
916  continue; /* Ok if interrupted */
917 
918  /*
919  * Careful: an ereport() that tries to write to the client would
920  * cause recursion to here, leading to stack overflow and core
921  * dump! This message must go *only* to the postmaster log.
922  */
925  errmsg("could not receive data from client: %m")));
926  return EOF;
927  }
928  if (r == 0)
929  {
930  /*
931  * EOF detected. We used to write a log message here, but it's
932  * better to expect the ultimate caller to do that.
933  */
934  return EOF;
935  }
936  /* r contains number of bytes read, so just incr length */
937  PqRecvLength += r;
938  return 0;
939  }
940 }
941 
942 /* --------------------------------
943  * pq_getbyte - get a single byte from connection, or return EOF
944  * --------------------------------
945  */
946 int
948 {
950 
951  while (PqRecvPointer >= PqRecvLength)
952  {
953  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
954  return EOF; /* Failed to recv data */
955  }
956  return (unsigned char) PqRecvBuffer[PqRecvPointer++];
957 }
958 
959 /* --------------------------------
960  * pq_peekbyte - peek at next byte from connection
961  *
962  * Same as pq_getbyte() except we don't advance the pointer.
963  * --------------------------------
964  */
965 int
967 {
969 
970  while (PqRecvPointer >= PqRecvLength)
971  {
972  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
973  return EOF; /* Failed to recv data */
974  }
975  return (unsigned char) PqRecvBuffer[PqRecvPointer];
976 }
977 
978 /* --------------------------------
979  * pq_getbyte_if_available - get a single byte from connection,
980  * if available
981  *
982  * The received byte is stored in *c. Returns 1 if a byte was read,
983  * 0 if no data was available, or EOF if trouble.
984  * --------------------------------
985  */
986 int
987 pq_getbyte_if_available(unsigned char *c)
988 {
989  int r;
990 
992 
994  {
995  *c = PqRecvBuffer[PqRecvPointer++];
996  return 1;
997  }
998 
999  /* Put the socket into non-blocking mode */
1000  socket_set_nonblocking(true);
1001 
1002  r = secure_read(MyProcPort, c, 1);
1003  if (r < 0)
1004  {
1005  /*
1006  * Ok if no data available without blocking or interrupted (though
1007  * EINTR really shouldn't happen with a non-blocking socket). Report
1008  * other errors.
1009  */
1010  if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
1011  r = 0;
1012  else
1013  {
1014  /*
1015  * Careful: an ereport() that tries to write to the client would
1016  * cause recursion to here, leading to stack overflow and core
1017  * dump! This message must go *only* to the postmaster log.
1018  */
1021  errmsg("could not receive data from client: %m")));
1022  r = EOF;
1023  }
1024  }
1025  else if (r == 0)
1026  {
1027  /* EOF detected */
1028  r = EOF;
1029  }
1030 
1031  return r;
1032 }
1033 
1034 /* --------------------------------
1035  * pq_getbytes - get a known number of bytes from connection
1036  *
1037  * returns 0 if OK, EOF if trouble
1038  * --------------------------------
1039  */
1040 int
1041 pq_getbytes(char *s, size_t len)
1042 {
1043  size_t amount;
1044 
1046 
1047  while (len > 0)
1048  {
1049  while (PqRecvPointer >= PqRecvLength)
1050  {
1051  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1052  return EOF; /* Failed to recv data */
1053  }
1054  amount = PqRecvLength - PqRecvPointer;
1055  if (amount > len)
1056  amount = len;
1057  memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
1058  PqRecvPointer += amount;
1059  s += amount;
1060  len -= amount;
1061  }
1062  return 0;
1063 }
1064 
1065 /* --------------------------------
1066  * pq_discardbytes - throw away a known number of bytes
1067  *
1068  * same as pq_getbytes except we do not copy the data to anyplace.
1069  * this is used for resynchronizing after read errors.
1070  *
1071  * returns 0 if OK, EOF if trouble
1072  * --------------------------------
1073  */
1074 static int
1075 pq_discardbytes(size_t len)
1076 {
1077  size_t amount;
1078 
1080 
1081  while (len > 0)
1082  {
1083  while (PqRecvPointer >= PqRecvLength)
1084  {
1085  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1086  return EOF; /* Failed to recv data */
1087  }
1088  amount = PqRecvLength - PqRecvPointer;
1089  if (amount > len)
1090  amount = len;
1091  PqRecvPointer += amount;
1092  len -= amount;
1093  }
1094  return 0;
1095 }
1096 
1097 /* --------------------------------
1098  * pq_getstring - get a null terminated string from connection
1099  *
1100  * The return value is placed in an expansible StringInfo, which has
1101  * already been initialized by the caller.
1102  *
1103  * This is used only for dealing with old-protocol clients. The idea
1104  * is to produce a StringInfo that looks the same as we would get from
1105  * pq_getmessage() with a newer client; we will then process it with
1106  * pq_getmsgstring. Therefore, no character set conversion is done here,
1107  * even though this is presumably useful only for text.
1108  *
1109  * returns 0 if OK, EOF if trouble
1110  * --------------------------------
1111  */
1112 int
1114 {
1115  int i;
1116 
1118 
1119  resetStringInfo(s);
1120 
1121  /* Read until we get the terminating '\0' */
1122  for (;;)
1123  {
1124  while (PqRecvPointer >= PqRecvLength)
1125  {
1126  if (pq_recvbuf()) /* If nothing in buffer, then recv some */
1127  return EOF; /* Failed to recv data */
1128  }
1129 
1130  for (i = PqRecvPointer; i < PqRecvLength; i++)
1131  {
1132  if (PqRecvBuffer[i] == '\0')
1133  {
1134  /* include the '\0' in the copy */
1136  i - PqRecvPointer + 1);
1137  PqRecvPointer = i + 1; /* advance past \0 */
1138  return 0;
1139  }
1140  }
1141 
1142  /* If we're here we haven't got the \0 in the buffer yet. */
1144  PqRecvLength - PqRecvPointer);
1146  }
1147 }
1148 
1149 
1150 /* --------------------------------
1151  * pq_startmsgread - begin reading a message from the client.
1152  *
1153  * This must be called before any of the pq_get* functions.
1154  * --------------------------------
1155  */
1156 void
1158 {
1159  /*
1160  * There shouldn't be a read active already, but let's check just to be
1161  * sure.
1162  */
1163  if (PqCommReadingMsg)
1164  ereport(FATAL,
1165  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1166  errmsg("terminating connection because protocol synchronization was lost")));
1167 
1168  PqCommReadingMsg = true;
1169 }
1170 
1171 
1172 /* --------------------------------
1173  * pq_endmsgread - finish reading message.
1174  *
1175  * This must be called after reading a V2 protocol message with
1176  * pq_getstring() and friends, to indicate that we have read the whole
1177  * message. In V3 protocol, pq_getmessage() does this implicitly.
1178  * --------------------------------
1179  */
1180 void
1182 {
1184 
1185  PqCommReadingMsg = false;
1186 }
1187 
1188 /* --------------------------------
1189  * pq_is_reading_msg - are we currently reading a message?
1190  *
1191  * This is used in error recovery at the outer idle loop to detect if we have
1192  * lost protocol sync, and need to terminate the connection. pq_startmsgread()
1193  * will check for that too, but it's nicer to detect it earlier.
1194  * --------------------------------
1195  */
1196 bool
1198 {
1199  return PqCommReadingMsg;
1200 }
1201 
1202 /* --------------------------------
1203  * pq_getmessage - get a message with length word from connection
1204  *
1205  * The return value is placed in an expansible StringInfo, which has
1206  * already been initialized by the caller.
1207  * Only the message body is placed in the StringInfo; the length word
1208  * is removed. Also, s->cursor is initialized to zero for convenience
1209  * in scanning the message contents.
1210  *
1211  * If maxlen is not zero, it is an upper limit on the length of the
1212  * message we are willing to accept. We abort the connection (by
1213  * returning EOF) if client tries to send more than that.
1214  *
1215  * returns 0 if OK, EOF if trouble
1216  * --------------------------------
1217  */
1218 int
1220 {
1221  int32 len;
1222 
1224 
1225  resetStringInfo(s);
1226 
1227  /* Read message length word */
1228  if (pq_getbytes((char *) &len, 4) == EOF)
1229  {
1231  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1232  errmsg("unexpected EOF within message length word")));
1233  return EOF;
1234  }
1235 
1236  len = ntohl(len);
1237 
1238  if (len < 4 ||
1239  (maxlen > 0 && len > maxlen))
1240  {
1242  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1243  errmsg("invalid message length")));
1244  return EOF;
1245  }
1246 
1247  len -= 4; /* discount length itself */
1248 
1249  if (len > 0)
1250  {
1251  /*
1252  * Allocate space for message. If we run out of room (ridiculously
1253  * large message), we will elog(ERROR), but we want to discard the
1254  * message body so as not to lose communication sync.
1255  */
1256  PG_TRY();
1257  {
1258  enlargeStringInfo(s, len);
1259  }
1260  PG_CATCH();
1261  {
1262  if (pq_discardbytes(len) == EOF)
1264  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1265  errmsg("incomplete message from client")));
1266 
1267  /* we discarded the rest of the message so we're back in sync. */
1268  PqCommReadingMsg = false;
1269  PG_RE_THROW();
1270  }
1271  PG_END_TRY();
1272 
1273  /* And grab the message */
1274  if (pq_getbytes(s->data, len) == EOF)
1275  {
1277  (errcode(ERRCODE_PROTOCOL_VIOLATION),
1278  errmsg("incomplete message from client")));
1279  return EOF;
1280  }
1281  s->len = len;
1282  /* Place a trailing null per StringInfo convention */
1283  s->data[len] = '\0';
1284  }
1285 
1286  /* finished reading the message. */
1287  PqCommReadingMsg = false;
1288 
1289  return 0;
1290 }
1291 
1292 
1293 /* --------------------------------
1294  * pq_putbytes - send bytes to connection (not flushed until pq_flush)
1295  *
1296  * returns 0 if OK, EOF if trouble
1297  * --------------------------------
1298  */
1299 int
1300 pq_putbytes(const char *s, size_t len)
1301 {
1302  int res;
1303 
1304  /* Should only be called by old-style COPY OUT */
1306  /* No-op if reentrant call */
1307  if (PqCommBusy)
1308  return 0;
1309  PqCommBusy = true;
1310  res = internal_putbytes(s, len);
1311  PqCommBusy = false;
1312  return res;
1313 }
1314 
1315 static int
1316 internal_putbytes(const char *s, size_t len)
1317 {
1318  size_t amount;
1319 
1320  while (len > 0)
1321  {
1322  /* If buffer is full, then flush it out */
1324  {
1325  socket_set_nonblocking(false);
1326  if (internal_flush())
1327  return EOF;
1328  }
1329  amount = PqSendBufferSize - PqSendPointer;
1330  if (amount > len)
1331  amount = len;
1332  memcpy(PqSendBuffer + PqSendPointer, s, amount);
1333  PqSendPointer += amount;
1334  s += amount;
1335  len -= amount;
1336  }
1337  return 0;
1338 }
1339 
1340 /* --------------------------------
1341  * socket_flush - flush pending output
1342  *
1343  * returns 0 if OK, EOF if trouble
1344  * --------------------------------
1345  */
1346 static int
1348 {
1349  int res;
1350 
1351  /* No-op if reentrant call */
1352  if (PqCommBusy)
1353  return 0;
1354  PqCommBusy = true;
1355  socket_set_nonblocking(false);
1356  res = internal_flush();
1357  PqCommBusy = false;
1358  return res;
1359 }
1360 
1361 /* --------------------------------
1362  * internal_flush - flush pending output
1363  *
1364  * Returns 0 if OK (meaning everything was sent, or operation would block
1365  * and the socket is in non-blocking mode), or EOF if trouble.
1366  * --------------------------------
1367  */
1368 static int
1370 {
1371  static int last_reported_send_errno = 0;
1372 
1373  char *bufptr = PqSendBuffer + PqSendStart;
1374  char *bufend = PqSendBuffer + PqSendPointer;
1375 
1376  while (bufptr < bufend)
1377  {
1378  int r;
1379 
1380  r = secure_write(MyProcPort, bufptr, bufend - bufptr);
1381 
1382  if (r <= 0)
1383  {
1384  if (errno == EINTR)
1385  continue; /* Ok if we were interrupted */
1386 
1387  /*
1388  * Ok if no data writable without blocking, and the socket is in
1389  * non-blocking mode.
1390  */
1391  if (errno == EAGAIN ||
1392  errno == EWOULDBLOCK)
1393  {
1394  return 0;
1395  }
1396 
1397  /*
1398  * Careful: an ereport() that tries to write to the client would
1399  * cause recursion to here, leading to stack overflow and core
1400  * dump! This message must go *only* to the postmaster log.
1401  *
1402  * If a client disconnects while we're in the midst of output, we
1403  * might write quite a bit of data before we get to a safe query
1404  * abort point. So, suppress duplicate log messages.
1405  */
1406  if (errno != last_reported_send_errno)
1407  {
1408  last_reported_send_errno = errno;
1411  errmsg("could not send data to client: %m")));
1412  }
1413 
1414  /*
1415  * We drop the buffered data anyway so that processing can
1416  * continue, even though we'll probably quit soon. We also set a
1417  * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
1418  * the connection.
1419  */
1420  PqSendStart = PqSendPointer = 0;
1422  InterruptPending = 1;
1423  return EOF;
1424  }
1425 
1426  last_reported_send_errno = 0; /* reset after any successful send */
1427  bufptr += r;
1428  PqSendStart += r;
1429  }
1430 
1431  PqSendStart = PqSendPointer = 0;
1432  return 0;
1433 }
1434 
1435 /* --------------------------------
1436  * pq_flush_if_writable - flush pending output if writable without blocking
1437  *
1438  * Returns 0 if OK, or EOF if trouble.
1439  * --------------------------------
1440  */
1441 static int
1443 {
1444  int res;
1445 
1446  /* Quick exit if nothing to do */
1447  if (PqSendPointer == PqSendStart)
1448  return 0;
1449 
1450  /* No-op if reentrant call */
1451  if (PqCommBusy)
1452  return 0;
1453 
1454  /* Temporarily put the socket into non-blocking mode */
1455  socket_set_nonblocking(true);
1456 
1457  PqCommBusy = true;
1458  res = internal_flush();
1459  PqCommBusy = false;
1460  return res;
1461 }
1462 
1463 /* --------------------------------
1464  * socket_is_send_pending - is there any pending data in the output buffer?
1465  * --------------------------------
1466  */
1467 static bool
1469 {
1470  return (PqSendStart < PqSendPointer);
1471 }
1472 
1473 /* --------------------------------
1474  * Message-level I/O routines begin here.
1475  *
1476  * These routines understand about the old-style COPY OUT protocol.
1477  * --------------------------------
1478  */
1479 
1480 
1481 /* --------------------------------
1482  * socket_putmessage - send a normal message (suppressed in COPY OUT mode)
1483  *
1484  * If msgtype is not '\0', it is a message type code to place before
1485  * the message body. If msgtype is '\0', then the message has no type
1486  * code (this is only valid in pre-3.0 protocols).
1487  *
1488  * len is the length of the message body data at *s. In protocol 3.0
1489  * and later, a message length word (equal to len+4 because it counts
1490  * itself too) is inserted by this routine.
1491  *
1492  * All normal messages are suppressed while old-style COPY OUT is in
1493  * progress. (In practice only a few notice messages might get emitted
1494  * then; dropping them is annoying, but at least they will still appear
1495  * in the postmaster log.)
1496  *
1497  * We also suppress messages generated while pqcomm.c is busy. This
1498  * avoids any possibility of messages being inserted within other
1499  * messages. The only known trouble case arises if SIGQUIT occurs
1500  * during a pqcomm.c routine --- quickdie() will try to send a warning
1501  * message, and the most reasonable approach seems to be to drop it.
1502  *
1503  * returns 0 if OK, EOF if trouble
1504  * --------------------------------
1505  */
1506 static int
1507 socket_putmessage(char msgtype, const char *s, size_t len)
1508 {
1509  if (DoingCopyOut || PqCommBusy)
1510  return 0;
1511  PqCommBusy = true;
1512  if (msgtype)
1513  if (internal_putbytes(&msgtype, 1))
1514  goto fail;
1516  {
1517  uint32 n32;
1518 
1519  n32 = htonl((uint32) (len + 4));
1520  if (internal_putbytes((char *) &n32, 4))
1521  goto fail;
1522  }
1523  if (internal_putbytes(s, len))
1524  goto fail;
1525  PqCommBusy = false;
1526  return 0;
1527 
1528 fail:
1529  PqCommBusy = false;
1530  return EOF;
1531 }
1532 
1533 /* --------------------------------
1534  * pq_putmessage_noblock - like pq_putmessage, but never blocks
1535  *
1536  * If the output buffer is too small to hold the message, the buffer
1537  * is enlarged.
1538  */
1539 static void
1540 socket_putmessage_noblock(char msgtype, const char *s, size_t len)
1541 {
1542  int res PG_USED_FOR_ASSERTS_ONLY;
1543  int required;
1544 
1545  /*
1546  * Ensure we have enough space in the output buffer for the message header
1547  * as well as the message itself.
1548  */
1549  required = PqSendPointer + 1 + 4 + len;
1550  if (required > PqSendBufferSize)
1551  {
1552  PqSendBuffer = repalloc(PqSendBuffer, required);
1553  PqSendBufferSize = required;
1554  }
1555  res = pq_putmessage(msgtype, s, len);
1556  Assert(res == 0); /* should not fail when the message fits in
1557  * buffer */
1558 }
1559 
1560 
1561 /* --------------------------------
1562  * socket_startcopyout - inform libpq that an old-style COPY OUT transfer
1563  * is beginning
1564  * --------------------------------
1565  */
1566 static void
1568 {
1569  DoingCopyOut = true;
1570 }
1571 
1572 /* --------------------------------
1573  * socket_endcopyout - end an old-style COPY OUT transfer
1574  *
1575  * If errorAbort is indicated, we are aborting a COPY OUT due to an error,
1576  * and must send a terminator line. Since a partial data line might have
1577  * been emitted, send a couple of newlines first (the first one could
1578  * get absorbed by a backslash...) Note that old-style COPY OUT does
1579  * not allow binary transfers, so a textual terminator is always correct.
1580  * --------------------------------
1581  */
1582 static void
1583 socket_endcopyout(bool errorAbort)
1584 {
1585  if (!DoingCopyOut)
1586  return;
1587  if (errorAbort)
1588  pq_putbytes("\n\n\\.\n", 5);
1589  /* in non-error case, copy.c will have emitted the terminator line */
1590  DoingCopyOut = false;
1591 }
1592 
1593 /*
1594  * Support for TCP Keepalive parameters
1595  */
1596 
1597 /*
1598  * On Windows, we need to set both idle and interval at the same time.
1599  * We also cannot reset them to the default (setting to zero will
1600  * actually set them to zero, not default), therefore we fallback to
1601  * the out-of-the-box default instead.
1602  */
1603 #if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
1604 static int
1605 pq_setkeepaliveswin32(Port *port, int idle, int interval)
1606 {
1607  struct tcp_keepalive ka;
1608  DWORD retsize;
1609 
1610  if (idle <= 0)
1611  idle = 2 * 60 * 60; /* default = 2 hours */
1612  if (interval <= 0)
1613  interval = 1; /* default = 1 second */
1614 
1615  ka.onoff = 1;
1616  ka.keepalivetime = idle * 1000;
1617  ka.keepaliveinterval = interval * 1000;
1618 
1619  if (WSAIoctl(port->sock,
1620  SIO_KEEPALIVE_VALS,
1621  (LPVOID) &ka,
1622  sizeof(ka),
1623  NULL,
1624  0,
1625  &retsize,
1626  NULL,
1627  NULL)
1628  != 0)
1629  {
1630  elog(LOG, "WSAIoctl(SIO_KEEPALIVE_VALS) failed: %ui",
1631  WSAGetLastError());
1632  return STATUS_ERROR;
1633  }
1634  if (port->keepalives_idle != idle)
1635  port->keepalives_idle = idle;
1636  if (port->keepalives_interval != interval)
1637  port->keepalives_interval = interval;
1638  return STATUS_OK;
1639 }
1640 #endif
1641 
1642 int
1644 {
1645 #if defined(TCP_KEEPIDLE) || defined(TCP_KEEPALIVE) || defined(WIN32)
1646  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1647  return 0;
1648 
1649  if (port->keepalives_idle != 0)
1650  return port->keepalives_idle;
1651 
1652  if (port->default_keepalives_idle == 0)
1653  {
1654 #ifndef WIN32
1655  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_idle);
1656 
1657 #ifdef TCP_KEEPIDLE
1658  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE,
1659  (char *) &port->default_keepalives_idle,
1660  &size) < 0)
1661  {
1662  elog(LOG, "getsockopt(TCP_KEEPIDLE) failed: %m");
1663  port->default_keepalives_idle = -1; /* don't know */
1664  }
1665 #else
1666  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPALIVE,
1667  (char *) &port->default_keepalives_idle,
1668  &size) < 0)
1669  {
1670  elog(LOG, "getsockopt(TCP_KEEPALIVE) failed: %m");
1671  port->default_keepalives_idle = -1; /* don't know */
1672  }
1673 #endif /* TCP_KEEPIDLE */
1674 #else /* WIN32 */
1675  /* We can't get the defaults on Windows, so return "don't know" */
1676  port->default_keepalives_idle = -1;
1677 #endif /* WIN32 */
1678  }
1679 
1680  return port->default_keepalives_idle;
1681 #else
1682  return 0;
1683 #endif
1684 }
1685 
1686 int
1687 pq_setkeepalivesidle(int idle, Port *port)
1688 {
1689  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1690  return STATUS_OK;
1691 
1692 #if defined(TCP_KEEPIDLE) || defined(TCP_KEEPALIVE) || defined(SIO_KEEPALIVE_VALS)
1693  if (idle == port->keepalives_idle)
1694  return STATUS_OK;
1695 
1696 #ifndef WIN32
1697  if (port->default_keepalives_idle <= 0)
1698  {
1699  if (pq_getkeepalivesidle(port) < 0)
1700  {
1701  if (idle == 0)
1702  return STATUS_OK; /* default is set but unknown */
1703  else
1704  return STATUS_ERROR;
1705  }
1706  }
1707 
1708  if (idle == 0)
1709  idle = port->default_keepalives_idle;
1710 
1711 #ifdef TCP_KEEPIDLE
1712  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE,
1713  (char *) &idle, sizeof(idle)) < 0)
1714  {
1715  elog(LOG, "setsockopt(TCP_KEEPIDLE) failed: %m");
1716  return STATUS_ERROR;
1717  }
1718 #else
1719  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPALIVE,
1720  (char *) &idle, sizeof(idle)) < 0)
1721  {
1722  elog(LOG, "setsockopt(TCP_KEEPALIVE) failed: %m");
1723  return STATUS_ERROR;
1724  }
1725 #endif
1726 
1727  port->keepalives_idle = idle;
1728 #else /* WIN32 */
1729  return pq_setkeepaliveswin32(port, idle, port->keepalives_interval);
1730 #endif
1731 #else /* TCP_KEEPIDLE || SIO_KEEPALIVE_VALS */
1732  if (idle != 0)
1733  {
1734  elog(LOG, "setting the keepalive idle time is not supported");
1735  return STATUS_ERROR;
1736  }
1737 #endif
1738  return STATUS_OK;
1739 }
1740 
1741 int
1743 {
1744 #if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1745  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1746  return 0;
1747 
1748  if (port->keepalives_interval != 0)
1749  return port->keepalives_interval;
1750 
1751  if (port->default_keepalives_interval == 0)
1752  {
1753 #ifndef WIN32
1754  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_interval);
1755 
1756  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1757  (char *) &port->default_keepalives_interval,
1758  &size) < 0)
1759  {
1760  elog(LOG, "getsockopt(TCP_KEEPINTVL) failed: %m");
1761  port->default_keepalives_interval = -1; /* don't know */
1762  }
1763 #else
1764  /* We can't get the defaults on Windows, so return "don't know" */
1765  port->default_keepalives_interval = -1;
1766 #endif /* WIN32 */
1767  }
1768 
1769  return port->default_keepalives_interval;
1770 #else
1771  return 0;
1772 #endif
1773 }
1774 
1775 int
1776 pq_setkeepalivesinterval(int interval, Port *port)
1777 {
1778  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1779  return STATUS_OK;
1780 
1781 #if defined(TCP_KEEPINTVL) || defined (SIO_KEEPALIVE_VALS)
1782  if (interval == port->keepalives_interval)
1783  return STATUS_OK;
1784 
1785 #ifndef WIN32
1786  if (port->default_keepalives_interval <= 0)
1787  {
1788  if (pq_getkeepalivesinterval(port) < 0)
1789  {
1790  if (interval == 0)
1791  return STATUS_OK; /* default is set but unknown */
1792  else
1793  return STATUS_ERROR;
1794  }
1795  }
1796 
1797  if (interval == 0)
1798  interval = port->default_keepalives_interval;
1799 
1800  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1801  (char *) &interval, sizeof(interval)) < 0)
1802  {
1803  elog(LOG, "setsockopt(TCP_KEEPINTVL) failed: %m");
1804  return STATUS_ERROR;
1805  }
1806 
1807  port->keepalives_interval = interval;
1808 #else /* WIN32 */
1809  return pq_setkeepaliveswin32(port, port->keepalives_idle, interval);
1810 #endif
1811 #else
1812  if (interval != 0)
1813  {
1814  elog(LOG, "setsockopt(TCP_KEEPINTVL) not supported");
1815  return STATUS_ERROR;
1816  }
1817 #endif
1818 
1819  return STATUS_OK;
1820 }
1821 
1822 int
1824 {
1825 #ifdef TCP_KEEPCNT
1826  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1827  return 0;
1828 
1829  if (port->keepalives_count != 0)
1830  return port->keepalives_count;
1831 
1832  if (port->default_keepalives_count == 0)
1833  {
1834  ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_count);
1835 
1836  if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1837  (char *) &port->default_keepalives_count,
1838  &size) < 0)
1839  {
1840  elog(LOG, "getsockopt(TCP_KEEPCNT) failed: %m");
1841  port->default_keepalives_count = -1; /* don't know */
1842  }
1843  }
1844 
1845  return port->default_keepalives_count;
1846 #else
1847  return 0;
1848 #endif
1849 }
1850 
1851 int
1852 pq_setkeepalivescount(int count, Port *port)
1853 {
1854  if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family))
1855  return STATUS_OK;
1856 
1857 #ifdef TCP_KEEPCNT
1858  if (count == port->keepalives_count)
1859  return STATUS_OK;
1860 
1861  if (port->default_keepalives_count <= 0)
1862  {
1863  if (pq_getkeepalivescount(port) < 0)
1864  {
1865  if (count == 0)
1866  return STATUS_OK; /* default is set but unknown */
1867  else
1868  return STATUS_ERROR;
1869  }
1870  }
1871 
1872  if (count == 0)
1873  count = port->default_keepalives_count;
1874 
1875  if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1876  (char *) &count, sizeof(count)) < 0)
1877  {
1878  elog(LOG, "setsockopt(TCP_KEEPCNT) failed: %m");
1879  return STATUS_ERROR;
1880  }
1881 
1882  port->keepalives_count = count;
1883 #else
1884  if (count != 0)
1885  {
1886  elog(LOG, "setsockopt(TCP_KEEPCNT) not supported");
1887  return STATUS_ERROR;
1888  }
1889 #endif
1890 
1891  return STATUS_OK;
1892 }
#define EWOULDBLOCK
Definition: win32.h:301
#define NIL
Definition: pg_list.h:69
void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir)
Definition: miscinit.c:1082
static List * sock_paths
Definition: pqcomm.c:106
#define accept(s, addr, addrlen)
Definition: win32.h:382
#define WL_SOCKET_WRITEABLE
Definition: latch.h:126
static char PqRecvBuffer[PQ_RECV_BUFFER_SIZE]
Definition: pqcomm.c:123
static bool DoingCopyOut
Definition: pqcomm.c:132
void pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
Definition: ip.c:89
#define UNIXSOCK_PATH(path, port, sockdir)
Definition: pqcomm.h:70
int errhint(const char *fmt,...)
Definition: elog.c:987
struct Port * MyProcPort
Definition: globals.c:40
PQcommMethods * PqCommMethods
Definition: pqcomm.c:165
int pq_peekbyte(void)
Definition: pqcomm.c:966
void StreamClose(pgsocket sock)
Definition: pqcomm.c:791
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:613
int keepalives_idle
Definition: libpq-be.h:165
int gid_t
Definition: win32.h:261
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:292
static char * PqSendBuffer
Definition: pqcomm.c:118
int pq_setkeepalivesinterval(int interval, Port *port)
Definition: pqcomm.c:1776
#define UNIXSOCK_PATH_BUFLEN
Definition: pqcomm.h:86
#define closesocket
Definition: port.h:328
static int socket_flush_if_writable(void)
Definition: pqcomm.c:1442
char * pstrdup(const char *in)
Definition: mcxt.c:1165
static int PqSendStart
Definition: pqcomm.c:121
static int pq_discardbytes(size_t len)
Definition: pqcomm.c:1075
int pq_getkeepalivesinterval(Port *port)
Definition: pqcomm.c:1742
struct sockaddr_storage addr
Definition: pqcomm.h:64
static PQcommMethods PqCommSocketMethods
Definition: pqcomm.c:154
#define socket(af, type, protocol)
Definition: win32.h:379
int errcode(int sqlerrcode)
Definition: elog.c:575
Definition: libpq-be.h:118
static bool socket_is_send_pending(void)
Definition: pqcomm.c:1468
#define STATUS_ERROR
Definition: c.h:971
#define MemSet(start, val, len)
Definition: c.h:852
ssize_t secure_read(Port *port, void *ptr, size_t len)
Definition: be-secure.c:137
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
WaitEventSet * FeBeWaitSet
Definition: pqcomm.c:167
#define LOG
Definition: elog.h:26
#define PG_PROTOCOL_MAJOR(v)
Definition: pqcomm.h:104
int StreamConnection(pgsocket server_fd, Port *port)
Definition: pqcomm.c:663
int default_keepalives_interval
Definition: libpq-be.h:163
int default_keepalives_count
Definition: libpq-be.h:164
static int fd(const char *x, int i)
Definition: preproc-init.c:105
pgsocket sock
Definition: libpq-be.h:120
static void socket_comm_reset(void)
Definition: pqcomm.c:220
#define gai_strerror
Definition: getaddrinfo.h:148
int tcp_keepalives_idle
Definition: guc.c:473
signed int int32
Definition: c.h:253
int pg_getaddrinfo_all(const char *hostname, const char *servname, const struct addrinfo *hintp, struct addrinfo **result)
Definition: ip.c:58
static void socket_startcopyout(void)
Definition: pqcomm.c:1567
#define EAGAIN
Definition: win32.h:293
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:485
volatile bool ClientConnectionLost
Definition: globals.c:32
static int internal_flush(void)
Definition: pqcomm.c:1369
SockAddr raddr
Definition: libpq-be.h:124
static void socket_set_nonblocking(bool nonblocking)
Definition: pqcomm.c:869
void pg_usleep(long microsec)
Definition: signal.c:53
void pq_init(void)
Definition: pqcomm.c:175
static int PqRecvLength
Definition: pqcomm.c:125
int keepalives_count
Definition: libpq-be.h:167
#define ERROR
Definition: elog.h:43
#define bind(s, addr, addrlen)
Definition: win32.h:380
void pq_startmsgread(void)
Definition: pqcomm.c:1157
bool pq_is_reading_msg(void)
Definition: pqcomm.c:1197
int pq_setkeepalivesidle(int idle, Port *port)
Definition: pqcomm.c:1687
#define IS_AF_UNIX(fam)
Definition: ip.h:24
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
static int pq_recvbuf(void)
Definition: pqcomm.c:886
int pq_getbyte_if_available(unsigned char *c)
Definition: pqcomm.c:987
int MaxBackends
Definition: globals.c:126
static int PqSendPointer
Definition: pqcomm.c:120
char * c
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1041
#define AI_PASSIVE
Definition: getaddrinfo.h:64
#define memmove(d, s, c)
Definition: c.h:1057
#define COMMERROR
Definition: elog.h:30
int errcode_for_file_access(void)
Definition: elog.c:598
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:277
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:94
unsigned int uint32
Definition: c.h:265
int pgsocket
Definition: port.h:22
ACCEPT_TYPE_ARG3 salen
Definition: pqcomm.h:65
void TouchSocketFiles(void)
Definition: pqcomm.c:806
int StreamServerPort(int family, char *hostName, unsigned short portNumber, char *unixSocketDir, pgsocket ListenSocket[], int MaxListen)
Definition: pqcomm.c:311
ssize_t secure_write(Port *port, void *ptr, size_t len)
Definition: be-secure.c:240
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
#define STATUS_OK
Definition: c.h:970
int tcp_keepalives_interval
Definition: guc.c:474
MemoryContext TopMemoryContext
Definition: mcxt.c:43
int errcode_for_socket_access(void)
Definition: elog.c:669
SockAddr laddr
Definition: libpq-be.h:123
static int port
Definition: pg_regress.c:87
List * lappend(List *list, void *datum)
Definition: list.c:128
int default_keepalives_idle
Definition: libpq-be.h:162
static int PqSendBufferSize
Definition: pqcomm.c:119
static int internal_putbytes(const char *s, size_t len)
Definition: pqcomm.c:1316
#define WARNING
Definition: elog.h:40
static bool PqCommBusy
Definition: pqcomm.c:130
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1219
static void socket_endcopyout(bool errorAbort)
Definition: pqcomm.c:1583
#define WL_POSTMASTER_DEATH
Definition: latch.h:128
static pgsocket ListenSocket[MAXLISTEN]
Definition: postmaster.c:215
#define listen(s, backlog)
Definition: win32.h:381
uintptr_t Datum
Definition: postgres.h:374
#define PGINVALID_SOCKET
Definition: port.h:24
#define EINTR
Definition: win32.h:295
#define PG_SOMAXCONN
int pq_getbyte(void)
Definition: pqcomm.c:947
void pq_endmsgread(void)
Definition: pqcomm.c:1181
#define free(a)
Definition: header.h:60
volatile bool InterruptPending
Definition: globals.c:29
#define PG_CATCH()
Definition: elog.h:293
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:670
#define lfirst(lc)
Definition: pg_list.h:106
bool pg_set_noblock(pgsocket sock)
Definition: noblock.c:21
int pq_setkeepalivescount(int count, Port *port)
Definition: pqcomm.c:1852
int Unix_socket_permissions
Definition: pqcomm.c:102
int pq_getkeepalivescount(Port *port)
Definition: pqcomm.c:1823
int ai_socktype
Definition: getaddrinfo.h:104
void secure_close(Port *port)
Definition: be-secure.c:125
int pq_getkeepalivesidle(Port *port)
Definition: pqcomm.c:1643
#define pq_endcopyout(errorAbort)
Definition: libpq.h:48
bool noblock
Definition: libpq-be.h:121
#define PG_RE_THROW()
Definition: elog.h:314
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
static int socket_putmessage(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1507
static void socket_close(int code, Datum arg)
Definition: pqcomm.c:237
static int socket_flush(void)
Definition: pqcomm.c:1347
struct addrinfo * ai_next
Definition: getaddrinfo.h:109
void * gss
Definition: libpq-be.h:177
static void socket_putmessage_noblock(char msgtype, const char *s, size_t len)
Definition: pqcomm.c:1540
int errmsg(const char *fmt,...)
Definition: elog.c:797
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
int i
#define PQ_RECV_BUFFER_SIZE
Definition: pqcomm.c:116
int pq_getstring(StringInfo s)
Definition: pqcomm.c:1113
int tcp_keepalives_count
Definition: guc.c:475
void * arg
struct Latch * MyLatch
Definition: globals.c:51
static int PqRecvPointer
Definition: pqcomm.c:124
char * Unix_socket_group
Definition: pqcomm.c:103
static bool PqCommReadingMsg
Definition: pqcomm.c:131
#define elog
Definition: elog.h:219
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:43
void RemoveSocketFiles(void)
Definition: pqcomm.c:836
int ai_flags
Definition: getaddrinfo.h:102
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:985
#define PG_TRY()
Definition: elog.h:284
Definition: pg_list.h:45
int keepalives_interval
Definition: libpq-be.h:166
#define WL_LATCH_SET
Definition: latch.h:124
#define _(x)
Definition: elog.c:84
#define PQ_SEND_BUFFER_SIZE
Definition: pqcomm.c:115
ProtocolVersion FrontendProtocol
Definition: globals.c:27
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:240
long val
Definition: informix.c:689
#define PG_END_TRY()
Definition: elog.h:300
int ai_family
Definition: getaddrinfo.h:103
int pq_putbytes(const char *s, size_t len)
Definition: pqcomm.c:1300