PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
latch.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * latch.c
4  * Routines for inter-process latches
5  *
6  * The Unix implementation uses the so-called self-pipe trick to overcome the
7  * race condition involved with poll() (or epoll_wait() on linux) and setting
8  * a global flag in the signal handler. When a latch is set and the current
9  * process is waiting for it, the signal handler wakes up the poll() in
10  * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
11  * poll() on all platforms, and even on platforms where it does, a signal that
12  * arrives just before the poll() call does not prevent poll() from entering
13  * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
14  * and causes poll() to return immediately even if the signal arrives before
15  * poll() begins.
16  *
17  * When SetLatch is called from the same process that owns the latch,
18  * SetLatch writes the byte directly to the pipe. If it's owned by another
19  * process, SIGUSR1 is sent and the signal handler in the waiting process
20  * writes the byte to the pipe on behalf of the signaling process.
21  *
22  * The Windows implementation uses Windows events that are inherited by all
23  * postmaster child processes. There's no need for the self-pipe trick there.
24  *
25  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
26  * Portions Copyright (c) 1994, Regents of the University of California
27  *
28  * IDENTIFICATION
29  * src/backend/storage/ipc/latch.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <fcntl.h>
36 #include <limits.h>
37 #include <signal.h>
38 #include <unistd.h>
39 #ifdef HAVE_SYS_EPOLL_H
40 #include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_POLL_H
43 #include <poll.h>
44 #endif
45 
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "port/atomics.h"
49 #include "portability/instr_time.h"
50 #include "postmaster/postmaster.h"
51 #include "storage/latch.h"
52 #include "storage/pmsignal.h"
53 #include "storage/shmem.h"
54 
55 /*
56  * Select the fd readiness primitive to use. Normally the "most modern"
57  * primitive supported by the OS will be used, but for testing it can be
58  * useful to manually specify the used primitive. If desired, just add a
59  * define somewhere before this block.
60  */
61 #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
62  defined(WAIT_USE_WIN32)
63 /* don't overwrite manual choice */
64 #elif defined(HAVE_SYS_EPOLL_H)
65 #define WAIT_USE_EPOLL
66 #elif defined(HAVE_POLL)
67 #define WAIT_USE_POLL
68 #elif WIN32
69 #define WAIT_USE_WIN32
70 #else
71 #error "no wait set implementation available"
72 #endif
73 
74 /* typedef in latch.h */
76 {
77  int nevents; /* number of registered events */
78  int nevents_space; /* maximum number of events in this set */
79 
80  /*
81  * Array, of nevents_space length, storing the definition of events this
82  * set is waiting for.
83  */
85 
86  /*
87  * If WL_LATCH_SET is specified in any wait event, latch is a pointer to
88  * said latch, and latch_pos the offset in the ->events array. This is
89  * useful because we check the state of the latch before performing doing
90  * syscalls related to waiting.
91  */
93  int latch_pos;
94 
95 #if defined(WAIT_USE_EPOLL)
96  int epoll_fd;
97  /* epoll_wait returns events in a user provided arrays, allocate once */
98  struct epoll_event *epoll_ret_events;
99 #elif defined(WAIT_USE_POLL)
100  /* poll expects events to be waited on every poll() call, prepare once */
101  struct pollfd *pollfds;
102 #elif defined(WAIT_USE_WIN32)
103 
104  /*
105  * Array of windows events. The first element always contains
106  * pgwin32_signal_event, so the remaining elements are offset by one (i.e.
107  * event->pos + 1).
108  */
109  HANDLE *handles;
110 #endif
111 };
112 
113 #ifndef WIN32
114 /* Are we currently in WaitLatch? The signal handler would like to know. */
115 static volatile sig_atomic_t waiting = false;
116 
117 /* Read and write ends of the self-pipe */
118 static int selfpipe_readfd = -1;
119 static int selfpipe_writefd = -1;
120 
121 /* Process owning the self-pipe --- needed for checking purposes */
122 static int selfpipe_owner_pid = 0;
123 
124 /* Private function prototypes */
125 static void sendSelfPipeByte(void);
126 static void drainSelfPipe(void);
127 #endif /* WIN32 */
128 
129 #if defined(WAIT_USE_EPOLL)
130 static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
131 #elif defined(WAIT_USE_POLL)
132 static void WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event);
133 #elif defined(WAIT_USE_WIN32)
134 static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
135 #endif
136 
137 static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
138  WaitEvent *occurred_events, int nevents);
139 
140 /*
141  * Initialize the process-local latch infrastructure.
142  *
143  * This must be called once during startup of any process that can wait on
144  * latches, before it issues any InitLatch() or OwnLatch() calls.
145  */
146 void
148 {
149 #ifndef WIN32
150  int pipefd[2];
151 
152  if (IsUnderPostmaster)
153  {
154  /*
155  * We might have inherited connections to a self-pipe created by the
156  * postmaster. It's critical that child processes create their own
157  * self-pipes, of course, and we really want them to close the
158  * inherited FDs for safety's sake.
159  */
160  if (selfpipe_owner_pid != 0)
161  {
162  /* Assert we go through here but once in a child process */
164  /* Release postmaster's pipe FDs; ignore any error */
165  (void) close(selfpipe_readfd);
166  (void) close(selfpipe_writefd);
167  /* Clean up, just for safety's sake; we'll set these below */
169  selfpipe_owner_pid = 0;
170  }
171  else
172  {
173  /*
174  * Postmaster didn't create a self-pipe ... or else we're in an
175  * EXEC_BACKEND build, in which case it doesn't matter since the
176  * postmaster's pipe FDs were closed by the action of FD_CLOEXEC.
177  */
178  Assert(selfpipe_readfd == -1);
179  }
180  }
181  else
182  {
183  /* In postmaster or standalone backend, assert we do this but once */
184  Assert(selfpipe_readfd == -1);
186  }
187 
188  /*
189  * Set up the self-pipe that allows a signal handler to wake up the
190  * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
191  * that SetLatch won't block if the event has already been set many times
192  * filling the kernel buffer. Make the read-end non-blocking too, so that
193  * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
194  * Also, make both FDs close-on-exec, since we surely do not want any
195  * child processes messing with them.
196  */
197  if (pipe(pipefd) < 0)
198  elog(FATAL, "pipe() failed: %m");
199  if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
200  elog(FATAL, "fcntl(F_SETFL) failed on read-end of self-pipe: %m");
201  if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
202  elog(FATAL, "fcntl(F_SETFL) failed on write-end of self-pipe: %m");
203  if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1)
204  elog(FATAL, "fcntl(F_SETFD) failed on read-end of self-pipe: %m");
205  if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1)
206  elog(FATAL, "fcntl(F_SETFD) failed on write-end of self-pipe: %m");
207 
208  selfpipe_readfd = pipefd[0];
209  selfpipe_writefd = pipefd[1];
211 #else
212  /* currently, nothing to do here for Windows */
213 #endif
214 }
215 
216 /*
217  * Initialize a process-local latch.
218  */
219 void
220 InitLatch(volatile Latch *latch)
221 {
222  latch->is_set = false;
223  latch->owner_pid = MyProcPid;
224  latch->is_shared = false;
225 
226 #ifndef WIN32
227  /* Assert InitializeLatchSupport has been called in this process */
229 #else
230  latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
231  if (latch->event == NULL)
232  elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
233 #endif /* WIN32 */
234 }
235 
236 /*
237  * Initialize a shared latch that can be set from other processes. The latch
238  * is initially owned by no-one; use OwnLatch to associate it with the
239  * current process.
240  *
241  * InitSharedLatch needs to be called in postmaster before forking child
242  * processes, usually right after allocating the shared memory block
243  * containing the latch with ShmemInitStruct. (The Unix implementation
244  * doesn't actually require that, but the Windows one does.) Because of
245  * this restriction, we have no concurrency issues to worry about here.
246  *
247  * Note that other handles created in this module are never marked as
248  * inheritable. Thus we do not need to worry about cleaning up child
249  * process references to postmaster-private latches or WaitEventSets.
250  */
251 void
252 InitSharedLatch(volatile Latch *latch)
253 {
254 #ifdef WIN32
255  SECURITY_ATTRIBUTES sa;
256 
257  /*
258  * Set up security attributes to specify that the events are inherited.
259  */
260  ZeroMemory(&sa, sizeof(sa));
261  sa.nLength = sizeof(sa);
262  sa.bInheritHandle = TRUE;
263 
264  latch->event = CreateEvent(&sa, TRUE, FALSE, NULL);
265  if (latch->event == NULL)
266  elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
267 #endif
268 
269  latch->is_set = false;
270  latch->owner_pid = 0;
271  latch->is_shared = true;
272 }
273 
274 /*
275  * Associate a shared latch with the current process, allowing it to
276  * wait on the latch.
277  *
278  * Although there is a sanity check for latch-already-owned, we don't do
279  * any sort of locking here, meaning that we could fail to detect the error
280  * if two processes try to own the same latch at about the same time. If
281  * there is any risk of that, caller must provide an interlock to prevent it.
282  *
283  * In any process that calls OwnLatch(), make sure that
284  * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
285  * as shared latches use SIGUSR1 for inter-process communication.
286  */
287 void
288 OwnLatch(volatile Latch *latch)
289 {
290  /* Sanity checks */
291  Assert(latch->is_shared);
292 
293 #ifndef WIN32
294  /* Assert InitializeLatchSupport has been called in this process */
296 #endif
297 
298  if (latch->owner_pid != 0)
299  elog(ERROR, "latch already owned");
300 
301  latch->owner_pid = MyProcPid;
302 }
303 
304 /*
305  * Disown a shared latch currently owned by the current process.
306  */
307 void
308 DisownLatch(volatile Latch *latch)
309 {
310  Assert(latch->is_shared);
311  Assert(latch->owner_pid == MyProcPid);
312 
313  latch->owner_pid = 0;
314 }
315 
316 /*
317  * Wait for a given latch to be set, or for postmaster death, or until timeout
318  * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
319  * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
320  * function returns immediately.
321  *
322  * The "timeout" is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
323  * is given. Although it is declared as "long", we don't actually support
324  * timeouts longer than INT_MAX milliseconds. Note that some extra overhead
325  * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
326  *
327  * The latch must be owned by the current process, ie. it must be a
328  * process-local latch initialized with InitLatch, or a shared latch
329  * associated with the current process by calling OwnLatch.
330  *
331  * Returns bit mask indicating which condition(s) caused the wake-up. Note
332  * that if multiple wake-up conditions are true, there is no guarantee that
333  * we return all of them in one call, but we will return at least one.
334  */
335 int
336 WaitLatch(volatile Latch *latch, int wakeEvents, long timeout,
337  uint32 wait_event_info)
338 {
339  return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout,
340  wait_event_info);
341 }
342 
343 /*
344  * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
345  * conditions.
346  *
347  * When waiting on a socket, EOF and error conditions always cause the socket
348  * to be reported as readable/writable/connected, so that the caller can deal
349  * with the condition.
350  *
351  * NB: These days this is just a wrapper around the WaitEventSet API. When
352  * using a latch very frequently, consider creating a longer living
353  * WaitEventSet instead; that's more efficient.
354  */
355 int
356 WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
357  long timeout, uint32 wait_event_info)
358 {
359  int ret = 0;
360  int rc;
361  WaitEvent event;
363 
364  if (wakeEvents & WL_TIMEOUT)
365  Assert(timeout >= 0);
366  else
367  timeout = -1;
368 
369  if (wakeEvents & WL_LATCH_SET)
370  AddWaitEventToSet(set, WL_LATCH_SET, PGINVALID_SOCKET,
371  (Latch *) latch, NULL);
372 
373  if (wakeEvents & WL_POSTMASTER_DEATH && IsUnderPostmaster)
375  NULL, NULL);
376 
377  if (wakeEvents & WL_SOCKET_MASK)
378  {
379  int ev;
380 
381  ev = wakeEvents & WL_SOCKET_MASK;
382  AddWaitEventToSet(set, ev, sock, NULL, NULL);
383  }
384 
385  rc = WaitEventSetWait(set, timeout, &event, 1, wait_event_info);
386 
387  if (rc == 0)
388  ret |= WL_TIMEOUT;
389  else
390  {
391  ret |= event.events & (WL_LATCH_SET |
394  }
395 
396  FreeWaitEventSet(set);
397 
398  return ret;
399 }
400 
401 /*
402  * Sets a latch and wakes up anyone waiting on it.
403  *
404  * This is cheap if the latch is already set, otherwise not so much.
405  *
406  * NB: when calling this in a signal handler, be sure to save and restore
407  * errno around it. (That's standard practice in most signal handlers, of
408  * course, but we used to omit it in handlers that only set a flag.)
409  *
410  * NB: this function is called from critical sections and signal handlers so
411  * throwing an error is not a good idea.
412  */
413 void
414 SetLatch(volatile Latch *latch)
415 {
416 #ifndef WIN32
417  pid_t owner_pid;
418 #else
419  HANDLE handle;
420 #endif
421 
422  /*
423  * The memory barrier has to be placed here to ensure that any flag
424  * variables possibly changed by this process have been flushed to main
425  * memory, before we check/set is_set.
426  */
428 
429  /* Quick exit if already set */
430  if (latch->is_set)
431  return;
432 
433  latch->is_set = true;
434 
435 #ifndef WIN32
436 
437  /*
438  * See if anyone's waiting for the latch. It can be the current process if
439  * we're in a signal handler. We use the self-pipe to wake up the
440  * poll()/epoll_wait() in that case. If it's another process, send a
441  * signal.
442  *
443  * Fetch owner_pid only once, in case the latch is concurrently getting
444  * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
445  * guaranteed to be true! In practice, the effective range of pid_t fits
446  * in a 32 bit integer, and so should be atomic. In the worst case, we
447  * might end up signaling the wrong process. Even then, you're very
448  * unlucky if a process with that bogus pid exists and belongs to
449  * Postgres; and PG database processes should handle excess SIGUSR1
450  * interrupts without a problem anyhow.
451  *
452  * Another sort of race condition that's possible here is for a new
453  * process to own the latch immediately after we look, so we don't signal
454  * it. This is okay so long as all callers of ResetLatch/WaitLatch follow
455  * the standard coding convention of waiting at the bottom of their loops,
456  * not the top, so that they'll correctly process latch-setting events
457  * that happen before they enter the loop.
458  */
459  owner_pid = latch->owner_pid;
460  if (owner_pid == 0)
461  return;
462  else if (owner_pid == MyProcPid)
463  {
464  if (waiting)
466  }
467  else
468  kill(owner_pid, SIGUSR1);
469 #else
470 
471  /*
472  * See if anyone's waiting for the latch. It can be the current process if
473  * we're in a signal handler.
474  *
475  * Use a local variable here just in case somebody changes the event field
476  * concurrently (which really should not happen).
477  */
478  handle = latch->event;
479  if (handle)
480  {
481  SetEvent(handle);
482 
483  /*
484  * Note that we silently ignore any errors. We might be in a signal
485  * handler or other critical path where it's not safe to call elog().
486  */
487  }
488 #endif
489 
490 }
491 
492 /*
493  * Clear the latch. Calling WaitLatch after this will sleep, unless
494  * the latch is set again before the WaitLatch call.
495  */
496 void
497 ResetLatch(volatile Latch *latch)
498 {
499  /* Only the owner should reset the latch */
500  Assert(latch->owner_pid == MyProcPid);
501 
502  latch->is_set = false;
503 
504  /*
505  * Ensure that the write to is_set gets flushed to main memory before we
506  * examine any flag variables. Otherwise a concurrent SetLatch might
507  * falsely conclude that it needn't signal us, even though we have missed
508  * seeing some flag updates that SetLatch was supposed to inform us of.
509  */
511 }
512 
513 /*
514  * Create a WaitEventSet with space for nevents different events to wait for.
515  *
516  * These events can then be efficiently waited upon together, using
517  * WaitEventSetWait().
518  */
519 WaitEventSet *
520 CreateWaitEventSet(MemoryContext context, int nevents)
521 {
522  WaitEventSet *set;
523  char *data;
524  Size sz = 0;
525 
526  /*
527  * Use MAXALIGN size/alignment to guarantee that later uses of memory are
528  * aligned correctly. E.g. epoll_event might need 8 byte alignment on some
529  * platforms, but earlier allocations like WaitEventSet and WaitEvent
530  * might not sized to guarantee that when purely using sizeof().
531  */
532  sz += MAXALIGN(sizeof(WaitEventSet));
533  sz += MAXALIGN(sizeof(WaitEvent) * nevents);
534 
535 #if defined(WAIT_USE_EPOLL)
536  sz += MAXALIGN(sizeof(struct epoll_event) * nevents);
537 #elif defined(WAIT_USE_POLL)
538  sz += MAXALIGN(sizeof(struct pollfd) * nevents);
539 #elif defined(WAIT_USE_WIN32)
540  /* need space for the pgwin32_signal_event */
541  sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
542 #endif
543 
544  data = (char *) MemoryContextAllocZero(context, sz);
545 
546  set = (WaitEventSet *) data;
547  data += MAXALIGN(sizeof(WaitEventSet));
548 
549  set->events = (WaitEvent *) data;
550  data += MAXALIGN(sizeof(WaitEvent) * nevents);
551 
552 #if defined(WAIT_USE_EPOLL)
553  set->epoll_ret_events = (struct epoll_event *) data;
554  data += MAXALIGN(sizeof(struct epoll_event) * nevents);
555 #elif defined(WAIT_USE_POLL)
556  set->pollfds = (struct pollfd *) data;
557  data += MAXALIGN(sizeof(struct pollfd) * nevents);
558 #elif defined(WAIT_USE_WIN32)
559  set->handles = (HANDLE) data;
560  data += MAXALIGN(sizeof(HANDLE) * nevents);
561 #endif
562 
563  set->latch = NULL;
564  set->nevents_space = nevents;
565 
566 #if defined(WAIT_USE_EPOLL)
567 #ifdef EPOLL_CLOEXEC
568  set->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
569  if (set->epoll_fd < 0)
570  elog(ERROR, "epoll_create1 failed: %m");
571 #else
572  /* cope with ancient glibc lacking epoll_create1 (e.g., RHEL5) */
573  set->epoll_fd = epoll_create(nevents);
574  if (set->epoll_fd < 0)
575  elog(ERROR, "epoll_create failed: %m");
576  if (fcntl(set->epoll_fd, F_SETFD, FD_CLOEXEC) == -1)
577  elog(ERROR, "fcntl(F_SETFD) failed on epoll descriptor: %m");
578 #endif /* EPOLL_CLOEXEC */
579 #elif defined(WAIT_USE_WIN32)
580 
581  /*
582  * To handle signals while waiting, we need to add a win32 specific event.
583  * We accounted for the additional event at the top of this routine. See
584  * port/win32/signal.c for more details.
585  *
586  * Note: pgwin32_signal_event should be first to ensure that it will be
587  * reported when multiple events are set. We want to guarantee that
588  * pending signals are serviced.
589  */
590  set->handles[0] = pgwin32_signal_event;
591  StaticAssertStmt(WSA_INVALID_EVENT == NULL, "");
592 #endif
593 
594  return set;
595 }
596 
597 /*
598  * Free a previously created WaitEventSet.
599  *
600  * Note: preferably, this shouldn't have to free any resources that could be
601  * inherited across an exec(). If it did, we'd likely leak those resources in
602  * many scenarios. For the epoll case, we ensure that by setting FD_CLOEXEC
603  * when the FD is created. For the Windows case, we assume that the handles
604  * involved are non-inheritable.
605  */
606 void
608 {
609 #if defined(WAIT_USE_EPOLL)
610  close(set->epoll_fd);
611 #elif defined(WAIT_USE_WIN32)
612  WaitEvent *cur_event;
613 
614  for (cur_event = set->events;
615  cur_event < (set->events + set->nevents);
616  cur_event++)
617  {
618  if (cur_event->events & WL_LATCH_SET)
619  {
620  /* uses the latch's HANDLE */
621  }
622  else if (cur_event->events & WL_POSTMASTER_DEATH)
623  {
624  /* uses PostmasterHandle */
625  }
626  else
627  {
628  /* Clean up the event object we created for the socket */
629  WSAEventSelect(cur_event->fd, NULL, 0);
630  WSACloseEvent(set->handles[cur_event->pos + 1]);
631  }
632  }
633 #endif
634 
635  pfree(set);
636 }
637 
638 /* ---
639  * Add an event to the set. Possible events are:
640  * - WL_LATCH_SET: Wait for the latch to be set
641  * - WL_POSTMASTER_DEATH: Wait for postmaster to die
642  * - WL_SOCKET_READABLE: Wait for socket to become readable,
643  * can be combined in one event with other WL_SOCKET_* events
644  * - WL_SOCKET_WRITEABLE: Wait for socket to become writeable,
645  * can be combined with other WL_SOCKET_* events
646  * - WL_SOCKET_CONNECTED: Wait for socket connection to be established,
647  * can be combined with other WL_SOCKET_* events (on non-Windows
648  * platforms, this is the same as WL_SOCKET_WRITEABLE)
649  *
650  * Returns the offset in WaitEventSet->events (starting from 0), which can be
651  * used to modify previously added wait events using ModifyWaitEvent().
652  *
653  * In the WL_LATCH_SET case the latch must be owned by the current process,
654  * i.e. it must be a process-local latch initialized with InitLatch, or a
655  * shared latch associated with the current process by calling OwnLatch.
656  *
657  * In the WL_SOCKET_READABLE/WRITEABLE/CONNECTED cases, EOF and error
658  * conditions cause the socket to be reported as readable/writable/connected,
659  * so that the caller can deal with the condition.
660  *
661  * The user_data pointer specified here will be set for the events returned
662  * by WaitEventSetWait(), allowing to easily associate additional data with
663  * events.
664  */
665 int
667  void *user_data)
668 {
669  WaitEvent *event;
670 
671  /* not enough space */
672  Assert(set->nevents < set->nevents_space);
673 
674  if (latch)
675  {
676  if (latch->owner_pid != MyProcPid)
677  elog(ERROR, "cannot wait on a latch owned by another process");
678  if (set->latch)
679  elog(ERROR, "cannot wait on more than one latch");
680  if ((events & WL_LATCH_SET) != WL_LATCH_SET)
681  elog(ERROR, "latch events only support being set");
682  }
683  else
684  {
685  if (events & WL_LATCH_SET)
686  elog(ERROR, "cannot wait on latch without a specified latch");
687  }
688 
689  /* waiting for socket readiness without a socket indicates a bug */
690  if (fd == PGINVALID_SOCKET && (events & WL_SOCKET_MASK))
691  elog(ERROR, "cannot wait on socket event without a socket");
692 
693  event = &set->events[set->nevents];
694  event->pos = set->nevents++;
695  event->fd = fd;
696  event->events = events;
697  event->user_data = user_data;
698 #ifdef WIN32
699  event->reset = false;
700 #endif
701 
702  if (events == WL_LATCH_SET)
703  {
704  set->latch = latch;
705  set->latch_pos = event->pos;
706 #ifndef WIN32
707  event->fd = selfpipe_readfd;
708 #endif
709  }
710  else if (events == WL_POSTMASTER_DEATH)
711  {
712 #ifndef WIN32
714 #endif
715  }
716 
717  /* perform wait primitive specific initialization, if needed */
718 #if defined(WAIT_USE_EPOLL)
719  WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
720 #elif defined(WAIT_USE_POLL)
721  WaitEventAdjustPoll(set, event);
722 #elif defined(WAIT_USE_WIN32)
723  WaitEventAdjustWin32(set, event);
724 #endif
725 
726  return event->pos;
727 }
728 
729 /*
730  * Change the event mask and, in the WL_LATCH_SET case, the latch associated
731  * with the WaitEvent.
732  *
733  * 'pos' is the id returned by AddWaitEventToSet.
734  */
735 void
736 ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
737 {
738  WaitEvent *event;
739 
740  Assert(pos < set->nevents);
741 
742  event = &set->events[pos];
743 
744  /*
745  * If neither the event mask nor the associated latch changes, return
746  * early. That's an important optimization for some sockets, where
747  * ModifyWaitEvent is frequently used to switch from waiting for reads to
748  * waiting on writes.
749  */
750  if (events == event->events &&
751  (!(event->events & WL_LATCH_SET) || set->latch == latch))
752  return;
753 
754  if (event->events & WL_LATCH_SET &&
755  events != event->events)
756  {
757  /* we could allow to disable latch events for a while */
758  elog(ERROR, "cannot modify latch event");
759  }
760 
761  if (event->events & WL_POSTMASTER_DEATH)
762  {
763  elog(ERROR, "cannot modify postmaster death event");
764  }
765 
766  /* FIXME: validate event mask */
767  event->events = events;
768 
769  if (events == WL_LATCH_SET)
770  {
771  set->latch = latch;
772  }
773 
774 #if defined(WAIT_USE_EPOLL)
775  WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
776 #elif defined(WAIT_USE_POLL)
777  WaitEventAdjustPoll(set, event);
778 #elif defined(WAIT_USE_WIN32)
779  WaitEventAdjustWin32(set, event);
780 #endif
781 }
782 
783 #if defined(WAIT_USE_EPOLL)
784 /*
785  * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL
786  */
787 static void
788 WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
789 {
790  struct epoll_event epoll_ev;
791  int rc;
792 
793  /* pointer to our event, returned by epoll_wait */
794  epoll_ev.data.ptr = event;
795  /* always wait for errors */
796  epoll_ev.events = EPOLLERR | EPOLLHUP;
797 
798  /* prepare pollfd entry once */
799  if (event->events == WL_LATCH_SET)
800  {
801  Assert(set->latch != NULL);
802  epoll_ev.events |= EPOLLIN;
803  }
804  else if (event->events == WL_POSTMASTER_DEATH)
805  {
806  epoll_ev.events |= EPOLLIN;
807  }
808  else
809  {
810  Assert(event->fd != PGINVALID_SOCKET);
812 
813  if (event->events & WL_SOCKET_READABLE)
814  epoll_ev.events |= EPOLLIN;
815  if (event->events & WL_SOCKET_WRITEABLE)
816  epoll_ev.events |= EPOLLOUT;
817  }
818 
819  /*
820  * Even though unused, we also pass epoll_ev as the data argument if
821  * EPOLL_CTL_DEL is passed as action. There used to be an epoll bug
822  * requiring that, and actually it makes the code simpler...
823  */
824  rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev);
825 
826  if (rc < 0)
827  ereport(ERROR,
829  errmsg("epoll_ctl() failed: %m")));
830 }
831 #endif
832 
833 #if defined(WAIT_USE_POLL)
834 static void
835 WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event)
836 {
837  struct pollfd *pollfd = &set->pollfds[event->pos];
838 
839  pollfd->revents = 0;
840  pollfd->fd = event->fd;
841 
842  /* prepare pollfd entry once */
843  if (event->events == WL_LATCH_SET)
844  {
845  Assert(set->latch != NULL);
846  pollfd->events = POLLIN;
847  }
848  else if (event->events == WL_POSTMASTER_DEATH)
849  {
850  pollfd->events = POLLIN;
851  }
852  else
853  {
855  pollfd->events = 0;
856  if (event->events & WL_SOCKET_READABLE)
857  pollfd->events |= POLLIN;
858  if (event->events & WL_SOCKET_WRITEABLE)
859  pollfd->events |= POLLOUT;
860  }
861 
862  Assert(event->fd != PGINVALID_SOCKET);
863 }
864 #endif
865 
866 #if defined(WAIT_USE_WIN32)
867 static void
868 WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
869 {
870  HANDLE *handle = &set->handles[event->pos + 1];
871 
872  if (event->events == WL_LATCH_SET)
873  {
874  Assert(set->latch != NULL);
875  *handle = set->latch->event;
876  }
877  else if (event->events == WL_POSTMASTER_DEATH)
878  {
879  *handle = PostmasterHandle;
880  }
881  else
882  {
883  int flags = FD_CLOSE; /* always check for errors/EOF */
884 
885  if (event->events & WL_SOCKET_READABLE)
886  flags |= FD_READ;
887  if (event->events & WL_SOCKET_WRITEABLE)
888  flags |= FD_WRITE;
889  if (event->events & WL_SOCKET_CONNECTED)
890  flags |= FD_CONNECT;
891 
892  if (*handle == WSA_INVALID_EVENT)
893  {
894  *handle = WSACreateEvent();
895  if (*handle == WSA_INVALID_EVENT)
896  elog(ERROR, "failed to create event for socket: error code %u",
897  WSAGetLastError());
898  }
899  if (WSAEventSelect(event->fd, *handle, flags) != 0)
900  elog(ERROR, "failed to set up event for socket: error code %u",
901  WSAGetLastError());
902 
903  Assert(event->fd != PGINVALID_SOCKET);
904  }
905 }
906 #endif
907 
908 /*
909  * Wait for events added to the set to happen, or until the timeout is
910  * reached. At most nevents occurred events are returned.
911  *
912  * If timeout = -1, block until an event occurs; if 0, check sockets for
913  * readiness, but don't block; if > 0, block for at most timeout milliseconds.
914  *
915  * Returns the number of events occurred, or 0 if the timeout was reached.
916  *
917  * Returned events will have the fd, pos, user_data fields set to the
918  * values associated with the registered event.
919  */
920 int
921 WaitEventSetWait(WaitEventSet *set, long timeout,
922  WaitEvent *occurred_events, int nevents,
923  uint32 wait_event_info)
924 {
925  int returned_events = 0;
927  instr_time cur_time;
928  long cur_timeout = -1;
929 
930  Assert(nevents > 0);
931 
932  /*
933  * Initialize timeout if requested. We must record the current time so
934  * that we can determine the remaining timeout if interrupted.
935  */
936  if (timeout >= 0)
937  {
938  INSTR_TIME_SET_CURRENT(start_time);
939  Assert(timeout >= 0 && timeout <= INT_MAX);
940  cur_timeout = timeout;
941  }
942 
943  pgstat_report_wait_start(wait_event_info);
944 
945 #ifndef WIN32
946  waiting = true;
947 #else
948  /* Ensure that signals are serviced even if latch is already set */
950 #endif
951  while (returned_events == 0)
952  {
953  int rc;
954 
955  /*
956  * Check if the latch is set already. If so, leave the loop
957  * immediately, avoid blocking again. We don't attempt to report any
958  * other events that might also be satisfied.
959  *
960  * If someone sets the latch between this and the
961  * WaitEventSetWaitBlock() below, the setter will write a byte to the
962  * pipe (or signal us and the signal handler will do that), and the
963  * readiness routine will return immediately.
964  *
965  * On unix, If there's a pending byte in the self pipe, we'll notice
966  * whenever blocking. Only clearing the pipe in that case avoids
967  * having to drain it every time WaitLatchOrSocket() is used. Should
968  * the pipe-buffer fill up we're still ok, because the pipe is in
969  * nonblocking mode. It's unlikely for that to happen, because the
970  * self pipe isn't filled unless we're blocking (waiting = true), or
971  * from inside a signal handler in latch_sigusr1_handler().
972  *
973  * On windows, we'll also notice if there's a pending event for the
974  * latch when blocking, but there's no danger of anything filling up,
975  * as "Setting an event that is already set has no effect.".
976  *
977  * Note: we assume that the kernel calls involved in latch management
978  * will provide adequate synchronization on machines with weak memory
979  * ordering, so that we cannot miss seeing is_set if a notification
980  * has already been queued.
981  */
982  if (set->latch && set->latch->is_set)
983  {
984  occurred_events->fd = PGINVALID_SOCKET;
985  occurred_events->pos = set->latch_pos;
986  occurred_events->user_data =
987  set->events[set->latch_pos].user_data;
988  occurred_events->events = WL_LATCH_SET;
989  occurred_events++;
990  returned_events++;
991 
992  break;
993  }
994 
995  /*
996  * Wait for events using the readiness primitive chosen at the top of
997  * this file. If -1 is returned, a timeout has occurred, if 0 we have
998  * to retry, everything >= 1 is the number of returned events.
999  */
1000  rc = WaitEventSetWaitBlock(set, cur_timeout,
1001  occurred_events, nevents);
1002 
1003  if (rc == -1)
1004  break; /* timeout occurred */
1005  else
1006  returned_events = rc;
1007 
1008  /* If we're not done, update cur_timeout for next iteration */
1009  if (returned_events == 0 && timeout >= 0)
1010  {
1011  INSTR_TIME_SET_CURRENT(cur_time);
1012  INSTR_TIME_SUBTRACT(cur_time, start_time);
1013  cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
1014  if (cur_timeout <= 0)
1015  break;
1016  }
1017  }
1018 #ifndef WIN32
1019  waiting = false;
1020 #endif
1021 
1023 
1024  return returned_events;
1025 }
1026 
1027 
1028 #if defined(WAIT_USE_EPOLL)
1029 
1030 /*
1031  * Wait using linux's epoll_wait(2).
1032  *
1033  * This is the preferrable wait method, as several readiness notifications are
1034  * delivered, without having to iterate through all of set->events. The return
1035  * epoll_event struct contain a pointer to our events, making association
1036  * easy.
1037  */
1038 static inline int
1039 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1040  WaitEvent *occurred_events, int nevents)
1041 {
1042  int returned_events = 0;
1043  int rc;
1044  WaitEvent *cur_event;
1045  struct epoll_event *cur_epoll_event;
1046 
1047  /* Sleep */
1048  rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
1049  nevents, cur_timeout);
1050 
1051  /* Check return code */
1052  if (rc < 0)
1053  {
1054  /* EINTR is okay, otherwise complain */
1055  if (errno != EINTR)
1056  {
1057  waiting = false;
1058  ereport(ERROR,
1060  errmsg("epoll_wait() failed: %m")));
1061  }
1062  return 0;
1063  }
1064  else if (rc == 0)
1065  {
1066  /* timeout exceeded */
1067  return -1;
1068  }
1069 
1070  /*
1071  * At least one event occurred, iterate over the returned epoll events
1072  * until they're either all processed, or we've returned all the events
1073  * the caller desired.
1074  */
1075  for (cur_epoll_event = set->epoll_ret_events;
1076  cur_epoll_event < (set->epoll_ret_events + rc) &&
1077  returned_events < nevents;
1078  cur_epoll_event++)
1079  {
1080  /* epoll's data pointer is set to the associated WaitEvent */
1081  cur_event = (WaitEvent *) cur_epoll_event->data.ptr;
1082 
1083  occurred_events->pos = cur_event->pos;
1084  occurred_events->user_data = cur_event->user_data;
1085  occurred_events->events = 0;
1086 
1087  if (cur_event->events == WL_LATCH_SET &&
1088  cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1089  {
1090  /* There's data in the self-pipe, clear it. */
1091  drainSelfPipe();
1092 
1093  if (set->latch->is_set)
1094  {
1095  occurred_events->fd = PGINVALID_SOCKET;
1096  occurred_events->events = WL_LATCH_SET;
1097  occurred_events++;
1098  returned_events++;
1099  }
1100  }
1101  else if (cur_event->events == WL_POSTMASTER_DEATH &&
1102  cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1103  {
1104  /*
1105  * We expect an EPOLLHUP when the remote end is closed, but
1106  * because we don't expect the pipe to become readable or to have
1107  * any errors either, treat those cases as postmaster death, too.
1108  *
1109  * Be paranoid about a spurious event signalling the postmaster as
1110  * being dead. There have been reports about that happening with
1111  * older primitives (select(2) to be specific), and a spurious
1112  * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1113  * cost much.
1114  */
1115  if (!PostmasterIsAlive())
1116  {
1117  occurred_events->fd = PGINVALID_SOCKET;
1118  occurred_events->events = WL_POSTMASTER_DEATH;
1119  occurred_events++;
1120  returned_events++;
1121  }
1122  }
1123  else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1124  {
1125  Assert(cur_event->fd != PGINVALID_SOCKET);
1126 
1127  if ((cur_event->events & WL_SOCKET_READABLE) &&
1128  (cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
1129  {
1130  /* data available in socket, or EOF */
1131  occurred_events->events |= WL_SOCKET_READABLE;
1132  }
1133 
1134  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1135  (cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
1136  {
1137  /* writable, or EOF */
1138  occurred_events->events |= WL_SOCKET_WRITEABLE;
1139  }
1140 
1141  if (occurred_events->events != 0)
1142  {
1143  occurred_events->fd = cur_event->fd;
1144  occurred_events++;
1145  returned_events++;
1146  }
1147  }
1148  }
1149 
1150  return returned_events;
1151 }
1152 
1153 #elif defined(WAIT_USE_POLL)
1154 
1155 /*
1156  * Wait using poll(2).
1157  *
1158  * This allows to receive readiness notifications for several events at once,
1159  * but requires iterating through all of set->pollfds.
1160  */
1161 static inline int
1162 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1163  WaitEvent *occurred_events, int nevents)
1164 {
1165  int returned_events = 0;
1166  int rc;
1167  WaitEvent *cur_event;
1168  struct pollfd *cur_pollfd;
1169 
1170  /* Sleep */
1171  rc = poll(set->pollfds, set->nevents, (int) cur_timeout);
1172 
1173  /* Check return code */
1174  if (rc < 0)
1175  {
1176  /* EINTR is okay, otherwise complain */
1177  if (errno != EINTR)
1178  {
1179  waiting = false;
1180  ereport(ERROR,
1182  errmsg("poll() failed: %m")));
1183  }
1184  return 0;
1185  }
1186  else if (rc == 0)
1187  {
1188  /* timeout exceeded */
1189  return -1;
1190  }
1191 
1192  for (cur_event = set->events, cur_pollfd = set->pollfds;
1193  cur_event < (set->events + set->nevents) &&
1194  returned_events < nevents;
1195  cur_event++, cur_pollfd++)
1196  {
1197  /* no activity on this FD, skip */
1198  if (cur_pollfd->revents == 0)
1199  continue;
1200 
1201  occurred_events->pos = cur_event->pos;
1202  occurred_events->user_data = cur_event->user_data;
1203  occurred_events->events = 0;
1204 
1205  if (cur_event->events == WL_LATCH_SET &&
1206  (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1207  {
1208  /* There's data in the self-pipe, clear it. */
1209  drainSelfPipe();
1210 
1211  if (set->latch->is_set)
1212  {
1213  occurred_events->fd = PGINVALID_SOCKET;
1214  occurred_events->events = WL_LATCH_SET;
1215  occurred_events++;
1216  returned_events++;
1217  }
1218  }
1219  else if (cur_event->events == WL_POSTMASTER_DEATH &&
1220  (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1221  {
1222  /*
1223  * We expect an POLLHUP when the remote end is closed, but because
1224  * we don't expect the pipe to become readable or to have any
1225  * errors either, treat those cases as postmaster death, too.
1226  *
1227  * Be paranoid about a spurious event signalling the postmaster as
1228  * being dead. There have been reports about that happening with
1229  * older primitives (select(2) to be specific), and a spurious
1230  * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1231  * cost much.
1232  */
1233  if (!PostmasterIsAlive())
1234  {
1235  occurred_events->fd = PGINVALID_SOCKET;
1236  occurred_events->events = WL_POSTMASTER_DEATH;
1237  occurred_events++;
1238  returned_events++;
1239  }
1240  }
1241  else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1242  {
1243  int errflags = POLLHUP | POLLERR | POLLNVAL;
1244 
1245  Assert(cur_event->fd >= PGINVALID_SOCKET);
1246 
1247  if ((cur_event->events & WL_SOCKET_READABLE) &&
1248  (cur_pollfd->revents & (POLLIN | errflags)))
1249  {
1250  /* data available in socket, or EOF */
1251  occurred_events->events |= WL_SOCKET_READABLE;
1252  }
1253 
1254  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1255  (cur_pollfd->revents & (POLLOUT | errflags)))
1256  {
1257  /* writeable, or EOF */
1258  occurred_events->events |= WL_SOCKET_WRITEABLE;
1259  }
1260 
1261  if (occurred_events->events != 0)
1262  {
1263  occurred_events->fd = cur_event->fd;
1264  occurred_events++;
1265  returned_events++;
1266  }
1267  }
1268  }
1269  return returned_events;
1270 }
1271 
1272 #elif defined(WAIT_USE_WIN32)
1273 
1274 /*
1275  * Wait using Windows' WaitForMultipleObjects().
1276  *
1277  * Unfortunately this will only ever return a single readiness notification at
1278  * a time. Note that while the official documentation for
1279  * WaitForMultipleObjects is ambiguous about multiple events being "consumed"
1280  * with a single bWaitAll = FALSE call,
1281  * https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273 confirms
1282  * that only one event is "consumed".
1283  */
1284 static inline int
1285 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1286  WaitEvent *occurred_events, int nevents)
1287 {
1288  int returned_events = 0;
1289  DWORD rc;
1290  WaitEvent *cur_event;
1291 
1292  /* Reset any wait events that need it */
1293  for (cur_event = set->events;
1294  cur_event < (set->events + set->nevents);
1295  cur_event++)
1296  {
1297  if (cur_event->reset)
1298  {
1299  WaitEventAdjustWin32(set, cur_event);
1300  cur_event->reset = false;
1301  }
1302 
1303  /*
1304  * Windows does not guarantee to log an FD_WRITE network event
1305  * indicating that more data can be sent unless the previous send()
1306  * failed with WSAEWOULDBLOCK. While our caller might well have made
1307  * such a call, we cannot assume that here. Therefore, if waiting for
1308  * write-ready, force the issue by doing a dummy send(). If the dummy
1309  * send() succeeds, assume that the socket is in fact write-ready, and
1310  * return immediately. Also, if it fails with something other than
1311  * WSAEWOULDBLOCK, return a write-ready indication to let our caller
1312  * deal with the error condition.
1313  */
1314  if (cur_event->events & WL_SOCKET_WRITEABLE)
1315  {
1316  char c;
1317  WSABUF buf;
1318  DWORD sent;
1319  int r;
1320 
1321  buf.buf = &c;
1322  buf.len = 0;
1323 
1324  r = WSASend(cur_event->fd, &buf, 1, &sent, 0, NULL, NULL);
1325  if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
1326  {
1327  occurred_events->pos = cur_event->pos;
1328  occurred_events->user_data = cur_event->user_data;
1329  occurred_events->events = WL_SOCKET_WRITEABLE;
1330  occurred_events->fd = cur_event->fd;
1331  return 1;
1332  }
1333  }
1334  }
1335 
1336  /*
1337  * Sleep.
1338  *
1339  * Need to wait for ->nevents + 1, because signal handle is in [0].
1340  */
1341  rc = WaitForMultipleObjects(set->nevents + 1, set->handles, FALSE,
1342  cur_timeout);
1343 
1344  /* Check return code */
1345  if (rc == WAIT_FAILED)
1346  elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
1347  GetLastError());
1348  else if (rc == WAIT_TIMEOUT)
1349  {
1350  /* timeout exceeded */
1351  return -1;
1352  }
1353 
1354  if (rc == WAIT_OBJECT_0)
1355  {
1356  /* Service newly-arrived signals */
1358  return 0; /* retry */
1359  }
1360 
1361  /*
1362  * With an offset of one, due to the always present pgwin32_signal_event,
1363  * the handle offset directly corresponds to a wait event.
1364  */
1365  cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
1366 
1367  occurred_events->pos = cur_event->pos;
1368  occurred_events->user_data = cur_event->user_data;
1369  occurred_events->events = 0;
1370 
1371  if (cur_event->events == WL_LATCH_SET)
1372  {
1373  if (!ResetEvent(set->latch->event))
1374  elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
1375 
1376  if (set->latch->is_set)
1377  {
1378  occurred_events->fd = PGINVALID_SOCKET;
1379  occurred_events->events = WL_LATCH_SET;
1380  occurred_events++;
1381  returned_events++;
1382  }
1383  }
1384  else if (cur_event->events == WL_POSTMASTER_DEATH)
1385  {
1386  /*
1387  * Postmaster apparently died. Since the consequences of falsely
1388  * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we take
1389  * the trouble to positively verify this with PostmasterIsAlive(),
1390  * even though there is no known reason to think that the event could
1391  * be falsely set on Windows.
1392  */
1393  if (!PostmasterIsAlive())
1394  {
1395  occurred_events->fd = PGINVALID_SOCKET;
1396  occurred_events->events = WL_POSTMASTER_DEATH;
1397  occurred_events++;
1398  returned_events++;
1399  }
1400  }
1401  else if (cur_event->events & WL_SOCKET_MASK)
1402  {
1403  WSANETWORKEVENTS resEvents;
1404  HANDLE handle = set->handles[cur_event->pos + 1];
1405 
1406  Assert(cur_event->fd);
1407 
1408  occurred_events->fd = cur_event->fd;
1409 
1410  ZeroMemory(&resEvents, sizeof(resEvents));
1411  if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
1412  elog(ERROR, "failed to enumerate network events: error code %u",
1413  WSAGetLastError());
1414  if ((cur_event->events & WL_SOCKET_READABLE) &&
1415  (resEvents.lNetworkEvents & FD_READ))
1416  {
1417  /* data available in socket */
1418  occurred_events->events |= WL_SOCKET_READABLE;
1419 
1420  /*------
1421  * WaitForMultipleObjects doesn't guarantee that a read event will
1422  * be returned if the latch is set at the same time. Even if it
1423  * did, the caller might drop that event expecting it to reoccur
1424  * on next call. So, we must force the event to be reset if this
1425  * WaitEventSet is used again in order to avoid an indefinite
1426  * hang. Refer https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
1427  * for the behavior of socket events.
1428  *------
1429  */
1430  cur_event->reset = true;
1431  }
1432  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1433  (resEvents.lNetworkEvents & FD_WRITE))
1434  {
1435  /* writeable */
1436  occurred_events->events |= WL_SOCKET_WRITEABLE;
1437  }
1438  if ((cur_event->events & WL_SOCKET_CONNECTED) &&
1439  (resEvents.lNetworkEvents & FD_CONNECT))
1440  {
1441  /* connected */
1442  occurred_events->events |= WL_SOCKET_CONNECTED;
1443  }
1444  if (resEvents.lNetworkEvents & FD_CLOSE)
1445  {
1446  /* EOF/error, so signal all caller-requested socket flags */
1447  occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
1448  }
1449 
1450  if (occurred_events->events != 0)
1451  {
1452  occurred_events++;
1453  returned_events++;
1454  }
1455  }
1456 
1457  return returned_events;
1458 }
1459 #endif
1460 
1461 /*
1462  * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
1463  *
1464  * Wake up WaitLatch, if we're waiting. (We might not be, since SIGUSR1 is
1465  * overloaded for multiple purposes; or we might not have reached WaitLatch
1466  * yet, in which case we don't need to fill the pipe either.)
1467  *
1468  * NB: when calling this in a signal handler, be sure to save and restore
1469  * errno around it.
1470  */
1471 #ifndef WIN32
1472 void
1474 {
1475  if (waiting)
1476  sendSelfPipeByte();
1477 }
1478 #endif /* !WIN32 */
1479 
1480 /* Send one byte to the self-pipe, to wake up WaitLatch */
1481 #ifndef WIN32
1482 static void
1484 {
1485  int rc;
1486  char dummy = 0;
1487 
1488 retry:
1489  rc = write(selfpipe_writefd, &dummy, 1);
1490  if (rc < 0)
1491  {
1492  /* If interrupted by signal, just retry */
1493  if (errno == EINTR)
1494  goto retry;
1495 
1496  /*
1497  * If the pipe is full, we don't need to retry, the data that's there
1498  * already is enough to wake up WaitLatch.
1499  */
1500  if (errno == EAGAIN || errno == EWOULDBLOCK)
1501  return;
1502 
1503  /*
1504  * Oops, the write() failed for some other reason. We might be in a
1505  * signal handler, so it's not safe to elog(). We have no choice but
1506  * silently ignore the error.
1507  */
1508  return;
1509  }
1510 }
1511 #endif /* !WIN32 */
1512 
1513 /*
1514  * Read all available data from the self-pipe
1515  *
1516  * Note: this is only called when waiting = true. If it fails and doesn't
1517  * return, it must reset that flag first (though ideally, this will never
1518  * happen).
1519  */
1520 #ifndef WIN32
1521 static void
1523 {
1524  /*
1525  * There shouldn't normally be more than one byte in the pipe, or maybe a
1526  * few bytes if multiple processes run SetLatch at the same instant.
1527  */
1528  char buf[16];
1529  int rc;
1530 
1531  for (;;)
1532  {
1533  rc = read(selfpipe_readfd, buf, sizeof(buf));
1534  if (rc < 0)
1535  {
1536  if (errno == EAGAIN || errno == EWOULDBLOCK)
1537  break; /* the pipe is empty */
1538  else if (errno == EINTR)
1539  continue; /* retry */
1540  else
1541  {
1542  waiting = false;
1543  elog(ERROR, "read() on self-pipe failed: %m");
1544  }
1545  }
1546  else if (rc == 0)
1547  {
1548  waiting = false;
1549  elog(ERROR, "unexpected EOF on self-pipe");
1550  }
1551  else if (rc < sizeof(buf))
1552  {
1553  /* we successfully drained the pipe; no need to read() again */
1554  break;
1555  }
1556  /* else buffer wasn't big enough, so read again */
1557  }
1558 }
1559 #endif /* !WIN32 */
#define EWOULDBLOCK
Definition: win32.h:291
int latch_pos
Definition: latch.c:93
#define SIGUSR1
Definition: win32.h:202
#define WL_SOCKET_WRITEABLE
Definition: latch.h:126
pgsocket fd
Definition: latch.h:144
int MyProcPid
Definition: globals.c:39
int pos
Definition: latch.h:142
void FreeWaitEventSet(WaitEventSet *set)
Definition: latch.c:607
static int selfpipe_writefd
Definition: latch.c:119
#define WL_TIMEOUT
Definition: latch.h:127
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:666
#define write(a, b, c)
Definition: win32.h:14
bool is_shared
Definition: latch.h:113
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:199
struct timeval instr_time
Definition: instr_time.h:147
void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
Definition: latch.c:736
static void drainSelfPipe(void)
Definition: latch.c:1522
#define WL_SOCKET_READABLE
Definition: latch.h:125
#define WL_SOCKET_MASK
Definition: latch.h:136
void ResetLatch(volatile Latch *latch)
Definition: latch.c:497
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static time_t start_time
Definition: pg_ctl.c:103
#define EAGAIN
Definition: win32.h:283
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:520
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:763
HANDLE pgwin32_signal_event
Definition: signal.c:27
int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:336
void pfree(void *pointer)
Definition: mcxt.c:949
void pgwin32_dispatch_queued_signals(void)
Definition: signal.c:107
#define ERROR
Definition: elog.h:43
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
#define FALSE
Definition: c.h:219
#define FATAL
Definition: elog.h:52
uint32 events
Definition: latch.h:143
static int selfpipe_readfd
Definition: latch.c:118
Definition: latch.h:110
static int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, WaitEvent *occurred_events, int nevents)
char * c
static char * buf
Definition: pg_test_fsync.c:67
bool IsUnderPostmaster
Definition: globals.c:101
bool PostmasterIsAlive(void)
Definition: pmsignal.c:272
unsigned int uint32
Definition: c.h:258
int pgsocket
Definition: port.h:22
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1244
void OwnLatch(volatile Latch *latch)
Definition: latch.c:288
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define ereport(elevel, rest)
Definition: elog.h:122
int errcode_for_socket_access(void)
Definition: elog.c:669
int nevents
Definition: latch.c:77
int postmaster_alive_fds[2]
Definition: postmaster.c:562
static void sendSelfPipeByte(void)
Definition: latch.c:1483
#define WL_POSTMASTER_DEATH
Definition: latch.h:128
#define PGINVALID_SOCKET
Definition: port.h:24
#define EINTR
Definition: win32.h:285
void InitializeLatchSupport(void)
Definition: latch.c:147
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:741
#define pg_memory_barrier()
Definition: atomics.h:148
void SetLatch(volatile Latch *latch)
Definition: latch.c:414
#define Assert(condition)
Definition: c.h:681
int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, long timeout, uint32 wait_event_info)
Definition: latch.c:356
WaitEvent * events
Definition: latch.c:84
size_t Size
Definition: c.h:350
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1220
#define MAXALIGN(LEN)
Definition: c.h:576
void InitLatch(volatile Latch *latch)
Definition: latch.c:220
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
void * user_data
Definition: latch.h:145
int nevents_space
Definition: latch.c:78
int errmsg(const char *fmt,...)
Definition: elog.c:797
int owner_pid
Definition: latch.h:114
void DisownLatch(volatile Latch *latch)
Definition: latch.c:308
sig_atomic_t is_set
Definition: latch.h:112
#define TRUE
Definition: c.h:215
#define elog
Definition: elog.h:219
#define close(a)
Definition: win32.h:12
void latch_sigusr1_handler(void)
Definition: latch.c:1473
#define WL_SOCKET_CONNECTED
Definition: latch.h:133
void InitSharedLatch(volatile Latch *latch)
Definition: latch.c:252
Latch * latch
Definition: latch.c:92
#define WL_LATCH_SET
Definition: latch.h:124
static volatile sig_atomic_t waiting
Definition: latch.c:115
static int selfpipe_owner_pid
Definition: latch.c:122
#define POSTMASTER_FD_WATCH
Definition: postmaster.h:42
#define read(a, b, c)
Definition: win32.h:13
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
Definition: latch.c:921