PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
latch.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * latch.c
4  * Routines for inter-process latches
5  *
6  * The Unix implementation uses the so-called self-pipe trick to overcome the
7  * race condition involved with poll() (or epoll_wait() on linux) and setting
8  * a global flag in the signal handler. When a latch is set and the current
9  * process is waiting for it, the signal handler wakes up the poll() in
10  * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
11  * poll() on all platforms, and even on platforms where it does, a signal that
12  * arrives just before the poll() call does not prevent poll() from entering
13  * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
14  * and causes poll() to return immediately even if the signal arrives before
15  * poll() begins.
16  *
17  * When SetLatch is called from the same process that owns the latch,
18  * SetLatch writes the byte directly to the pipe. If it's owned by another
19  * process, SIGUSR1 is sent and the signal handler in the waiting process
20  * writes the byte to the pipe on behalf of the signaling process.
21  *
22  * The Windows implementation uses Windows events that are inherited by all
23  * postmaster child processes. There's no need for the self-pipe trick there.
24  *
25  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
26  * Portions Copyright (c) 1994, Regents of the University of California
27  *
28  * IDENTIFICATION
29  * src/backend/storage/ipc/latch.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <fcntl.h>
36 #include <limits.h>
37 #include <signal.h>
38 #include <unistd.h>
39 #ifdef HAVE_SYS_EPOLL_H
40 #include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_POLL_H
43 #include <poll.h>
44 #endif
45 
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "port/atomics.h"
49 #include "portability/instr_time.h"
50 #include "postmaster/postmaster.h"
51 #include "storage/latch.h"
52 #include "storage/pmsignal.h"
53 #include "storage/shmem.h"
54 
55 /*
56  * Select the fd readiness primitive to use. Normally the "most modern"
57  * primitive supported by the OS will be used, but for testing it can be
58  * useful to manually specify the used primitive. If desired, just add a
59  * define somewhere before this block.
60  */
61 #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
62  defined(WAIT_USE_WIN32)
63 /* don't overwrite manual choice */
64 #elif defined(HAVE_SYS_EPOLL_H)
65 #define WAIT_USE_EPOLL
66 #elif defined(HAVE_POLL)
67 #define WAIT_USE_POLL
68 #elif WIN32
69 #define WAIT_USE_WIN32
70 #else
71 #error "no wait set implementation available"
72 #endif
73 
74 /* typedef in latch.h */
76 {
77  int nevents; /* number of registered events */
78  int nevents_space; /* maximum number of events in this set */
79 
80  /*
81  * Array, of nevents_space length, storing the definition of events this
82  * set is waiting for.
83  */
85 
86  /*
87  * If WL_LATCH_SET is specified in any wait event, latch is a pointer to
88  * said latch, and latch_pos the offset in the ->events array. This is
89  * useful because we check the state of the latch before performing doing
90  * syscalls related to waiting.
91  */
93  int latch_pos;
94 
95 #if defined(WAIT_USE_EPOLL)
96  int epoll_fd;
97  /* epoll_wait returns events in a user provided arrays, allocate once */
98  struct epoll_event *epoll_ret_events;
99 #elif defined(WAIT_USE_POLL)
100  /* poll expects events to be waited on every poll() call, prepare once */
101  struct pollfd *pollfds;
102 #elif defined(WAIT_USE_WIN32)
103 
104  /*
105  * Array of windows events. The first element always contains
106  * pgwin32_signal_event, so the remaining elements are offset by one (i.e.
107  * event->pos + 1).
108  */
109  HANDLE *handles;
110 #endif
111 };
112 
113 #ifndef WIN32
114 /* Are we currently in WaitLatch? The signal handler would like to know. */
115 static volatile sig_atomic_t waiting = false;
116 
117 /* Read and write ends of the self-pipe */
118 static int selfpipe_readfd = -1;
119 static int selfpipe_writefd = -1;
120 
121 /* Process owning the self-pipe --- needed for checking purposes */
122 static int selfpipe_owner_pid = 0;
123 
124 /* Private function prototypes */
125 static void sendSelfPipeByte(void);
126 static void drainSelfPipe(void);
127 #endif /* WIN32 */
128 
129 #if defined(WAIT_USE_EPOLL)
130 static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
131 #elif defined(WAIT_USE_POLL)
132 static void WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event);
133 #elif defined(WAIT_USE_WIN32)
134 static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
135 #endif
136 
137 static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
138  WaitEvent *occurred_events, int nevents);
139 
140 /*
141  * Initialize the process-local latch infrastructure.
142  *
143  * This must be called once during startup of any process that can wait on
144  * latches, before it issues any InitLatch() or OwnLatch() calls.
145  */
146 void
148 {
149 #ifndef WIN32
150  int pipefd[2];
151 
152  if (IsUnderPostmaster)
153  {
154  /*
155  * We might have inherited connections to a self-pipe created by the
156  * postmaster. It's critical that child processes create their own
157  * self-pipes, of course, and we really want them to close the
158  * inherited FDs for safety's sake.
159  */
160  if (selfpipe_owner_pid != 0)
161  {
162  /* Assert we go through here but once in a child process */
164  /* Release postmaster's pipe FDs; ignore any error */
165  (void) close(selfpipe_readfd);
166  (void) close(selfpipe_writefd);
167  /* Clean up, just for safety's sake; we'll set these below */
169  selfpipe_owner_pid = 0;
170  }
171  else
172  {
173  /*
174  * Postmaster didn't create a self-pipe ... or else we're in an
175  * EXEC_BACKEND build, in which case it doesn't matter since the
176  * postmaster's pipe FDs were closed by the action of FD_CLOEXEC.
177  */
178  Assert(selfpipe_readfd == -1);
179  }
180  }
181  else
182  {
183  /* In postmaster or standalone backend, assert we do this but once */
184  Assert(selfpipe_readfd == -1);
186  }
187 
188  /*
189  * Set up the self-pipe that allows a signal handler to wake up the
190  * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
191  * that SetLatch won't block if the event has already been set many times
192  * filling the kernel buffer. Make the read-end non-blocking too, so that
193  * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
194  * Also, make both FDs close-on-exec, since we surely do not want any
195  * child processes messing with them.
196  */
197  if (pipe(pipefd) < 0)
198  elog(FATAL, "pipe() failed: %m");
199  if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
200  elog(FATAL, "fcntl(F_SETFL) failed on read-end of self-pipe: %m");
201  if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
202  elog(FATAL, "fcntl(F_SETFL) failed on write-end of self-pipe: %m");
203  if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1)
204  elog(FATAL, "fcntl(F_SETFD) failed on read-end of self-pipe: %m");
205  if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1)
206  elog(FATAL, "fcntl(F_SETFD) failed on write-end of self-pipe: %m");
207 
208  selfpipe_readfd = pipefd[0];
209  selfpipe_writefd = pipefd[1];
211 #else
212  /* currently, nothing to do here for Windows */
213 #endif
214 }
215 
216 /*
217  * Initialize a process-local latch.
218  */
219 void
220 InitLatch(volatile Latch *latch)
221 {
222  latch->is_set = false;
223  latch->owner_pid = MyProcPid;
224  latch->is_shared = false;
225 
226 #ifndef WIN32
227  /* Assert InitializeLatchSupport has been called in this process */
229 #else
230  latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
231  if (latch->event == NULL)
232  elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
233 #endif /* WIN32 */
234 }
235 
236 /*
237  * Initialize a shared latch that can be set from other processes. The latch
238  * is initially owned by no-one; use OwnLatch to associate it with the
239  * current process.
240  *
241  * InitSharedLatch needs to be called in postmaster before forking child
242  * processes, usually right after allocating the shared memory block
243  * containing the latch with ShmemInitStruct. (The Unix implementation
244  * doesn't actually require that, but the Windows one does.) Because of
245  * this restriction, we have no concurrency issues to worry about here.
246  *
247  * Note that other handles created in this module are never marked as
248  * inheritable. Thus we do not need to worry about cleaning up child
249  * process references to postmaster-private latches or WaitEventSets.
250  */
251 void
252 InitSharedLatch(volatile Latch *latch)
253 {
254 #ifdef WIN32
255  SECURITY_ATTRIBUTES sa;
256 
257  /*
258  * Set up security attributes to specify that the events are inherited.
259  */
260  ZeroMemory(&sa, sizeof(sa));
261  sa.nLength = sizeof(sa);
262  sa.bInheritHandle = TRUE;
263 
264  latch->event = CreateEvent(&sa, TRUE, FALSE, NULL);
265  if (latch->event == NULL)
266  elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
267 #endif
268 
269  latch->is_set = false;
270  latch->owner_pid = 0;
271  latch->is_shared = true;
272 }
273 
274 /*
275  * Associate a shared latch with the current process, allowing it to
276  * wait on the latch.
277  *
278  * Although there is a sanity check for latch-already-owned, we don't do
279  * any sort of locking here, meaning that we could fail to detect the error
280  * if two processes try to own the same latch at about the same time. If
281  * there is any risk of that, caller must provide an interlock to prevent it.
282  *
283  * In any process that calls OwnLatch(), make sure that
284  * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
285  * as shared latches use SIGUSR1 for inter-process communication.
286  */
287 void
288 OwnLatch(volatile Latch *latch)
289 {
290  /* Sanity checks */
291  Assert(latch->is_shared);
292 
293 #ifndef WIN32
294  /* Assert InitializeLatchSupport has been called in this process */
296 #endif
297 
298  if (latch->owner_pid != 0)
299  elog(ERROR, "latch already owned");
300 
301  latch->owner_pid = MyProcPid;
302 }
303 
304 /*
305  * Disown a shared latch currently owned by the current process.
306  */
307 void
308 DisownLatch(volatile Latch *latch)
309 {
310  Assert(latch->is_shared);
311  Assert(latch->owner_pid == MyProcPid);
312 
313  latch->owner_pid = 0;
314 }
315 
316 /*
317  * Wait for a given latch to be set, or for postmaster death, or until timeout
318  * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
319  * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
320  * function returns immediately.
321  *
322  * The "timeout" is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
323  * is given. Although it is declared as "long", we don't actually support
324  * timeouts longer than INT_MAX milliseconds. Note that some extra overhead
325  * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
326  *
327  * The latch must be owned by the current process, ie. it must be a
328  * process-local latch initialized with InitLatch, or a shared latch
329  * associated with the current process by calling OwnLatch.
330  *
331  * Returns bit mask indicating which condition(s) caused the wake-up. Note
332  * that if multiple wake-up conditions are true, there is no guarantee that
333  * we return all of them in one call, but we will return at least one.
334  */
335 int
336 WaitLatch(volatile Latch *latch, int wakeEvents, long timeout,
337  uint32 wait_event_info)
338 {
339  return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout,
340  wait_event_info);
341 }
342 
343 /*
344  * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
345  * conditions.
346  *
347  * When waiting on a socket, EOF and error conditions are reported by
348  * returning the socket as readable/writable or both, depending on
349  * WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
350  *
351  * NB: These days this is just a wrapper around the WaitEventSet API. When
352  * using a latch very frequently, consider creating a longer living
353  * WaitEventSet instead; that's more efficient.
354  */
355 int
356 WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
357  long timeout, uint32 wait_event_info)
358 {
359  int ret = 0;
360  int rc;
361  WaitEvent event;
363 
364  if (wakeEvents & WL_TIMEOUT)
365  Assert(timeout >= 0);
366  else
367  timeout = -1;
368 
369  if (wakeEvents & WL_LATCH_SET)
370  AddWaitEventToSet(set, WL_LATCH_SET, PGINVALID_SOCKET,
371  (Latch *) latch, NULL);
372 
373  if (wakeEvents & WL_POSTMASTER_DEATH)
374  AddWaitEventToSet(set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET,
375  NULL, NULL);
376 
377  if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
378  {
379  int ev;
380 
381  ev = wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
382  AddWaitEventToSet(set, ev, sock, NULL, NULL);
383  }
384 
385  rc = WaitEventSetWait(set, timeout, &event, 1, wait_event_info);
386 
387  if (rc == 0)
388  ret |= WL_TIMEOUT;
389  else
390  {
391  ret |= event.events & (WL_LATCH_SET |
392  WL_POSTMASTER_DEATH |
395  }
396 
397  FreeWaitEventSet(set);
398 
399  return ret;
400 }
401 
402 /*
403  * Sets a latch and wakes up anyone waiting on it.
404  *
405  * This is cheap if the latch is already set, otherwise not so much.
406  *
407  * NB: when calling this in a signal handler, be sure to save and restore
408  * errno around it. (That's standard practice in most signal handlers, of
409  * course, but we used to omit it in handlers that only set a flag.)
410  *
411  * NB: this function is called from critical sections and signal handlers so
412  * throwing an error is not a good idea.
413  */
414 void
415 SetLatch(volatile Latch *latch)
416 {
417 #ifndef WIN32
418  pid_t owner_pid;
419 #else
420  HANDLE handle;
421 #endif
422 
423  /*
424  * The memory barrier has to be placed here to ensure that any flag
425  * variables possibly changed by this process have been flushed to main
426  * memory, before we check/set is_set.
427  */
429 
430  /* Quick exit if already set */
431  if (latch->is_set)
432  return;
433 
434  latch->is_set = true;
435 
436 #ifndef WIN32
437 
438  /*
439  * See if anyone's waiting for the latch. It can be the current process if
440  * we're in a signal handler. We use the self-pipe to wake up the
441  * poll()/epoll_wait() in that case. If it's another process, send a
442  * signal.
443  *
444  * Fetch owner_pid only once, in case the latch is concurrently getting
445  * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
446  * guaranteed to be true! In practice, the effective range of pid_t fits
447  * in a 32 bit integer, and so should be atomic. In the worst case, we
448  * might end up signaling the wrong process. Even then, you're very
449  * unlucky if a process with that bogus pid exists and belongs to
450  * Postgres; and PG database processes should handle excess SIGUSR1
451  * interrupts without a problem anyhow.
452  *
453  * Another sort of race condition that's possible here is for a new
454  * process to own the latch immediately after we look, so we don't signal
455  * it. This is okay so long as all callers of ResetLatch/WaitLatch follow
456  * the standard coding convention of waiting at the bottom of their loops,
457  * not the top, so that they'll correctly process latch-setting events
458  * that happen before they enter the loop.
459  */
460  owner_pid = latch->owner_pid;
461  if (owner_pid == 0)
462  return;
463  else if (owner_pid == MyProcPid)
464  {
465  if (waiting)
467  }
468  else
469  kill(owner_pid, SIGUSR1);
470 #else
471 
472  /*
473  * See if anyone's waiting for the latch. It can be the current process if
474  * we're in a signal handler.
475  *
476  * Use a local variable here just in case somebody changes the event field
477  * concurrently (which really should not happen).
478  */
479  handle = latch->event;
480  if (handle)
481  {
482  SetEvent(handle);
483 
484  /*
485  * Note that we silently ignore any errors. We might be in a signal
486  * handler or other critical path where it's not safe to call elog().
487  */
488  }
489 #endif
490 
491 }
492 
493 /*
494  * Clear the latch. Calling WaitLatch after this will sleep, unless
495  * the latch is set again before the WaitLatch call.
496  */
497 void
498 ResetLatch(volatile Latch *latch)
499 {
500  /* Only the owner should reset the latch */
501  Assert(latch->owner_pid == MyProcPid);
502 
503  latch->is_set = false;
504 
505  /*
506  * Ensure that the write to is_set gets flushed to main memory before we
507  * examine any flag variables. Otherwise a concurrent SetLatch might
508  * falsely conclude that it needn't signal us, even though we have missed
509  * seeing some flag updates that SetLatch was supposed to inform us of.
510  */
512 }
513 
514 /*
515  * Create a WaitEventSet with space for nevents different events to wait for.
516  *
517  * These events can then be efficiently waited upon together, using
518  * WaitEventSetWait().
519  */
520 WaitEventSet *
521 CreateWaitEventSet(MemoryContext context, int nevents)
522 {
523  WaitEventSet *set;
524  char *data;
525  Size sz = 0;
526 
527  /*
528  * Use MAXALIGN size/alignment to guarantee that later uses of memory are
529  * aligned correctly. E.g. epoll_event might need 8 byte alignment on some
530  * platforms, but earlier allocations like WaitEventSet and WaitEvent
531  * might not sized to guarantee that when purely using sizeof().
532  */
533  sz += MAXALIGN(sizeof(WaitEventSet));
534  sz += MAXALIGN(sizeof(WaitEvent) * nevents);
535 
536 #if defined(WAIT_USE_EPOLL)
537  sz += MAXALIGN(sizeof(struct epoll_event) * nevents);
538 #elif defined(WAIT_USE_POLL)
539  sz += MAXALIGN(sizeof(struct pollfd) * nevents);
540 #elif defined(WAIT_USE_WIN32)
541  /* need space for the pgwin32_signal_event */
542  sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
543 #endif
544 
545  data = (char *) MemoryContextAllocZero(context, sz);
546 
547  set = (WaitEventSet *) data;
548  data += MAXALIGN(sizeof(WaitEventSet));
549 
550  set->events = (WaitEvent *) data;
551  data += MAXALIGN(sizeof(WaitEvent) * nevents);
552 
553 #if defined(WAIT_USE_EPOLL)
554  set->epoll_ret_events = (struct epoll_event *) data;
555  data += MAXALIGN(sizeof(struct epoll_event) * nevents);
556 #elif defined(WAIT_USE_POLL)
557  set->pollfds = (struct pollfd *) data;
558  data += MAXALIGN(sizeof(struct pollfd) * nevents);
559 #elif defined(WAIT_USE_WIN32)
560  set->handles = (HANDLE) data;
561  data += MAXALIGN(sizeof(HANDLE) * nevents);
562 #endif
563 
564  set->latch = NULL;
565  set->nevents_space = nevents;
566 
567 #if defined(WAIT_USE_EPOLL)
568 #ifdef EPOLL_CLOEXEC
569  set->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
570  if (set->epoll_fd < 0)
571  elog(ERROR, "epoll_create1 failed: %m");
572 #else
573  /* cope with ancient glibc lacking epoll_create1 (e.g., RHEL5) */
574  set->epoll_fd = epoll_create(nevents);
575  if (set->epoll_fd < 0)
576  elog(ERROR, "epoll_create failed: %m");
577  if (fcntl(set->epoll_fd, F_SETFD, FD_CLOEXEC) == -1)
578  elog(ERROR, "fcntl(F_SETFD) failed on epoll descriptor: %m");
579 #endif /* EPOLL_CLOEXEC */
580 #elif defined(WAIT_USE_WIN32)
581 
582  /*
583  * To handle signals while waiting, we need to add a win32 specific event.
584  * We accounted for the additional event at the top of this routine. See
585  * port/win32/signal.c for more details.
586  *
587  * Note: pgwin32_signal_event should be first to ensure that it will be
588  * reported when multiple events are set. We want to guarantee that
589  * pending signals are serviced.
590  */
591  set->handles[0] = pgwin32_signal_event;
592  StaticAssertStmt(WSA_INVALID_EVENT == NULL, "");
593 #endif
594 
595  return set;
596 }
597 
598 /*
599  * Free a previously created WaitEventSet.
600  *
601  * Note: preferably, this shouldn't have to free any resources that could be
602  * inherited across an exec(). If it did, we'd likely leak those resources in
603  * many scenarios. For the epoll case, we ensure that by setting FD_CLOEXEC
604  * when the FD is created. For the Windows case, we assume that the handles
605  * involved are non-inheritable.
606  */
607 void
609 {
610 #if defined(WAIT_USE_EPOLL)
611  close(set->epoll_fd);
612 #elif defined(WAIT_USE_WIN32)
613  WaitEvent *cur_event;
614 
615  for (cur_event = set->events;
616  cur_event < (set->events + set->nevents);
617  cur_event++)
618  {
619  if (cur_event->events & WL_LATCH_SET)
620  {
621  /* uses the latch's HANDLE */
622  }
623  else if (cur_event->events & WL_POSTMASTER_DEATH)
624  {
625  /* uses PostmasterHandle */
626  }
627  else
628  {
629  /* Clean up the event object we created for the socket */
630  WSAEventSelect(cur_event->fd, NULL, 0);
631  WSACloseEvent(set->handles[cur_event->pos + 1]);
632  }
633  }
634 #endif
635 
636  pfree(set);
637 }
638 
639 /* ---
640  * Add an event to the set. Possible events are:
641  * - WL_LATCH_SET: Wait for the latch to be set
642  * - WL_POSTMASTER_DEATH: Wait for postmaster to die
643  * - WL_SOCKET_READABLE: Wait for socket to become readable
644  * can be combined in one event with WL_SOCKET_WRITEABLE
645  * - WL_SOCKET_WRITEABLE: Wait for socket to become writeable
646  * can be combined with WL_SOCKET_READABLE
647  *
648  * Returns the offset in WaitEventSet->events (starting from 0), which can be
649  * used to modify previously added wait events using ModifyWaitEvent().
650  *
651  * In the WL_LATCH_SET case the latch must be owned by the current process,
652  * i.e. it must be a process-local latch initialized with InitLatch, or a
653  * shared latch associated with the current process by calling OwnLatch.
654  *
655  * In the WL_SOCKET_READABLE/WRITEABLE case, EOF and error conditions are
656  * reported by returning the socket as readable/writable or both, depending on
657  * WL_SOCKET_READABLE/WRITEABLE being specified.
658  *
659  * The user_data pointer specified here will be set for the events returned
660  * by WaitEventSetWait(), allowing to easily associate additional data with
661  * events.
662  */
663 int
665  void *user_data)
666 {
667  WaitEvent *event;
668 
669  /* not enough space */
670  Assert(set->nevents < set->nevents_space);
671 
672  if (latch)
673  {
674  if (latch->owner_pid != MyProcPid)
675  elog(ERROR, "cannot wait on a latch owned by another process");
676  if (set->latch)
677  elog(ERROR, "cannot wait on more than one latch");
678  if ((events & WL_LATCH_SET) != WL_LATCH_SET)
679  elog(ERROR, "latch events only support being set");
680  }
681  else
682  {
683  if (events & WL_LATCH_SET)
684  elog(ERROR, "cannot wait on latch without a specified latch");
685  }
686 
687  /* waiting for socket readiness without a socket indicates a bug */
688  if (fd == PGINVALID_SOCKET &&
690  elog(ERROR, "cannot wait on socket event without a socket");
691 
692  event = &set->events[set->nevents];
693  event->pos = set->nevents++;
694  event->fd = fd;
695  event->events = events;
696  event->user_data = user_data;
697 #ifdef WIN32
698  event->reset = false;
699 #endif
700 
701  if (events == WL_LATCH_SET)
702  {
703  set->latch = latch;
704  set->latch_pos = event->pos;
705 #ifndef WIN32
706  event->fd = selfpipe_readfd;
707 #endif
708  }
709  else if (events == WL_POSTMASTER_DEATH)
710  {
711 #ifndef WIN32
713 #endif
714  }
715 
716  /* perform wait primitive specific initialization, if needed */
717 #if defined(WAIT_USE_EPOLL)
718  WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
719 #elif defined(WAIT_USE_POLL)
720  WaitEventAdjustPoll(set, event);
721 #elif defined(WAIT_USE_WIN32)
722  WaitEventAdjustWin32(set, event);
723 #endif
724 
725  return event->pos;
726 }
727 
728 /*
729  * Change the event mask and, in the WL_LATCH_SET case, the latch associated
730  * with the WaitEvent.
731  *
732  * 'pos' is the id returned by AddWaitEventToSet.
733  */
734 void
735 ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
736 {
737  WaitEvent *event;
738 
739  Assert(pos < set->nevents);
740 
741  event = &set->events[pos];
742 
743  /*
744  * If neither the event mask nor the associated latch changes, return
745  * early. That's an important optimization for some sockets, where
746  * ModifyWaitEvent is frequently used to switch from waiting for reads to
747  * waiting on writes.
748  */
749  if (events == event->events &&
750  (!(event->events & WL_LATCH_SET) || set->latch == latch))
751  return;
752 
753  if (event->events & WL_LATCH_SET &&
754  events != event->events)
755  {
756  /* we could allow to disable latch events for a while */
757  elog(ERROR, "cannot modify latch event");
758  }
759 
760  if (event->events & WL_POSTMASTER_DEATH)
761  {
762  elog(ERROR, "cannot modify postmaster death event");
763  }
764 
765  /* FIXME: validate event mask */
766  event->events = events;
767 
768  if (events == WL_LATCH_SET)
769  {
770  set->latch = latch;
771  }
772 
773 #if defined(WAIT_USE_EPOLL)
774  WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
775 #elif defined(WAIT_USE_POLL)
776  WaitEventAdjustPoll(set, event);
777 #elif defined(WAIT_USE_WIN32)
778  WaitEventAdjustWin32(set, event);
779 #endif
780 }
781 
782 #if defined(WAIT_USE_EPOLL)
783 /*
784  * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL
785  */
786 static void
787 WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
788 {
789  struct epoll_event epoll_ev;
790  int rc;
791 
792  /* pointer to our event, returned by epoll_wait */
793  epoll_ev.data.ptr = event;
794  /* always wait for errors */
795  epoll_ev.events = EPOLLERR | EPOLLHUP;
796 
797  /* prepare pollfd entry once */
798  if (event->events == WL_LATCH_SET)
799  {
800  Assert(set->latch != NULL);
801  epoll_ev.events |= EPOLLIN;
802  }
803  else if (event->events == WL_POSTMASTER_DEATH)
804  {
805  epoll_ev.events |= EPOLLIN;
806  }
807  else
808  {
809  Assert(event->fd != PGINVALID_SOCKET);
811 
812  if (event->events & WL_SOCKET_READABLE)
813  epoll_ev.events |= EPOLLIN;
814  if (event->events & WL_SOCKET_WRITEABLE)
815  epoll_ev.events |= EPOLLOUT;
816  }
817 
818  /*
819  * Even though unused, we also pass epoll_ev as the data argument if
820  * EPOLL_CTL_DEL is passed as action. There used to be an epoll bug
821  * requiring that, and actually it makes the code simpler...
822  */
823  rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev);
824 
825  if (rc < 0)
826  ereport(ERROR,
828  errmsg("epoll_ctl() failed: %m")));
829 }
830 #endif
831 
832 #if defined(WAIT_USE_POLL)
833 static void
834 WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event)
835 {
836  struct pollfd *pollfd = &set->pollfds[event->pos];
837 
838  pollfd->revents = 0;
839  pollfd->fd = event->fd;
840 
841  /* prepare pollfd entry once */
842  if (event->events == WL_LATCH_SET)
843  {
844  Assert(set->latch != NULL);
845  pollfd->events = POLLIN;
846  }
847  else if (event->events == WL_POSTMASTER_DEATH)
848  {
849  pollfd->events = POLLIN;
850  }
851  else
852  {
854  pollfd->events = 0;
855  if (event->events & WL_SOCKET_READABLE)
856  pollfd->events |= POLLIN;
857  if (event->events & WL_SOCKET_WRITEABLE)
858  pollfd->events |= POLLOUT;
859  }
860 
861  Assert(event->fd != PGINVALID_SOCKET);
862 }
863 #endif
864 
865 #if defined(WAIT_USE_WIN32)
866 static void
867 WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
868 {
869  HANDLE *handle = &set->handles[event->pos + 1];
870 
871  if (event->events == WL_LATCH_SET)
872  {
873  Assert(set->latch != NULL);
874  *handle = set->latch->event;
875  }
876  else if (event->events == WL_POSTMASTER_DEATH)
877  {
878  *handle = PostmasterHandle;
879  }
880  else
881  {
882  int flags = FD_CLOSE; /* always check for errors/EOF */
883 
884  if (event->events & WL_SOCKET_READABLE)
885  flags |= FD_READ;
886  if (event->events & WL_SOCKET_WRITEABLE)
887  flags |= FD_WRITE;
888 
889  if (*handle == WSA_INVALID_EVENT)
890  {
891  *handle = WSACreateEvent();
892  if (*handle == WSA_INVALID_EVENT)
893  elog(ERROR, "failed to create event for socket: error code %u",
894  WSAGetLastError());
895  }
896  if (WSAEventSelect(event->fd, *handle, flags) != 0)
897  elog(ERROR, "failed to set up event for socket: error code %u",
898  WSAGetLastError());
899 
900  Assert(event->fd != PGINVALID_SOCKET);
901  }
902 }
903 #endif
904 
905 /*
906  * Wait for events added to the set to happen, or until the timeout is
907  * reached. At most nevents occurred events are returned.
908  *
909  * If timeout = -1, block until an event occurs; if 0, check sockets for
910  * readiness, but don't block; if > 0, block for at most timeout milliseconds.
911  *
912  * Returns the number of events occurred, or 0 if the timeout was reached.
913  *
914  * Returned events will have the fd, pos, user_data fields set to the
915  * values associated with the registered event.
916  */
917 int
918 WaitEventSetWait(WaitEventSet *set, long timeout,
919  WaitEvent *occurred_events, int nevents,
920  uint32 wait_event_info)
921 {
922  int returned_events = 0;
924  instr_time cur_time;
925  long cur_timeout = -1;
926 
927  Assert(nevents > 0);
928 
929  /*
930  * Initialize timeout if requested. We must record the current time so
931  * that we can determine the remaining timeout if interrupted.
932  */
933  if (timeout >= 0)
934  {
935  INSTR_TIME_SET_CURRENT(start_time);
936  Assert(timeout >= 0 && timeout <= INT_MAX);
937  cur_timeout = timeout;
938  }
939 
940  pgstat_report_wait_start(wait_event_info);
941 
942 #ifndef WIN32
943  waiting = true;
944 #else
945  /* Ensure that signals are serviced even if latch is already set */
947 #endif
948  while (returned_events == 0)
949  {
950  int rc;
951 
952  /*
953  * Check if the latch is set already. If so, leave the loop
954  * immediately, avoid blocking again. We don't attempt to report any
955  * other events that might also be satisfied.
956  *
957  * If someone sets the latch between this and the
958  * WaitEventSetWaitBlock() below, the setter will write a byte to the
959  * pipe (or signal us and the signal handler will do that), and the
960  * readiness routine will return immediately.
961  *
962  * On unix, If there's a pending byte in the self pipe, we'll notice
963  * whenever blocking. Only clearing the pipe in that case avoids
964  * having to drain it every time WaitLatchOrSocket() is used. Should
965  * the pipe-buffer fill up we're still ok, because the pipe is in
966  * nonblocking mode. It's unlikely for that to happen, because the
967  * self pipe isn't filled unless we're blocking (waiting = true), or
968  * from inside a signal handler in latch_sigusr1_handler().
969  *
970  * On windows, we'll also notice if there's a pending event for the
971  * latch when blocking, but there's no danger of anything filling up,
972  * as "Setting an event that is already set has no effect.".
973  *
974  * Note: we assume that the kernel calls involved in latch management
975  * will provide adequate synchronization on machines with weak memory
976  * ordering, so that we cannot miss seeing is_set if a notification
977  * has already been queued.
978  */
979  if (set->latch && set->latch->is_set)
980  {
981  occurred_events->fd = PGINVALID_SOCKET;
982  occurred_events->pos = set->latch_pos;
983  occurred_events->user_data =
984  set->events[set->latch_pos].user_data;
985  occurred_events->events = WL_LATCH_SET;
986  occurred_events++;
987  returned_events++;
988 
989  break;
990  }
991 
992  /*
993  * Wait for events using the readiness primitive chosen at the top of
994  * this file. If -1 is returned, a timeout has occurred, if 0 we have
995  * to retry, everything >= 1 is the number of returned events.
996  */
997  rc = WaitEventSetWaitBlock(set, cur_timeout,
998  occurred_events, nevents);
999 
1000  if (rc == -1)
1001  break; /* timeout occurred */
1002  else
1003  returned_events = rc;
1004 
1005  /* If we're not done, update cur_timeout for next iteration */
1006  if (returned_events == 0 && timeout >= 0)
1007  {
1008  INSTR_TIME_SET_CURRENT(cur_time);
1009  INSTR_TIME_SUBTRACT(cur_time, start_time);
1010  cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
1011  if (cur_timeout <= 0)
1012  break;
1013  }
1014  }
1015 #ifndef WIN32
1016  waiting = false;
1017 #endif
1018 
1020 
1021  return returned_events;
1022 }
1023 
1024 
1025 #if defined(WAIT_USE_EPOLL)
1026 
1027 /*
1028  * Wait using linux's epoll_wait(2).
1029  *
1030  * This is the preferrable wait method, as several readiness notifications are
1031  * delivered, without having to iterate through all of set->events. The return
1032  * epoll_event struct contain a pointer to our events, making association
1033  * easy.
1034  */
1035 static inline int
1036 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1037  WaitEvent *occurred_events, int nevents)
1038 {
1039  int returned_events = 0;
1040  int rc;
1041  WaitEvent *cur_event;
1042  struct epoll_event *cur_epoll_event;
1043 
1044  /* Sleep */
1045  rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
1046  nevents, cur_timeout);
1047 
1048  /* Check return code */
1049  if (rc < 0)
1050  {
1051  /* EINTR is okay, otherwise complain */
1052  if (errno != EINTR)
1053  {
1054  waiting = false;
1055  ereport(ERROR,
1057  errmsg("epoll_wait() failed: %m")));
1058  }
1059  return 0;
1060  }
1061  else if (rc == 0)
1062  {
1063  /* timeout exceeded */
1064  return -1;
1065  }
1066 
1067  /*
1068  * At least one event occurred, iterate over the returned epoll events
1069  * until they're either all processed, or we've returned all the events
1070  * the caller desired.
1071  */
1072  for (cur_epoll_event = set->epoll_ret_events;
1073  cur_epoll_event < (set->epoll_ret_events + rc) &&
1074  returned_events < nevents;
1075  cur_epoll_event++)
1076  {
1077  /* epoll's data pointer is set to the associated WaitEvent */
1078  cur_event = (WaitEvent *) cur_epoll_event->data.ptr;
1079 
1080  occurred_events->pos = cur_event->pos;
1081  occurred_events->user_data = cur_event->user_data;
1082  occurred_events->events = 0;
1083 
1084  if (cur_event->events == WL_LATCH_SET &&
1085  cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1086  {
1087  /* There's data in the self-pipe, clear it. */
1088  drainSelfPipe();
1089 
1090  if (set->latch->is_set)
1091  {
1092  occurred_events->fd = PGINVALID_SOCKET;
1093  occurred_events->events = WL_LATCH_SET;
1094  occurred_events++;
1095  returned_events++;
1096  }
1097  }
1098  else if (cur_event->events == WL_POSTMASTER_DEATH &&
1099  cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1100  {
1101  /*
1102  * We expect an EPOLLHUP when the remote end is closed, but
1103  * because we don't expect the pipe to become readable or to have
1104  * any errors either, treat those cases as postmaster death, too.
1105  *
1106  * Be paranoid about a spurious event signalling the postmaster as
1107  * being dead. There have been reports about that happening with
1108  * older primitives (select(2) to be specific), and a spurious
1109  * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1110  * cost much.
1111  */
1112  if (!PostmasterIsAlive())
1113  {
1114  occurred_events->fd = PGINVALID_SOCKET;
1115  occurred_events->events = WL_POSTMASTER_DEATH;
1116  occurred_events++;
1117  returned_events++;
1118  }
1119  }
1120  else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1121  {
1122  Assert(cur_event->fd != PGINVALID_SOCKET);
1123 
1124  if ((cur_event->events & WL_SOCKET_READABLE) &&
1125  (cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
1126  {
1127  /* data available in socket, or EOF */
1128  occurred_events->events |= WL_SOCKET_READABLE;
1129  }
1130 
1131  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1132  (cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
1133  {
1134  /* writable, or EOF */
1135  occurred_events->events |= WL_SOCKET_WRITEABLE;
1136  }
1137 
1138  if (occurred_events->events != 0)
1139  {
1140  occurred_events->fd = cur_event->fd;
1141  occurred_events++;
1142  returned_events++;
1143  }
1144  }
1145  }
1146 
1147  return returned_events;
1148 }
1149 
1150 #elif defined(WAIT_USE_POLL)
1151 
1152 /*
1153  * Wait using poll(2).
1154  *
1155  * This allows to receive readiness notifications for several events at once,
1156  * but requires iterating through all of set->pollfds.
1157  */
1158 static inline int
1159 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1160  WaitEvent *occurred_events, int nevents)
1161 {
1162  int returned_events = 0;
1163  int rc;
1164  WaitEvent *cur_event;
1165  struct pollfd *cur_pollfd;
1166 
1167  /* Sleep */
1168  rc = poll(set->pollfds, set->nevents, (int) cur_timeout);
1169 
1170  /* Check return code */
1171  if (rc < 0)
1172  {
1173  /* EINTR is okay, otherwise complain */
1174  if (errno != EINTR)
1175  {
1176  waiting = false;
1177  ereport(ERROR,
1179  errmsg("poll() failed: %m")));
1180  }
1181  return 0;
1182  }
1183  else if (rc == 0)
1184  {
1185  /* timeout exceeded */
1186  return -1;
1187  }
1188 
1189  for (cur_event = set->events, cur_pollfd = set->pollfds;
1190  cur_event < (set->events + set->nevents) &&
1191  returned_events < nevents;
1192  cur_event++, cur_pollfd++)
1193  {
1194  /* no activity on this FD, skip */
1195  if (cur_pollfd->revents == 0)
1196  continue;
1197 
1198  occurred_events->pos = cur_event->pos;
1199  occurred_events->user_data = cur_event->user_data;
1200  occurred_events->events = 0;
1201 
1202  if (cur_event->events == WL_LATCH_SET &&
1203  (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1204  {
1205  /* There's data in the self-pipe, clear it. */
1206  drainSelfPipe();
1207 
1208  if (set->latch->is_set)
1209  {
1210  occurred_events->fd = PGINVALID_SOCKET;
1211  occurred_events->events = WL_LATCH_SET;
1212  occurred_events++;
1213  returned_events++;
1214  }
1215  }
1216  else if (cur_event->events == WL_POSTMASTER_DEATH &&
1217  (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1218  {
1219  /*
1220  * We expect an POLLHUP when the remote end is closed, but because
1221  * we don't expect the pipe to become readable or to have any
1222  * errors either, treat those cases as postmaster death, too.
1223  *
1224  * Be paranoid about a spurious event signalling the postmaster as
1225  * being dead. There have been reports about that happening with
1226  * older primitives (select(2) to be specific), and a spurious
1227  * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1228  * cost much.
1229  */
1230  if (!PostmasterIsAlive())
1231  {
1232  occurred_events->fd = PGINVALID_SOCKET;
1233  occurred_events->events = WL_POSTMASTER_DEATH;
1234  occurred_events++;
1235  returned_events++;
1236  }
1237  }
1238  else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1239  {
1240  int errflags = POLLHUP | POLLERR | POLLNVAL;
1241 
1242  Assert(cur_event->fd >= PGINVALID_SOCKET);
1243 
1244  if ((cur_event->events & WL_SOCKET_READABLE) &&
1245  (cur_pollfd->revents & (POLLIN | errflags)))
1246  {
1247  /* data available in socket, or EOF */
1248  occurred_events->events |= WL_SOCKET_READABLE;
1249  }
1250 
1251  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1252  (cur_pollfd->revents & (POLLOUT | errflags)))
1253  {
1254  /* writeable, or EOF */
1255  occurred_events->events |= WL_SOCKET_WRITEABLE;
1256  }
1257 
1258  if (occurred_events->events != 0)
1259  {
1260  occurred_events->fd = cur_event->fd;
1261  occurred_events++;
1262  returned_events++;
1263  }
1264  }
1265  }
1266  return returned_events;
1267 }
1268 
1269 #elif defined(WAIT_USE_WIN32)
1270 
1271 /*
1272  * Wait using Windows' WaitForMultipleObjects().
1273  *
1274  * Unfortunately this will only ever return a single readiness notification at
1275  * a time. Note that while the official documentation for
1276  * WaitForMultipleObjects is ambiguous about multiple events being "consumed"
1277  * with a single bWaitAll = FALSE call,
1278  * https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273 confirms
1279  * that only one event is "consumed".
1280  */
1281 static inline int
1282 WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1283  WaitEvent *occurred_events, int nevents)
1284 {
1285  int returned_events = 0;
1286  DWORD rc;
1287  WaitEvent *cur_event;
1288 
1289  /* Reset any wait events that need it */
1290  for (cur_event = set->events;
1291  cur_event < (set->events + set->nevents);
1292  cur_event++)
1293  {
1294  if (cur_event->reset)
1295  {
1296  WaitEventAdjustWin32(set, cur_event);
1297  cur_event->reset = false;
1298  }
1299 
1300  /*
1301  * Windows does not guarantee to log an FD_WRITE network event
1302  * indicating that more data can be sent unless the previous send()
1303  * failed with WSAEWOULDBLOCK. While our caller might well have made
1304  * such a call, we cannot assume that here. Therefore, if waiting for
1305  * write-ready, force the issue by doing a dummy send(). If the dummy
1306  * send() succeeds, assume that the socket is in fact write-ready, and
1307  * return immediately. Also, if it fails with something other than
1308  * WSAEWOULDBLOCK, return a write-ready indication to let our caller
1309  * deal with the error condition.
1310  */
1311  if (cur_event->events & WL_SOCKET_WRITEABLE)
1312  {
1313  char c;
1314  WSABUF buf;
1315  DWORD sent;
1316  int r;
1317 
1318  buf.buf = &c;
1319  buf.len = 0;
1320 
1321  r = WSASend(cur_event->fd, &buf, 1, &sent, 0, NULL, NULL);
1322  if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
1323  {
1324  occurred_events->pos = cur_event->pos;
1325  occurred_events->user_data = cur_event->user_data;
1326  occurred_events->events = WL_SOCKET_WRITEABLE;
1327  occurred_events->fd = cur_event->fd;
1328  return 1;
1329  }
1330  }
1331  }
1332 
1333  /*
1334  * Sleep.
1335  *
1336  * Need to wait for ->nevents + 1, because signal handle is in [0].
1337  */
1338  rc = WaitForMultipleObjects(set->nevents + 1, set->handles, FALSE,
1339  cur_timeout);
1340 
1341  /* Check return code */
1342  if (rc == WAIT_FAILED)
1343  elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
1344  GetLastError());
1345  else if (rc == WAIT_TIMEOUT)
1346  {
1347  /* timeout exceeded */
1348  return -1;
1349  }
1350 
1351  if (rc == WAIT_OBJECT_0)
1352  {
1353  /* Service newly-arrived signals */
1355  return 0; /* retry */
1356  }
1357 
1358  /*
1359  * With an offset of one, due to the always present pgwin32_signal_event,
1360  * the handle offset directly corresponds to a wait event.
1361  */
1362  cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
1363 
1364  occurred_events->pos = cur_event->pos;
1365  occurred_events->user_data = cur_event->user_data;
1366  occurred_events->events = 0;
1367 
1368  if (cur_event->events == WL_LATCH_SET)
1369  {
1370  if (!ResetEvent(set->latch->event))
1371  elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
1372 
1373  if (set->latch->is_set)
1374  {
1375  occurred_events->fd = PGINVALID_SOCKET;
1376  occurred_events->events = WL_LATCH_SET;
1377  occurred_events++;
1378  returned_events++;
1379  }
1380  }
1381  else if (cur_event->events == WL_POSTMASTER_DEATH)
1382  {
1383  /*
1384  * Postmaster apparently died. Since the consequences of falsely
1385  * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we take
1386  * the trouble to positively verify this with PostmasterIsAlive(),
1387  * even though there is no known reason to think that the event could
1388  * be falsely set on Windows.
1389  */
1390  if (!PostmasterIsAlive())
1391  {
1392  occurred_events->fd = PGINVALID_SOCKET;
1393  occurred_events->events = WL_POSTMASTER_DEATH;
1394  occurred_events++;
1395  returned_events++;
1396  }
1397  }
1398  else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1399  {
1400  WSANETWORKEVENTS resEvents;
1401  HANDLE handle = set->handles[cur_event->pos + 1];
1402 
1403  Assert(cur_event->fd);
1404 
1405  occurred_events->fd = cur_event->fd;
1406 
1407  ZeroMemory(&resEvents, sizeof(resEvents));
1408  if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
1409  elog(ERROR, "failed to enumerate network events: error code %u",
1410  WSAGetLastError());
1411  if ((cur_event->events & WL_SOCKET_READABLE) &&
1412  (resEvents.lNetworkEvents & FD_READ))
1413  {
1414  /* data available in socket */
1415  occurred_events->events |= WL_SOCKET_READABLE;
1416 
1417  /*------
1418  * WaitForMultipleObjects doesn't guarantee that a read event will
1419  * be returned if the latch is set at the same time. Even if it
1420  * did, the caller might drop that event expecting it to reoccur
1421  * on next call. So, we must force the event to be reset if this
1422  * WaitEventSet is used again in order to avoid an indefinite
1423  * hang. Refer https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
1424  * for the behavior of socket events.
1425  *------
1426  */
1427  cur_event->reset = true;
1428  }
1429  if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1430  (resEvents.lNetworkEvents & FD_WRITE))
1431  {
1432  /* writeable */
1433  occurred_events->events |= WL_SOCKET_WRITEABLE;
1434  }
1435  if (resEvents.lNetworkEvents & FD_CLOSE)
1436  {
1437  /* EOF */
1438  if (cur_event->events & WL_SOCKET_READABLE)
1439  occurred_events->events |= WL_SOCKET_READABLE;
1440  if (cur_event->events & WL_SOCKET_WRITEABLE)
1441  occurred_events->events |= WL_SOCKET_WRITEABLE;
1442  }
1443 
1444  if (occurred_events->events != 0)
1445  {
1446  occurred_events++;
1447  returned_events++;
1448  }
1449  }
1450 
1451  return returned_events;
1452 }
1453 #endif
1454 
1455 /*
1456  * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
1457  *
1458  * Wake up WaitLatch, if we're waiting. (We might not be, since SIGUSR1 is
1459  * overloaded for multiple purposes; or we might not have reached WaitLatch
1460  * yet, in which case we don't need to fill the pipe either.)
1461  *
1462  * NB: when calling this in a signal handler, be sure to save and restore
1463  * errno around it.
1464  */
1465 #ifndef WIN32
1466 void
1468 {
1469  if (waiting)
1470  sendSelfPipeByte();
1471 }
1472 #endif /* !WIN32 */
1473 
1474 /* Send one byte to the self-pipe, to wake up WaitLatch */
1475 #ifndef WIN32
1476 static void
1478 {
1479  int rc;
1480  char dummy = 0;
1481 
1482 retry:
1483  rc = write(selfpipe_writefd, &dummy, 1);
1484  if (rc < 0)
1485  {
1486  /* If interrupted by signal, just retry */
1487  if (errno == EINTR)
1488  goto retry;
1489 
1490  /*
1491  * If the pipe is full, we don't need to retry, the data that's there
1492  * already is enough to wake up WaitLatch.
1493  */
1494  if (errno == EAGAIN || errno == EWOULDBLOCK)
1495  return;
1496 
1497  /*
1498  * Oops, the write() failed for some other reason. We might be in a
1499  * signal handler, so it's not safe to elog(). We have no choice but
1500  * silently ignore the error.
1501  */
1502  return;
1503  }
1504 }
1505 #endif /* !WIN32 */
1506 
1507 /*
1508  * Read all available data from the self-pipe
1509  *
1510  * Note: this is only called when waiting = true. If it fails and doesn't
1511  * return, it must reset that flag first (though ideally, this will never
1512  * happen).
1513  */
1514 #ifndef WIN32
1515 static void
1517 {
1518  /*
1519  * There shouldn't normally be more than one byte in the pipe, or maybe a
1520  * few bytes if multiple processes run SetLatch at the same instant.
1521  */
1522  char buf[16];
1523  int rc;
1524 
1525  for (;;)
1526  {
1527  rc = read(selfpipe_readfd, buf, sizeof(buf));
1528  if (rc < 0)
1529  {
1530  if (errno == EAGAIN || errno == EWOULDBLOCK)
1531  break; /* the pipe is empty */
1532  else if (errno == EINTR)
1533  continue; /* retry */
1534  else
1535  {
1536  waiting = false;
1537  elog(ERROR, "read() on self-pipe failed: %m");
1538  }
1539  }
1540  else if (rc == 0)
1541  {
1542  waiting = false;
1543  elog(ERROR, "unexpected EOF on self-pipe");
1544  }
1545  else if (rc < sizeof(buf))
1546  {
1547  /* we successfully drained the pipe; no need to read() again */
1548  break;
1549  }
1550  /* else buffer wasn't big enough, so read again */
1551  }
1552 }
1553 #endif /* !WIN32 */
#define EWOULDBLOCK
Definition: win32.h:291
int latch_pos
Definition: latch.c:93
#define SIGUSR1
Definition: win32.h:202
#define WL_SOCKET_WRITEABLE
Definition: latch.h:126
pgsocket fd
Definition: latch.h:134
int MyProcPid
Definition: globals.c:38
int pos
Definition: latch.h:132
void FreeWaitEventSet(WaitEventSet *set)
Definition: latch.c:608
static int selfpipe_writefd
Definition: latch.c:119
#define WL_TIMEOUT
Definition: latch.h:127
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:664
#define write(a, b, c)
Definition: win32.h:14
bool is_shared
Definition: latch.h:113
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:199
struct timeval instr_time
Definition: instr_time.h:147
void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
Definition: latch.c:735
static void drainSelfPipe(void)
Definition: latch.c:1516
#define WL_SOCKET_READABLE
Definition: latch.h:125
void ResetLatch(volatile Latch *latch)
Definition: latch.c:498
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static time_t start_time
Definition: pg_ctl.c:91
#define EAGAIN
Definition: win32.h:283
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:521
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:757
HANDLE pgwin32_signal_event
Definition: signal.c:27
int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:336
void pfree(void *pointer)
Definition: mcxt.c:950
void pgwin32_dispatch_queued_signals(void)
Definition: signal.c:107
#define ERROR
Definition: elog.h:43
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
#define FALSE
Definition: c.h:221
#define FATAL
Definition: elog.h:52
uint32 events
Definition: latch.h:133
static int selfpipe_readfd
Definition: latch.c:118
Definition: latch.h:110
static int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, WaitEvent *occurred_events, int nevents)
char * c
static char * buf
Definition: pg_test_fsync.c:66
bool IsUnderPostmaster
Definition: globals.c:100
bool PostmasterIsAlive(void)
Definition: pmsignal.c:272
unsigned int uint32
Definition: c.h:268
int pgsocket
Definition: port.h:22
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1232
void OwnLatch(volatile Latch *latch)
Definition: latch.c:288
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define ereport(elevel, rest)
Definition: elog.h:122
int errcode_for_socket_access(void)
Definition: elog.c:669
int nevents
Definition: latch.c:77
int postmaster_alive_fds[2]
Definition: postmaster.c:557
static void sendSelfPipeByte(void)
Definition: latch.c:1477
#define WL_POSTMASTER_DEATH
Definition: latch.h:128
#define PGINVALID_SOCKET
Definition: port.h:24
#define EINTR
Definition: win32.h:285
void InitializeLatchSupport(void)
Definition: latch.c:147
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:742
#define pg_memory_barrier()
Definition: atomics.h:148
void SetLatch(volatile Latch *latch)
Definition: latch.c:415
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, long timeout, uint32 wait_event_info)
Definition: latch.c:356
WaitEvent * events
Definition: latch.c:84
size_t Size
Definition: c.h:356
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1208
#define MAXALIGN(LEN)
Definition: c.h:588
void InitLatch(volatile Latch *latch)
Definition: latch.c:220
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
void * user_data
Definition: latch.h:135
int nevents_space
Definition: latch.c:78
int errmsg(const char *fmt,...)
Definition: elog.c:797
int owner_pid
Definition: latch.h:114
void DisownLatch(volatile Latch *latch)
Definition: latch.c:308
sig_atomic_t is_set
Definition: latch.h:112
#define TRUE
Definition: c.h:217
#define elog
Definition: elog.h:219
#define close(a)
Definition: win32.h:12
void latch_sigusr1_handler(void)
Definition: latch.c:1467
void InitSharedLatch(volatile Latch *latch)
Definition: latch.c:252
Latch * latch
Definition: latch.c:92
#define WL_LATCH_SET
Definition: latch.h:124
static volatile sig_atomic_t waiting
Definition: latch.c:115
static int selfpipe_owner_pid
Definition: latch.c:122
#define POSTMASTER_FD_WATCH
Definition: postmaster.h:42
#define read(a, b, c)
Definition: win32.h:13
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
Definition: latch.c:918