PostgreSQL Source Code  git master
async.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * async.c
4  * Asynchronous notification: NOTIFY, LISTEN, UNLISTEN
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/commands/async.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /*-------------------------------------------------------------------------
16  * Async Notification Model as of 9.0:
17  *
18  * 1. Multiple backends on same machine. Multiple backends listening on
19  * several channels. (Channels are also called "conditions" in other
20  * parts of the code.)
21  *
22  * 2. There is one central queue in disk-based storage (directory pg_notify/),
23  * with actively-used pages mapped into shared memory by the slru.c module.
24  * All notification messages are placed in the queue and later read out
25  * by listening backends.
26  *
27  * There is no central knowledge of which backend listens on which channel;
28  * every backend has its own list of interesting channels.
29  *
30  * Although there is only one queue, notifications are treated as being
31  * database-local; this is done by including the sender's database OID
32  * in each notification message. Listening backends ignore messages
33  * that don't match their database OID. This is important because it
34  * ensures senders and receivers have the same database encoding and won't
35  * misinterpret non-ASCII text in the channel name or payload string.
36  *
37  * Since notifications are not expected to survive database crashes,
38  * we can simply clean out the pg_notify data at any reboot, and there
39  * is no need for WAL support or fsync'ing.
40  *
41  * 3. Every backend that is listening on at least one channel registers by
42  * entering its PID into the array in AsyncQueueControl. It then scans all
43  * incoming notifications in the central queue and first compares the
44  * database OID of the notification with its own database OID and then
45  * compares the notified channel with the list of channels that it listens
46  * to. In case there is a match it delivers the notification event to its
47  * frontend. Non-matching events are simply skipped.
48  *
49  * 4. The NOTIFY statement (routine Async_Notify) stores the notification in
50  * a backend-local list which will not be processed until transaction end.
51  *
52  * Duplicate notifications from the same transaction are sent out as one
53  * notification only. This is done to save work when for example a trigger
54  * on a 2 million row table fires a notification for each row that has been
55  * changed. If the application needs to receive every single notification
56  * that has been sent, it can easily add some unique string into the extra
57  * payload parameter.
58  *
59  * When the transaction is ready to commit, PreCommit_Notify() adds the
60  * pending notifications to the head of the queue. The head pointer of the
61  * queue always points to the next free position and a position is just a
62  * page number and the offset in that page. This is done before marking the
63  * transaction as committed in clog. If we run into problems writing the
64  * notifications, we can still call elog(ERROR, ...) and the transaction
65  * will roll back.
66  *
67  * Once we have put all of the notifications into the queue, we return to
68  * CommitTransaction() which will then do the actual transaction commit.
69  *
70  * After commit we are called another time (AtCommit_Notify()). Here we
71  * make the actual updates to the effective listen state (listenChannels).
72  *
73  * Finally, after we are out of the transaction altogether, we check if
74  * we need to signal listening backends. In SignalBackends() we scan the
75  * list of listening backends and send a PROCSIG_NOTIFY_INTERRUPT signal
76  * to every listening backend (we don't know which backend is listening on
77  * which channel so we must signal them all). We can exclude backends that
78  * are already up to date, though, and we can also exclude backends that
79  * are in other databases (unless they are way behind and should be kicked
80  * to make them advance their pointers). We don't bother with a
81  * self-signal either, but just process the queue directly.
82  *
83  * 5. Upon receipt of a PROCSIG_NOTIFY_INTERRUPT signal, the signal handler
84  * sets the process's latch, which triggers the event to be processed
85  * immediately if this backend is idle (i.e., it is waiting for a frontend
86  * command and is not within a transaction block. C.f.
87  * ProcessClientReadInterrupt()). Otherwise the handler may only set a
88  * flag, which will cause the processing to occur just before we next go
89  * idle.
90  *
91  * Inbound-notify processing consists of reading all of the notifications
92  * that have arrived since scanning last time. We read every notification
93  * until we reach either a notification from an uncommitted transaction or
94  * the head pointer's position.
95  *
96  * 6. To avoid SLRU wraparound and limit disk space consumption, the tail
97  * pointer needs to be advanced so that old pages can be truncated.
98  * This is relatively expensive (notably, it requires an exclusive lock),
99  * so we don't want to do it often. We make sending backends do this work
100  * if they advanced the queue head into a new page, but only once every
101  * QUEUE_CLEANUP_DELAY pages.
102  *
103  * An application that listens on the same channel it notifies will get
104  * NOTIFY messages for its own NOTIFYs. These can be ignored, if not useful,
105  * by comparing be_pid in the NOTIFY message to the application's own backend's
106  * PID. (As of FE/BE protocol 2.0, the backend's PID is provided to the
107  * frontend during startup.) The above design guarantees that notifies from
108  * other backends will never be missed by ignoring self-notifies.
109  *
110  * The amount of shared memory used for notify management (NUM_ASYNC_BUFFERS)
111  * can be varied without affecting anything but performance. The maximum
112  * amount of notification data that can be queued at one time is determined
113  * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
114  *-------------------------------------------------------------------------
115  */
116 
117 #include "postgres.h"
118 
119 #include <limits.h>
120 #include <unistd.h>
121 #include <signal.h>
122 
123 #include "access/parallel.h"
124 #include "access/slru.h"
125 #include "access/transam.h"
126 #include "access/xact.h"
127 #include "catalog/pg_database.h"
128 #include "commands/async.h"
129 #include "funcapi.h"
130 #include "libpq/libpq.h"
131 #include "libpq/pqformat.h"
132 #include "miscadmin.h"
133 #include "storage/ipc.h"
134 #include "storage/lmgr.h"
135 #include "storage/proc.h"
136 #include "storage/procarray.h"
137 #include "storage/procsignal.h"
138 #include "storage/sinval.h"
139 #include "tcop/tcopprot.h"
140 #include "utils/builtins.h"
141 #include "utils/hashutils.h"
142 #include "utils/memutils.h"
143 #include "utils/ps_status.h"
144 #include "utils/snapmgr.h"
145 #include "utils/timestamp.h"
146 
147 
148 /*
149  * Maximum size of a NOTIFY payload, including terminating NULL. This
150  * must be kept small enough so that a notification message fits on one
151  * SLRU page. The magic fudge factor here is noncritical as long as it's
152  * more than AsyncQueueEntryEmptySize --- we make it significantly bigger
153  * than that, so changes in that data structure won't affect user-visible
154  * restrictions.
155  */
156 #define NOTIFY_PAYLOAD_MAX_LENGTH (BLCKSZ - NAMEDATALEN - 128)
157 
158 /*
159  * Struct representing an entry in the global notify queue
160  *
161  * This struct declaration has the maximal length, but in a real queue entry
162  * the data area is only big enough for the actual channel and payload strings
163  * (each null-terminated). AsyncQueueEntryEmptySize is the minimum possible
164  * entry size, if both channel and payload strings are empty (but note it
165  * doesn't include alignment padding).
166  *
167  * The "length" field should always be rounded up to the next QUEUEALIGN
168  * multiple so that all fields are properly aligned.
169  */
170 typedef struct AsyncQueueEntry
171 {
172  int length; /* total allocated length of entry */
173  Oid dboid; /* sender's database OID */
174  TransactionId xid; /* sender's XID */
175  int32 srcPid; /* sender's PID */
178 
179 /* Currently, no field of AsyncQueueEntry requires more than int alignment */
180 #define QUEUEALIGN(len) INTALIGN(len)
181 
182 #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2)
183 
184 /*
185  * Struct describing a queue position, and assorted macros for working with it
186  */
187 typedef struct QueuePosition
188 {
189  int page; /* SLRU page number */
190  int offset; /* byte offset within page */
191 } QueuePosition;
192 
193 #define QUEUE_POS_PAGE(x) ((x).page)
194 #define QUEUE_POS_OFFSET(x) ((x).offset)
195 
196 #define SET_QUEUE_POS(x,y,z) \
197  do { \
198  (x).page = (y); \
199  (x).offset = (z); \
200  } while (0)
201 
202 #define QUEUE_POS_EQUAL(x,y) \
203  ((x).page == (y).page && (x).offset == (y).offset)
204 
205 /* choose logically smaller QueuePosition */
206 #define QUEUE_POS_MIN(x,y) \
207  (asyncQueuePagePrecedes((x).page, (y).page) ? (x) : \
208  (x).page != (y).page ? (y) : \
209  (x).offset < (y).offset ? (x) : (y))
210 
211 /* choose logically larger QueuePosition */
212 #define QUEUE_POS_MAX(x,y) \
213  (asyncQueuePagePrecedes((x).page, (y).page) ? (y) : \
214  (x).page != (y).page ? (x) : \
215  (x).offset > (y).offset ? (x) : (y))
216 
217 /*
218  * Parameter determining how often we try to advance the tail pointer:
219  * we do that after every QUEUE_CLEANUP_DELAY pages of NOTIFY data. This is
220  * also the distance by which a backend in another database needs to be
221  * behind before we'll decide we need to wake it up to advance its pointer.
222  *
223  * Resist the temptation to make this really large. While that would save
224  * work in some places, it would add cost in others. In particular, this
225  * should likely be less than NUM_ASYNC_BUFFERS, to ensure that backends
226  * catch up before the pages they'll need to read fall out of SLRU cache.
227  */
228 #define QUEUE_CLEANUP_DELAY 4
229 
230 /*
231  * Struct describing a listening backend's status
232  */
233 typedef struct QueueBackendStatus
234 {
235  int32 pid; /* either a PID or InvalidPid */
236  Oid dboid; /* backend's database OID, or InvalidOid */
237  BackendId nextListener; /* id of next listener, or InvalidBackendId */
238  QueuePosition pos; /* backend has read queue up to here */
240 
241 /*
242  * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
243  *
244  * The AsyncQueueControl structure is protected by the AsyncQueueLock.
245  *
246  * When holding the lock in SHARED mode, backends may only inspect their own
247  * entries as well as the head and tail pointers. Consequently we can allow a
248  * backend to update its own record while holding only SHARED lock (since no
249  * other backend will inspect it).
250  *
251  * When holding the lock in EXCLUSIVE mode, backends can inspect the entries
252  * of other backends and also change the head and tail pointers.
253  *
254  * AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
255  * In order to avoid deadlocks, whenever we need both locks, we always first
256  * get AsyncQueueLock and then AsyncCtlLock.
257  *
258  * Each backend uses the backend[] array entry with index equal to its
259  * BackendId (which can range from 1 to MaxBackends). We rely on this to make
260  * SendProcSignal fast.
261  *
262  * The backend[] array entries for actively-listening backends are threaded
263  * together using firstListener and the nextListener links, so that we can
264  * scan them without having to iterate over inactive entries. We keep this
265  * list in order by BackendId so that the scan is cache-friendly when there
266  * are many active entries.
267  */
268 typedef struct AsyncQueueControl
269 {
270  QueuePosition head; /* head points to the next free location */
271  QueuePosition tail; /* tail must be <= the queue position of every
272  * listening backend */
273  BackendId firstListener; /* id of first listener, or InvalidBackendId */
274  TimestampTz lastQueueFillWarn; /* time of last queue-full msg */
275  QueueBackendStatus backend[FLEXIBLE_ARRAY_MEMBER];
276  /* backend[0] is not used; used entries are from [1] to [MaxBackends] */
278 
280 
281 #define QUEUE_HEAD (asyncQueueControl->head)
282 #define QUEUE_TAIL (asyncQueueControl->tail)
283 #define QUEUE_FIRST_LISTENER (asyncQueueControl->firstListener)
284 #define QUEUE_BACKEND_PID(i) (asyncQueueControl->backend[i].pid)
285 #define QUEUE_BACKEND_DBOID(i) (asyncQueueControl->backend[i].dboid)
286 #define QUEUE_NEXT_LISTENER(i) (asyncQueueControl->backend[i].nextListener)
287 #define QUEUE_BACKEND_POS(i) (asyncQueueControl->backend[i].pos)
288 
289 /*
290  * The SLRU buffer area through which we access the notification queue
291  */
293 
294 #define AsyncCtl (&AsyncCtlData)
295 #define QUEUE_PAGESIZE BLCKSZ
296 #define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */
297 
298 /*
299  * slru.c currently assumes that all filenames are four characters of hex
300  * digits. That means that we can use segments 0000 through FFFF.
301  * Each segment contains SLRU_PAGES_PER_SEGMENT pages which gives us
302  * the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1.
303  *
304  * It's of course possible to enhance slru.c, but this gives us so much
305  * space already that it doesn't seem worth the trouble.
306  *
307  * The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2
308  * pages, because more than that would confuse slru.c into thinking there
309  * was a wraparound condition. With the default BLCKSZ this means there
310  * can be up to 8GB of queued-and-not-read data.
311  *
312  * Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of
313  * SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour.
314  */
315 #define QUEUE_MAX_PAGE (SLRU_PAGES_PER_SEGMENT * 0x10000 - 1)
316 
317 /*
318  * listenChannels identifies the channels we are actually listening to
319  * (ie, have committed a LISTEN on). It is a simple list of channel names,
320  * allocated in TopMemoryContext.
321  */
322 static List *listenChannels = NIL; /* list of C strings */
323 
324 /*
325  * State for pending LISTEN/UNLISTEN actions consists of an ordered list of
326  * all actions requested in the current transaction. As explained above,
327  * we don't actually change listenChannels until we reach transaction commit.
328  *
329  * The list is kept in CurTransactionContext. In subtransactions, each
330  * subtransaction has its own list in its own CurTransactionContext, but
331  * successful subtransactions attach their lists to their parent's list.
332  * Failed subtransactions simply discard their lists.
333  */
334 typedef enum
335 {
340 
341 typedef struct
342 {
344  char channel[FLEXIBLE_ARRAY_MEMBER]; /* nul-terminated string */
345 } ListenAction;
346 
347 typedef struct ActionList
348 {
349  int nestingLevel; /* current transaction nesting depth */
350  List *actions; /* list of ListenAction structs */
351  struct ActionList *upper; /* details for upper transaction levels */
352 } ActionList;
353 
354 static ActionList *pendingActions = NULL;
355 
356 /*
357  * State for outbound notifies consists of a list of all channels+payloads
358  * NOTIFYed in the current transaction. We do not actually perform a NOTIFY
359  * until and unless the transaction commits. pendingNotifies is NULL if no
360  * NOTIFYs have been done in the current (sub) transaction.
361  *
362  * We discard duplicate notify events issued in the same transaction.
363  * Hence, in addition to the list proper (which we need to track the order
364  * of the events, since we guarantee to deliver them in order), we build a
365  * hash table which we can probe to detect duplicates. Since building the
366  * hash table is somewhat expensive, we do so only once we have at least
367  * MIN_HASHABLE_NOTIFIES events queued in the current (sub) transaction;
368  * before that we just scan the events linearly.
369  *
370  * The list is kept in CurTransactionContext. In subtransactions, each
371  * subtransaction has its own list in its own CurTransactionContext, but
372  * successful subtransactions add their entries to their parent's list.
373  * Failed subtransactions simply discard their lists. Since these lists
374  * are independent, there may be notify events in a subtransaction's list
375  * that duplicate events in some ancestor (sub) transaction; we get rid of
376  * the dups when merging the subtransaction's list into its parent's.
377  *
378  * Note: the action and notify lists do not interact within a transaction.
379  * In particular, if a transaction does NOTIFY and then LISTEN on the same
380  * condition name, it will get a self-notify at commit. This is a bit odd
381  * but is consistent with our historical behavior.
382  */
383 typedef struct Notification
384 {
385  uint16 channel_len; /* length of channel-name string */
386  uint16 payload_len; /* length of payload string */
387  /* null-terminated channel name, then null-terminated payload follow */
388  char data[FLEXIBLE_ARRAY_MEMBER];
389 } Notification;
390 
391 typedef struct NotificationList
392 {
393  int nestingLevel; /* current transaction nesting depth */
394  List *events; /* list of Notification structs */
395  HTAB *hashtab; /* hash of NotificationHash structs, or NULL */
396  struct NotificationList *upper; /* details for upper transaction levels */
398 
399 #define MIN_HASHABLE_NOTIFIES 16 /* threshold to build hashtab */
400 
401 typedef struct NotificationHash
402 {
403  Notification *event; /* => the actual Notification struct */
405 
407 
408 /*
409  * Inbound notifications are initially processed by HandleNotifyInterrupt(),
410  * called from inside a signal handler. That just sets the
411  * notifyInterruptPending flag and sets the process
412  * latch. ProcessNotifyInterrupt() will then be called whenever it's safe to
413  * actually deal with the interrupt.
414  */
415 volatile sig_atomic_t notifyInterruptPending = false;
416 
417 /* True if we've registered an on_shmem_exit cleanup */
418 static bool unlistenExitRegistered = false;
419 
420 /* True if we're currently registered as a listener in asyncQueueControl */
421 static bool amRegisteredListener = false;
422 
423 /* has this backend sent notifications in the current transaction? */
424 static bool backendHasSentNotifications = false;
425 
426 /* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */
427 static bool backendTryAdvanceTail = false;
428 
429 /* GUC parameter */
430 bool Trace_notify = false;
431 
432 /* local function prototypes */
433 static int asyncQueuePageDiff(int p, int q);
434 static bool asyncQueuePagePrecedes(int p, int q);
435 static void queue_listen(ListenActionKind action, const char *channel);
436 static void Async_UnlistenOnExit(int code, Datum arg);
437 static void Exec_ListenPreCommit(void);
438 static void Exec_ListenCommit(const char *channel);
439 static void Exec_UnlistenCommit(const char *channel);
440 static void Exec_UnlistenAllCommit(void);
441 static bool IsListeningOn(const char *channel);
442 static void asyncQueueUnregister(void);
443 static bool asyncQueueIsFull(void);
444 static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength);
446 static ListCell *asyncQueueAddEntries(ListCell *nextNotify);
447 static double asyncQueueUsage(void);
448 static void asyncQueueFillWarning(void);
449 static void SignalBackends(void);
450 static void asyncQueueReadAllNotifications(void);
451 static bool asyncQueueProcessPageEntries(volatile QueuePosition *current,
452  QueuePosition stop,
453  char *page_buffer,
454  Snapshot snapshot);
455 static void asyncQueueAdvanceTail(void);
456 static void ProcessIncomingNotify(void);
457 static bool AsyncExistsPendingNotify(Notification *n);
459 static uint32 notification_hash(const void *key, Size keysize);
460 static int notification_match(const void *key1, const void *key2, Size keysize);
461 static void ClearPendingActionsAndNotifies(void);
462 
463 /*
464  * Compute the difference between two queue page numbers (i.e., p - q),
465  * accounting for wraparound.
466  */
467 static int
468 asyncQueuePageDiff(int p, int q)
469 {
470  int diff;
471 
472  /*
473  * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be
474  * in the range 0..QUEUE_MAX_PAGE.
475  */
476  Assert(p >= 0 && p <= QUEUE_MAX_PAGE);
477  Assert(q >= 0 && q <= QUEUE_MAX_PAGE);
478 
479  diff = p - q;
480  if (diff >= ((QUEUE_MAX_PAGE + 1) / 2))
481  diff -= QUEUE_MAX_PAGE + 1;
482  else if (diff < -((QUEUE_MAX_PAGE + 1) / 2))
483  diff += QUEUE_MAX_PAGE + 1;
484  return diff;
485 }
486 
487 /* Is p < q, accounting for wraparound? */
488 static bool
490 {
491  return asyncQueuePageDiff(p, q) < 0;
492 }
493 
494 /*
495  * Report space needed for our shared memory area
496  */
497 Size
499 {
500  Size size;
501 
502  /* This had better match AsyncShmemInit */
503  size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
504  size = add_size(size, offsetof(AsyncQueueControl, backend));
505 
507 
508  return size;
509 }
510 
511 /*
512  * Initialize our shared memory area
513  */
514 void
516 {
517  bool found;
518  int slotno;
519  Size size;
520 
521  /*
522  * Create or attach to the AsyncQueueControl structure.
523  *
524  * The used entries in the backend[] array run from 1 to MaxBackends; the
525  * zero'th entry is unused but must be allocated.
526  */
527  size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
528  size = add_size(size, offsetof(AsyncQueueControl, backend));
529 
530  asyncQueueControl = (AsyncQueueControl *)
531  ShmemInitStruct("Async Queue Control", size, &found);
532 
533  if (!found)
534  {
535  /* First time through, so initialize it */
536  SET_QUEUE_POS(QUEUE_HEAD, 0, 0);
537  SET_QUEUE_POS(QUEUE_TAIL, 0, 0);
539  asyncQueueControl->lastQueueFillWarn = 0;
540  /* zero'th entry won't be used, but let's initialize it anyway */
541  for (int i = 0; i <= MaxBackends; i++)
542  {
547  }
548  }
549 
550  /*
551  * Set up SLRU management of the pg_notify data.
552  */
553  AsyncCtl->PagePrecedes = asyncQueuePagePrecedes;
555  AsyncCtlLock, "pg_notify", LWTRANCHE_ASYNC_BUFFERS);
556  /* Override default assumption that writes should be fsync'd */
557  AsyncCtl->do_fsync = false;
558 
559  if (!found)
560  {
561  /*
562  * During start or reboot, clean out the pg_notify directory.
563  */
565 
566  /* Now initialize page zero to empty */
567  LWLockAcquire(AsyncCtlLock, LW_EXCLUSIVE);
569  /* This write is just to verify that pg_notify/ is writable */
570  SimpleLruWritePage(AsyncCtl, slotno);
571  LWLockRelease(AsyncCtlLock);
572  }
573 }
574 
575 
576 /*
577  * pg_notify -
578  * SQL function to send a notification event
579  */
580 Datum
582 {
583  const char *channel;
584  const char *payload;
585 
586  if (PG_ARGISNULL(0))
587  channel = "";
588  else
589  channel = text_to_cstring(PG_GETARG_TEXT_PP(0));
590 
591  if (PG_ARGISNULL(1))
592  payload = "";
593  else
594  payload = text_to_cstring(PG_GETARG_TEXT_PP(1));
595 
596  /* For NOTIFY as a statement, this is checked in ProcessUtility */
598 
599  Async_Notify(channel, payload);
600 
601  PG_RETURN_VOID();
602 }
603 
604 
605 /*
606  * Async_Notify
607  *
608  * This is executed by the SQL notify command.
609  *
610  * Adds the message to the list of pending notifies.
611  * Actual notification happens during transaction commit.
612  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
613  */
614 void
615 Async_Notify(const char *channel, const char *payload)
616 {
617  int my_level = GetCurrentTransactionNestLevel();
618  size_t channel_len;
619  size_t payload_len;
620  Notification *n;
621  MemoryContext oldcontext;
622 
623  if (IsParallelWorker())
624  elog(ERROR, "cannot send notifications from a parallel worker");
625 
626  if (Trace_notify)
627  elog(DEBUG1, "Async_Notify(%s)", channel);
628 
629  channel_len = channel ? strlen(channel) : 0;
630  payload_len = payload ? strlen(payload) : 0;
631 
632  /* a channel name must be specified */
633  if (channel_len == 0)
634  ereport(ERROR,
635  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
636  errmsg("channel name cannot be empty")));
637 
638  /* enforce length limits */
639  if (channel_len >= NAMEDATALEN)
640  ereport(ERROR,
641  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
642  errmsg("channel name too long")));
643 
644  if (payload_len >= NOTIFY_PAYLOAD_MAX_LENGTH)
645  ereport(ERROR,
646  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
647  errmsg("payload string too long")));
648 
649  /*
650  * We must construct the Notification entry, even if we end up not using
651  * it, in order to compare it cheaply to existing list entries.
652  *
653  * The notification list needs to live until end of transaction, so store
654  * it in the transaction context.
655  */
657 
659  channel_len + payload_len + 2);
660  n->channel_len = channel_len;
661  n->payload_len = payload_len;
662  strcpy(n->data, channel);
663  if (payload)
664  strcpy(n->data + channel_len + 1, payload);
665  else
666  n->data[channel_len + 1] = '\0';
667 
668  if (pendingNotifies == NULL || my_level > pendingNotifies->nestingLevel)
669  {
670  NotificationList *notifies;
671 
672  /*
673  * First notify event in current (sub)xact. Note that we allocate the
674  * NotificationList in TopTransactionContext; the nestingLevel might
675  * get changed later by AtSubCommit_Notify.
676  */
677  notifies = (NotificationList *)
679  sizeof(NotificationList));
680  notifies->nestingLevel = my_level;
681  notifies->events = list_make1(n);
682  /* We certainly don't need a hashtable yet */
683  notifies->hashtab = NULL;
684  notifies->upper = pendingNotifies;
685  pendingNotifies = notifies;
686  }
687  else
688  {
689  /* Now check for duplicates */
691  {
692  /* It's a dup, so forget it */
693  pfree(n);
694  MemoryContextSwitchTo(oldcontext);
695  return;
696  }
697 
698  /* Append more events to existing list */
700  }
701 
702  MemoryContextSwitchTo(oldcontext);
703 }
704 
705 /*
706  * queue_listen
707  * Common code for listen, unlisten, unlisten all commands.
708  *
709  * Adds the request to the list of pending actions.
710  * Actual update of the listenChannels list happens during transaction
711  * commit.
712  */
713 static void
714 queue_listen(ListenActionKind action, const char *channel)
715 {
716  MemoryContext oldcontext;
717  ListenAction *actrec;
718  int my_level = GetCurrentTransactionNestLevel();
719 
720  /*
721  * Unlike Async_Notify, we don't try to collapse out duplicates. It would
722  * be too complicated to ensure we get the right interactions of
723  * conflicting LISTEN/UNLISTEN/UNLISTEN_ALL, and it's unlikely that there
724  * would be any performance benefit anyway in sane applications.
725  */
727 
728  /* space for terminating null is included in sizeof(ListenAction) */
729  actrec = (ListenAction *) palloc(offsetof(ListenAction, channel) +
730  strlen(channel) + 1);
731  actrec->action = action;
732  strcpy(actrec->channel, channel);
733 
734  if (pendingActions == NULL || my_level > pendingActions->nestingLevel)
735  {
736  ActionList *actions;
737 
738  /*
739  * First action in current sub(xact). Note that we allocate the
740  * ActionList in TopTransactionContext; the nestingLevel might get
741  * changed later by AtSubCommit_Notify.
742  */
743  actions = (ActionList *)
745  actions->nestingLevel = my_level;
746  actions->actions = list_make1(actrec);
747  actions->upper = pendingActions;
748  pendingActions = actions;
749  }
750  else
751  pendingActions->actions = lappend(pendingActions->actions, actrec);
752 
753  MemoryContextSwitchTo(oldcontext);
754 }
755 
756 /*
757  * Async_Listen
758  *
759  * This is executed by the SQL listen command.
760  */
761 void
762 Async_Listen(const char *channel)
763 {
764  if (Trace_notify)
765  elog(DEBUG1, "Async_Listen(%s,%d)", channel, MyProcPid);
766 
767  queue_listen(LISTEN_LISTEN, channel);
768 }
769 
770 /*
771  * Async_Unlisten
772  *
773  * This is executed by the SQL unlisten command.
774  */
775 void
776 Async_Unlisten(const char *channel)
777 {
778  if (Trace_notify)
779  elog(DEBUG1, "Async_Unlisten(%s,%d)", channel, MyProcPid);
780 
781  /* If we couldn't possibly be listening, no need to queue anything */
782  if (pendingActions == NULL && !unlistenExitRegistered)
783  return;
784 
785  queue_listen(LISTEN_UNLISTEN, channel);
786 }
787 
788 /*
789  * Async_UnlistenAll
790  *
791  * This is invoked by UNLISTEN * command, and also at backend exit.
792  */
793 void
795 {
796  if (Trace_notify)
797  elog(DEBUG1, "Async_UnlistenAll(%d)", MyProcPid);
798 
799  /* If we couldn't possibly be listening, no need to queue anything */
800  if (pendingActions == NULL && !unlistenExitRegistered)
801  return;
802 
804 }
805 
806 /*
807  * SQL function: return a set of the channel names this backend is actively
808  * listening to.
809  *
810  * Note: this coding relies on the fact that the listenChannels list cannot
811  * change within a transaction.
812  */
813 Datum
815 {
816  FuncCallContext *funcctx;
817 
818  /* stuff done only on the first call of the function */
819  if (SRF_IS_FIRSTCALL())
820  {
821  /* create a function context for cross-call persistence */
822  funcctx = SRF_FIRSTCALL_INIT();
823  }
824 
825  /* stuff done on every call of the function */
826  funcctx = SRF_PERCALL_SETUP();
827 
828  if (funcctx->call_cntr < list_length(listenChannels))
829  {
830  char *channel = (char *) list_nth(listenChannels,
831  funcctx->call_cntr);
832 
833  SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(channel));
834  }
835 
836  SRF_RETURN_DONE(funcctx);
837 }
838 
839 /*
840  * Async_UnlistenOnExit
841  *
842  * This is executed at backend exit if we have done any LISTENs in this
843  * backend. It might not be necessary anymore, if the user UNLISTENed
844  * everything, but we don't try to detect that case.
845  */
846 static void
848 {
851 }
852 
853 /*
854  * AtPrepare_Notify
855  *
856  * This is called at the prepare phase of a two-phase
857  * transaction. Save the state for possible commit later.
858  */
859 void
861 {
862  /* It's not allowed to have any pending LISTEN/UNLISTEN/NOTIFY actions */
863  if (pendingActions || pendingNotifies)
864  ereport(ERROR,
865  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
866  errmsg("cannot PREPARE a transaction that has executed LISTEN, UNLISTEN, or NOTIFY")));
867 }
868 
869 /*
870  * PreCommit_Notify
871  *
872  * This is called at transaction commit, before actually committing to
873  * clog.
874  *
875  * If there are pending LISTEN actions, make sure we are listed in the
876  * shared-memory listener array. This must happen before commit to
877  * ensure we don't miss any notifies from transactions that commit
878  * just after ours.
879  *
880  * If there are outbound notify requests in the pendingNotifies list,
881  * add them to the global queue. We do that before commit so that
882  * we can still throw error if we run out of queue space.
883  */
884 void
886 {
887  ListCell *p;
888 
889  if (!pendingActions && !pendingNotifies)
890  return; /* no relevant statements in this xact */
891 
892  if (Trace_notify)
893  elog(DEBUG1, "PreCommit_Notify");
894 
895  /* Preflight for any pending listen/unlisten actions */
896  if (pendingActions != NULL)
897  {
898  foreach(p, pendingActions->actions)
899  {
900  ListenAction *actrec = (ListenAction *) lfirst(p);
901 
902  switch (actrec->action)
903  {
904  case LISTEN_LISTEN:
906  break;
907  case LISTEN_UNLISTEN:
908  /* there is no Exec_UnlistenPreCommit() */
909  break;
910  case LISTEN_UNLISTEN_ALL:
911  /* there is no Exec_UnlistenAllPreCommit() */
912  break;
913  }
914  }
915  }
916 
917  /* Queue any pending notifies (must happen after the above) */
918  if (pendingNotifies)
919  {
920  ListCell *nextNotify;
921 
922  /*
923  * Make sure that we have an XID assigned to the current transaction.
924  * GetCurrentTransactionId is cheap if we already have an XID, but not
925  * so cheap if we don't, and we'd prefer not to do that work while
926  * holding AsyncQueueLock.
927  */
928  (void) GetCurrentTransactionId();
929 
930  /*
931  * Serialize writers by acquiring a special lock that we hold till
932  * after commit. This ensures that queue entries appear in commit
933  * order, and in particular that there are never uncommitted queue
934  * entries ahead of committed ones, so an uncommitted transaction
935  * can't block delivery of deliverable notifications.
936  *
937  * We use a heavyweight lock so that it'll automatically be released
938  * after either commit or abort. This also allows deadlocks to be
939  * detected, though really a deadlock shouldn't be possible here.
940  *
941  * The lock is on "database 0", which is pretty ugly but it doesn't
942  * seem worth inventing a special locktag category just for this.
943  * (Historical note: before PG 9.0, a similar lock on "database 0" was
944  * used by the flatfiles mechanism.)
945  */
946  LockSharedObject(DatabaseRelationId, InvalidOid, 0,
948 
949  /* Now push the notifications into the queue */
951 
952  nextNotify = list_head(pendingNotifies->events);
953  while (nextNotify != NULL)
954  {
955  /*
956  * Add the pending notifications to the queue. We acquire and
957  * release AsyncQueueLock once per page, which might be overkill
958  * but it does allow readers to get in while we're doing this.
959  *
960  * A full queue is very uncommon and should really not happen,
961  * given that we have so much space available in the SLRU pages.
962  * Nevertheless we need to deal with this possibility. Note that
963  * when we get here we are in the process of committing our
964  * transaction, but we have not yet committed to clog, so at this
965  * point in time we can still roll the transaction back.
966  */
967  LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
969  if (asyncQueueIsFull())
970  ereport(ERROR,
971  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
972  errmsg("too many notifications in the NOTIFY queue")));
973  nextNotify = asyncQueueAddEntries(nextNotify);
974  LWLockRelease(AsyncQueueLock);
975  }
976  }
977 }
978 
979 /*
980  * AtCommit_Notify
981  *
982  * This is called at transaction commit, after committing to clog.
983  *
984  * Update listenChannels and clear transaction-local state.
985  */
986 void
988 {
989  ListCell *p;
990 
991  /*
992  * Allow transactions that have not executed LISTEN/UNLISTEN/NOTIFY to
993  * return as soon as possible
994  */
995  if (!pendingActions && !pendingNotifies)
996  return;
997 
998  if (Trace_notify)
999  elog(DEBUG1, "AtCommit_Notify");
1000 
1001  /* Perform any pending listen/unlisten actions */
1002  if (pendingActions != NULL)
1003  {
1004  foreach(p, pendingActions->actions)
1005  {
1006  ListenAction *actrec = (ListenAction *) lfirst(p);
1007 
1008  switch (actrec->action)
1009  {
1010  case LISTEN_LISTEN:
1011  Exec_ListenCommit(actrec->channel);
1012  break;
1013  case LISTEN_UNLISTEN:
1014  Exec_UnlistenCommit(actrec->channel);
1015  break;
1016  case LISTEN_UNLISTEN_ALL:
1018  break;
1019  }
1020  }
1021  }
1022 
1023  /* If no longer listening to anything, get out of listener array */
1024  if (amRegisteredListener && listenChannels == NIL)
1026 
1027  /* And clean up */
1029 }
1030 
1031 /*
1032  * Exec_ListenPreCommit --- subroutine for PreCommit_Notify
1033  *
1034  * This function must make sure we are ready to catch any incoming messages.
1035  */
1036 static void
1038 {
1039  QueuePosition head;
1040  QueuePosition max;
1041  BackendId prevListener;
1042 
1043  /*
1044  * Nothing to do if we are already listening to something, nor if we
1045  * already ran this routine in this transaction.
1046  */
1048  return;
1049 
1050  if (Trace_notify)
1051  elog(DEBUG1, "Exec_ListenPreCommit(%d)", MyProcPid);
1052 
1053  /*
1054  * Before registering, make sure we will unlisten before dying. (Note:
1055  * this action does not get undone if we abort later.)
1056  */
1058  {
1060  unlistenExitRegistered = true;
1061  }
1062 
1063  /*
1064  * This is our first LISTEN, so establish our pointer.
1065  *
1066  * We set our pointer to the global tail pointer and then move it forward
1067  * over already-committed notifications. This ensures we cannot miss any
1068  * not-yet-committed notifications. We might get a few more but that
1069  * doesn't hurt.
1070  *
1071  * In some scenarios there might be a lot of committed notifications that
1072  * have not yet been pruned away (because some backend is being lazy about
1073  * reading them). To reduce our startup time, we can look at other
1074  * backends and adopt the maximum "pos" pointer of any backend that's in
1075  * our database; any notifications it's already advanced over are surely
1076  * committed and need not be re-examined by us. (We must consider only
1077  * backends connected to our DB, because others will not have bothered to
1078  * check committed-ness of notifications in our DB.)
1079  *
1080  * We need exclusive lock here so we can look at other backends' entries
1081  * and manipulate the list links.
1082  */
1083  LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
1084  head = QUEUE_HEAD;
1085  max = QUEUE_TAIL;
1086  prevListener = InvalidBackendId;
1088  {
1090  max = QUEUE_POS_MAX(max, QUEUE_BACKEND_POS(i));
1091  /* Also find last listening backend before this one */
1092  if (i < MyBackendId)
1093  prevListener = i;
1094  }
1098  /* Insert backend into list of listeners at correct position */
1099  if (prevListener > 0)
1100  {
1102  QUEUE_NEXT_LISTENER(prevListener) = MyBackendId;
1103  }
1104  else
1105  {
1108  }
1109  LWLockRelease(AsyncQueueLock);
1110 
1111  /* Now we are listed in the global array, so remember we're listening */
1112  amRegisteredListener = true;
1113 
1114  /*
1115  * Try to move our pointer forward as far as possible. This will skip over
1116  * already-committed notifications. Still, we could get notifications that
1117  * have already committed before we started to LISTEN.
1118  *
1119  * Note that we are not yet listening on anything, so we won't deliver any
1120  * notification to the frontend. Also, although our transaction might
1121  * have executed NOTIFY, those message(s) aren't queued yet so we can't
1122  * see them in the queue.
1123  */
1124  if (!QUEUE_POS_EQUAL(max, head))
1126 }
1127 
1128 /*
1129  * Exec_ListenCommit --- subroutine for AtCommit_Notify
1130  *
1131  * Add the channel to the list of channels we are listening on.
1132  */
1133 static void
1134 Exec_ListenCommit(const char *channel)
1135 {
1136  MemoryContext oldcontext;
1137 
1138  /* Do nothing if we are already listening on this channel */
1139  if (IsListeningOn(channel))
1140  return;
1141 
1142  /*
1143  * Add the new channel name to listenChannels.
1144  *
1145  * XXX It is theoretically possible to get an out-of-memory failure here,
1146  * which would be bad because we already committed. For the moment it
1147  * doesn't seem worth trying to guard against that, but maybe improve this
1148  * later.
1149  */
1151  listenChannels = lappend(listenChannels, pstrdup(channel));
1152  MemoryContextSwitchTo(oldcontext);
1153 }
1154 
1155 /*
1156  * Exec_UnlistenCommit --- subroutine for AtCommit_Notify
1157  *
1158  * Remove the specified channel name from listenChannels.
1159  */
1160 static void
1161 Exec_UnlistenCommit(const char *channel)
1162 {
1163  ListCell *q;
1164 
1165  if (Trace_notify)
1166  elog(DEBUG1, "Exec_UnlistenCommit(%s,%d)", channel, MyProcPid);
1167 
1168  foreach(q, listenChannels)
1169  {
1170  char *lchan = (char *) lfirst(q);
1171 
1172  if (strcmp(lchan, channel) == 0)
1173  {
1174  listenChannels = foreach_delete_current(listenChannels, q);
1175  pfree(lchan);
1176  break;
1177  }
1178  }
1179 
1180  /*
1181  * We do not complain about unlistening something not being listened;
1182  * should we?
1183  */
1184 }
1185 
1186 /*
1187  * Exec_UnlistenAllCommit --- subroutine for AtCommit_Notify
1188  *
1189  * Unlisten on all channels for this backend.
1190  */
1191 static void
1193 {
1194  if (Trace_notify)
1195  elog(DEBUG1, "Exec_UnlistenAllCommit(%d)", MyProcPid);
1196 
1197  list_free_deep(listenChannels);
1198  listenChannels = NIL;
1199 }
1200 
1201 /*
1202  * ProcessCompletedNotifies --- send out signals and self-notifies
1203  *
1204  * This is called from postgres.c just before going idle at the completion
1205  * of a transaction. If we issued any notifications in the just-completed
1206  * transaction, send signals to other backends to process them, and also
1207  * process the queue ourselves to send messages to our own frontend.
1208  * Also, if we filled enough queue pages with new notifies, try to advance
1209  * the queue tail pointer.
1210  *
1211  * The reason that this is not done in AtCommit_Notify is that there is
1212  * a nonzero chance of errors here (for example, encoding conversion errors
1213  * while trying to format messages to our frontend). An error during
1214  * AtCommit_Notify would be a PANIC condition. The timing is also arranged
1215  * to ensure that a transaction's self-notifies are delivered to the frontend
1216  * before it gets the terminating ReadyForQuery message.
1217  *
1218  * Note that we send signals and process the queue even if the transaction
1219  * eventually aborted. This is because we need to clean out whatever got
1220  * added to the queue.
1221  *
1222  * NOTE: we are outside of any transaction here.
1223  */
1224 void
1226 {
1227  MemoryContext caller_context;
1228 
1229  /* Nothing to do if we didn't send any notifications */
1231  return;
1232 
1233  /*
1234  * We reset the flag immediately; otherwise, if any sort of error occurs
1235  * below, we'd be locked up in an infinite loop, because control will come
1236  * right back here after error cleanup.
1237  */
1239 
1240  /*
1241  * We must preserve the caller's memory context (probably MessageContext)
1242  * across the transaction we do here.
1243  */
1244  caller_context = CurrentMemoryContext;
1245 
1246  if (Trace_notify)
1247  elog(DEBUG1, "ProcessCompletedNotifies");
1248 
1249  /*
1250  * We must run asyncQueueReadAllNotifications inside a transaction, else
1251  * bad things happen if it gets an error.
1252  */
1254 
1255  /* Send signals to other backends */
1256  SignalBackends();
1257 
1258  if (listenChannels != NIL)
1259  {
1260  /* Read the queue ourselves, and send relevant stuff to the frontend */
1262  }
1263 
1264  /*
1265  * If it's time to try to advance the global tail pointer, do that.
1266  */
1268  {
1269  backendTryAdvanceTail = false;
1271  }
1272 
1274 
1275  MemoryContextSwitchTo(caller_context);
1276 
1277  /* We don't need pq_flush() here since postgres.c will do one shortly */
1278 }
1279 
1280 /*
1281  * Test whether we are actively listening on the given channel name.
1282  *
1283  * Note: this function is executed for every notification found in the queue.
1284  * Perhaps it is worth further optimization, eg convert the list to a sorted
1285  * array so we can binary-search it. In practice the list is likely to be
1286  * fairly short, though.
1287  */
1288 static bool
1289 IsListeningOn(const char *channel)
1290 {
1291  ListCell *p;
1292 
1293  foreach(p, listenChannels)
1294  {
1295  char *lchan = (char *) lfirst(p);
1296 
1297  if (strcmp(lchan, channel) == 0)
1298  return true;
1299  }
1300  return false;
1301 }
1302 
1303 /*
1304  * Remove our entry from the listeners array when we are no longer listening
1305  * on any channel. NB: must not fail if we're already not listening.
1306  */
1307 static void
1309 {
1310  Assert(listenChannels == NIL); /* else caller error */
1311 
1312  if (!amRegisteredListener) /* nothing to do */
1313  return;
1314 
1315  /*
1316  * Need exclusive lock here to manipulate list links.
1317  */
1318  LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
1319  /* Mark our entry as invalid */
1322  /* and remove it from the list */
1325  else
1326  {
1328  {
1330  {
1332  break;
1333  }
1334  }
1335  }
1337  LWLockRelease(AsyncQueueLock);
1338 
1339  /* mark ourselves as no longer listed in the global array */
1340  amRegisteredListener = false;
1341 }
1342 
1343 /*
1344  * Test whether there is room to insert more notification messages.
1345  *
1346  * Caller must hold at least shared AsyncQueueLock.
1347  */
1348 static bool
1350 {
1351  int nexthead;
1352  int boundary;
1353 
1354  /*
1355  * The queue is full if creating a new head page would create a page that
1356  * logically precedes the current global tail pointer, ie, the head
1357  * pointer would wrap around compared to the tail. We cannot create such
1358  * a head page for fear of confusing slru.c. For safety we round the tail
1359  * pointer back to a segment boundary (compare the truncation logic in
1360  * asyncQueueAdvanceTail).
1361  *
1362  * Note that this test is *not* dependent on how much space there is on
1363  * the current head page. This is necessary because asyncQueueAddEntries
1364  * might try to create the next head page in any case.
1365  */
1366  nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1;
1367  if (nexthead > QUEUE_MAX_PAGE)
1368  nexthead = 0; /* wrap around */
1369  boundary = QUEUE_POS_PAGE(QUEUE_TAIL);
1370  boundary -= boundary % SLRU_PAGES_PER_SEGMENT;
1371  return asyncQueuePagePrecedes(nexthead, boundary);
1372 }
1373 
1374 /*
1375  * Advance the QueuePosition to the next entry, assuming that the current
1376  * entry is of length entryLength. If we jump to a new page the function
1377  * returns true, else false.
1378  */
1379 static bool
1380 asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
1381 {
1382  int pageno = QUEUE_POS_PAGE(*position);
1383  int offset = QUEUE_POS_OFFSET(*position);
1384  bool pageJump = false;
1385 
1386  /*
1387  * Move to the next writing position: First jump over what we have just
1388  * written or read.
1389  */
1390  offset += entryLength;
1391  Assert(offset <= QUEUE_PAGESIZE);
1392 
1393  /*
1394  * In a second step check if another entry can possibly be written to the
1395  * page. If so, stay here, we have reached the next position. If not, then
1396  * we need to move on to the next page.
1397  */
1399  {
1400  pageno++;
1401  if (pageno > QUEUE_MAX_PAGE)
1402  pageno = 0; /* wrap around */
1403  offset = 0;
1404  pageJump = true;
1405  }
1406 
1407  SET_QUEUE_POS(*position, pageno, offset);
1408  return pageJump;
1409 }
1410 
1411 /*
1412  * Fill the AsyncQueueEntry at *qe with an outbound notification message.
1413  */
1414 static void
1416 {
1417  size_t channellen = n->channel_len;
1418  size_t payloadlen = n->payload_len;
1419  int entryLength;
1420 
1421  Assert(channellen < NAMEDATALEN);
1422  Assert(payloadlen < NOTIFY_PAYLOAD_MAX_LENGTH);
1423 
1424  /* The terminators are already included in AsyncQueueEntryEmptySize */
1425  entryLength = AsyncQueueEntryEmptySize + payloadlen + channellen;
1426  entryLength = QUEUEALIGN(entryLength);
1427  qe->length = entryLength;
1428  qe->dboid = MyDatabaseId;
1429  qe->xid = GetCurrentTransactionId();
1430  qe->srcPid = MyProcPid;
1431  memcpy(qe->data, n->data, channellen + payloadlen + 2);
1432 }
1433 
1434 /*
1435  * Add pending notifications to the queue.
1436  *
1437  * We go page by page here, i.e. we stop once we have to go to a new page but
1438  * we will be called again and then fill that next page. If an entry does not
1439  * fit into the current page, we write a dummy entry with an InvalidOid as the
1440  * database OID in order to fill the page. So every page is always used up to
1441  * the last byte which simplifies reading the page later.
1442  *
1443  * We are passed the list cell (in pendingNotifies->events) containing the next
1444  * notification to write and return the first still-unwritten cell back.
1445  * Eventually we will return NULL indicating all is done.
1446  *
1447  * We are holding AsyncQueueLock already from the caller and grab AsyncCtlLock
1448  * locally in this function.
1449  */
1450 static ListCell *
1452 {
1453  AsyncQueueEntry qe;
1454  QueuePosition queue_head;
1455  int pageno;
1456  int offset;
1457  int slotno;
1458 
1459  /* We hold both AsyncQueueLock and AsyncCtlLock during this operation */
1460  LWLockAcquire(AsyncCtlLock, LW_EXCLUSIVE);
1461 
1462  /*
1463  * We work with a local copy of QUEUE_HEAD, which we write back to shared
1464  * memory upon exiting. The reason for this is that if we have to advance
1465  * to a new page, SimpleLruZeroPage might fail (out of disk space, for
1466  * instance), and we must not advance QUEUE_HEAD if it does. (Otherwise,
1467  * subsequent insertions would try to put entries into a page that slru.c
1468  * thinks doesn't exist yet.) So, use a local position variable. Note
1469  * that if we do fail, any already-inserted queue entries are forgotten;
1470  * this is okay, since they'd be useless anyway after our transaction
1471  * rolls back.
1472  */
1473  queue_head = QUEUE_HEAD;
1474 
1475  /* Fetch the current page */
1476  pageno = QUEUE_POS_PAGE(queue_head);
1477  slotno = SimpleLruReadPage(AsyncCtl, pageno, true, InvalidTransactionId);
1478  /* Note we mark the page dirty before writing in it */
1479  AsyncCtl->shared->page_dirty[slotno] = true;
1480 
1481  while (nextNotify != NULL)
1482  {
1483  Notification *n = (Notification *) lfirst(nextNotify);
1484 
1485  /* Construct a valid queue entry in local variable qe */
1487 
1488  offset = QUEUE_POS_OFFSET(queue_head);
1489 
1490  /* Check whether the entry really fits on the current page */
1491  if (offset + qe.length <= QUEUE_PAGESIZE)
1492  {
1493  /* OK, so advance nextNotify past this item */
1494  nextNotify = lnext(pendingNotifies->events, nextNotify);
1495  }
1496  else
1497  {
1498  /*
1499  * Write a dummy entry to fill up the page. Actually readers will
1500  * only check dboid and since it won't match any reader's database
1501  * OID, they will ignore this entry and move on.
1502  */
1503  qe.length = QUEUE_PAGESIZE - offset;
1504  qe.dboid = InvalidOid;
1505  qe.data[0] = '\0'; /* empty channel */
1506  qe.data[1] = '\0'; /* empty payload */
1507  }
1508 
1509  /* Now copy qe into the shared buffer page */
1510  memcpy(AsyncCtl->shared->page_buffer[slotno] + offset,
1511  &qe,
1512  qe.length);
1513 
1514  /* Advance queue_head appropriately, and detect if page is full */
1515  if (asyncQueueAdvance(&(queue_head), qe.length))
1516  {
1517  /*
1518  * Page is full, so we're done here, but first fill the next page
1519  * with zeroes. The reason to do this is to ensure that slru.c's
1520  * idea of the head page is always the same as ours, which avoids
1521  * boundary problems in SimpleLruTruncate. The test in
1522  * asyncQueueIsFull() ensured that there is room to create this
1523  * page without overrunning the queue.
1524  */
1525  slotno = SimpleLruZeroPage(AsyncCtl, QUEUE_POS_PAGE(queue_head));
1526 
1527  /*
1528  * If the new page address is a multiple of QUEUE_CLEANUP_DELAY,
1529  * set flag to remember that we should try to advance the tail
1530  * pointer (we don't want to actually do that right here).
1531  */
1532  if (QUEUE_POS_PAGE(queue_head) % QUEUE_CLEANUP_DELAY == 0)
1533  backendTryAdvanceTail = true;
1534 
1535  /* And exit the loop */
1536  break;
1537  }
1538  }
1539 
1540  /* Success, so update the global QUEUE_HEAD */
1541  QUEUE_HEAD = queue_head;
1542 
1543  LWLockRelease(AsyncCtlLock);
1544 
1545  return nextNotify;
1546 }
1547 
1548 /*
1549  * SQL function to return the fraction of the notification queue currently
1550  * occupied.
1551  */
1552 Datum
1554 {
1555  double usage;
1556 
1557  LWLockAcquire(AsyncQueueLock, LW_SHARED);
1558  usage = asyncQueueUsage();
1559  LWLockRelease(AsyncQueueLock);
1560 
1561  PG_RETURN_FLOAT8(usage);
1562 }
1563 
1564 /*
1565  * Return the fraction of the queue that is currently occupied.
1566  *
1567  * The caller must hold AsyncQueueLock in (at least) shared mode.
1568  */
1569 static double
1571 {
1572  int headPage = QUEUE_POS_PAGE(QUEUE_HEAD);
1573  int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL);
1574  int occupied;
1575 
1576  occupied = headPage - tailPage;
1577 
1578  if (occupied == 0)
1579  return (double) 0; /* fast exit for common case */
1580 
1581  if (occupied < 0)
1582  {
1583  /* head has wrapped around, tail not yet */
1584  occupied += QUEUE_MAX_PAGE + 1;
1585  }
1586 
1587  return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2);
1588 }
1589 
1590 /*
1591  * Check whether the queue is at least half full, and emit a warning if so.
1592  *
1593  * This is unlikely given the size of the queue, but possible.
1594  * The warnings show up at most once every QUEUE_FULL_WARN_INTERVAL.
1595  *
1596  * Caller must hold exclusive AsyncQueueLock.
1597  */
1598 static void
1600 {
1601  double fillDegree;
1602  TimestampTz t;
1603 
1604  fillDegree = asyncQueueUsage();
1605  if (fillDegree < 0.5)
1606  return;
1607 
1608  t = GetCurrentTimestamp();
1609 
1610  if (TimestampDifferenceExceeds(asyncQueueControl->lastQueueFillWarn,
1612  {
1613  QueuePosition min = QUEUE_HEAD;
1614  int32 minPid = InvalidPid;
1615 
1617  {
1619  min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
1620  if (QUEUE_POS_EQUAL(min, QUEUE_BACKEND_POS(i)))
1621  minPid = QUEUE_BACKEND_PID(i);
1622  }
1623 
1624  ereport(WARNING,
1625  (errmsg("NOTIFY queue is %.0f%% full", fillDegree * 100),
1626  (minPid != InvalidPid ?
1627  errdetail("The server process with PID %d is among those with the oldest transactions.", minPid)
1628  : 0),
1629  (minPid != InvalidPid ?
1630  errhint("The NOTIFY queue cannot be emptied until that process ends its current transaction.")
1631  : 0)));
1632 
1633  asyncQueueControl->lastQueueFillWarn = t;
1634  }
1635 }
1636 
1637 /*
1638  * Send signals to listening backends.
1639  *
1640  * We never signal our own process; that should be handled by our caller.
1641  *
1642  * Normally we signal only backends in our own database, since only those
1643  * backends could be interested in notifies we send. However, if there's
1644  * notify traffic in our database but no traffic in another database that
1645  * does have listener(s), those listeners will fall further and further
1646  * behind. Waken them anyway if they're far enough behind, so that they'll
1647  * advance their queue position pointers, allowing the global tail to advance.
1648  *
1649  * Since we know the BackendId and the Pid the signalling is quite cheap.
1650  */
1651 static void
1653 {
1654  int32 *pids;
1655  BackendId *ids;
1656  int count;
1657 
1658  /*
1659  * Identify backends that we need to signal. We don't want to send
1660  * signals while holding the AsyncQueueLock, so this loop just builds a
1661  * list of target PIDs.
1662  *
1663  * XXX in principle these pallocs could fail, which would be bad. Maybe
1664  * preallocate the arrays? But in practice this is only run in trivial
1665  * transactions, so there should surely be space available.
1666  */
1667  pids = (int32 *) palloc(MaxBackends * sizeof(int32));
1668  ids = (BackendId *) palloc(MaxBackends * sizeof(BackendId));
1669  count = 0;
1670 
1671  LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
1673  {
1674  int32 pid = QUEUE_BACKEND_PID(i);
1675  QueuePosition pos;
1676 
1677  Assert(pid != InvalidPid);
1678  if (pid == MyProcPid)
1679  continue; /* never signal self */
1680  pos = QUEUE_BACKEND_POS(i);
1682  {
1683  /*
1684  * Always signal listeners in our own database, unless they're
1685  * already caught up (unlikely, but possible).
1686  */
1687  if (QUEUE_POS_EQUAL(pos, QUEUE_HEAD))
1688  continue;
1689  }
1690  else
1691  {
1692  /*
1693  * Listeners in other databases should be signaled only if they
1694  * are far behind.
1695  */
1698  continue;
1699  }
1700  /* OK, need to signal this one */
1701  pids[count] = pid;
1702  ids[count] = i;
1703  count++;
1704  }
1705  LWLockRelease(AsyncQueueLock);
1706 
1707  /* Now send signals */
1708  for (int i = 0; i < count; i++)
1709  {
1710  int32 pid = pids[i];
1711 
1712  /*
1713  * Note: assuming things aren't broken, a signal failure here could
1714  * only occur if the target backend exited since we released
1715  * AsyncQueueLock; which is unlikely but certainly possible. So we
1716  * just log a low-level debug message if it happens.
1717  */
1718  if (SendProcSignal(pid, PROCSIG_NOTIFY_INTERRUPT, ids[i]) < 0)
1719  elog(DEBUG3, "could not signal backend with PID %d: %m", pid);
1720  }
1721 
1722  pfree(pids);
1723  pfree(ids);
1724 }
1725 
1726 /*
1727  * AtAbort_Notify
1728  *
1729  * This is called at transaction abort.
1730  *
1731  * Gets rid of pending actions and outbound notifies that we would have
1732  * executed if the transaction got committed.
1733  */
1734 void
1736 {
1737  /*
1738  * If we LISTEN but then roll back the transaction after PreCommit_Notify,
1739  * we have registered as a listener but have not made any entry in
1740  * listenChannels. In that case, deregister again.
1741  */
1742  if (amRegisteredListener && listenChannels == NIL)
1744 
1745  /* And clean up */
1747 }
1748 
1749 /*
1750  * AtSubCommit_Notify() --- Take care of subtransaction commit.
1751  *
1752  * Reassign all items in the pending lists to the parent transaction.
1753  */
1754 void
1756 {
1757  int my_level = GetCurrentTransactionNestLevel();
1758 
1759  /* If there are actions at our nesting level, we must reparent them. */
1760  if (pendingActions != NULL &&
1761  pendingActions->nestingLevel >= my_level)
1762  {
1763  if (pendingActions->upper == NULL ||
1764  pendingActions->upper->nestingLevel < my_level - 1)
1765  {
1766  /* nothing to merge; give the whole thing to the parent */
1767  --pendingActions->nestingLevel;
1768  }
1769  else
1770  {
1771  ActionList *childPendingActions = pendingActions;
1772 
1773  pendingActions = pendingActions->upper;
1774 
1775  /*
1776  * Mustn't try to eliminate duplicates here --- see queue_listen()
1777  */
1778  pendingActions->actions =
1779  list_concat(pendingActions->actions,
1780  childPendingActions->actions);
1781  pfree(childPendingActions);
1782  }
1783  }
1784 
1785  /* If there are notifies at our nesting level, we must reparent them. */
1786  if (pendingNotifies != NULL &&
1787  pendingNotifies->nestingLevel >= my_level)
1788  {
1789  Assert(pendingNotifies->nestingLevel == my_level);
1790 
1791  if (pendingNotifies->upper == NULL ||
1792  pendingNotifies->upper->nestingLevel < my_level - 1)
1793  {
1794  /* nothing to merge; give the whole thing to the parent */
1795  --pendingNotifies->nestingLevel;
1796  }
1797  else
1798  {
1799  /*
1800  * Formerly, we didn't bother to eliminate duplicates here, but
1801  * now we must, else we fall foul of "Assert(!found)", either here
1802  * or during a later attempt to build the parent-level hashtable.
1803  */
1804  NotificationList *childPendingNotifies = pendingNotifies;
1805  ListCell *l;
1806 
1807  pendingNotifies = pendingNotifies->upper;
1808  /* Insert all the subxact's events into parent, except for dups */
1809  foreach(l, childPendingNotifies->events)
1810  {
1811  Notification *childn = (Notification *) lfirst(l);
1812 
1813  if (!AsyncExistsPendingNotify(childn))
1814  AddEventToPendingNotifies(childn);
1815  }
1816  pfree(childPendingNotifies);
1817  }
1818  }
1819 }
1820 
1821 /*
1822  * AtSubAbort_Notify() --- Take care of subtransaction abort.
1823  */
1824 void
1826 {
1827  int my_level = GetCurrentTransactionNestLevel();
1828 
1829  /*
1830  * All we have to do is pop the stack --- the actions/notifies made in
1831  * this subxact are no longer interesting, and the space will be freed
1832  * when CurTransactionContext is recycled. We still have to free the
1833  * ActionList and NotificationList objects themselves, though, because
1834  * those are allocated in TopTransactionContext.
1835  *
1836  * Note that there might be no entries at all, or no entries for the
1837  * current subtransaction level, either because none were ever created,
1838  * or because we reentered this routine due to trouble during subxact
1839  * abort.
1840  */
1841  while (pendingActions != NULL &&
1842  pendingActions->nestingLevel >= my_level)
1843  {
1844  ActionList *childPendingActions = pendingActions;
1845 
1846  pendingActions = pendingActions->upper;
1847  pfree(childPendingActions);
1848  }
1849 
1850  while (pendingNotifies != NULL &&
1851  pendingNotifies->nestingLevel >= my_level)
1852  {
1853  NotificationList *childPendingNotifies = pendingNotifies;
1854 
1855  pendingNotifies = pendingNotifies->upper;
1856  pfree(childPendingNotifies);
1857  }
1858 }
1859 
1860 /*
1861  * HandleNotifyInterrupt
1862  *
1863  * Signal handler portion of interrupt handling. Let the backend know
1864  * that there's a pending notify interrupt. If we're currently reading
1865  * from the client, this will interrupt the read and
1866  * ProcessClientReadInterrupt() will call ProcessNotifyInterrupt().
1867  */
1868 void
1870 {
1871  /*
1872  * Note: this is called by a SIGNAL HANDLER. You must be very wary what
1873  * you do here.
1874  */
1875 
1876  /* signal that work needs to be done */
1877  notifyInterruptPending = true;
1878 
1879  /* make sure the event is processed in due course */
1880  SetLatch(MyLatch);
1881 }
1882 
1883 /*
1884  * ProcessNotifyInterrupt
1885  *
1886  * This is called just after waiting for a frontend command. If a
1887  * interrupt arrives (via HandleNotifyInterrupt()) while reading, the
1888  * read will be interrupted via the process's latch, and this routine
1889  * will get called. If we are truly idle (ie, *not* inside a transaction
1890  * block), process the incoming notifies.
1891  */
1892 void
1894 {
1896  return; /* not really idle */
1897 
1898  while (notifyInterruptPending)
1900 }
1901 
1902 
1903 /*
1904  * Read all pending notifications from the queue, and deliver appropriate
1905  * ones to my frontend. Stop when we reach queue head or an uncommitted
1906  * notification.
1907  */
1908 static void
1910 {
1911  volatile QueuePosition pos;
1912  QueuePosition oldpos;
1913  QueuePosition head;
1914  Snapshot snapshot;
1915 
1916  /* page_buffer must be adequately aligned, so use a union */
1917  union
1918  {
1919  char buf[QUEUE_PAGESIZE];
1920  AsyncQueueEntry align;
1921  } page_buffer;
1922 
1923  /* Fetch current state */
1924  LWLockAcquire(AsyncQueueLock, LW_SHARED);
1925  /* Assert checks that we have a valid state entry */
1927  pos = oldpos = QUEUE_BACKEND_POS(MyBackendId);
1928  head = QUEUE_HEAD;
1929  LWLockRelease(AsyncQueueLock);
1930 
1931  if (QUEUE_POS_EQUAL(pos, head))
1932  {
1933  /* Nothing to do, we have read all notifications already. */
1934  return;
1935  }
1936 
1937  /* Get snapshot we'll use to decide which xacts are still in progress */
1938  snapshot = RegisterSnapshot(GetLatestSnapshot());
1939 
1940  /*----------
1941  * Note that we deliver everything that we see in the queue and that
1942  * matches our _current_ listening state.
1943  * Especially we do not take into account different commit times.
1944  * Consider the following example:
1945  *
1946  * Backend 1: Backend 2:
1947  *
1948  * transaction starts
1949  * NOTIFY foo;
1950  * commit starts
1951  * transaction starts
1952  * LISTEN foo;
1953  * commit starts
1954  * commit to clog
1955  * commit to clog
1956  *
1957  * It could happen that backend 2 sees the notification from backend 1 in
1958  * the queue. Even though the notifying transaction committed before
1959  * the listening transaction, we still deliver the notification.
1960  *
1961  * The idea is that an additional notification does not do any harm, we
1962  * just need to make sure that we do not miss a notification.
1963  *
1964  * It is possible that we fail while trying to send a message to our
1965  * frontend (for example, because of encoding conversion failure).
1966  * If that happens it is critical that we not try to send the same
1967  * message over and over again. Therefore, we place a PG_TRY block
1968  * here that will forcibly advance our backend position before we lose
1969  * control to an error. (We could alternatively retake AsyncQueueLock
1970  * and move the position before handling each individual message, but
1971  * that seems like too much lock traffic.)
1972  *----------
1973  */
1974  PG_TRY();
1975  {
1976  bool reachedStop;
1977 
1978  do
1979  {
1980  int curpage = QUEUE_POS_PAGE(pos);
1981  int curoffset = QUEUE_POS_OFFSET(pos);
1982  int slotno;
1983  int copysize;
1984 
1985  /*
1986  * We copy the data from SLRU into a local buffer, so as to avoid
1987  * holding the AsyncCtlLock while we are examining the entries and
1988  * possibly transmitting them to our frontend. Copy only the part
1989  * of the page we will actually inspect.
1990  */
1991  slotno = SimpleLruReadPage_ReadOnly(AsyncCtl, curpage,
1993  if (curpage == QUEUE_POS_PAGE(head))
1994  {
1995  /* we only want to read as far as head */
1996  copysize = QUEUE_POS_OFFSET(head) - curoffset;
1997  if (copysize < 0)
1998  copysize = 0; /* just for safety */
1999  }
2000  else
2001  {
2002  /* fetch all the rest of the page */
2003  copysize = QUEUE_PAGESIZE - curoffset;
2004  }
2005  memcpy(page_buffer.buf + curoffset,
2006  AsyncCtl->shared->page_buffer[slotno] + curoffset,
2007  copysize);
2008  /* Release lock that we got from SimpleLruReadPage_ReadOnly() */
2009  LWLockRelease(AsyncCtlLock);
2010 
2011  /*
2012  * Process messages up to the stop position, end of page, or an
2013  * uncommitted message.
2014  *
2015  * Our stop position is what we found to be the head's position
2016  * when we entered this function. It might have changed already.
2017  * But if it has, we will receive (or have already received and
2018  * queued) another signal and come here again.
2019  *
2020  * We are not holding AsyncQueueLock here! The queue can only
2021  * extend beyond the head pointer (see above) and we leave our
2022  * backend's pointer where it is so nobody will truncate or
2023  * rewrite pages under us. Especially we don't want to hold a lock
2024  * while sending the notifications to the frontend.
2025  */
2026  reachedStop = asyncQueueProcessPageEntries(&pos, head,
2027  page_buffer.buf,
2028  snapshot);
2029  } while (!reachedStop);
2030  }
2031  PG_CATCH();
2032  {
2033  /* Update shared state */
2034  LWLockAcquire(AsyncQueueLock, LW_SHARED);
2036  LWLockRelease(AsyncQueueLock);
2037 
2038  PG_RE_THROW();
2039  }
2040  PG_END_TRY();
2041 
2042  /* Update shared state */
2043  LWLockAcquire(AsyncQueueLock, LW_SHARED);
2045  LWLockRelease(AsyncQueueLock);
2046 
2047  /* Done with snapshot */
2048  UnregisterSnapshot(snapshot);
2049 }
2050 
2051 /*
2052  * Fetch notifications from the shared queue, beginning at position current,
2053  * and deliver relevant ones to my frontend.
2054  *
2055  * The current page must have been fetched into page_buffer from shared
2056  * memory. (We could access the page right in shared memory, but that
2057  * would imply holding the AsyncCtlLock throughout this routine.)
2058  *
2059  * We stop if we reach the "stop" position, or reach a notification from an
2060  * uncommitted transaction, or reach the end of the page.
2061  *
2062  * The function returns true once we have reached the stop position or an
2063  * uncommitted notification, and false if we have finished with the page.
2064  * In other words: once it returns true there is no need to look further.
2065  * The QueuePosition *current is advanced past all processed messages.
2066  */
2067 static bool
2069  QueuePosition stop,
2070  char *page_buffer,
2071  Snapshot snapshot)
2072 {
2073  bool reachedStop = false;
2074  bool reachedEndOfPage;
2075  AsyncQueueEntry *qe;
2076 
2077  do
2078  {
2079  QueuePosition thisentry = *current;
2080 
2081  if (QUEUE_POS_EQUAL(thisentry, stop))
2082  break;
2083 
2084  qe = (AsyncQueueEntry *) (page_buffer + QUEUE_POS_OFFSET(thisentry));
2085 
2086  /*
2087  * Advance *current over this message, possibly to the next page. As
2088  * noted in the comments for asyncQueueReadAllNotifications, we must
2089  * do this before possibly failing while processing the message.
2090  */
2091  reachedEndOfPage = asyncQueueAdvance(current, qe->length);
2092 
2093  /* Ignore messages destined for other databases */
2094  if (qe->dboid == MyDatabaseId)
2095  {
2096  if (XidInMVCCSnapshot(qe->xid, snapshot))
2097  {
2098  /*
2099  * The source transaction is still in progress, so we can't
2100  * process this message yet. Break out of the loop, but first
2101  * back up *current so we will reprocess the message next
2102  * time. (Note: it is unlikely but not impossible for
2103  * TransactionIdDidCommit to fail, so we can't really avoid
2104  * this advance-then-back-up behavior when dealing with an
2105  * uncommitted message.)
2106  *
2107  * Note that we must test XidInMVCCSnapshot before we test
2108  * TransactionIdDidCommit, else we might return a message from
2109  * a transaction that is not yet visible to snapshots; compare
2110  * the comments at the head of heapam_visibility.c.
2111  *
2112  * Also, while our own xact won't be listed in the snapshot,
2113  * we need not check for TransactionIdIsCurrentTransactionId
2114  * because our transaction cannot (yet) have queued any
2115  * messages.
2116  */
2117  *current = thisentry;
2118  reachedStop = true;
2119  break;
2120  }
2121  else if (TransactionIdDidCommit(qe->xid))
2122  {
2123  /* qe->data is the null-terminated channel name */
2124  char *channel = qe->data;
2125 
2126  if (IsListeningOn(channel))
2127  {
2128  /* payload follows channel name */
2129  char *payload = qe->data + strlen(channel) + 1;
2130 
2131  NotifyMyFrontEnd(channel, payload, qe->srcPid);
2132  }
2133  }
2134  else
2135  {
2136  /*
2137  * The source transaction aborted or crashed, so we just
2138  * ignore its notifications.
2139  */
2140  }
2141  }
2142 
2143  /* Loop back if we're not at end of page */
2144  } while (!reachedEndOfPage);
2145 
2146  if (QUEUE_POS_EQUAL(*current, stop))
2147  reachedStop = true;
2148 
2149  return reachedStop;
2150 }
2151 
2152 /*
2153  * Advance the shared queue tail variable to the minimum of all the
2154  * per-backend tail pointers. Truncate pg_notify space if possible.
2155  */
2156 static void
2158 {
2159  QueuePosition min;
2160  int oldtailpage;
2161  int newtailpage;
2162  int boundary;
2163 
2164  LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
2165  min = QUEUE_HEAD;
2167  {
2169  min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
2170  }
2171  oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
2172  QUEUE_TAIL = min;
2173  LWLockRelease(AsyncQueueLock);
2174 
2175  /*
2176  * We can truncate something if the global tail advanced across an SLRU
2177  * segment boundary.
2178  *
2179  * XXX it might be better to truncate only once every several segments, to
2180  * reduce the number of directory scans.
2181  */
2182  newtailpage = QUEUE_POS_PAGE(min);
2183  boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT);
2184  if (asyncQueuePagePrecedes(oldtailpage, boundary))
2185  {
2186  /*
2187  * SimpleLruTruncate() will ask for AsyncCtlLock but will also release
2188  * the lock again.
2189  */
2190  SimpleLruTruncate(AsyncCtl, newtailpage);
2191  }
2192 }
2193 
2194 /*
2195  * ProcessIncomingNotify
2196  *
2197  * Deal with arriving NOTIFYs from other backends as soon as it's safe to
2198  * do so. This used to be called from the PROCSIG_NOTIFY_INTERRUPT
2199  * signal handler, but isn't anymore.
2200  *
2201  * Scan the queue for arriving notifications and report them to my front
2202  * end.
2203  *
2204  * NOTE: since we are outside any transaction, we must create our own.
2205  */
2206 static void
2208 {
2209  /* We *must* reset the flag */
2210  notifyInterruptPending = false;
2211 
2212  /* Do nothing else if we aren't actively listening */
2213  if (listenChannels == NIL)
2214  return;
2215 
2216  if (Trace_notify)
2217  elog(DEBUG1, "ProcessIncomingNotify");
2218 
2219  set_ps_display("notify interrupt", false);
2220 
2221  /*
2222  * We must run asyncQueueReadAllNotifications inside a transaction, else
2223  * bad things happen if it gets an error.
2224  */
2226 
2228 
2230 
2231  /*
2232  * Must flush the notify messages to ensure frontend gets them promptly.
2233  */
2234  pq_flush();
2235 
2236  set_ps_display("idle", false);
2237 
2238  if (Trace_notify)
2239  elog(DEBUG1, "ProcessIncomingNotify: done");
2240 }
2241 
2242 /*
2243  * Send NOTIFY message to my front end.
2244  */
2245 void
2246 NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
2247 {
2249  {
2251 
2252  pq_beginmessage(&buf, 'A');
2253  pq_sendint32(&buf, srcPid);
2254  pq_sendstring(&buf, channel);
2256  pq_sendstring(&buf, payload);
2257  pq_endmessage(&buf);
2258 
2259  /*
2260  * NOTE: we do not do pq_flush() here. For a self-notify, it will
2261  * happen at the end of the transaction, and for incoming notifies
2262  * ProcessIncomingNotify will do it after finding all the notifies.
2263  */
2264  }
2265  else
2266  elog(INFO, "NOTIFY for \"%s\" payload \"%s\"", channel, payload);
2267 }
2268 
2269 /* Does pendingNotifies include a match for the given event? */
2270 static bool
2272 {
2273  if (pendingNotifies == NULL)
2274  return false;
2275 
2276  if (pendingNotifies->hashtab != NULL)
2277  {
2278  /* Use the hash table to probe for a match */
2279  if (hash_search(pendingNotifies->hashtab,
2280  &n,
2281  HASH_FIND,
2282  NULL))
2283  return true;
2284  }
2285  else
2286  {
2287  /* Must scan the event list */
2288  ListCell *l;
2289 
2290  foreach(l, pendingNotifies->events)
2291  {
2292  Notification *oldn = (Notification *) lfirst(l);
2293 
2294  if (n->channel_len == oldn->channel_len &&
2295  n->payload_len == oldn->payload_len &&
2296  memcmp(n->data, oldn->data,
2297  n->channel_len + n->payload_len + 2) == 0)
2298  return true;
2299  }
2300  }
2301 
2302  return false;
2303 }
2304 
2305 /*
2306  * Add a notification event to a pre-existing pendingNotifies list.
2307  *
2308  * Because pendingNotifies->events is already nonempty, this works
2309  * correctly no matter what CurrentMemoryContext is.
2310  */
2311 static void
2313 {
2314  Assert(pendingNotifies->events != NIL);
2315 
2316  /* Create the hash table if it's time to */
2317  if (list_length(pendingNotifies->events) >= MIN_HASHABLE_NOTIFIES &&
2318  pendingNotifies->hashtab == NULL)
2319  {
2320  HASHCTL hash_ctl;
2321  ListCell *l;
2322 
2323  /* Create the hash table */
2324  MemSet(&hash_ctl, 0, sizeof(hash_ctl));
2325  hash_ctl.keysize = sizeof(Notification *);
2326  hash_ctl.entrysize = sizeof(NotificationHash);
2327  hash_ctl.hash = notification_hash;
2328  hash_ctl.match = notification_match;
2329  hash_ctl.hcxt = CurTransactionContext;
2330  pendingNotifies->hashtab =
2331  hash_create("Pending Notifies",
2332  256L,
2333  &hash_ctl,
2335 
2336  /* Insert all the already-existing events */
2337  foreach(l, pendingNotifies->events)
2338  {
2339  Notification *oldn = (Notification *) lfirst(l);
2340  NotificationHash *hentry;
2341  bool found;
2342 
2343  hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
2344  &oldn,
2345  HASH_ENTER,
2346  &found);
2347  Assert(!found);
2348  hentry->event = oldn;
2349  }
2350  }
2351 
2352  /* Add new event to the list, in order */
2353  pendingNotifies->events = lappend(pendingNotifies->events, n);
2354 
2355  /* Add event to the hash table if needed */
2356  if (pendingNotifies->hashtab != NULL)
2357  {
2358  NotificationHash *hentry;
2359  bool found;
2360 
2361  hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
2362  &n,
2363  HASH_ENTER,
2364  &found);
2365  Assert(!found);
2366  hentry->event = n;
2367  }
2368 }
2369 
2370 /*
2371  * notification_hash: hash function for notification hash table
2372  *
2373  * The hash "keys" are pointers to Notification structs.
2374  */
2375 static uint32
2376 notification_hash(const void *key, Size keysize)
2377 {
2378  const Notification *k = *(const Notification *const *) key;
2379 
2380  Assert(keysize == sizeof(Notification *));
2381  /* We don't bother to include the payload's trailing null in the hash */
2382  return DatumGetUInt32(hash_any((const unsigned char *) k->data,
2383  k->channel_len + k->payload_len + 1));
2384 }
2385 
2386 /*
2387  * notification_match: match function to use with notification_hash
2388  */
2389 static int
2390 notification_match(const void *key1, const void *key2, Size keysize)
2391 {
2392  const Notification *k1 = *(const Notification *const *) key1;
2393  const Notification *k2 = *(const Notification *const *) key2;
2394 
2395  Assert(keysize == sizeof(Notification *));
2396  if (k1->channel_len == k2->channel_len &&
2397  k1->payload_len == k2->payload_len &&
2398  memcmp(k1->data, k2->data,
2399  k1->channel_len + k1->payload_len + 2) == 0)
2400  return 0; /* equal */
2401  return 1; /* not equal */
2402 }
2403 
2404 /* Clear the pendingActions and pendingNotifies lists. */
2405 static void
2407 {
2408  /*
2409  * Everything's allocated in either TopTransactionContext or the context
2410  * for the subtransaction to which it corresponds. So, there's nothing
2411  * to do here except rest the pointers; the space will be reclaimed when
2412  * the contexts are deleted.
2413  */
2414  pendingActions = NULL;
2415  pendingNotifies = NULL;
2416 }
uint64 call_cntr
Definition: funcapi.h:66
#define DatumGetUInt32(X)
Definition: postgres.h:486
struct QueueBackendStatus QueueBackendStatus
#define NIL
Definition: pg_list.h:65
static void usage(void)
Definition: pg_standby.c:593
#define QUEUE_TAIL
Definition: async.c:282
BackendId firstListener
Definition: async.c:273
char data[NAMEDATALEN+NOTIFY_PAYLOAD_MAX_LENGTH]
Definition: async.c:176
bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
Definition: snapmgr.c:2241
#define DEBUG1
Definition: elog.h:25
int MyProcPid
Definition: globals.c:40
int errhint(const char *fmt,...)
Definition: elog.c:974
static void queue_listen(ListenActionKind action, const char *channel)
Definition: async.c:714
static SlruCtlData AsyncCtlData
Definition: async.c:292
BackendId MyBackendId
Definition: globals.c:81
struct NotificationHash NotificationHash
List * events
Definition: async.c:394
#define pq_flush()
Definition: libpq.h:39
MemoryContext TopTransactionContext
Definition: mcxt.c:49
#define QUEUE_BACKEND_PID(i)
Definition: async.c:284
Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.c:148
int page
Definition: async.c:189
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
uint32 TransactionId
Definition: c.h:507
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:865
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
MemoryContext hcxt
Definition: hsearch.h:78
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1370
#define DEBUG3
Definition: elog.h:23
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
static ActionList * pendingActions
Definition: async.c:354
void AsyncShmemInit(void)
Definition: async.c:515
int64 TimestampTz
Definition: timestamp.h:39
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:283
struct NotificationList NotificationList
char * pstrdup(const char *in)
Definition: mcxt.c:1186
void CommitTransactionCommand(void)
Definition: xact.c:2895
void SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
Definition: slru.c:1185
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:356
char data[FLEXIBLE_ARRAY_MEMBER]
Definition: async.c:388
#define AsyncCtl
Definition: async.c:294
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static void Exec_UnlistenAllCommit(void)
Definition: async.c:1192
int offset
Definition: async.c:190
void set_ps_display(const char *activity, bool force)
Definition: ps_status.c:331
Size entrysize
Definition: hsearch.h:73
MemoryContext CurTransactionContext
Definition: mcxt.c:50
static List * listenChannels
Definition: async.c:322
List * list_concat(List *list1, const List *list2)
Definition: list.c:516
void AtPrepare_Notify(void)
Definition: async.c:860
int errcode(int sqlerrcode)
Definition: elog.c:570
Datum pg_notification_queue_usage(PG_FUNCTION_ARGS)
Definition: async.c:1553
bool IsTransactionOrTransactionBlock(void)
Definition: xact.c:4651
#define MemSet(start, val, len)
Definition: c.h:955
#define INFO
Definition: elog.h:33
static double asyncQueueUsage(void)
Definition: async.c:1570
void pq_sendstring(StringInfo buf, const char *str)
Definition: pqformat.c:197
void Async_Listen(const char *channel)
Definition: async.c:762
static void ClearPendingActionsAndNotifies(void)
Definition: async.c:2406
static void asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe)
Definition: async.c:1415
Notification * event
Definition: async.c:403
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:906
static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
Definition: async.c:1380
#define QUEUE_HEAD
Definition: async.c:281
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
static void Exec_UnlistenCommit(const char *channel)
Definition: async.c:1161
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_PROTOCOL_MAJOR(v)
Definition: pqcomm.h:104
void SetLatch(Latch *latch)
Definition: latch.c:436
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1682
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:145
void list_free_deep(List *list)
Definition: list.c:1391
static bool IsListeningOn(const char *channel)
Definition: async.c:1289
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:287
static bool unlistenExitRegistered
Definition: async.c:418
static bool asyncQueueIsFull(void)
Definition: async.c:1349
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
static bool AsyncExistsPendingNotify(Notification *n)
Definition: async.c:2271
static void asyncQueueFillWarning(void)
Definition: async.c:1599
signed int int32
Definition: c.h:346
int nestingLevel
Definition: async.c:393
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:303
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:368
#define list_make1(x1)
Definition: pg_list.h:227
QueuePosition pos
Definition: async.c:238
#define NAMEDATALEN
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:289
static void pq_sendint32(StringInfo buf, uint32 i)
Definition: pqformat.h:145
static int asyncQueuePageDiff(int p, int q)
Definition: async.c:468
void PreCommit_Notify(void)
Definition: async.c:885
#define NUM_ASYNC_BUFFERS
Definition: async.h:21
#define QUEUE_POS_OFFSET(x)
Definition: async.c:194
Definition: dynahash.c:208
int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
Definition: procsignal.c:180
unsigned short uint16
Definition: c.h:357
void pfree(void *pointer)
Definition: mcxt.c:1056
static NotificationList * pendingNotifies
Definition: async.c:406
#define AsyncQueueEntryEmptySize
Definition: async.c:182
#define ERROR
Definition: elog.h:43
void PreventCommandDuringRecovery(const char *cmdname)
Definition: utility.c:276
void ProcessNotifyInterrupt(void)
Definition: async.c:1893
struct ActionList ActionList
#define QUEUE_FIRST_LISTENER
Definition: async.c:283
static void * list_nth(const List *list, int n)
Definition: pg_list.h:277
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:375
static void Async_UnlistenOnExit(int code, Datum arg)
Definition: async.c:847
BackendId nextListener
Definition: async.c:237
void AtSubCommit_Notify(void)
Definition: async.c:1755
static bool backendHasSentNotifications
Definition: async.c:424
int MaxBackends
Definition: globals.c:135
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:423
static char * buf
Definition: pg_test_fsync.c:68
static bool backendTryAdvanceTail
Definition: async.c:427
#define QUEUE_PAGESIZE
Definition: async.c:295
#define SET_QUEUE_POS(x, y, z)
Definition: async.c:196
List * actions
Definition: async.c:350
static void AddEventToPendingNotifies(Notification *n)
Definition: async.c:2312
int errdetail(const char *fmt,...)
Definition: elog.c:860
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:333
#define InvalidTransactionId
Definition: transam.h:31
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
unsigned int uint32
Definition: c.h:358
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:578
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define NOTIFY_PAYLOAD_MAX_LENGTH
Definition: async.c:156
static bool asyncQueuePagePrecedes(int p, int q)
Definition: async.c:489
struct NotificationList * upper
Definition: async.c:396
static AsyncQueueControl * asyncQueueControl
Definition: async.c:279
static void asyncQueueAdvanceTail(void)
Definition: async.c:2157
static void Exec_ListenCommit(const char *channel)
Definition: async.c:1134
#define ereport(elevel, rest)
Definition: elog.h:141
#define IsParallelWorker()
Definition: parallel.h:60
static void SignalBackends(void)
Definition: async.c:1652
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
#define QUEUE_FULL_WARN_INTERVAL
Definition: async.c:296
List * lappend(List *list, void *datum)
Definition: list.c:322
struct AsyncQueueControl AsyncQueueControl
#define WARNING
Definition: elog.h:40
#define MIN_HASHABLE_NOTIFIES
Definition: async.c:399
struct QueuePosition QueuePosition
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
void AtSubAbort_Notify(void)
Definition: async.c:1825
#define InvalidBackendId
Definition: backendid.h:23
ListenActionKind
Definition: async.c:334
int nestingLevel
Definition: async.c:349
static void ProcessIncomingNotify(void)
Definition: async.c:2207
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:316
uintptr_t Datum
Definition: postgres.h:367
#define QUEUE_CLEANUP_DELAY
Definition: async.c:228
static void asyncQueueUnregister(void)
Definition: async.c:1308
static bool amRegisteredListener
Definition: async.c:421
void AtAbort_Notify(void)
Definition: async.c:1735
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
int BackendId
Definition: backendid.h:21
#define QUEUEALIGN(len)
Definition: async.c:180
Oid MyDatabaseId
Definition: globals.c:85
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:467
Size keysize
Definition: hsearch.h:72
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1002
HashCompareFunc match
Definition: hsearch.h:75
#define QUEUE_BACKEND_DBOID(i)
Definition: async.c:285
QueuePosition head
Definition: async.c:270
static bool asyncQueueProcessPageEntries(volatile QueuePosition *current, QueuePosition stop, char *page_buffer, Snapshot snapshot)
Definition: async.c:2068
#define InvalidOid
Definition: postgres_ext.h:36
static void asyncQueueReadAllNotifications(void)
Definition: async.c:1909
struct ActionList * upper
Definition: async.c:351
#define QUEUE_NEXT_LISTENER(i)
Definition: async.c:286
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:842
#define PG_RETURN_VOID()
Definition: fmgr.h:339
Datum pg_notify(PG_FUNCTION_ARGS)
Definition: async.c:581
#define PG_CATCH()
Definition: elog.h:310
HTAB * hashtab
Definition: async.c:395
static int notification_match(const void *key1, const void *key2, Size keysize)
Definition: async.c:2390
#define PG_ARGISNULL(n)
Definition: fmgr.h:204
static ListCell * asyncQueueAddEntries(ListCell *nextNotify)
Definition: async.c:1451
#define Assert(condition)
Definition: c.h:732
void ProcessCompletedNotifies(void)
Definition: async.c:1225
#define lfirst(lc)
Definition: pg_list.h:190
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1393
static uint32 notification_hash(const void *key, Size keysize)
Definition: async.c:2376
uint16 channel_len
Definition: async.c:385
static void Exec_ListenPreCommit(void)
Definition: async.c:1037
void StartTransactionCommand(void)
Definition: xact.c:2794
uint16 payload_len
Definition: async.c:386
#define HASH_COMPARE
Definition: hsearch.h:90
struct AsyncQueueEntry AsyncQueueEntry
Size AsyncShmemSize(void)
Definition: async.c:498
size_t Size
Definition: c.h:466
void Async_UnlistenAll(void)
Definition: async.c:794
static int list_length(const List *l)
Definition: pg_list.h:169
struct Notification Notification
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
#define PG_RE_THROW()
Definition: elog.h:331
bool Trace_notify
Definition: async.c:430
Snapshot GetLatestSnapshot(void)
Definition: snapmgr.c:381
void Async_Notify(const char *channel, const char *payload)
Definition: async.c:615
Datum pg_listening_channels(PG_FUNCTION_ARGS)
Definition: async.c:814
TransactionId xid
Definition: async.c:174
char * text_to_cstring(const text *t)
Definition: varlena.c:204
#define AccessExclusiveLock
Definition: lockdefs.h:45
void AtCommit_Notify(void)
Definition: async.c:987
void HandleNotifyInterrupt(void)
Definition: async.c:1869
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:784
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
#define elog(elevel,...)
Definition: elog.h:226
char channel[FLEXIBLE_ARRAY_MEMBER]
Definition: async.c:344
int i
ListenActionKind action
Definition: async.c:343
#define CStringGetTextDatum(s)
Definition: builtins.h:83
int32 srcPid
Definition: async.c:175
void * arg
struct Latch * MyLatch
Definition: globals.c:54
volatile sig_atomic_t notifyInterruptPending
Definition: async.c:415
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
TimestampTz lastQueueFillWarn
Definition: async.c:274
CommandDest whereToSendOutput
Definition: postgres.c:90
#define QUEUE_MAX_PAGE
Definition: async.c:315
#define PG_TRY()
Definition: elog.h:301
QueuePosition tail
Definition: async.c:271
#define QUEUE_BACKEND_POS(i)
Definition: async.c:287
Definition: pg_list.h:50
ProtocolVersion FrontendProtocol
Definition: globals.c:28
int SimpleLruZeroPage(SlruCtl ctl, int pageno)
Definition: slru.c:263
#define PG_END_TRY()
Definition: elog.h:317
#define QUEUE_POS_PAGE(x)
Definition: async.c:193
#define QUEUE_POS_EQUAL(x, y)
Definition: async.c:202
#define offsetof(type, field)
Definition: c.h:655
#define QUEUE_POS_MIN(x, y)
Definition: async.c:206
void Async_Unlisten(const char *channel)
Definition: async.c:776
void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
Definition: async.c:2246
HashValueFunc hash
Definition: hsearch.h:74
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:307
#define HASH_FUNCTION
Definition: hsearch.h:89
#define InvalidPid
Definition: miscadmin.h:32
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
Definition: slru.c:165
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:285
#define QUEUE_POS_MAX(x, y)
Definition: async.c:212