PostgreSQL Source Code  git master
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
35 
36 #include "lib/ilist.h"
37 #include "miscadmin.h"
38 #include "storage/dsm.h"
39 #include "storage/ipc.h"
40 #include "storage/lwlock.h"
41 #include "storage/pg_shmem.h"
42 #include "utils/guc.h"
43 #include "utils/memutils.h"
44 #include "utils/resowner_private.h"
45 
46 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
47 
48 #define PG_DYNSHMEM_FIXED_SLOTS 64
49 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
50 
51 #define INVALID_CONTROL_SLOT ((uint32) -1)
52 
53 /* Backend-local tracking for on-detach callbacks. */
55 {
60 
61 /* Backend-local state for a dynamic shared memory segment. */
63 {
64  dlist_node node; /* List link in dsm_segment_list. */
65  ResourceOwner resowner; /* Resource owner. */
66  dsm_handle handle; /* Segment name. */
67  uint32 control_slot; /* Slot in control segment. */
68  void *impl_private; /* Implementation-specific private data. */
69  void *mapped_address; /* Mapping address, or NULL if unmapped. */
70  Size mapped_size; /* Size of our mapping. */
71  slist_head on_detach; /* On-detach callbacks. */
72 };
73 
74 /* Shared-memory state for a dynamic shared memory segment. */
75 typedef struct dsm_control_item
76 {
78  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
79  void *impl_private_pm_handle; /* only needed on Windows */
80  bool pinned;
82 
83 /* Layout of the dynamic shared memory control segment. */
84 typedef struct dsm_control_header
85 {
91 
92 static void dsm_cleanup_for_mmap(void);
93 static void dsm_postmaster_shutdown(int code, Datum arg);
95 static bool dsm_control_segment_sane(dsm_control_header *control,
96  Size mapped_size);
97 static uint64 dsm_control_bytes_needed(uint32 nitems);
98 
99 /* Has this backend initialized the dynamic shared memory system yet? */
100 static bool dsm_init_done = false;
101 
102 /*
103  * List of dynamic shared memory segments used by this backend.
104  *
105  * At process exit time, we must decrement the reference count of each
106  * segment we have attached; this list makes it possible to find all such
107  * segments.
108  *
109  * This list should always be empty in the postmaster. We could probably
110  * allow the postmaster to map dynamic shared memory segments before it
111  * begins to start child processes, provided that each process adjusted
112  * the reference counts for those segments in the control segment at
113  * startup time, but there's no obvious need for such a facility, which
114  * would also be complex to handle in the EXEC_BACKEND case. Once the
115  * postmaster has begun spawning children, there's an additional problem:
116  * each new mapping would require an update to the control segment,
117  * which requires locking, in which the postmaster must not be involved.
118  */
119 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
120 
121 /*
122  * Control segment information.
123  *
124  * Unlike ordinary shared memory segments, the control segment is not
125  * reference counted; instead, it lasts for the postmaster's entire
126  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
127  */
131 static void *dsm_control_impl_private = NULL;
132 
133 /*
134  * Start up the dynamic shared memory system.
135  *
136  * This is called just once during each cluster lifetime, at postmaster
137  * startup time.
138  */
139 void
141 {
142  void *dsm_control_address = NULL;
143  uint32 maxitems;
144  Size segsize;
145 
147 
148  /*
149  * If we're using the mmap implementations, clean up any leftovers.
150  * Cleanup isn't needed on Windows, and happens earlier in startup for
151  * POSIX and System V shared memory, via a direct call to
152  * dsm_cleanup_using_control_segment.
153  */
156 
157  /* Determine size for new control segment. */
158  maxitems = PG_DYNSHMEM_FIXED_SLOTS
160  elog(DEBUG2, "dynamic shared memory system will support %u segments",
161  maxitems);
162  segsize = dsm_control_bytes_needed(maxitems);
163 
164  /*
165  * Loop until we find an unused identifier for the new control segment. We
166  * sometimes use 0 as a sentinel value indicating that no control segment
167  * is known to exist, so avoid using that value for a real control
168  * segment.
169  */
170  for (;;)
171  {
172  Assert(dsm_control_address == NULL);
176  continue;
178  &dsm_control_impl_private, &dsm_control_address,
180  break;
181  }
182  dsm_control = dsm_control_address;
184  elog(DEBUG2,
185  "created dynamic shared memory control segment %u (%zu bytes)",
186  dsm_control_handle, segsize);
188 
189  /* Initialize control segment. */
190  dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
191  dsm_control->nitems = 0;
192  dsm_control->maxitems = maxitems;
193 }
194 
195 /*
196  * Determine whether the control segment from the previous postmaster
197  * invocation still exists. If so, remove the dynamic shared memory
198  * segments to which it refers, and then the control segment itself.
199  */
200 void
202 {
203  void *mapped_address = NULL;
204  void *junk_mapped_address = NULL;
205  void *impl_private = NULL;
206  void *junk_impl_private = NULL;
207  Size mapped_size = 0;
208  Size junk_mapped_size = 0;
209  uint32 nitems;
210  uint32 i;
211  dsm_control_header *old_control;
212 
213  /*
214  * Try to attach the segment. If this fails, it probably just means that
215  * the operating system has been rebooted and the segment no longer
216  * exists, or an unrelated process has used the same shm ID. So just fall
217  * out quietly.
218  */
219  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
220  &mapped_address, &mapped_size, DEBUG1))
221  return;
222 
223  /*
224  * We've managed to reattach it, but the contents might not be sane. If
225  * they aren't, we disregard the segment after all.
226  */
227  old_control = (dsm_control_header *) mapped_address;
228  if (!dsm_control_segment_sane(old_control, mapped_size))
229  {
230  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
231  &mapped_address, &mapped_size, LOG);
232  return;
233  }
234 
235  /*
236  * OK, the control segment looks basically valid, so we can use it to get
237  * a list of segments that need to be removed.
238  */
239  nitems = old_control->nitems;
240  for (i = 0; i < nitems; ++i)
241  {
242  dsm_handle handle;
243  uint32 refcnt;
244 
245  /* If the reference count is 0, the slot is actually unused. */
246  refcnt = old_control->item[i].refcnt;
247  if (refcnt == 0)
248  continue;
249 
250  /* Log debugging information. */
251  handle = old_control->item[i].handle;
252  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
253  handle, refcnt);
254 
255  /* Destroy the referenced segment. */
256  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
257  &junk_mapped_address, &junk_mapped_size, LOG);
258  }
259 
260  /* Destroy the old control segment, too. */
261  elog(DEBUG2,
262  "cleaning up dynamic shared memory control segment with ID %u",
263  old_control_handle);
264  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
265  &mapped_address, &mapped_size, LOG);
266 }
267 
268 /*
269  * When we're using the mmap shared memory implementation, "shared memory"
270  * segments might even manage to survive an operating system reboot.
271  * But there's no guarantee as to exactly what will survive: some segments
272  * may survive, and others may not, and the contents of some may be out
273  * of date. In particular, the control segment may be out of date, so we
274  * can't rely on it to figure out what to remove. However, since we know
275  * what directory contains the files we used as shared memory, we can simply
276  * scan the directory and blow everything away that shouldn't be there.
277  */
278 static void
280 {
281  DIR *dir;
282  struct dirent *dent;
283 
284  /* Scan the directory for something with a name of the correct format. */
286 
287  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
288  {
289  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
290  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
291  {
292  char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
293 
294  snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
295 
296  elog(DEBUG2, "removing file \"%s\"", buf);
297 
298  /* We found a matching file; so remove it. */
299  if (unlink(buf) != 0)
300  ereport(ERROR,
302  errmsg("could not remove file \"%s\": %m", buf)));
303  }
304  }
305 
306  /* Cleanup complete. */
307  FreeDir(dir);
308 }
309 
310 /*
311  * At shutdown time, we iterate over the control segment and remove all
312  * remaining dynamic shared memory segments. We avoid throwing errors here;
313  * the postmaster is shutting down either way, and this is just non-critical
314  * resource cleanup.
315  */
316 static void
318 {
319  uint32 nitems;
320  uint32 i;
321  void *dsm_control_address;
322  void *junk_mapped_address = NULL;
323  void *junk_impl_private = NULL;
324  Size junk_mapped_size = 0;
326 
327  /*
328  * If some other backend exited uncleanly, it might have corrupted the
329  * control segment while it was dying. In that case, we warn and ignore
330  * the contents of the control segment. This may end up leaving behind
331  * stray shared memory segments, but there's not much we can do about that
332  * if the metadata is gone.
333  */
334  nitems = dsm_control->nitems;
336  {
337  ereport(LOG,
338  (errmsg("dynamic shared memory control segment is corrupt")));
339  return;
340  }
341 
342  /* Remove any remaining segments. */
343  for (i = 0; i < nitems; ++i)
344  {
345  dsm_handle handle;
346 
347  /* If the reference count is 0, the slot is actually unused. */
348  if (dsm_control->item[i].refcnt == 0)
349  continue;
350 
351  /* Log debugging information. */
352  handle = dsm_control->item[i].handle;
353  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
354  handle);
355 
356  /* Destroy the segment. */
357  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
358  &junk_mapped_address, &junk_mapped_size, LOG);
359  }
360 
361  /* Remove the control segment itself. */
362  elog(DEBUG2,
363  "cleaning up dynamic shared memory control segment with ID %u",
365  dsm_control_address = dsm_control;
367  &dsm_control_impl_private, &dsm_control_address,
369  dsm_control = dsm_control_address;
370  shim->dsm_control = 0;
371 }
372 
373 /*
374  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
375  * we must reread the state file and map the control segment; in other cases,
376  * we'll have inherited the postmaster's mapping and global variables.
377  */
378 static void
380 {
381 #ifdef EXEC_BACKEND
382  {
383  void *control_address = NULL;
384 
385  /* Attach control segment. */
388  &dsm_control_impl_private, &control_address,
390  dsm_control = control_address;
391  /* If control segment doesn't look sane, something is badly wrong. */
393  {
395  &dsm_control_impl_private, &control_address,
397  ereport(FATAL,
398  (errcode(ERRCODE_INTERNAL_ERROR),
399  errmsg("dynamic shared memory control segment is not valid")));
400  }
401  }
402 #endif
403 
404  dsm_init_done = true;
405 }
406 
407 #ifdef EXEC_BACKEND
408 /*
409  * When running under EXEC_BACKEND, we get a callback here when the main
410  * shared memory segment is re-attached, so that we can record the control
411  * handle retrieved from it.
412  */
413 void
414 dsm_set_control_handle(dsm_handle h)
415 {
416  Assert(dsm_control_handle == 0 && h != 0);
417  dsm_control_handle = h;
418 }
419 #endif
420 
421 /*
422  * Create a new dynamic shared memory segment.
423  *
424  * If there is a non-NULL CurrentResourceOwner, the new segment is associated
425  * with it and must be detached before the resource owner releases, or a
426  * warning will be logged. If CurrentResourceOwner is NULL, the segment
427  * remains attached until explicitly detached or the session ends.
428  * Creating with a NULL CurrentResourceOwner is equivalent to creating
429  * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
430  */
431 dsm_segment *
432 dsm_create(Size size, int flags)
433 {
434  dsm_segment *seg;
435  uint32 i;
436  uint32 nitems;
437 
438  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
440 
441  if (!dsm_init_done)
443 
444  /* Create a new segment descriptor. */
445  seg = dsm_create_descriptor();
446 
447  /* Loop until we find an unused segment identifier. */
448  for (;;)
449  {
450  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
451  seg->handle = random();
452  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
453  continue;
454  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
455  &seg->mapped_address, &seg->mapped_size, ERROR))
456  break;
457  }
458 
459  /* Lock the control segment so we can register the new segment. */
460  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
461 
462  /* Search the control segment for an unused slot. */
463  nitems = dsm_control->nitems;
464  for (i = 0; i < nitems; ++i)
465  {
466  if (dsm_control->item[i].refcnt == 0)
467  {
468  dsm_control->item[i].handle = seg->handle;
469  /* refcnt of 1 triggers destruction, so start at 2 */
470  dsm_control->item[i].refcnt = 2;
471  dsm_control->item[i].impl_private_pm_handle = NULL;
472  dsm_control->item[i].pinned = false;
473  seg->control_slot = i;
474  LWLockRelease(DynamicSharedMemoryControlLock);
475  return seg;
476  }
477  }
478 
479  /* Verify that we can support an additional mapping. */
480  if (nitems >= dsm_control->maxitems)
481  {
482  LWLockRelease(DynamicSharedMemoryControlLock);
484  &seg->mapped_address, &seg->mapped_size, WARNING);
485  if (seg->resowner != NULL)
486  ResourceOwnerForgetDSM(seg->resowner, seg);
487  dlist_delete(&seg->node);
488  pfree(seg);
489 
490  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
491  return NULL;
492  ereport(ERROR,
493  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
494  errmsg("too many dynamic shared memory segments")));
495  }
496 
497  /* Enter the handle into a new array slot. */
498  dsm_control->item[nitems].handle = seg->handle;
499  /* refcnt of 1 triggers destruction, so start at 2 */
500  dsm_control->item[nitems].refcnt = 2;
501  dsm_control->item[nitems].impl_private_pm_handle = NULL;
502  dsm_control->item[nitems].pinned = false;
503  seg->control_slot = nitems;
504  dsm_control->nitems++;
505  LWLockRelease(DynamicSharedMemoryControlLock);
506 
507  return seg;
508 }
509 
510 /*
511  * Attach a dynamic shared memory segment.
512  *
513  * See comments for dsm_segment_handle() for an explanation of how this
514  * is intended to be used.
515  *
516  * This function will return NULL if the segment isn't known to the system.
517  * This can happen if we're asked to attach the segment, but then everyone
518  * else detaches it (causing it to be destroyed) before we get around to
519  * attaching it.
520  *
521  * If there is a non-NULL CurrentResourceOwner, the attached segment is
522  * associated with it and must be detached before the resource owner releases,
523  * or a warning will be logged. Otherwise the segment remains attached until
524  * explicitly detached or the session ends. See the note atop dsm_create().
525  */
526 dsm_segment *
528 {
529  dsm_segment *seg;
530  dlist_iter iter;
531  uint32 i;
532  uint32 nitems;
533 
534  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
536 
537  if (!dsm_init_done)
539 
540  /*
541  * Since this is just a debugging cross-check, we could leave it out
542  * altogether, or include it only in assert-enabled builds. But since the
543  * list of attached segments should normally be very short, let's include
544  * it always for right now.
545  *
546  * If you're hitting this error, you probably want to attempt to find an
547  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
548  * create a new one.
549  */
550  dlist_foreach(iter, &dsm_segment_list)
551  {
552  seg = dlist_container(dsm_segment, node, iter.cur);
553  if (seg->handle == h)
554  elog(ERROR, "can't attach the same segment more than once");
555  }
556 
557  /* Create a new segment descriptor. */
558  seg = dsm_create_descriptor();
559  seg->handle = h;
560 
561  /* Bump reference count for this segment in shared memory. */
562  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
563  nitems = dsm_control->nitems;
564  for (i = 0; i < nitems; ++i)
565  {
566  /*
567  * If the reference count is 0, the slot is actually unused. If the
568  * reference count is 1, the slot is still in use, but the segment is
569  * in the process of going away; even if the handle matches, another
570  * slot may already have started using the same handle value by
571  * coincidence so we have to keep searching.
572  */
573  if (dsm_control->item[i].refcnt <= 1)
574  continue;
575 
576  /* If the handle doesn't match, it's not the slot we want. */
577  if (dsm_control->item[i].handle != seg->handle)
578  continue;
579 
580  /* Otherwise we've found a match. */
581  dsm_control->item[i].refcnt++;
582  seg->control_slot = i;
583  break;
584  }
585  LWLockRelease(DynamicSharedMemoryControlLock);
586 
587  /*
588  * If we didn't find the handle we're looking for in the control segment,
589  * it probably means that everyone else who had it mapped, including the
590  * original creator, died before we got to this point. It's up to the
591  * caller to decide what to do about that.
592  */
594  {
595  dsm_detach(seg);
596  return NULL;
597  }
598 
599  /* Here's where we actually try to map the segment. */
601  &seg->mapped_address, &seg->mapped_size, ERROR);
602 
603  return seg;
604 }
605 
606 /*
607  * At backend shutdown time, detach any segments that are still attached.
608  * (This is similar to dsm_detach_all, except that there's no reason to
609  * unmap the control segment before exiting, so we don't bother.)
610  */
611 void
613 {
614  while (!dlist_is_empty(&dsm_segment_list))
615  {
616  dsm_segment *seg;
617 
618  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
619  dsm_detach(seg);
620  }
621 }
622 
623 /*
624  * Detach all shared memory segments, including the control segments. This
625  * should be called, along with PGSharedMemoryDetach, in processes that
626  * might inherit mappings but are not intended to be connected to dynamic
627  * shared memory.
628  */
629 void
631 {
632  void *control_address = dsm_control;
633 
634  while (!dlist_is_empty(&dsm_segment_list))
635  {
636  dsm_segment *seg;
637 
638  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
639  dsm_detach(seg);
640  }
641 
642  if (control_address != NULL)
644  &dsm_control_impl_private, &control_address,
646 }
647 
648 /*
649  * Detach from a shared memory segment, destroying the segment if we
650  * remove the last reference.
651  *
652  * This function should never fail. It will often be invoked when aborting
653  * a transaction, and a further error won't serve any purpose. It's not a
654  * complete disaster if we fail to unmap or destroy the segment; it means a
655  * resource leak, but that doesn't necessarily preclude further operations.
656  */
657 void
659 {
660  /*
661  * Invoke registered callbacks. Just in case one of those callbacks
662  * throws a further error that brings us back here, pop the callback
663  * before invoking it, to avoid infinite error recursion.
664  */
665  while (!slist_is_empty(&seg->on_detach))
666  {
667  slist_node *node;
669  on_dsm_detach_callback function;
670  Datum arg;
671 
672  node = slist_pop_head_node(&seg->on_detach);
674  function = cb->function;
675  arg = cb->arg;
676  pfree(cb);
677 
678  function(seg, arg);
679  }
680 
681  /*
682  * Try to remove the mapping, if one exists. Normally, there will be, but
683  * maybe not, if we failed partway through a create or attach operation.
684  * We remove the mapping before decrementing the reference count so that
685  * the process that sees a zero reference count can be certain that no
686  * remaining mappings exist. Even if this fails, we pretend that it
687  * works, because retrying is likely to fail in the same way.
688  */
689  if (seg->mapped_address != NULL)
690  {
692  &seg->mapped_address, &seg->mapped_size, WARNING);
693  seg->impl_private = NULL;
694  seg->mapped_address = NULL;
695  seg->mapped_size = 0;
696  }
697 
698  /* Reduce reference count, if we previously increased it. */
700  {
701  uint32 refcnt;
702  uint32 control_slot = seg->control_slot;
703 
704  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
705  Assert(dsm_control->item[control_slot].handle == seg->handle);
706  Assert(dsm_control->item[control_slot].refcnt > 1);
707  refcnt = --dsm_control->item[control_slot].refcnt;
709  LWLockRelease(DynamicSharedMemoryControlLock);
710 
711  /* If new reference count is 1, try to destroy the segment. */
712  if (refcnt == 1)
713  {
714  /* A pinned segment should never reach 1. */
715  Assert(!dsm_control->item[control_slot].pinned);
716 
717  /*
718  * If we fail to destroy the segment here, or are killed before we
719  * finish doing so, the reference count will remain at 1, which
720  * will mean that nobody else can attach to the segment. At
721  * postmaster shutdown time, or when a new postmaster is started
722  * after a hard kill, another attempt will be made to remove the
723  * segment.
724  *
725  * The main case we're worried about here is being killed by a
726  * signal before we can finish removing the segment. In that
727  * case, it's important to be sure that the segment still gets
728  * removed. If we actually fail to remove the segment for some
729  * other reason, the postmaster may not have any better luck than
730  * we did. There's not much we can do about that, though.
731  */
732  if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
733  &seg->mapped_address, &seg->mapped_size, WARNING))
734  {
735  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
736  Assert(dsm_control->item[control_slot].handle == seg->handle);
737  Assert(dsm_control->item[control_slot].refcnt == 1);
738  dsm_control->item[control_slot].refcnt = 0;
739  LWLockRelease(DynamicSharedMemoryControlLock);
740  }
741  }
742  }
743 
744  /* Clean up our remaining backend-private data structures. */
745  if (seg->resowner != NULL)
746  ResourceOwnerForgetDSM(seg->resowner, seg);
747  dlist_delete(&seg->node);
748  pfree(seg);
749 }
750 
751 /*
752  * Keep a dynamic shared memory mapping until end of session.
753  *
754  * By default, mappings are owned by the current resource owner, which
755  * typically means they stick around for the duration of the current query
756  * only.
757  */
758 void
760 {
761  if (seg->resowner != NULL)
762  {
763  ResourceOwnerForgetDSM(seg->resowner, seg);
764  seg->resowner = NULL;
765  }
766 }
767 
768 /*
769  * Arrange to remove a dynamic shared memory mapping at cleanup time.
770  *
771  * dsm_pin_mapping() can be used to preserve a mapping for the entire
772  * lifetime of a process; this function reverses that decision, making
773  * the segment owned by the current resource owner. This may be useful
774  * just before performing some operation that will invalidate the segment
775  * for future use by this backend.
776  */
777 void
779 {
780  Assert(seg->resowner == NULL);
784 }
785 
786 /*
787  * Keep a dynamic shared memory segment until postmaster shutdown, or until
788  * dsm_unpin_segment is called.
789  *
790  * This function should not be called more than once per segment, unless the
791  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
792  *
793  * Note that this function does not arrange for the current process to
794  * keep the segment mapped indefinitely; if that behavior is desired,
795  * dsm_pin_mapping() should be used from each process that needs to
796  * retain the mapping.
797  */
798 void
800 {
801  void *handle;
802 
803  /*
804  * Bump reference count for this segment in shared memory. This will
805  * ensure that even if there is no session which is attached to this
806  * segment, it will remain until postmaster shutdown or an explicit call
807  * to unpin.
808  */
809  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
810  if (dsm_control->item[seg->control_slot].pinned)
811  elog(ERROR, "cannot pin a segment that is already pinned");
812  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
813  dsm_control->item[seg->control_slot].pinned = true;
814  dsm_control->item[seg->control_slot].refcnt++;
815  dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
816  LWLockRelease(DynamicSharedMemoryControlLock);
817 }
818 
819 /*
820  * Unpin a dynamic shared memory segment that was previously pinned with
821  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
822  * was previously called for this segment.
823  *
824  * The argument is a dsm_handle rather than a dsm_segment in case you want
825  * to unpin a segment to which you haven't attached. This turns out to be
826  * useful if, for example, a reference to one shared memory segment is stored
827  * within another shared memory segment. You might want to unpin the
828  * referenced segment before destroying the referencing segment.
829  */
830 void
832 {
833  uint32 control_slot = INVALID_CONTROL_SLOT;
834  bool destroy = false;
835  uint32 i;
836 
837  /* Find the control slot for the given handle. */
838  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
839  for (i = 0; i < dsm_control->nitems; ++i)
840  {
841  /* Skip unused slots and segments that are concurrently going away. */
842  if (dsm_control->item[i].refcnt <= 1)
843  continue;
844 
845  /* If we've found our handle, we can stop searching. */
846  if (dsm_control->item[i].handle == handle)
847  {
848  control_slot = i;
849  break;
850  }
851  }
852 
853  /*
854  * We should definitely have found the slot, and it should not already be
855  * in the process of going away, because this function should only be
856  * called on a segment which is pinned.
857  */
858  if (control_slot == INVALID_CONTROL_SLOT)
859  elog(ERROR, "cannot unpin unknown segment handle");
860  if (!dsm_control->item[control_slot].pinned)
861  elog(ERROR, "cannot unpin a segment that is not pinned");
862  Assert(dsm_control->item[control_slot].refcnt > 1);
863 
864  /*
865  * Allow implementation-specific code to run. We have to do this before
866  * releasing the lock, because impl_private_pm_handle may get modified by
867  * dsm_impl_unpin_segment.
868  */
869  dsm_impl_unpin_segment(handle,
870  &dsm_control->item[control_slot].impl_private_pm_handle);
871 
872  /* Note that 1 means no references (0 means unused slot). */
873  if (--dsm_control->item[control_slot].refcnt == 1)
874  destroy = true;
875  dsm_control->item[control_slot].pinned = false;
876 
877  /* Now we can release the lock. */
878  LWLockRelease(DynamicSharedMemoryControlLock);
879 
880  /* Clean up resources if that was the last reference. */
881  if (destroy)
882  {
883  void *junk_impl_private = NULL;
884  void *junk_mapped_address = NULL;
885  Size junk_mapped_size = 0;
886 
887  /*
888  * For an explanation of how error handling works in this case, see
889  * comments in dsm_detach. Note that if we reach this point, the
890  * current process certainly does not have the segment mapped, because
891  * if it did, the reference count would have still been greater than 1
892  * even after releasing the reference count held by the pin. The fact
893  * that there can't be a dsm_segment for this handle makes it OK to
894  * pass the mapped size, mapped address, and private data as NULL
895  * here.
896  */
897  if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
898  &junk_mapped_address, &junk_mapped_size, WARNING))
899  {
900  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
901  Assert(dsm_control->item[control_slot].handle == handle);
902  Assert(dsm_control->item[control_slot].refcnt == 1);
903  dsm_control->item[control_slot].refcnt = 0;
904  LWLockRelease(DynamicSharedMemoryControlLock);
905  }
906  }
907 }
908 
909 /*
910  * Find an existing mapping for a shared memory segment, if there is one.
911  */
912 dsm_segment *
914 {
915  dlist_iter iter;
916  dsm_segment *seg;
917 
918  dlist_foreach(iter, &dsm_segment_list)
919  {
920  seg = dlist_container(dsm_segment, node, iter.cur);
921  if (seg->handle == h)
922  return seg;
923  }
924 
925  return NULL;
926 }
927 
928 /*
929  * Get the address at which a dynamic shared memory segment is mapped.
930  */
931 void *
933 {
934  Assert(seg->mapped_address != NULL);
935  return seg->mapped_address;
936 }
937 
938 /*
939  * Get the size of a mapping.
940  */
941 Size
943 {
944  Assert(seg->mapped_address != NULL);
945  return seg->mapped_size;
946 }
947 
948 /*
949  * Get a handle for a mapping.
950  *
951  * To establish communication via dynamic shared memory between two backends,
952  * one of them should first call dsm_create() to establish a new shared
953  * memory mapping. That process should then call dsm_segment_handle() to
954  * obtain a handle for the mapping, and pass that handle to the
955  * coordinating backend via some means (e.g. bgw_main_arg, or via the
956  * main shared memory segment). The recipient, once in possession of the
957  * handle, should call dsm_attach().
958  */
961 {
962  return seg->handle;
963 }
964 
965 /*
966  * Register an on-detach callback for a dynamic shared memory segment.
967  */
968 void
970 {
972 
975  cb->function = function;
976  cb->arg = arg;
977  slist_push_head(&seg->on_detach, &cb->node);
978 }
979 
980 /*
981  * Unregister an on-detach callback for a dynamic shared memory segment.
982  */
983 void
985  Datum arg)
986 {
987  slist_mutable_iter iter;
988 
989  slist_foreach_modify(iter, &seg->on_detach)
990  {
992 
994  if (cb->function == function && cb->arg == arg)
995  {
996  slist_delete_current(&iter);
997  pfree(cb);
998  break;
999  }
1000  }
1001 }
1002 
1003 /*
1004  * Discard all registered on-detach callbacks without executing them.
1005  */
1006 void
1008 {
1009  dlist_iter iter;
1010 
1011  dlist_foreach(iter, &dsm_segment_list)
1012  {
1014 
1015  /* Throw away explicit on-detach actions one by one. */
1016  while (!slist_is_empty(&seg->on_detach))
1017  {
1018  slist_node *node;
1020 
1021  node = slist_pop_head_node(&seg->on_detach);
1022  cb = slist_container(dsm_segment_detach_callback, node, node);
1023  pfree(cb);
1024  }
1025 
1026  /*
1027  * Decrementing the reference count is a sort of implicit on-detach
1028  * action; make sure we don't do that, either.
1029  */
1031  }
1032 }
1033 
1034 /*
1035  * Create a segment descriptor.
1036  */
1037 static dsm_segment *
1039 {
1040  dsm_segment *seg;
1041 
1044 
1046  dlist_push_head(&dsm_segment_list, &seg->node);
1047 
1048  /* seg->handle must be initialized by the caller */
1050  seg->impl_private = NULL;
1051  seg->mapped_address = NULL;
1052  seg->mapped_size = 0;
1053 
1057 
1058  slist_init(&seg->on_detach);
1059 
1060  return seg;
1061 }
1062 
1063 /*
1064  * Sanity check a control segment.
1065  *
1066  * The goal here isn't to detect everything that could possibly be wrong with
1067  * the control segment; there's not enough information for that. Rather, the
1068  * goal is to make sure that someone can iterate over the items in the segment
1069  * without overrunning the end of the mapping and crashing. We also check
1070  * the magic number since, if that's messed up, this may not even be one of
1071  * our segments at all.
1072  */
1073 static bool
1075 {
1076  if (mapped_size < offsetof(dsm_control_header, item))
1077  return false; /* Mapped size too short to read header. */
1078  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1079  return false; /* Magic number doesn't match. */
1080  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1081  return false; /* Max item count won't fit in map. */
1082  if (control->nitems > control->maxitems)
1083  return false; /* Overfull. */
1084  return true;
1085 }
1086 
1087 /*
1088  * Compute the number of control-segment bytes needed to store a given
1089  * number of items.
1090  */
1091 static uint64
1093 {
1094  return offsetof(dsm_control_header, item)
1095  + sizeof(dsm_control_item) * (uint64) nitems;
1096 }
static void * dsm_control_impl_private
Definition: dsm.c:131
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:140
dlist_node node
Definition: dsm.c:64
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1313
dsm_segment * dsm_find_mapping(dsm_handle h)
Definition: dsm.c:913
#define DEBUG1
Definition: elog.h:25
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1002
void reset_on_dsm_detach(void)
Definition: dsm.c:1007
uint32 maxitems
Definition: dsm.c:88
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:49
uint32 dsm_handle
Definition: dsm_impl.h:54
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:48
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
Size mapped_size
Definition: dsm.c:70
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:201
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:556
dsm_handle handle
Definition: dsm.c:77
long random(void)
Definition: random.c:22
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
ResourceOwner CurrentResourceOwner
Definition: resowner.c:142
struct dsm_control_header dsm_control_header
slist_node * cur
Definition: ilist.h:241
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:527
static dsm_handle dsm_control_handle
Definition: dsm.c:128
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:574
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:276
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:960
int errcode(int sqlerrcode)
Definition: elog.c:610
static void dsm_backend_startup(void)
Definition: dsm.c:379
static dlist_head dsm_segment_list
Definition: dsm.c:119
#define LOG
Definition: elog.h:26
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:969
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1092
Definition: dirent.h:9
uint32 nitems
Definition: dsm.c:87
bool pinned
Definition: dsm.c:80
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:716
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1727
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:799
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:1056
static void slist_init(slist_head *head)
Definition: ilist.h:554
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
uint32 magic
Definition: dsm.c:86
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
int MaxBackends
Definition: globals.c:135
on_dsm_detach_callback function
Definition: dsm.c:56
#define DEBUG2
Definition: elog.h:24
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:759
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static char * buf
Definition: pg_test_fsync.c:67
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1302
bool IsUnderPostmaster
Definition: globals.c:109
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:89
int errcode_for_file_access(void)
Definition: elog.c:633
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:317
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
unsigned int uint32
Definition: c.h:367
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2581
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:831
void * mapped_address
Definition: dsm.c:69
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1074
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void dsm_backend_shutdown(void)
Definition: dsm.c:612
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:596
slist_head on_detach
Definition: dsm.c:71
static bool slist_is_empty(slist_head *head)
Definition: ilist.h:563
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
dsm_handle handle
Definition: dsm.c:66
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
#define slist_container(type, membername, ptr)
Definition: ilist.h:674
uintptr_t Datum
Definition: postgres.h:367
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:432
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:279
dlist_node * cur
Definition: ilist.h:161
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:778
#define ereport(elevel,...)
Definition: elog.h:144
static dsm_control_header * dsm_control
Definition: dsm.c:129
ResourceOwner resowner
Definition: dsm.c:65
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:932
#define Assert(condition)
Definition: c.h:738
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2647
void * impl_private
Definition: dsm.c:68
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1322
size_t Size
Definition: c.h:466
uint32 control_slot
Definition: dsm.c:67
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1123
void dsm_detach_all(void)
Definition: dsm.c:630
#define DatumGetPointer(X)
Definition: postgres.h:549
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1038
uint32 refcnt
Definition: dsm.c:78
static bool dsm_init_done
Definition: dsm.c:100
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:952
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:658
int errmsg(const char *fmt,...)
Definition: elog.c:824
struct dsm_control_item dsm_control_item
static Size dsm_control_mapped_size
Definition: dsm.c:130
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
struct dsm_segment_detach_callback dsm_segment_detach_callback
#define elog(elevel,...)
Definition: elog.h:214
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:51
int i
void * impl_private_pm_handle
Definition: dsm.c:79
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:652
char d_name[MAX_PATH]
Definition: dirent.h:14
#define INVALID_CONTROL_SLOT
Definition: dsm.c:51
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:54
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:158
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:984
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:942
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:46
#define snprintf
Definition: port.h:192
int FreeDir(DIR *dir)
Definition: fd.c:2699
#define offsetof(type, field)
Definition: c.h:661