PostgreSQL Source Code  git master
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
35 
36 #include "lib/ilist.h"
37 #include "miscadmin.h"
38 #include "storage/dsm.h"
39 #include "storage/ipc.h"
40 #include "storage/lwlock.h"
41 #include "storage/pg_shmem.h"
42 #include "utils/guc.h"
43 #include "utils/memutils.h"
44 #include "utils/resowner_private.h"
45 
46 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
47 
48 /*
49  * There's no point in getting too cheap here, because the minimum allocation
50  * is one OS page, which is probably at least 4KB and could easily be as high
51  * as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
52  */
53 #define PG_DYNSHMEM_FIXED_SLOTS 64
54 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
55 
56 #define INVALID_CONTROL_SLOT ((uint32) -1)
57 
58 /* Backend-local tracking for on-detach callbacks. */
60 {
65 
66 /* Backend-local state for a dynamic shared memory segment. */
68 {
69  dlist_node node; /* List link in dsm_segment_list. */
70  ResourceOwner resowner; /* Resource owner. */
71  dsm_handle handle; /* Segment name. */
72  uint32 control_slot; /* Slot in control segment. */
73  void *impl_private; /* Implementation-specific private data. */
74  void *mapped_address; /* Mapping address, or NULL if unmapped. */
75  Size mapped_size; /* Size of our mapping. */
76  slist_head on_detach; /* On-detach callbacks. */
77 };
78 
79 /* Shared-memory state for a dynamic shared memory segment. */
80 typedef struct dsm_control_item
81 {
83  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
84  void *impl_private_pm_handle; /* only needed on Windows */
85  bool pinned;
87 
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
90 {
94  dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
96 
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static bool dsm_control_segment_sane(dsm_control_header *control,
101  Size mapped_size);
102 static uint64 dsm_control_bytes_needed(uint32 nitems);
103 
104 /* Has this backend initialized the dynamic shared memory system yet? */
105 static bool dsm_init_done = false;
106 
107 /*
108  * List of dynamic shared memory segments used by this backend.
109  *
110  * At process exit time, we must decrement the reference count of each
111  * segment we have attached; this list makes it possible to find all such
112  * segments.
113  *
114  * This list should always be empty in the postmaster. We could probably
115  * allow the postmaster to map dynamic shared memory segments before it
116  * begins to start child processes, provided that each process adjusted
117  * the reference counts for those segments in the control segment at
118  * startup time, but there's no obvious need for such a facility, which
119  * would also be complex to handle in the EXEC_BACKEND case. Once the
120  * postmaster has begun spawning children, there's an additional problem:
121  * each new mapping would require an update to the control segment,
122  * which requires locking, in which the postmaster must not be involved.
123  */
124 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
125 
126 /*
127  * Control segment information.
128  *
129  * Unlike ordinary shared memory segments, the control segment is not
130  * reference counted; instead, it lasts for the postmaster's entire
131  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
132  */
136 static void *dsm_control_impl_private = NULL;
137 
138 /*
139  * Start up the dynamic shared memory system.
140  *
141  * This is called just once during each cluster lifetime, at postmaster
142  * startup time.
143  */
144 void
146 {
147  void *dsm_control_address = NULL;
148  uint32 maxitems;
149  Size segsize;
150 
152 
153  /* If dynamic shared memory is disabled, there's nothing to do. */
155  return;
156 
157  /*
158  * If we're using the mmap implementations, clean up any leftovers.
159  * Cleanup isn't needed on Windows, and happens earlier in startup for
160  * POSIX and System V shared memory, via a direct call to
161  * dsm_cleanup_using_control_segment.
162  */
165 
166  /* Determine size for new control segment. */
167  maxitems = PG_DYNSHMEM_FIXED_SLOTS
169  elog(DEBUG2, "dynamic shared memory system will support %u segments",
170  maxitems);
171  segsize = dsm_control_bytes_needed(maxitems);
172 
173  /*
174  * Loop until we find an unused identifier for the new control segment. We
175  * sometimes use 0 as a sentinel value indicating that no control segment
176  * is known to exist, so avoid using that value for a real control
177  * segment.
178  */
179  for (;;)
180  {
181  Assert(dsm_control_address == NULL);
185  continue;
187  &dsm_control_impl_private, &dsm_control_address,
189  break;
190  }
191  dsm_control = dsm_control_address;
193  elog(DEBUG2,
194  "created dynamic shared memory control segment %u (%zu bytes)",
195  dsm_control_handle, segsize);
197 
198  /* Initialize control segment. */
199  dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
200  dsm_control->nitems = 0;
201  dsm_control->maxitems = maxitems;
202 }
203 
204 /*
205  * Determine whether the control segment from the previous postmaster
206  * invocation still exists. If so, remove the dynamic shared memory
207  * segments to which it refers, and then the control segment itself.
208  */
209 void
211 {
212  void *mapped_address = NULL;
213  void *junk_mapped_address = NULL;
214  void *impl_private = NULL;
215  void *junk_impl_private = NULL;
216  Size mapped_size = 0;
217  Size junk_mapped_size = 0;
218  uint32 nitems;
219  uint32 i;
220  dsm_control_header *old_control;
221 
222  /* If dynamic shared memory is disabled, there's nothing to do. */
224  return;
225 
226  /*
227  * Try to attach the segment. If this fails, it probably just means that
228  * the operating system has been rebooted and the segment no longer
229  * exists, or an unrelated process has used the same shm ID. So just fall
230  * out quietly.
231  */
232  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
233  &mapped_address, &mapped_size, DEBUG1))
234  return;
235 
236  /*
237  * We've managed to reattach it, but the contents might not be sane. If
238  * they aren't, we disregard the segment after all.
239  */
240  old_control = (dsm_control_header *) mapped_address;
241  if (!dsm_control_segment_sane(old_control, mapped_size))
242  {
243  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
244  &mapped_address, &mapped_size, LOG);
245  return;
246  }
247 
248  /*
249  * OK, the control segment looks basically valid, so we can use it to get
250  * a list of segments that need to be removed.
251  */
252  nitems = old_control->nitems;
253  for (i = 0; i < nitems; ++i)
254  {
255  dsm_handle handle;
256  uint32 refcnt;
257 
258  /* If the reference count is 0, the slot is actually unused. */
259  refcnt = old_control->item[i].refcnt;
260  if (refcnt == 0)
261  continue;
262 
263  /* Log debugging information. */
264  handle = old_control->item[i].handle;
265  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
266  handle, refcnt);
267 
268  /* Destroy the referenced segment. */
269  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
270  &junk_mapped_address, &junk_mapped_size, LOG);
271  }
272 
273  /* Destroy the old control segment, too. */
274  elog(DEBUG2,
275  "cleaning up dynamic shared memory control segment with ID %u",
276  old_control_handle);
277  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
278  &mapped_address, &mapped_size, LOG);
279 }
280 
281 /*
282  * When we're using the mmap shared memory implementation, "shared memory"
283  * segments might even manage to survive an operating system reboot.
284  * But there's no guarantee as to exactly what will survive: some segments
285  * may survive, and others may not, and the contents of some may be out
286  * of date. In particular, the control segment may be out of date, so we
287  * can't rely on it to figure out what to remove. However, since we know
288  * what directory contains the files we used as shared memory, we can simply
289  * scan the directory and blow everything away that shouldn't be there.
290  */
291 static void
293 {
294  DIR *dir;
295  struct dirent *dent;
296 
297  /* Scan the directory for something with a name of the correct format. */
299 
300  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
301  {
302  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
303  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
304  {
305  char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
306 
307  snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
308 
309  elog(DEBUG2, "removing file \"%s\"", buf);
310 
311  /* We found a matching file; so remove it. */
312  if (unlink(buf) != 0)
313  ereport(ERROR,
315  errmsg("could not remove file \"%s\": %m", buf)));
316  }
317  }
318 
319  /* Cleanup complete. */
320  FreeDir(dir);
321 }
322 
323 /*
324  * At shutdown time, we iterate over the control segment and remove all
325  * remaining dynamic shared memory segments. We avoid throwing errors here;
326  * the postmaster is shutting down either way, and this is just non-critical
327  * resource cleanup.
328  */
329 static void
331 {
332  uint32 nitems;
333  uint32 i;
334  void *dsm_control_address;
335  void *junk_mapped_address = NULL;
336  void *junk_impl_private = NULL;
337  Size junk_mapped_size = 0;
339 
340  /*
341  * If some other backend exited uncleanly, it might have corrupted the
342  * control segment while it was dying. In that case, we warn and ignore
343  * the contents of the control segment. This may end up leaving behind
344  * stray shared memory segments, but there's not much we can do about that
345  * if the metadata is gone.
346  */
347  nitems = dsm_control->nitems;
349  {
350  ereport(LOG,
351  (errmsg("dynamic shared memory control segment is corrupt")));
352  return;
353  }
354 
355  /* Remove any remaining segments. */
356  for (i = 0; i < nitems; ++i)
357  {
358  dsm_handle handle;
359 
360  /* If the reference count is 0, the slot is actually unused. */
361  if (dsm_control->item[i].refcnt == 0)
362  continue;
363 
364  /* Log debugging information. */
365  handle = dsm_control->item[i].handle;
366  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
367  handle);
368 
369  /* Destroy the segment. */
370  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
371  &junk_mapped_address, &junk_mapped_size, LOG);
372  }
373 
374  /* Remove the control segment itself. */
375  elog(DEBUG2,
376  "cleaning up dynamic shared memory control segment with ID %u",
378  dsm_control_address = dsm_control;
380  &dsm_control_impl_private, &dsm_control_address,
382  dsm_control = dsm_control_address;
383  shim->dsm_control = 0;
384 }
385 
386 /*
387  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
388  * we must reread the state file and map the control segment; in other cases,
389  * we'll have inherited the postmaster's mapping and global variables.
390  */
391 static void
393 {
394  /* If dynamic shared memory is disabled, reject this. */
396  ereport(ERROR,
397  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
398  errmsg("dynamic shared memory is disabled"),
399  errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
400 
401 #ifdef EXEC_BACKEND
402  {
403  void *control_address = NULL;
404 
405  /* Attach control segment. */
408  &dsm_control_impl_private, &control_address,
410  dsm_control = control_address;
411  /* If control segment doesn't look sane, something is badly wrong. */
413  {
415  &dsm_control_impl_private, &control_address,
417  ereport(FATAL,
418  (errcode(ERRCODE_INTERNAL_ERROR),
419  errmsg("dynamic shared memory control segment is not valid")));
420  }
421  }
422 #endif
423 
424  dsm_init_done = true;
425 }
426 
427 #ifdef EXEC_BACKEND
428 /*
429  * When running under EXEC_BACKEND, we get a callback here when the main
430  * shared memory segment is re-attached, so that we can record the control
431  * handle retrieved from it.
432  */
433 void
434 dsm_set_control_handle(dsm_handle h)
435 {
436  Assert(dsm_control_handle == 0 && h != 0);
437  dsm_control_handle = h;
438 }
439 #endif
440 
441 /*
442  * Create a new dynamic shared memory segment.
443  *
444  * If there is a non-NULL CurrentResourceOwner, the new segment is associated
445  * with it and must be detached before the resource owner releases, or a
446  * warning will be logged. If CurrentResourceOwner is NULL, the segment
447  * remains attached until explicitely detached or the session ends.
448  * Creating with a NULL CurrentResourceOwner is equivalent to creating
449  * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
450  */
451 dsm_segment *
452 dsm_create(Size size, int flags)
453 {
454  dsm_segment *seg;
455  uint32 i;
456  uint32 nitems;
457 
458  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
460 
461  if (!dsm_init_done)
463 
464  /* Create a new segment descriptor. */
465  seg = dsm_create_descriptor();
466 
467  /* Loop until we find an unused segment identifier. */
468  for (;;)
469  {
470  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
471  seg->handle = random();
472  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
473  continue;
474  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
475  &seg->mapped_address, &seg->mapped_size, ERROR))
476  break;
477  }
478 
479  /* Lock the control segment so we can register the new segment. */
480  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
481 
482  /* Search the control segment for an unused slot. */
483  nitems = dsm_control->nitems;
484  for (i = 0; i < nitems; ++i)
485  {
486  if (dsm_control->item[i].refcnt == 0)
487  {
488  dsm_control->item[i].handle = seg->handle;
489  /* refcnt of 1 triggers destruction, so start at 2 */
490  dsm_control->item[i].refcnt = 2;
491  dsm_control->item[i].impl_private_pm_handle = NULL;
492  dsm_control->item[i].pinned = false;
493  seg->control_slot = i;
494  LWLockRelease(DynamicSharedMemoryControlLock);
495  return seg;
496  }
497  }
498 
499  /* Verify that we can support an additional mapping. */
500  if (nitems >= dsm_control->maxitems)
501  {
502  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
503  {
504  LWLockRelease(DynamicSharedMemoryControlLock);
506  &seg->mapped_address, &seg->mapped_size, WARNING);
507  if (seg->resowner != NULL)
508  ResourceOwnerForgetDSM(seg->resowner, seg);
509  dlist_delete(&seg->node);
510  pfree(seg);
511  return NULL;
512  }
513  ereport(ERROR,
514  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
515  errmsg("too many dynamic shared memory segments")));
516  }
517 
518  /* Enter the handle into a new array slot. */
519  dsm_control->item[nitems].handle = seg->handle;
520  /* refcnt of 1 triggers destruction, so start at 2 */
521  dsm_control->item[nitems].refcnt = 2;
522  dsm_control->item[nitems].impl_private_pm_handle = NULL;
523  dsm_control->item[nitems].pinned = false;
524  seg->control_slot = nitems;
525  dsm_control->nitems++;
526  LWLockRelease(DynamicSharedMemoryControlLock);
527 
528  return seg;
529 }
530 
531 /*
532  * Attach a dynamic shared memory segment.
533  *
534  * See comments for dsm_segment_handle() for an explanation of how this
535  * is intended to be used.
536  *
537  * This function will return NULL if the segment isn't known to the system.
538  * This can happen if we're asked to attach the segment, but then everyone
539  * else detaches it (causing it to be destroyed) before we get around to
540  * attaching it.
541  *
542  * If there is a non-NULL CurrentResourceOwner, the attached segment is
543  * associated with it and must be detached before the resource owner releases,
544  * or a warning will be logged. Otherwise the segment remains attached until
545  * explicitely detached or the session ends. See the note atop dsm_create().
546  */
547 dsm_segment *
549 {
550  dsm_segment *seg;
551  dlist_iter iter;
552  uint32 i;
553  uint32 nitems;
554 
555  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
557 
558  if (!dsm_init_done)
560 
561  /*
562  * Since this is just a debugging cross-check, we could leave it out
563  * altogether, or include it only in assert-enabled builds. But since the
564  * list of attached segments should normally be very short, let's include
565  * it always for right now.
566  *
567  * If you're hitting this error, you probably want to attempt to find an
568  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
569  * create a new one.
570  */
571  dlist_foreach(iter, &dsm_segment_list)
572  {
573  seg = dlist_container(dsm_segment, node, iter.cur);
574  if (seg->handle == h)
575  elog(ERROR, "can't attach the same segment more than once");
576  }
577 
578  /* Create a new segment descriptor. */
579  seg = dsm_create_descriptor();
580  seg->handle = h;
581 
582  /* Bump reference count for this segment in shared memory. */
583  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
584  nitems = dsm_control->nitems;
585  for (i = 0; i < nitems; ++i)
586  {
587  /* If the reference count is 0, the slot is actually unused. */
588  if (dsm_control->item[i].refcnt == 0)
589  continue;
590 
591  /* If the handle doesn't match, it's not the slot we want. */
592  if (dsm_control->item[i].handle != seg->handle)
593  continue;
594 
595  /*
596  * If the reference count is 1, the slot is still in use, but the
597  * segment is in the process of going away. Treat that as if we
598  * didn't find a match.
599  */
600  if (dsm_control->item[i].refcnt == 1)
601  break;
602 
603  /* Otherwise we've found a match. */
604  dsm_control->item[i].refcnt++;
605  seg->control_slot = i;
606  break;
607  }
608  LWLockRelease(DynamicSharedMemoryControlLock);
609 
610  /*
611  * If we didn't find the handle we're looking for in the control segment,
612  * it probably means that everyone else who had it mapped, including the
613  * original creator, died before we got to this point. It's up to the
614  * caller to decide what to do about that.
615  */
617  {
618  dsm_detach(seg);
619  return NULL;
620  }
621 
622  /* Here's where we actually try to map the segment. */
624  &seg->mapped_address, &seg->mapped_size, ERROR);
625 
626  return seg;
627 }
628 
629 /*
630  * At backend shutdown time, detach any segments that are still attached.
631  * (This is similar to dsm_detach_all, except that there's no reason to
632  * unmap the control segment before exiting, so we don't bother.)
633  */
634 void
636 {
637  while (!dlist_is_empty(&dsm_segment_list))
638  {
639  dsm_segment *seg;
640 
641  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
642  dsm_detach(seg);
643  }
644 }
645 
646 /*
647  * Detach all shared memory segments, including the control segments. This
648  * should be called, along with PGSharedMemoryDetach, in processes that
649  * might inherit mappings but are not intended to be connected to dynamic
650  * shared memory.
651  */
652 void
654 {
655  void *control_address = dsm_control;
656 
657  while (!dlist_is_empty(&dsm_segment_list))
658  {
659  dsm_segment *seg;
660 
661  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
662  dsm_detach(seg);
663  }
664 
665  if (control_address != NULL)
667  &dsm_control_impl_private, &control_address,
669 }
670 
671 /*
672  * Resize an existing shared memory segment.
673  *
674  * This may cause the shared memory segment to be remapped at a different
675  * address. For the caller's convenience, we return the mapped address.
676  */
677 void *
679 {
681  dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
682  &seg->mapped_address, &seg->mapped_size, ERROR);
683  return seg->mapped_address;
684 }
685 
686 /*
687  * Remap an existing shared memory segment.
688  *
689  * This is intended to be used when some other process has extended the
690  * mapping using dsm_resize(), but we've still only got the initial
691  * portion mapped. Since this might change the address at which the
692  * segment is mapped, we return the new mapped address.
693  */
694 void *
696 {
698  &seg->mapped_address, &seg->mapped_size, ERROR);
699 
700  return seg->mapped_address;
701 }
702 
703 /*
704  * Detach from a shared memory segment, destroying the segment if we
705  * remove the last reference.
706  *
707  * This function should never fail. It will often be invoked when aborting
708  * a transaction, and a further error won't serve any purpose. It's not a
709  * complete disaster if we fail to unmap or destroy the segment; it means a
710  * resource leak, but that doesn't necessarily preclude further operations.
711  */
712 void
714 {
715  /*
716  * Invoke registered callbacks. Just in case one of those callbacks
717  * throws a further error that brings us back here, pop the callback
718  * before invoking it, to avoid infinite error recursion.
719  */
720  while (!slist_is_empty(&seg->on_detach))
721  {
722  slist_node *node;
724  on_dsm_detach_callback function;
725  Datum arg;
726 
727  node = slist_pop_head_node(&seg->on_detach);
729  function = cb->function;
730  arg = cb->arg;
731  pfree(cb);
732 
733  function(seg, arg);
734  }
735 
736  /*
737  * Try to remove the mapping, if one exists. Normally, there will be, but
738  * maybe not, if we failed partway through a create or attach operation.
739  * We remove the mapping before decrementing the reference count so that
740  * the process that sees a zero reference count can be certain that no
741  * remaining mappings exist. Even if this fails, we pretend that it
742  * works, because retrying is likely to fail in the same way.
743  */
744  if (seg->mapped_address != NULL)
745  {
747  &seg->mapped_address, &seg->mapped_size, WARNING);
748  seg->impl_private = NULL;
749  seg->mapped_address = NULL;
750  seg->mapped_size = 0;
751  }
752 
753  /* Reduce reference count, if we previously increased it. */
755  {
756  uint32 refcnt;
757  uint32 control_slot = seg->control_slot;
758 
759  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
760  Assert(dsm_control->item[control_slot].handle == seg->handle);
761  Assert(dsm_control->item[control_slot].refcnt > 1);
762  refcnt = --dsm_control->item[control_slot].refcnt;
764  LWLockRelease(DynamicSharedMemoryControlLock);
765 
766  /* If new reference count is 1, try to destroy the segment. */
767  if (refcnt == 1)
768  {
769  /* A pinned segment should never reach 1. */
770  Assert(!dsm_control->item[control_slot].pinned);
771 
772  /*
773  * If we fail to destroy the segment here, or are killed before we
774  * finish doing so, the reference count will remain at 1, which
775  * will mean that nobody else can attach to the segment. At
776  * postmaster shutdown time, or when a new postmaster is started
777  * after a hard kill, another attempt will be made to remove the
778  * segment.
779  *
780  * The main case we're worried about here is being killed by a
781  * signal before we can finish removing the segment. In that
782  * case, it's important to be sure that the segment still gets
783  * removed. If we actually fail to remove the segment for some
784  * other reason, the postmaster may not have any better luck than
785  * we did. There's not much we can do about that, though.
786  */
787  if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
788  &seg->mapped_address, &seg->mapped_size, WARNING))
789  {
790  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
791  Assert(dsm_control->item[control_slot].handle == seg->handle);
792  Assert(dsm_control->item[control_slot].refcnt == 1);
793  dsm_control->item[control_slot].refcnt = 0;
794  LWLockRelease(DynamicSharedMemoryControlLock);
795  }
796  }
797  }
798 
799  /* Clean up our remaining backend-private data structures. */
800  if (seg->resowner != NULL)
801  ResourceOwnerForgetDSM(seg->resowner, seg);
802  dlist_delete(&seg->node);
803  pfree(seg);
804 }
805 
806 /*
807  * Keep a dynamic shared memory mapping until end of session.
808  *
809  * By default, mappings are owned by the current resource owner, which
810  * typically means they stick around for the duration of the current query
811  * only.
812  */
813 void
815 {
816  if (seg->resowner != NULL)
817  {
818  ResourceOwnerForgetDSM(seg->resowner, seg);
819  seg->resowner = NULL;
820  }
821 }
822 
823 /*
824  * Arrange to remove a dynamic shared memory mapping at cleanup time.
825  *
826  * dsm_pin_mapping() can be used to preserve a mapping for the entire
827  * lifetime of a process; this function reverses that decision, making
828  * the segment owned by the current resource owner. This may be useful
829  * just before performing some operation that will invalidate the segment
830  * for future use by this backend.
831  */
832 void
834 {
835  Assert(seg->resowner == NULL);
839 }
840 
841 /*
842  * Keep a dynamic shared memory segment until postmaster shutdown, or until
843  * dsm_unpin_segment is called.
844  *
845  * This function should not be called more than once per segment, unless the
846  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
847  *
848  * Note that this function does not arrange for the current process to
849  * keep the segment mapped indefinitely; if that behavior is desired,
850  * dsm_pin_mapping() should be used from each process that needs to
851  * retain the mapping.
852  */
853 void
855 {
856  void *handle;
857 
858  /*
859  * Bump reference count for this segment in shared memory. This will
860  * ensure that even if there is no session which is attached to this
861  * segment, it will remain until postmaster shutdown or an explicit call
862  * to unpin.
863  */
864  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
865  if (dsm_control->item[seg->control_slot].pinned)
866  elog(ERROR, "cannot pin a segment that is already pinned");
867  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
868  dsm_control->item[seg->control_slot].pinned = true;
869  dsm_control->item[seg->control_slot].refcnt++;
870  dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
871  LWLockRelease(DynamicSharedMemoryControlLock);
872 }
873 
874 /*
875  * Unpin a dynamic shared memory segment that was previously pinned with
876  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
877  * was previously called for this segment.
878  *
879  * The argument is a dsm_handle rather than a dsm_segment in case you want
880  * to unpin a segment to which you haven't attached. This turns out to be
881  * useful if, for example, a reference to one shared memory segment is stored
882  * within another shared memory segment. You might want to unpin the
883  * referenced segment before destroying the referencing segment.
884  */
885 void
887 {
888  uint32 control_slot = INVALID_CONTROL_SLOT;
889  bool destroy = false;
890  uint32 i;
891 
892  /* Find the control slot for the given handle. */
893  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
894  for (i = 0; i < dsm_control->nitems; ++i)
895  {
896  /* Skip unused slots. */
897  if (dsm_control->item[i].refcnt == 0)
898  continue;
899 
900  /* If we've found our handle, we can stop searching. */
901  if (dsm_control->item[i].handle == handle)
902  {
903  control_slot = i;
904  break;
905  }
906  }
907 
908  /*
909  * We should definitely have found the slot, and it should not already be
910  * in the process of going away, because this function should only be
911  * called on a segment which is pinned.
912  */
913  if (control_slot == INVALID_CONTROL_SLOT)
914  elog(ERROR, "cannot unpin unknown segment handle");
915  if (!dsm_control->item[control_slot].pinned)
916  elog(ERROR, "cannot unpin a segment that is not pinned");
917  Assert(dsm_control->item[control_slot].refcnt > 1);
918 
919  /*
920  * Allow implementation-specific code to run. We have to do this before
921  * releasing the lock, because impl_private_pm_handle may get modified by
922  * dsm_impl_unpin_segment.
923  */
924  dsm_impl_unpin_segment(handle,
925  &dsm_control->item[control_slot].impl_private_pm_handle);
926 
927  /* Note that 1 means no references (0 means unused slot). */
928  if (--dsm_control->item[control_slot].refcnt == 1)
929  destroy = true;
930  dsm_control->item[control_slot].pinned = false;
931 
932  /* Now we can release the lock. */
933  LWLockRelease(DynamicSharedMemoryControlLock);
934 
935  /* Clean up resources if that was the last reference. */
936  if (destroy)
937  {
938  void *junk_impl_private = NULL;
939  void *junk_mapped_address = NULL;
940  Size junk_mapped_size = 0;
941 
942  /*
943  * For an explanation of how error handling works in this case, see
944  * comments in dsm_detach. Note that if we reach this point, the
945  * current process certainly does not have the segment mapped, because
946  * if it did, the reference count would have still been greater than 1
947  * even after releasing the reference count held by the pin. The fact
948  * that there can't be a dsm_segment for this handle makes it OK to
949  * pass the mapped size, mapped address, and private data as NULL
950  * here.
951  */
952  if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
953  &junk_mapped_address, &junk_mapped_size, WARNING))
954  {
955  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
956  Assert(dsm_control->item[control_slot].handle == handle);
957  Assert(dsm_control->item[control_slot].refcnt == 1);
958  dsm_control->item[control_slot].refcnt = 0;
959  LWLockRelease(DynamicSharedMemoryControlLock);
960  }
961  }
962 }
963 
964 /*
965  * Find an existing mapping for a shared memory segment, if there is one.
966  */
967 dsm_segment *
969 {
970  dlist_iter iter;
971  dsm_segment *seg;
972 
973  dlist_foreach(iter, &dsm_segment_list)
974  {
975  seg = dlist_container(dsm_segment, node, iter.cur);
976  if (seg->handle == h)
977  return seg;
978  }
979 
980  return NULL;
981 }
982 
983 /*
984  * Get the address at which a dynamic shared memory segment is mapped.
985  */
986 void *
988 {
989  Assert(seg->mapped_address != NULL);
990  return seg->mapped_address;
991 }
992 
993 /*
994  * Get the size of a mapping.
995  */
996 Size
998 {
999  Assert(seg->mapped_address != NULL);
1000  return seg->mapped_size;
1001 }
1002 
1003 /*
1004  * Get a handle for a mapping.
1005  *
1006  * To establish communication via dynamic shared memory between two backends,
1007  * one of them should first call dsm_create() to establish a new shared
1008  * memory mapping. That process should then call dsm_segment_handle() to
1009  * obtain a handle for the mapping, and pass that handle to the
1010  * coordinating backend via some means (e.g. bgw_main_arg, or via the
1011  * main shared memory segment). The recipient, once in possession of the
1012  * handle, should call dsm_attach().
1013  */
1014 dsm_handle
1016 {
1017  return seg->handle;
1018 }
1019 
1020 /*
1021  * Register an on-detach callback for a dynamic shared memory segment.
1022  */
1023 void
1025 {
1027 
1029  sizeof(dsm_segment_detach_callback));
1030  cb->function = function;
1031  cb->arg = arg;
1032  slist_push_head(&seg->on_detach, &cb->node);
1033 }
1034 
1035 /*
1036  * Unregister an on-detach callback for a dynamic shared memory segment.
1037  */
1038 void
1040  Datum arg)
1041 {
1042  slist_mutable_iter iter;
1043 
1044  slist_foreach_modify(iter, &seg->on_detach)
1045  {
1047 
1049  if (cb->function == function && cb->arg == arg)
1050  {
1051  slist_delete_current(&iter);
1052  pfree(cb);
1053  break;
1054  }
1055  }
1056 }
1057 
1058 /*
1059  * Discard all registered on-detach callbacks without executing them.
1060  */
1061 void
1063 {
1064  dlist_iter iter;
1065 
1066  dlist_foreach(iter, &dsm_segment_list)
1067  {
1069 
1070  /* Throw away explicit on-detach actions one by one. */
1071  while (!slist_is_empty(&seg->on_detach))
1072  {
1073  slist_node *node;
1075 
1076  node = slist_pop_head_node(&seg->on_detach);
1077  cb = slist_container(dsm_segment_detach_callback, node, node);
1078  pfree(cb);
1079  }
1080 
1081  /*
1082  * Decrementing the reference count is a sort of implicit on-detach
1083  * action; make sure we don't do that, either.
1084  */
1086  }
1087 }
1088 
1089 /*
1090  * Create a segment descriptor.
1091  */
1092 static dsm_segment *
1094 {
1095  dsm_segment *seg;
1096 
1099 
1101  dlist_push_head(&dsm_segment_list, &seg->node);
1102 
1103  /* seg->handle must be initialized by the caller */
1105  seg->impl_private = NULL;
1106  seg->mapped_address = NULL;
1107  seg->mapped_size = 0;
1108 
1112 
1113  slist_init(&seg->on_detach);
1114 
1115  return seg;
1116 }
1117 
1118 /*
1119  * Sanity check a control segment.
1120  *
1121  * The goal here isn't to detect everything that could possibly be wrong with
1122  * the control segment; there's not enough information for that. Rather, the
1123  * goal is to make sure that someone can iterate over the items in the segment
1124  * without overrunning the end of the mapping and crashing. We also check
1125  * the magic number since, if that's messed up, this may not even be one of
1126  * our segments at all.
1127  */
1128 static bool
1130 {
1131  if (mapped_size < offsetof(dsm_control_header, item))
1132  return false; /* Mapped size too short to read header. */
1133  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1134  return false; /* Magic number doesn't match. */
1135  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1136  return false; /* Max item count won't fit in map. */
1137  if (control->nitems > control->maxitems)
1138  return false; /* Overfull. */
1139  return true;
1140 }
1141 
1142 /*
1143  * Compute the number of control-segment bytes needed to store a given
1144  * number of items.
1145  */
1146 static uint64
1148 {
1149  return offsetof(dsm_control_header, item)
1150  + sizeof(dsm_control_item) * (uint64) nitems;
1151 }
static void * dsm_control_impl_private
Definition: dsm.c:136
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:21
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:145
dlist_node node
Definition: dsm.c:69
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1244
dsm_segment * dsm_find_mapping(dsm_handle h)
Definition: dsm.c:968
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:987
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1107
void reset_on_dsm_detach(void)
Definition: dsm.c:1062
uint32 maxitems
Definition: dsm.c:93
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:54
uint32 dsm_handle
Definition: dsm_impl.h:55
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:53
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
Size mapped_size
Definition: dsm.c:75
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:210
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:541
dsm_handle handle
Definition: dsm.c:82
long random(void)
Definition: random.c:22
void * dsm_resize(dsm_segment *seg, Size size)
Definition: dsm.c:678
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
ResourceOwner CurrentResourceOwner
Definition: resowner.c:140
struct dsm_control_header dsm_control_header
slist_node * cur
Definition: ilist.h:241
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:548
static dsm_handle dsm_control_handle
Definition: dsm.c:133
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:574
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1015
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
static void dsm_backend_startup(void)
Definition: dsm.c:392
static dlist_head dsm_segment_list
Definition: dsm.c:124
#define LOG
Definition: elog.h:26
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1024
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1147
Definition: dirent.h:9
uint32 nitems
Definition: dsm.c:92
bool pinned
Definition: dsm.c:85
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:716
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:854
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:1031
static void slist_init(slist_head *head)
Definition: ilist.h:554
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
uint32 magic
Definition: dsm.c:91
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
int MaxBackends
Definition: globals.c:134
on_dsm_detach_callback function
Definition: dsm.c:61
#define DEBUG2
Definition: elog.h:24
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:814
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:359
static char * buf
Definition: pg_test_fsync.c:67
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1233
bool IsUnderPostmaster
Definition: globals.c:108
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:94
int errcode_for_file_access(void)
Definition: elog.c:598
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:330
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
unsigned int uint32
Definition: c.h:325
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2590
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:886
void * mapped_address
Definition: dsm.c:74
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
#define ereport(elevel, rest)
Definition: elog.h:122
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1129
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void dsm_backend_shutdown(void)
Definition: dsm.c:635
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:596
slist_head on_detach
Definition: dsm.c:76
static bool slist_is_empty(slist_head *head)
Definition: ilist.h:563
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
dsm_handle handle
Definition: dsm.c:71
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
#define slist_container(type, membername, ptr)
Definition: ilist.h:674
uintptr_t Datum
Definition: postgres.h:367
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:452
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:292
dlist_node * cur
Definition: ilist.h:161
void * dsm_remap(dsm_segment *seg)
Definition: dsm.c:695
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:833
static dsm_control_header * dsm_control
Definition: dsm.c:134
ResourceOwner resowner
Definition: dsm.c:70
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:987
#define Assert(condition)
Definition: c.h:699
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2656
void * impl_private
Definition: dsm.c:73
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define DSM_IMPL_NONE
Definition: dsm_impl.h:17
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1253
size_t Size
Definition: c.h:433
uint32 control_slot
Definition: dsm.c:72
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
void dsm_detach_all(void)
Definition: dsm.c:653
#define DatumGetPointer(X)
Definition: postgres.h:534
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1093
uint32 refcnt
Definition: dsm.c:83
static bool dsm_init_done
Definition: dsm.c:105
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:1057
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:713
int errmsg(const char *fmt,...)
Definition: elog.c:797
struct dsm_control_item dsm_control_item
static Size dsm_control_mapped_size
Definition: dsm.c:135
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
struct dsm_segment_detach_callback dsm_segment_detach_callback
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
int i
void * impl_private_pm_handle
Definition: dsm.c:84
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:652
char d_name[MAX_PATH]
Definition: dirent.h:14
#define INVALID_CONTROL_SLOT
Definition: dsm.c:56
#define elog
Definition: elog.h:219
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:56
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:164
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1039
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:997
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:46
int FreeDir(DIR *dir)
Definition: fd.c:2708
#define offsetof(type, field)
Definition: c.h:622