PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
35 
36 #include "lib/ilist.h"
37 #include "miscadmin.h"
38 #include "storage/dsm.h"
39 #include "storage/ipc.h"
40 #include "storage/lwlock.h"
41 #include "storage/pg_shmem.h"
42 #include "utils/guc.h"
43 #include "utils/memutils.h"
44 #include "utils/resowner_private.h"
45 
46 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
47 
48 /*
49  * There's no point in getting too cheap here, because the minimum allocation
50  * is one OS page, which is probably at least 4KB and could easily be as high
51  * as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
52  */
53 #define PG_DYNSHMEM_FIXED_SLOTS 64
54 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
55 
56 #define INVALID_CONTROL_SLOT ((uint32) -1)
57 
58 /* Backend-local tracking for on-detach callbacks. */
60 {
65 
66 /* Backend-local state for a dynamic shared memory segment. */
68 {
69  dlist_node node; /* List link in dsm_segment_list. */
70  ResourceOwner resowner; /* Resource owner. */
71  dsm_handle handle; /* Segment name. */
72  uint32 control_slot; /* Slot in control segment. */
73  void *impl_private; /* Implementation-specific private data. */
74  void *mapped_address; /* Mapping address, or NULL if unmapped. */
75  Size mapped_size; /* Size of our mapping. */
76  slist_head on_detach; /* On-detach callbacks. */
77 };
78 
79 /* Shared-memory state for a dynamic shared memory segment. */
80 typedef struct dsm_control_item
81 {
83  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
84  void *impl_private_pm_handle; /* only needed on Windows */
85  bool pinned;
87 
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
90 {
94  dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
96 
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static bool dsm_control_segment_sane(dsm_control_header *control,
101  Size mapped_size);
102 static uint64 dsm_control_bytes_needed(uint32 nitems);
103 
104 /* Has this backend initialized the dynamic shared memory system yet? */
105 static bool dsm_init_done = false;
106 
107 /*
108  * List of dynamic shared memory segments used by this backend.
109  *
110  * At process exit time, we must decrement the reference count of each
111  * segment we have attached; this list makes it possible to find all such
112  * segments.
113  *
114  * This list should always be empty in the postmaster. We could probably
115  * allow the postmaster to map dynamic shared memory segments before it
116  * begins to start child processes, provided that each process adjusted
117  * the reference counts for those segments in the control segment at
118  * startup time, but there's no obvious need for such a facility, which
119  * would also be complex to handle in the EXEC_BACKEND case. Once the
120  * postmaster has begun spawning children, there's an additional problem:
121  * each new mapping would require an update to the control segment,
122  * which requires locking, in which the postmaster must not be involved.
123  */
124 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
125 
126 /*
127  * Control segment information.
128  *
129  * Unlike ordinary shared memory segments, the control segment is not
130  * reference counted; instead, it lasts for the postmaster's entire
131  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
132  */
137 
138 /*
139  * Start up the dynamic shared memory system.
140  *
141  * This is called just once during each cluster lifetime, at postmaster
142  * startup time.
143  */
144 void
146 {
147  void *dsm_control_address = NULL;
148  uint32 maxitems;
149  Size segsize;
150 
152 
153  /* If dynamic shared memory is disabled, there's nothing to do. */
155  return;
156 
157  /*
158  * If we're using the mmap implementations, clean up any leftovers.
159  * Cleanup isn't needed on Windows, and happens earlier in startup for
160  * POSIX and System V shared memory, via a direct call to
161  * dsm_cleanup_using_control_segment.
162  */
165 
166  /* Determine size for new control segment. */
167  maxitems = PG_DYNSHMEM_FIXED_SLOTS
169  elog(DEBUG2, "dynamic shared memory system will support %u segments",
170  maxitems);
171  segsize = dsm_control_bytes_needed(maxitems);
172 
173  /*
174  * Loop until we find an unused identifier for the new control segment. We
175  * sometimes use 0 as a sentinel value indicating that no control segment
176  * is known to exist, so avoid using that value for a real control
177  * segment.
178  */
179  for (;;)
180  {
181  Assert(dsm_control_address == NULL);
185  continue;
187  &dsm_control_impl_private, &dsm_control_address,
189  break;
190  }
191  dsm_control = dsm_control_address;
193  elog(DEBUG2,
194  "created dynamic shared memory control segment %u (%zu bytes)",
195  dsm_control_handle, segsize);
197 
198  /* Initialize control segment. */
199  dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
200  dsm_control->nitems = 0;
201  dsm_control->maxitems = maxitems;
202 }
203 
204 /*
205  * Determine whether the control segment from the previous postmaster
206  * invocation still exists. If so, remove the dynamic shared memory
207  * segments to which it refers, and then the control segment itself.
208  */
209 void
211 {
212  void *mapped_address = NULL;
213  void *junk_mapped_address = NULL;
214  void *impl_private = NULL;
215  void *junk_impl_private = NULL;
216  Size mapped_size = 0;
217  Size junk_mapped_size = 0;
218  uint32 nitems;
219  uint32 i;
220  dsm_control_header *old_control;
221 
222  /* If dynamic shared memory is disabled, there's nothing to do. */
224  return;
225 
226  /*
227  * Try to attach the segment. If this fails, it probably just means that
228  * the operating system has been rebooted and the segment no longer
229  * exists, or an unrelated process has used the same shm ID. So just fall
230  * out quietly.
231  */
232  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
233  &mapped_address, &mapped_size, DEBUG1))
234  return;
235 
236  /*
237  * We've managed to reattach it, but the contents might not be sane. If
238  * they aren't, we disregard the segment after all.
239  */
240  old_control = (dsm_control_header *) mapped_address;
241  if (!dsm_control_segment_sane(old_control, mapped_size))
242  {
243  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
244  &mapped_address, &mapped_size, LOG);
245  return;
246  }
247 
248  /*
249  * OK, the control segment looks basically valid, so we can use it to get
250  * a list of segments that need to be removed.
251  */
252  nitems = old_control->nitems;
253  for (i = 0; i < nitems; ++i)
254  {
255  dsm_handle handle;
256  uint32 refcnt;
257 
258  /* If the reference count is 0, the slot is actually unused. */
259  refcnt = old_control->item[i].refcnt;
260  if (refcnt == 0)
261  continue;
262 
263  /* Log debugging information. */
264  handle = old_control->item[i].handle;
265  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
266  handle, refcnt);
267 
268  /* Destroy the referenced segment. */
269  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
270  &junk_mapped_address, &junk_mapped_size, LOG);
271  }
272 
273  /* Destroy the old control segment, too. */
274  elog(DEBUG2,
275  "cleaning up dynamic shared memory control segment with ID %u",
276  old_control_handle);
277  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
278  &mapped_address, &mapped_size, LOG);
279 }
280 
281 /*
282  * When we're using the mmap shared memory implementation, "shared memory"
283  * segments might even manage to survive an operating system reboot.
284  * But there's no guarantee as to exactly what will survive: some segments
285  * may survive, and others may not, and the contents of some may be out
286  * of date. In particular, the control segment may be out of date, so we
287  * can't rely on it to figure out what to remove. However, since we know
288  * what directory contains the files we used as shared memory, we can simply
289  * scan the directory and blow everything away that shouldn't be there.
290  */
291 static void
293 {
294  DIR *dir;
295  struct dirent *dent;
296 
297  /* Open the directory; can't use AllocateDir in postmaster. */
298  if ((dir = AllocateDir(PG_DYNSHMEM_DIR)) == NULL)
299  ereport(ERROR,
301  errmsg("could not open directory \"%s\": %m",
302  PG_DYNSHMEM_DIR)));
303 
304  /* Scan for something with a name of the correct format. */
305  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
306  {
307  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
308  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
309  {
310  char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
311 
312  snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
313 
314  elog(DEBUG2, "removing file \"%s\"", buf);
315 
316  /* We found a matching file; so remove it. */
317  if (unlink(buf) != 0)
318  {
319  int save_errno;
320 
321  save_errno = errno;
322  closedir(dir);
323  errno = save_errno;
324 
325  ereport(ERROR,
327  errmsg("could not remove file \"%s\": %m", buf)));
328  }
329  }
330  }
331 
332  /* Cleanup complete. */
333  FreeDir(dir);
334 }
335 
336 /*
337  * At shutdown time, we iterate over the control segment and remove all
338  * remaining dynamic shared memory segments. We avoid throwing errors here;
339  * the postmaster is shutting down either way, and this is just non-critical
340  * resource cleanup.
341  */
342 static void
344 {
345  uint32 nitems;
346  uint32 i;
347  void *dsm_control_address;
348  void *junk_mapped_address = NULL;
349  void *junk_impl_private = NULL;
350  Size junk_mapped_size = 0;
352 
353  /*
354  * If some other backend exited uncleanly, it might have corrupted the
355  * control segment while it was dying. In that case, we warn and ignore
356  * the contents of the control segment. This may end up leaving behind
357  * stray shared memory segments, but there's not much we can do about that
358  * if the metadata is gone.
359  */
360  nitems = dsm_control->nitems;
362  {
363  ereport(LOG,
364  (errmsg("dynamic shared memory control segment is corrupt")));
365  return;
366  }
367 
368  /* Remove any remaining segments. */
369  for (i = 0; i < nitems; ++i)
370  {
371  dsm_handle handle;
372 
373  /* If the reference count is 0, the slot is actually unused. */
374  if (dsm_control->item[i].refcnt == 0)
375  continue;
376 
377  /* Log debugging information. */
378  handle = dsm_control->item[i].handle;
379  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
380  handle);
381 
382  /* Destroy the segment. */
383  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
384  &junk_mapped_address, &junk_mapped_size, LOG);
385  }
386 
387  /* Remove the control segment itself. */
388  elog(DEBUG2,
389  "cleaning up dynamic shared memory control segment with ID %u",
391  dsm_control_address = dsm_control;
393  &dsm_control_impl_private, &dsm_control_address,
395  dsm_control = dsm_control_address;
396  shim->dsm_control = 0;
397 }
398 
399 /*
400  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
401  * we must reread the state file and map the control segment; in other cases,
402  * we'll have inherited the postmaster's mapping and global variables.
403  */
404 static void
406 {
407  /* If dynamic shared memory is disabled, reject this. */
409  ereport(ERROR,
410  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
411  errmsg("dynamic shared memory is disabled"),
412  errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
413 
414 #ifdef EXEC_BACKEND
415  {
416  void *control_address = NULL;
417 
418  /* Attach control segment. */
421  &dsm_control_impl_private, &control_address,
423  dsm_control = control_address;
424  /* If control segment doesn't look sane, something is badly wrong. */
426  {
428  &dsm_control_impl_private, &control_address,
430  ereport(FATAL,
431  (errcode(ERRCODE_INTERNAL_ERROR),
432  errmsg("dynamic shared memory control segment is not valid")));
433  }
434  }
435 #endif
436 
437  dsm_init_done = true;
438 }
439 
440 #ifdef EXEC_BACKEND
441 /*
442  * When running under EXEC_BACKEND, we get a callback here when the main
443  * shared memory segment is re-attached, so that we can record the control
444  * handle retrieved from it.
445  */
446 void
447 dsm_set_control_handle(dsm_handle h)
448 {
449  Assert(dsm_control_handle == 0 && h != 0);
450  dsm_control_handle = h;
451 }
452 #endif
453 
454 /*
455  * Create a new dynamic shared memory segment.
456  *
457  * If there is a non-NULL CurrentResourceOwner, the new segment is associated
458  * with it and must be detached before the resource owner releases, or a
459  * warning will be logged. If CurrentResourceOwner is NULL, the segment
460  * remains attached until explicitely detached or the session ends.
461  * Creating with a NULL CurrentResourceOwner is equivalent to creating
462  * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
463  */
464 dsm_segment *
465 dsm_create(Size size, int flags)
466 {
467  dsm_segment *seg;
468  uint32 i;
469  uint32 nitems;
470 
471  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
473 
474  if (!dsm_init_done)
476 
477  /* Create a new segment descriptor. */
478  seg = dsm_create_descriptor();
479 
480  /* Loop until we find an unused segment identifier. */
481  for (;;)
482  {
483  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
484  seg->handle = random();
485  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
486  continue;
487  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
488  &seg->mapped_address, &seg->mapped_size, ERROR))
489  break;
490  }
491 
492  /* Lock the control segment so we can register the new segment. */
493  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
494 
495  /* Search the control segment for an unused slot. */
496  nitems = dsm_control->nitems;
497  for (i = 0; i < nitems; ++i)
498  {
499  if (dsm_control->item[i].refcnt == 0)
500  {
501  dsm_control->item[i].handle = seg->handle;
502  /* refcnt of 1 triggers destruction, so start at 2 */
503  dsm_control->item[i].refcnt = 2;
504  dsm_control->item[i].impl_private_pm_handle = NULL;
505  dsm_control->item[i].pinned = false;
506  seg->control_slot = i;
507  LWLockRelease(DynamicSharedMemoryControlLock);
508  return seg;
509  }
510  }
511 
512  /* Verify that we can support an additional mapping. */
513  if (nitems >= dsm_control->maxitems)
514  {
515  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
516  {
517  LWLockRelease(DynamicSharedMemoryControlLock);
519  &seg->mapped_address, &seg->mapped_size, WARNING);
520  if (seg->resowner != NULL)
521  ResourceOwnerForgetDSM(seg->resowner, seg);
522  dlist_delete(&seg->node);
523  pfree(seg);
524  return NULL;
525  }
526  ereport(ERROR,
527  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
528  errmsg("too many dynamic shared memory segments")));
529  }
530 
531  /* Enter the handle into a new array slot. */
532  dsm_control->item[nitems].handle = seg->handle;
533  /* refcnt of 1 triggers destruction, so start at 2 */
534  dsm_control->item[nitems].refcnt = 2;
535  dsm_control->item[nitems].impl_private_pm_handle = NULL;
536  dsm_control->item[nitems].pinned = false;
537  seg->control_slot = nitems;
538  dsm_control->nitems++;
539  LWLockRelease(DynamicSharedMemoryControlLock);
540 
541  return seg;
542 }
543 
544 /*
545  * Attach a dynamic shared memory segment.
546  *
547  * See comments for dsm_segment_handle() for an explanation of how this
548  * is intended to be used.
549  *
550  * This function will return NULL if the segment isn't known to the system.
551  * This can happen if we're asked to attach the segment, but then everyone
552  * else detaches it (causing it to be destroyed) before we get around to
553  * attaching it.
554  *
555  * If there is a non-NULL CurrentResourceOwner, the attached segment is
556  * associated with it and must be detached before the resource owner releases,
557  * or a warning will be logged. Otherwise the segment remains attached until
558  * explicitely detached or the session ends. See the note atop dsm_create().
559  */
560 dsm_segment *
562 {
563  dsm_segment *seg;
564  dlist_iter iter;
565  uint32 i;
566  uint32 nitems;
567 
568  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
570 
571  if (!dsm_init_done)
573 
574  /*
575  * Since this is just a debugging cross-check, we could leave it out
576  * altogether, or include it only in assert-enabled builds. But since the
577  * list of attached segments should normally be very short, let's include
578  * it always for right now.
579  *
580  * If you're hitting this error, you probably want to attempt to find an
581  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
582  * create a new one.
583  */
584  dlist_foreach(iter, &dsm_segment_list)
585  {
586  seg = dlist_container(dsm_segment, node, iter.cur);
587  if (seg->handle == h)
588  elog(ERROR, "can't attach the same segment more than once");
589  }
590 
591  /* Create a new segment descriptor. */
592  seg = dsm_create_descriptor();
593  seg->handle = h;
594 
595  /* Bump reference count for this segment in shared memory. */
596  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
597  nitems = dsm_control->nitems;
598  for (i = 0; i < nitems; ++i)
599  {
600  /* If the reference count is 0, the slot is actually unused. */
601  if (dsm_control->item[i].refcnt == 0)
602  continue;
603 
604  /* If the handle doesn't match, it's not the slot we want. */
605  if (dsm_control->item[i].handle != seg->handle)
606  continue;
607 
608  /*
609  * If the reference count is 1, the slot is still in use, but the
610  * segment is in the process of going away. Treat that as if we
611  * didn't find a match.
612  */
613  if (dsm_control->item[i].refcnt == 1)
614  break;
615 
616  /* Otherwise we've found a match. */
617  dsm_control->item[i].refcnt++;
618  seg->control_slot = i;
619  break;
620  }
621  LWLockRelease(DynamicSharedMemoryControlLock);
622 
623  /*
624  * If we didn't find the handle we're looking for in the control segment,
625  * it probably means that everyone else who had it mapped, including the
626  * original creator, died before we got to this point. It's up to the
627  * caller to decide what to do about that.
628  */
630  {
631  dsm_detach(seg);
632  return NULL;
633  }
634 
635  /* Here's where we actually try to map the segment. */
637  &seg->mapped_address, &seg->mapped_size, ERROR);
638 
639  return seg;
640 }
641 
642 /*
643  * At backend shutdown time, detach any segments that are still attached.
644  * (This is similar to dsm_detach_all, except that there's no reason to
645  * unmap the control segment before exiting, so we don't bother.)
646  */
647 void
649 {
650  while (!dlist_is_empty(&dsm_segment_list))
651  {
652  dsm_segment *seg;
653 
654  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
655  dsm_detach(seg);
656  }
657 }
658 
659 /*
660  * Detach all shared memory segments, including the control segments. This
661  * should be called, along with PGSharedMemoryDetach, in processes that
662  * might inherit mappings but are not intended to be connected to dynamic
663  * shared memory.
664  */
665 void
667 {
668  void *control_address = dsm_control;
669 
670  while (!dlist_is_empty(&dsm_segment_list))
671  {
672  dsm_segment *seg;
673 
674  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
675  dsm_detach(seg);
676  }
677 
678  if (control_address != NULL)
680  &dsm_control_impl_private, &control_address,
682 }
683 
684 /*
685  * Resize an existing shared memory segment.
686  *
687  * This may cause the shared memory segment to be remapped at a different
688  * address. For the caller's convenience, we return the mapped address.
689  */
690 void *
692 {
694  dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
695  &seg->mapped_address, &seg->mapped_size, ERROR);
696  return seg->mapped_address;
697 }
698 
699 /*
700  * Remap an existing shared memory segment.
701  *
702  * This is intended to be used when some other process has extended the
703  * mapping using dsm_resize(), but we've still only got the initial
704  * portion mapped. Since this might change the address at which the
705  * segment is mapped, we return the new mapped address.
706  */
707 void *
709 {
711  &seg->mapped_address, &seg->mapped_size, ERROR);
712 
713  return seg->mapped_address;
714 }
715 
716 /*
717  * Detach from a shared memory segment, destroying the segment if we
718  * remove the last reference.
719  *
720  * This function should never fail. It will often be invoked when aborting
721  * a transaction, and a further error won't serve any purpose. It's not a
722  * complete disaster if we fail to unmap or destroy the segment; it means a
723  * resource leak, but that doesn't necessarily preclude further operations.
724  */
725 void
727 {
728  /*
729  * Invoke registered callbacks. Just in case one of those callbacks
730  * throws a further error that brings us back here, pop the callback
731  * before invoking it, to avoid infinite error recursion.
732  */
733  while (!slist_is_empty(&seg->on_detach))
734  {
735  slist_node *node;
737  on_dsm_detach_callback function;
738  Datum arg;
739 
740  node = slist_pop_head_node(&seg->on_detach);
742  function = cb->function;
743  arg = cb->arg;
744  pfree(cb);
745 
746  function(seg, arg);
747  }
748 
749  /*
750  * Try to remove the mapping, if one exists. Normally, there will be, but
751  * maybe not, if we failed partway through a create or attach operation.
752  * We remove the mapping before decrementing the reference count so that
753  * the process that sees a zero reference count can be certain that no
754  * remaining mappings exist. Even if this fails, we pretend that it
755  * works, because retrying is likely to fail in the same way.
756  */
757  if (seg->mapped_address != NULL)
758  {
760  &seg->mapped_address, &seg->mapped_size, WARNING);
761  seg->impl_private = NULL;
762  seg->mapped_address = NULL;
763  seg->mapped_size = 0;
764  }
765 
766  /* Reduce reference count, if we previously increased it. */
768  {
769  uint32 refcnt;
770  uint32 control_slot = seg->control_slot;
771 
772  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
773  Assert(dsm_control->item[control_slot].handle == seg->handle);
774  Assert(dsm_control->item[control_slot].refcnt > 1);
775  refcnt = --dsm_control->item[control_slot].refcnt;
777  LWLockRelease(DynamicSharedMemoryControlLock);
778 
779  /* If new reference count is 1, try to destroy the segment. */
780  if (refcnt == 1)
781  {
782  /* A pinned segment should never reach 1. */
783  Assert(!dsm_control->item[control_slot].pinned);
784 
785  /*
786  * If we fail to destroy the segment here, or are killed before we
787  * finish doing so, the reference count will remain at 1, which
788  * will mean that nobody else can attach to the segment. At
789  * postmaster shutdown time, or when a new postmaster is started
790  * after a hard kill, another attempt will be made to remove the
791  * segment.
792  *
793  * The main case we're worried about here is being killed by a
794  * signal before we can finish removing the segment. In that
795  * case, it's important to be sure that the segment still gets
796  * removed. If we actually fail to remove the segment for some
797  * other reason, the postmaster may not have any better luck than
798  * we did. There's not much we can do about that, though.
799  */
800  if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
801  &seg->mapped_address, &seg->mapped_size, WARNING))
802  {
803  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
804  Assert(dsm_control->item[control_slot].handle == seg->handle);
805  Assert(dsm_control->item[control_slot].refcnt == 1);
806  dsm_control->item[control_slot].refcnt = 0;
807  LWLockRelease(DynamicSharedMemoryControlLock);
808  }
809  }
810  }
811 
812  /* Clean up our remaining backend-private data structures. */
813  if (seg->resowner != NULL)
814  ResourceOwnerForgetDSM(seg->resowner, seg);
815  dlist_delete(&seg->node);
816  pfree(seg);
817 }
818 
819 /*
820  * Keep a dynamic shared memory mapping until end of session.
821  *
822  * By default, mappings are owned by the current resource owner, which
823  * typically means they stick around for the duration of the current query
824  * only.
825  */
826 void
828 {
829  if (seg->resowner != NULL)
830  {
831  ResourceOwnerForgetDSM(seg->resowner, seg);
832  seg->resowner = NULL;
833  }
834 }
835 
836 /*
837  * Arrange to remove a dynamic shared memory mapping at cleanup time.
838  *
839  * dsm_pin_mapping() can be used to preserve a mapping for the entire
840  * lifetime of a process; this function reverses that decision, making
841  * the segment owned by the current resource owner. This may be useful
842  * just before performing some operation that will invalidate the segment
843  * for future use by this backend.
844  */
845 void
847 {
848  Assert(seg->resowner == NULL);
852 }
853 
854 /*
855  * Keep a dynamic shared memory segment until postmaster shutdown, or until
856  * dsm_unpin_segment is called.
857  *
858  * This function should not be called more than once per segment, unless the
859  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
860  *
861  * Note that this function does not arrange for the current process to
862  * keep the segment mapped indefinitely; if that behavior is desired,
863  * dsm_pin_mapping() should be used from each process that needs to
864  * retain the mapping.
865  */
866 void
868 {
869  void *handle;
870 
871  /*
872  * Bump reference count for this segment in shared memory. This will
873  * ensure that even if there is no session which is attached to this
874  * segment, it will remain until postmaster shutdown or an explicit call
875  * to unpin.
876  */
877  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
878  if (dsm_control->item[seg->control_slot].pinned)
879  elog(ERROR, "cannot pin a segment that is already pinned");
880  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
881  dsm_control->item[seg->control_slot].pinned = true;
882  dsm_control->item[seg->control_slot].refcnt++;
883  dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
884  LWLockRelease(DynamicSharedMemoryControlLock);
885 }
886 
887 /*
888  * Unpin a dynamic shared memory segment that was previously pinned with
889  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
890  * was previously called for this segment.
891  *
892  * The argument is a dsm_handle rather than a dsm_segment in case you want
893  * to unpin a segment to which you haven't attached. This turns out to be
894  * useful if, for example, a reference to one shared memory segment is stored
895  * within another shared memory segment. You might want to unpin the
896  * referenced segment before destroying the referencing segment.
897  */
898 void
900 {
901  uint32 control_slot = INVALID_CONTROL_SLOT;
902  bool destroy = false;
903  uint32 i;
904 
905  /* Find the control slot for the given handle. */
906  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
907  for (i = 0; i < dsm_control->nitems; ++i)
908  {
909  /* Skip unused slots. */
910  if (dsm_control->item[i].refcnt == 0)
911  continue;
912 
913  /* If we've found our handle, we can stop searching. */
914  if (dsm_control->item[i].handle == handle)
915  {
916  control_slot = i;
917  break;
918  }
919  }
920 
921  /*
922  * We should definitely have found the slot, and it should not already be
923  * in the process of going away, because this function should only be
924  * called on a segment which is pinned.
925  */
926  if (control_slot == INVALID_CONTROL_SLOT)
927  elog(ERROR, "cannot unpin unknown segment handle");
928  if (!dsm_control->item[control_slot].pinned)
929  elog(ERROR, "cannot unpin a segment that is not pinned");
930  Assert(dsm_control->item[control_slot].refcnt > 1);
931 
932  /*
933  * Allow implementation-specific code to run. We have to do this before
934  * releasing the lock, because impl_private_pm_handle may get modified by
935  * dsm_impl_unpin_segment.
936  */
937  dsm_impl_unpin_segment(handle,
938  &dsm_control->item[control_slot].impl_private_pm_handle);
939 
940  /* Note that 1 means no references (0 means unused slot). */
941  if (--dsm_control->item[control_slot].refcnt == 1)
942  destroy = true;
943  dsm_control->item[control_slot].pinned = false;
944 
945  /* Now we can release the lock. */
946  LWLockRelease(DynamicSharedMemoryControlLock);
947 
948  /* Clean up resources if that was the last reference. */
949  if (destroy)
950  {
951  void *junk_impl_private = NULL;
952  void *junk_mapped_address = NULL;
953  Size junk_mapped_size = 0;
954 
955  /*
956  * For an explanation of how error handling works in this case, see
957  * comments in dsm_detach. Note that if we reach this point, the
958  * current process certainly does not have the segment mapped, because
959  * if it did, the reference count would have still been greater than 1
960  * even after releasing the reference count held by the pin. The fact
961  * that there can't be a dsm_segment for this handle makes it OK to
962  * pass the mapped size, mapped address, and private data as NULL
963  * here.
964  */
965  if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
966  &junk_mapped_address, &junk_mapped_size, WARNING))
967  {
968  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
969  Assert(dsm_control->item[control_slot].handle == handle);
970  Assert(dsm_control->item[control_slot].refcnt == 1);
971  dsm_control->item[control_slot].refcnt = 0;
972  LWLockRelease(DynamicSharedMemoryControlLock);
973  }
974  }
975 }
976 
977 /*
978  * Find an existing mapping for a shared memory segment, if there is one.
979  */
980 dsm_segment *
982 {
983  dlist_iter iter;
984  dsm_segment *seg;
985 
986  dlist_foreach(iter, &dsm_segment_list)
987  {
988  seg = dlist_container(dsm_segment, node, iter.cur);
989  if (seg->handle == h)
990  return seg;
991  }
992 
993  return NULL;
994 }
995 
996 /*
997  * Get the address at which a dynamic shared memory segment is mapped.
998  */
999 void *
1001 {
1002  Assert(seg->mapped_address != NULL);
1003  return seg->mapped_address;
1004 }
1005 
1006 /*
1007  * Get the size of a mapping.
1008  */
1009 Size
1011 {
1012  Assert(seg->mapped_address != NULL);
1013  return seg->mapped_size;
1014 }
1015 
1016 /*
1017  * Get a handle for a mapping.
1018  *
1019  * To establish communication via dynamic shared memory between two backends,
1020  * one of them should first call dsm_create() to establish a new shared
1021  * memory mapping. That process should then call dsm_segment_handle() to
1022  * obtain a handle for the mapping, and pass that handle to the
1023  * coordinating backend via some means (e.g. bgw_main_arg, or via the
1024  * main shared memory segment). The recipient, once in possession of the
1025  * handle, should call dsm_attach().
1026  */
1027 dsm_handle
1029 {
1030  return seg->handle;
1031 }
1032 
1033 /*
1034  * Register an on-detach callback for a dynamic shared memory segment.
1035  */
1036 void
1038 {
1040 
1042  sizeof(dsm_segment_detach_callback));
1043  cb->function = function;
1044  cb->arg = arg;
1045  slist_push_head(&seg->on_detach, &cb->node);
1046 }
1047 
1048 /*
1049  * Unregister an on-detach callback for a dynamic shared memory segment.
1050  */
1051 void
1053  Datum arg)
1054 {
1055  slist_mutable_iter iter;
1056 
1057  slist_foreach_modify(iter, &seg->on_detach)
1058  {
1060 
1062  if (cb->function == function && cb->arg == arg)
1063  {
1064  slist_delete_current(&iter);
1065  pfree(cb);
1066  break;
1067  }
1068  }
1069 }
1070 
1071 /*
1072  * Discard all registered on-detach callbacks without executing them.
1073  */
1074 void
1076 {
1077  dlist_iter iter;
1078 
1079  dlist_foreach(iter, &dsm_segment_list)
1080  {
1081  dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1082 
1083  /* Throw away explicit on-detach actions one by one. */
1084  while (!slist_is_empty(&seg->on_detach))
1085  {
1086  slist_node *node;
1088 
1089  node = slist_pop_head_node(&seg->on_detach);
1090  cb = slist_container(dsm_segment_detach_callback, node, node);
1091  pfree(cb);
1092  }
1093 
1094  /*
1095  * Decrementing the reference count is a sort of implicit on-detach
1096  * action; make sure we don't do that, either.
1097  */
1099  }
1100 }
1101 
1102 /*
1103  * Create a segment descriptor.
1104  */
1105 static dsm_segment *
1107 {
1108  dsm_segment *seg;
1109 
1112 
1114  dlist_push_head(&dsm_segment_list, &seg->node);
1115 
1116  /* seg->handle must be initialized by the caller */
1118  seg->impl_private = NULL;
1119  seg->mapped_address = NULL;
1120  seg->mapped_size = 0;
1121 
1125 
1126  slist_init(&seg->on_detach);
1127 
1128  return seg;
1129 }
1130 
1131 /*
1132  * Sanity check a control segment.
1133  *
1134  * The goal here isn't to detect everything that could possibly be wrong with
1135  * the control segment; there's not enough information for that. Rather, the
1136  * goal is to make sure that someone can iterate over the items in the segment
1137  * without overrunning the end of the mapping and crashing. We also check
1138  * the magic number since, if that's messed up, this may not even be one of
1139  * our segments at all.
1140  */
1141 static bool
1143 {
1144  if (mapped_size < offsetof(dsm_control_header, item))
1145  return false; /* Mapped size too short to read header. */
1146  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1147  return false; /* Magic number doesn't match. */
1148  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1149  return false; /* Max item count won't fit in map. */
1150  if (control->nitems > control->maxitems)
1151  return false; /* Overfull. */
1152  return true;
1153 }
1154 
1155 /*
1156  * Compute the number of control-segment bytes needed to store a given
1157  * number of items.
1158  */
1159 static uint64
1161 {
1162  return offsetof(dsm_control_header, item)
1163  +sizeof(dsm_control_item) * (uint64) nitems;
1164 }
static void * dsm_control_impl_private
Definition: dsm.c:136
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:21
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:145
dlist_node node
Definition: dsm.c:69
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1245
dsm_segment * dsm_find_mapping(dsm_handle h)
Definition: dsm.c:981
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:987
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1059
void reset_on_dsm_detach(void)
Definition: dsm.c:1075
uint32 maxitems
Definition: dsm.c:93
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:54
uint32 dsm_handle
Definition: dsm_impl.h:55
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:53
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
Size mapped_size
Definition: dsm.c:75
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:210
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:562
dsm_handle handle
Definition: dsm.c:82
long random(void)
Definition: random.c:22
void * dsm_resize(dsm_segment *seg, Size size)
Definition: dsm.c:691
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct dsm_control_header dsm_control_header
slist_node * cur
Definition: ilist.h:241
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:561
static dsm_handle dsm_control_handle
Definition: dsm.c:133
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:574
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1028
int closedir(DIR *)
Definition: dirent.c:113
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:56
static void dsm_backend_startup(void)
Definition: dsm.c:405
static dlist_head dsm_segment_list
Definition: dsm.c:124
#define LOG
Definition: elog.h:26
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1037
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1160
Definition: dirent.h:9
uint32 nitems
Definition: dsm.c:92
bool pinned
Definition: dsm.c:85
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:716
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:867
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:950
static void slist_init(slist_head *head)
Definition: ilist.h:554
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
uint32 magic
Definition: dsm.c:91
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
int MaxBackends
Definition: globals.c:126
on_dsm_detach_callback function
Definition: dsm.c:61
#define DEBUG2
Definition: elog.h:24
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:827
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
static char * buf
Definition: pg_test_fsync.c:66
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1234
bool IsUnderPostmaster
Definition: globals.c:100
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:94
int errcode_for_file_access(void)
Definition: elog.c:598
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:343
int dynamic_shared_memory_type
Definition: dsm_impl.c:112
unsigned int uint32
Definition: c.h:268
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2335
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:899
void * mapped_address
Definition: dsm.c:74
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1142
MemoryContext TopMemoryContext
Definition: mcxt.c:43
void dsm_backend_shutdown(void)
Definition: dsm.c:648
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:596
slist_head on_detach
Definition: dsm.c:76
static bool slist_is_empty(slist_head *head)
Definition: ilist.h:563
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
dsm_handle handle
Definition: dsm.c:71
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
#define slist_container(type, membername, ptr)
Definition: ilist.h:674
uintptr_t Datum
Definition: postgres.h:372
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:465
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:292
dlist_node * cur
Definition: ilist.h:161
void * dsm_remap(dsm_segment *seg)
Definition: dsm.c:708
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:846
static dsm_control_header * dsm_control
Definition: dsm.c:134
ResourceOwner resowner
Definition: dsm.c:70
#define NULL
Definition: c.h:229
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:1000
#define Assert(condition)
Definition: c.h:675
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2401
void * impl_private
Definition: dsm.c:73
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define DSM_IMPL_NONE
Definition: dsm_impl.h:17
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1254
size_t Size
Definition: c.h:356
uint32 control_slot
Definition: dsm.c:72
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
void dsm_detach_all(void)
Definition: dsm.c:666
#define DatumGetPointer(X)
Definition: postgres.h:555
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1106
uint32 refcnt
Definition: dsm.c:83
static bool dsm_init_done
Definition: dsm.c:105
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:1009
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:726
int errmsg(const char *fmt,...)
Definition: elog.c:797
struct dsm_control_item dsm_control_item
static Size dsm_control_mapped_size
Definition: dsm.c:135
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
struct dsm_segment_detach_callback dsm_segment_detach_callback
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
int i
void * arg
void * impl_private_pm_handle
Definition: dsm.c:84
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:652
char d_name[MAX_PATH]
Definition: dirent.h:14
#define INVALID_CONTROL_SLOT
Definition: dsm.c:56
#define elog
Definition: elog.h:219
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:162
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1052
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:1010
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:46
int FreeDir(DIR *dir)
Definition: fd.c:2444
#define offsetof(type, field)
Definition: c.h:555