PostgreSQL Source Code  git master
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
35 
36 #include "lib/ilist.h"
37 #include "miscadmin.h"
38 #include "storage/dsm.h"
39 #include "storage/ipc.h"
40 #include "storage/lwlock.h"
41 #include "storage/pg_shmem.h"
42 #include "utils/guc.h"
43 #include "utils/memutils.h"
44 #include "utils/resowner_private.h"
45 
46 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
47 
48 /*
49  * There's no point in getting too cheap here, because the minimum allocation
50  * is one OS page, which is probably at least 4KB and could easily be as high
51  * as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
52  */
53 #define PG_DYNSHMEM_FIXED_SLOTS 64
54 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
55 
56 #define INVALID_CONTROL_SLOT ((uint32) -1)
57 
58 /* Backend-local tracking for on-detach callbacks. */
60 {
65 
66 /* Backend-local state for a dynamic shared memory segment. */
68 {
69  dlist_node node; /* List link in dsm_segment_list. */
70  ResourceOwner resowner; /* Resource owner. */
71  dsm_handle handle; /* Segment name. */
72  uint32 control_slot; /* Slot in control segment. */
73  void *impl_private; /* Implementation-specific private data. */
74  void *mapped_address; /* Mapping address, or NULL if unmapped. */
75  Size mapped_size; /* Size of our mapping. */
76  slist_head on_detach; /* On-detach callbacks. */
77 };
78 
79 /* Shared-memory state for a dynamic shared memory segment. */
80 typedef struct dsm_control_item
81 {
83  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
84  void *impl_private_pm_handle; /* only needed on Windows */
85  bool pinned;
87 
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
90 {
94  dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
96 
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static bool dsm_control_segment_sane(dsm_control_header *control,
101  Size mapped_size);
102 static uint64 dsm_control_bytes_needed(uint32 nitems);
103 
104 /* Has this backend initialized the dynamic shared memory system yet? */
105 static bool dsm_init_done = false;
106 
107 /*
108  * List of dynamic shared memory segments used by this backend.
109  *
110  * At process exit time, we must decrement the reference count of each
111  * segment we have attached; this list makes it possible to find all such
112  * segments.
113  *
114  * This list should always be empty in the postmaster. We could probably
115  * allow the postmaster to map dynamic shared memory segments before it
116  * begins to start child processes, provided that each process adjusted
117  * the reference counts for those segments in the control segment at
118  * startup time, but there's no obvious need for such a facility, which
119  * would also be complex to handle in the EXEC_BACKEND case. Once the
120  * postmaster has begun spawning children, there's an additional problem:
121  * each new mapping would require an update to the control segment,
122  * which requires locking, in which the postmaster must not be involved.
123  */
124 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
125 
126 /*
127  * Control segment information.
128  *
129  * Unlike ordinary shared memory segments, the control segment is not
130  * reference counted; instead, it lasts for the postmaster's entire
131  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
132  */
136 static void *dsm_control_impl_private = NULL;
137 
138 /*
139  * Start up the dynamic shared memory system.
140  *
141  * This is called just once during each cluster lifetime, at postmaster
142  * startup time.
143  */
144 void
146 {
147  void *dsm_control_address = NULL;
148  uint32 maxitems;
149  Size segsize;
150 
152 
153  /*
154  * If we're using the mmap implementations, clean up any leftovers.
155  * Cleanup isn't needed on Windows, and happens earlier in startup for
156  * POSIX and System V shared memory, via a direct call to
157  * dsm_cleanup_using_control_segment.
158  */
161 
162  /* Determine size for new control segment. */
163  maxitems = PG_DYNSHMEM_FIXED_SLOTS
165  elog(DEBUG2, "dynamic shared memory system will support %u segments",
166  maxitems);
167  segsize = dsm_control_bytes_needed(maxitems);
168 
169  /*
170  * Loop until we find an unused identifier for the new control segment. We
171  * sometimes use 0 as a sentinel value indicating that no control segment
172  * is known to exist, so avoid using that value for a real control
173  * segment.
174  */
175  for (;;)
176  {
177  Assert(dsm_control_address == NULL);
181  continue;
183  &dsm_control_impl_private, &dsm_control_address,
185  break;
186  }
187  dsm_control = dsm_control_address;
189  elog(DEBUG2,
190  "created dynamic shared memory control segment %u (%zu bytes)",
191  dsm_control_handle, segsize);
193 
194  /* Initialize control segment. */
195  dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
196  dsm_control->nitems = 0;
197  dsm_control->maxitems = maxitems;
198 }
199 
200 /*
201  * Determine whether the control segment from the previous postmaster
202  * invocation still exists. If so, remove the dynamic shared memory
203  * segments to which it refers, and then the control segment itself.
204  */
205 void
207 {
208  void *mapped_address = NULL;
209  void *junk_mapped_address = NULL;
210  void *impl_private = NULL;
211  void *junk_impl_private = NULL;
212  Size mapped_size = 0;
213  Size junk_mapped_size = 0;
214  uint32 nitems;
215  uint32 i;
216  dsm_control_header *old_control;
217 
218  /*
219  * Try to attach the segment. If this fails, it probably just means that
220  * the operating system has been rebooted and the segment no longer
221  * exists, or an unrelated process has used the same shm ID. So just fall
222  * out quietly.
223  */
224  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
225  &mapped_address, &mapped_size, DEBUG1))
226  return;
227 
228  /*
229  * We've managed to reattach it, but the contents might not be sane. If
230  * they aren't, we disregard the segment after all.
231  */
232  old_control = (dsm_control_header *) mapped_address;
233  if (!dsm_control_segment_sane(old_control, mapped_size))
234  {
235  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
236  &mapped_address, &mapped_size, LOG);
237  return;
238  }
239 
240  /*
241  * OK, the control segment looks basically valid, so we can use it to get
242  * a list of segments that need to be removed.
243  */
244  nitems = old_control->nitems;
245  for (i = 0; i < nitems; ++i)
246  {
247  dsm_handle handle;
248  uint32 refcnt;
249 
250  /* If the reference count is 0, the slot is actually unused. */
251  refcnt = old_control->item[i].refcnt;
252  if (refcnt == 0)
253  continue;
254 
255  /* Log debugging information. */
256  handle = old_control->item[i].handle;
257  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
258  handle, refcnt);
259 
260  /* Destroy the referenced segment. */
261  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
262  &junk_mapped_address, &junk_mapped_size, LOG);
263  }
264 
265  /* Destroy the old control segment, too. */
266  elog(DEBUG2,
267  "cleaning up dynamic shared memory control segment with ID %u",
268  old_control_handle);
269  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
270  &mapped_address, &mapped_size, LOG);
271 }
272 
273 /*
274  * When we're using the mmap shared memory implementation, "shared memory"
275  * segments might even manage to survive an operating system reboot.
276  * But there's no guarantee as to exactly what will survive: some segments
277  * may survive, and others may not, and the contents of some may be out
278  * of date. In particular, the control segment may be out of date, so we
279  * can't rely on it to figure out what to remove. However, since we know
280  * what directory contains the files we used as shared memory, we can simply
281  * scan the directory and blow everything away that shouldn't be there.
282  */
283 static void
285 {
286  DIR *dir;
287  struct dirent *dent;
288 
289  /* Scan the directory for something with a name of the correct format. */
291 
292  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
293  {
294  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
295  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
296  {
297  char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
298 
299  snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
300 
301  elog(DEBUG2, "removing file \"%s\"", buf);
302 
303  /* We found a matching file; so remove it. */
304  if (unlink(buf) != 0)
305  ereport(ERROR,
307  errmsg("could not remove file \"%s\": %m", buf)));
308  }
309  }
310 
311  /* Cleanup complete. */
312  FreeDir(dir);
313 }
314 
315 /*
316  * At shutdown time, we iterate over the control segment and remove all
317  * remaining dynamic shared memory segments. We avoid throwing errors here;
318  * the postmaster is shutting down either way, and this is just non-critical
319  * resource cleanup.
320  */
321 static void
323 {
324  uint32 nitems;
325  uint32 i;
326  void *dsm_control_address;
327  void *junk_mapped_address = NULL;
328  void *junk_impl_private = NULL;
329  Size junk_mapped_size = 0;
331 
332  /*
333  * If some other backend exited uncleanly, it might have corrupted the
334  * control segment while it was dying. In that case, we warn and ignore
335  * the contents of the control segment. This may end up leaving behind
336  * stray shared memory segments, but there's not much we can do about that
337  * if the metadata is gone.
338  */
339  nitems = dsm_control->nitems;
341  {
342  ereport(LOG,
343  (errmsg("dynamic shared memory control segment is corrupt")));
344  return;
345  }
346 
347  /* Remove any remaining segments. */
348  for (i = 0; i < nitems; ++i)
349  {
350  dsm_handle handle;
351 
352  /* If the reference count is 0, the slot is actually unused. */
353  if (dsm_control->item[i].refcnt == 0)
354  continue;
355 
356  /* Log debugging information. */
357  handle = dsm_control->item[i].handle;
358  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
359  handle);
360 
361  /* Destroy the segment. */
362  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
363  &junk_mapped_address, &junk_mapped_size, LOG);
364  }
365 
366  /* Remove the control segment itself. */
367  elog(DEBUG2,
368  "cleaning up dynamic shared memory control segment with ID %u",
370  dsm_control_address = dsm_control;
372  &dsm_control_impl_private, &dsm_control_address,
374  dsm_control = dsm_control_address;
375  shim->dsm_control = 0;
376 }
377 
378 /*
379  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
380  * we must reread the state file and map the control segment; in other cases,
381  * we'll have inherited the postmaster's mapping and global variables.
382  */
383 static void
385 {
386 #ifdef EXEC_BACKEND
387  {
388  void *control_address = NULL;
389 
390  /* Attach control segment. */
393  &dsm_control_impl_private, &control_address,
395  dsm_control = control_address;
396  /* If control segment doesn't look sane, something is badly wrong. */
398  {
400  &dsm_control_impl_private, &control_address,
402  ereport(FATAL,
403  (errcode(ERRCODE_INTERNAL_ERROR),
404  errmsg("dynamic shared memory control segment is not valid")));
405  }
406  }
407 #endif
408 
409  dsm_init_done = true;
410 }
411 
412 #ifdef EXEC_BACKEND
413 /*
414  * When running under EXEC_BACKEND, we get a callback here when the main
415  * shared memory segment is re-attached, so that we can record the control
416  * handle retrieved from it.
417  */
418 void
419 dsm_set_control_handle(dsm_handle h)
420 {
421  Assert(dsm_control_handle == 0 && h != 0);
422  dsm_control_handle = h;
423 }
424 #endif
425 
426 /*
427  * Create a new dynamic shared memory segment.
428  *
429  * If there is a non-NULL CurrentResourceOwner, the new segment is associated
430  * with it and must be detached before the resource owner releases, or a
431  * warning will be logged. If CurrentResourceOwner is NULL, the segment
432  * remains attached until explicitly detached or the session ends.
433  * Creating with a NULL CurrentResourceOwner is equivalent to creating
434  * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
435  */
436 dsm_segment *
437 dsm_create(Size size, int flags)
438 {
439  dsm_segment *seg;
440  uint32 i;
441  uint32 nitems;
442 
443  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
445 
446  if (!dsm_init_done)
448 
449  /* Create a new segment descriptor. */
450  seg = dsm_create_descriptor();
451 
452  /* Loop until we find an unused segment identifier. */
453  for (;;)
454  {
455  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
456  seg->handle = random();
457  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
458  continue;
459  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
460  &seg->mapped_address, &seg->mapped_size, ERROR))
461  break;
462  }
463 
464  /* Lock the control segment so we can register the new segment. */
465  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
466 
467  /* Search the control segment for an unused slot. */
468  nitems = dsm_control->nitems;
469  for (i = 0; i < nitems; ++i)
470  {
471  if (dsm_control->item[i].refcnt == 0)
472  {
473  dsm_control->item[i].handle = seg->handle;
474  /* refcnt of 1 triggers destruction, so start at 2 */
475  dsm_control->item[i].refcnt = 2;
476  dsm_control->item[i].impl_private_pm_handle = NULL;
477  dsm_control->item[i].pinned = false;
478  seg->control_slot = i;
479  LWLockRelease(DynamicSharedMemoryControlLock);
480  return seg;
481  }
482  }
483 
484  /* Verify that we can support an additional mapping. */
485  if (nitems >= dsm_control->maxitems)
486  {
487  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
488  {
489  LWLockRelease(DynamicSharedMemoryControlLock);
491  &seg->mapped_address, &seg->mapped_size, WARNING);
492  if (seg->resowner != NULL)
493  ResourceOwnerForgetDSM(seg->resowner, seg);
494  dlist_delete(&seg->node);
495  pfree(seg);
496  return NULL;
497  }
498  ereport(ERROR,
499  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
500  errmsg("too many dynamic shared memory segments")));
501  }
502 
503  /* Enter the handle into a new array slot. */
504  dsm_control->item[nitems].handle = seg->handle;
505  /* refcnt of 1 triggers destruction, so start at 2 */
506  dsm_control->item[nitems].refcnt = 2;
507  dsm_control->item[nitems].impl_private_pm_handle = NULL;
508  dsm_control->item[nitems].pinned = false;
509  seg->control_slot = nitems;
510  dsm_control->nitems++;
511  LWLockRelease(DynamicSharedMemoryControlLock);
512 
513  return seg;
514 }
515 
516 /*
517  * Attach a dynamic shared memory segment.
518  *
519  * See comments for dsm_segment_handle() for an explanation of how this
520  * is intended to be used.
521  *
522  * This function will return NULL if the segment isn't known to the system.
523  * This can happen if we're asked to attach the segment, but then everyone
524  * else detaches it (causing it to be destroyed) before we get around to
525  * attaching it.
526  *
527  * If there is a non-NULL CurrentResourceOwner, the attached segment is
528  * associated with it and must be detached before the resource owner releases,
529  * or a warning will be logged. Otherwise the segment remains attached until
530  * explicitly detached or the session ends. See the note atop dsm_create().
531  */
532 dsm_segment *
534 {
535  dsm_segment *seg;
536  dlist_iter iter;
537  uint32 i;
538  uint32 nitems;
539 
540  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
542 
543  if (!dsm_init_done)
545 
546  /*
547  * Since this is just a debugging cross-check, we could leave it out
548  * altogether, or include it only in assert-enabled builds. But since the
549  * list of attached segments should normally be very short, let's include
550  * it always for right now.
551  *
552  * If you're hitting this error, you probably want to attempt to find an
553  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
554  * create a new one.
555  */
556  dlist_foreach(iter, &dsm_segment_list)
557  {
558  seg = dlist_container(dsm_segment, node, iter.cur);
559  if (seg->handle == h)
560  elog(ERROR, "can't attach the same segment more than once");
561  }
562 
563  /* Create a new segment descriptor. */
564  seg = dsm_create_descriptor();
565  seg->handle = h;
566 
567  /* Bump reference count for this segment in shared memory. */
568  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
569  nitems = dsm_control->nitems;
570  for (i = 0; i < nitems; ++i)
571  {
572  /*
573  * If the reference count is 0, the slot is actually unused. If the
574  * reference count is 1, the slot is still in use, but the segment is
575  * in the process of going away; even if the handle matches, another
576  * slot may already have started using the same handle value by
577  * coincidence so we have to keep searching.
578  */
579  if (dsm_control->item[i].refcnt <= 1)
580  continue;
581 
582  /* If the handle doesn't match, it's not the slot we want. */
583  if (dsm_control->item[i].handle != seg->handle)
584  continue;
585 
586  /* Otherwise we've found a match. */
587  dsm_control->item[i].refcnt++;
588  seg->control_slot = i;
589  break;
590  }
591  LWLockRelease(DynamicSharedMemoryControlLock);
592 
593  /*
594  * If we didn't find the handle we're looking for in the control segment,
595  * it probably means that everyone else who had it mapped, including the
596  * original creator, died before we got to this point. It's up to the
597  * caller to decide what to do about that.
598  */
600  {
601  dsm_detach(seg);
602  return NULL;
603  }
604 
605  /* Here's where we actually try to map the segment. */
607  &seg->mapped_address, &seg->mapped_size, ERROR);
608 
609  return seg;
610 }
611 
612 /*
613  * At backend shutdown time, detach any segments that are still attached.
614  * (This is similar to dsm_detach_all, except that there's no reason to
615  * unmap the control segment before exiting, so we don't bother.)
616  */
617 void
619 {
620  while (!dlist_is_empty(&dsm_segment_list))
621  {
622  dsm_segment *seg;
623 
624  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
625  dsm_detach(seg);
626  }
627 }
628 
629 /*
630  * Detach all shared memory segments, including the control segments. This
631  * should be called, along with PGSharedMemoryDetach, in processes that
632  * might inherit mappings but are not intended to be connected to dynamic
633  * shared memory.
634  */
635 void
637 {
638  void *control_address = dsm_control;
639 
640  while (!dlist_is_empty(&dsm_segment_list))
641  {
642  dsm_segment *seg;
643 
644  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
645  dsm_detach(seg);
646  }
647 
648  if (control_address != NULL)
650  &dsm_control_impl_private, &control_address,
652 }
653 
654 /*
655  * Detach from a shared memory segment, destroying the segment if we
656  * remove the last reference.
657  *
658  * This function should never fail. It will often be invoked when aborting
659  * a transaction, and a further error won't serve any purpose. It's not a
660  * complete disaster if we fail to unmap or destroy the segment; it means a
661  * resource leak, but that doesn't necessarily preclude further operations.
662  */
663 void
665 {
666  /*
667  * Invoke registered callbacks. Just in case one of those callbacks
668  * throws a further error that brings us back here, pop the callback
669  * before invoking it, to avoid infinite error recursion.
670  */
671  while (!slist_is_empty(&seg->on_detach))
672  {
673  slist_node *node;
675  on_dsm_detach_callback function;
676  Datum arg;
677 
678  node = slist_pop_head_node(&seg->on_detach);
680  function = cb->function;
681  arg = cb->arg;
682  pfree(cb);
683 
684  function(seg, arg);
685  }
686 
687  /*
688  * Try to remove the mapping, if one exists. Normally, there will be, but
689  * maybe not, if we failed partway through a create or attach operation.
690  * We remove the mapping before decrementing the reference count so that
691  * the process that sees a zero reference count can be certain that no
692  * remaining mappings exist. Even if this fails, we pretend that it
693  * works, because retrying is likely to fail in the same way.
694  */
695  if (seg->mapped_address != NULL)
696  {
698  &seg->mapped_address, &seg->mapped_size, WARNING);
699  seg->impl_private = NULL;
700  seg->mapped_address = NULL;
701  seg->mapped_size = 0;
702  }
703 
704  /* Reduce reference count, if we previously increased it. */
706  {
707  uint32 refcnt;
708  uint32 control_slot = seg->control_slot;
709 
710  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
711  Assert(dsm_control->item[control_slot].handle == seg->handle);
712  Assert(dsm_control->item[control_slot].refcnt > 1);
713  refcnt = --dsm_control->item[control_slot].refcnt;
715  LWLockRelease(DynamicSharedMemoryControlLock);
716 
717  /* If new reference count is 1, try to destroy the segment. */
718  if (refcnt == 1)
719  {
720  /* A pinned segment should never reach 1. */
721  Assert(!dsm_control->item[control_slot].pinned);
722 
723  /*
724  * If we fail to destroy the segment here, or are killed before we
725  * finish doing so, the reference count will remain at 1, which
726  * will mean that nobody else can attach to the segment. At
727  * postmaster shutdown time, or when a new postmaster is started
728  * after a hard kill, another attempt will be made to remove the
729  * segment.
730  *
731  * The main case we're worried about here is being killed by a
732  * signal before we can finish removing the segment. In that
733  * case, it's important to be sure that the segment still gets
734  * removed. If we actually fail to remove the segment for some
735  * other reason, the postmaster may not have any better luck than
736  * we did. There's not much we can do about that, though.
737  */
738  if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
739  &seg->mapped_address, &seg->mapped_size, WARNING))
740  {
741  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
742  Assert(dsm_control->item[control_slot].handle == seg->handle);
743  Assert(dsm_control->item[control_slot].refcnt == 1);
744  dsm_control->item[control_slot].refcnt = 0;
745  LWLockRelease(DynamicSharedMemoryControlLock);
746  }
747  }
748  }
749 
750  /* Clean up our remaining backend-private data structures. */
751  if (seg->resowner != NULL)
752  ResourceOwnerForgetDSM(seg->resowner, seg);
753  dlist_delete(&seg->node);
754  pfree(seg);
755 }
756 
757 /*
758  * Keep a dynamic shared memory mapping until end of session.
759  *
760  * By default, mappings are owned by the current resource owner, which
761  * typically means they stick around for the duration of the current query
762  * only.
763  */
764 void
766 {
767  if (seg->resowner != NULL)
768  {
769  ResourceOwnerForgetDSM(seg->resowner, seg);
770  seg->resowner = NULL;
771  }
772 }
773 
774 /*
775  * Arrange to remove a dynamic shared memory mapping at cleanup time.
776  *
777  * dsm_pin_mapping() can be used to preserve a mapping for the entire
778  * lifetime of a process; this function reverses that decision, making
779  * the segment owned by the current resource owner. This may be useful
780  * just before performing some operation that will invalidate the segment
781  * for future use by this backend.
782  */
783 void
785 {
786  Assert(seg->resowner == NULL);
790 }
791 
792 /*
793  * Keep a dynamic shared memory segment until postmaster shutdown, or until
794  * dsm_unpin_segment is called.
795  *
796  * This function should not be called more than once per segment, unless the
797  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
798  *
799  * Note that this function does not arrange for the current process to
800  * keep the segment mapped indefinitely; if that behavior is desired,
801  * dsm_pin_mapping() should be used from each process that needs to
802  * retain the mapping.
803  */
804 void
806 {
807  void *handle;
808 
809  /*
810  * Bump reference count for this segment in shared memory. This will
811  * ensure that even if there is no session which is attached to this
812  * segment, it will remain until postmaster shutdown or an explicit call
813  * to unpin.
814  */
815  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
816  if (dsm_control->item[seg->control_slot].pinned)
817  elog(ERROR, "cannot pin a segment that is already pinned");
818  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
819  dsm_control->item[seg->control_slot].pinned = true;
820  dsm_control->item[seg->control_slot].refcnt++;
821  dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
822  LWLockRelease(DynamicSharedMemoryControlLock);
823 }
824 
825 /*
826  * Unpin a dynamic shared memory segment that was previously pinned with
827  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
828  * was previously called for this segment.
829  *
830  * The argument is a dsm_handle rather than a dsm_segment in case you want
831  * to unpin a segment to which you haven't attached. This turns out to be
832  * useful if, for example, a reference to one shared memory segment is stored
833  * within another shared memory segment. You might want to unpin the
834  * referenced segment before destroying the referencing segment.
835  */
836 void
838 {
839  uint32 control_slot = INVALID_CONTROL_SLOT;
840  bool destroy = false;
841  uint32 i;
842 
843  /* Find the control slot for the given handle. */
844  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
845  for (i = 0; i < dsm_control->nitems; ++i)
846  {
847  /* Skip unused slots and segments that are concurrently going away. */
848  if (dsm_control->item[i].refcnt <= 1)
849  continue;
850 
851  /* If we've found our handle, we can stop searching. */
852  if (dsm_control->item[i].handle == handle)
853  {
854  control_slot = i;
855  break;
856  }
857  }
858 
859  /*
860  * We should definitely have found the slot, and it should not already be
861  * in the process of going away, because this function should only be
862  * called on a segment which is pinned.
863  */
864  if (control_slot == INVALID_CONTROL_SLOT)
865  elog(ERROR, "cannot unpin unknown segment handle");
866  if (!dsm_control->item[control_slot].pinned)
867  elog(ERROR, "cannot unpin a segment that is not pinned");
868  Assert(dsm_control->item[control_slot].refcnt > 1);
869 
870  /*
871  * Allow implementation-specific code to run. We have to do this before
872  * releasing the lock, because impl_private_pm_handle may get modified by
873  * dsm_impl_unpin_segment.
874  */
875  dsm_impl_unpin_segment(handle,
876  &dsm_control->item[control_slot].impl_private_pm_handle);
877 
878  /* Note that 1 means no references (0 means unused slot). */
879  if (--dsm_control->item[control_slot].refcnt == 1)
880  destroy = true;
881  dsm_control->item[control_slot].pinned = false;
882 
883  /* Now we can release the lock. */
884  LWLockRelease(DynamicSharedMemoryControlLock);
885 
886  /* Clean up resources if that was the last reference. */
887  if (destroy)
888  {
889  void *junk_impl_private = NULL;
890  void *junk_mapped_address = NULL;
891  Size junk_mapped_size = 0;
892 
893  /*
894  * For an explanation of how error handling works in this case, see
895  * comments in dsm_detach. Note that if we reach this point, the
896  * current process certainly does not have the segment mapped, because
897  * if it did, the reference count would have still been greater than 1
898  * even after releasing the reference count held by the pin. The fact
899  * that there can't be a dsm_segment for this handle makes it OK to
900  * pass the mapped size, mapped address, and private data as NULL
901  * here.
902  */
903  if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
904  &junk_mapped_address, &junk_mapped_size, WARNING))
905  {
906  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
907  Assert(dsm_control->item[control_slot].handle == handle);
908  Assert(dsm_control->item[control_slot].refcnt == 1);
909  dsm_control->item[control_slot].refcnt = 0;
910  LWLockRelease(DynamicSharedMemoryControlLock);
911  }
912  }
913 }
914 
915 /*
916  * Find an existing mapping for a shared memory segment, if there is one.
917  */
918 dsm_segment *
920 {
921  dlist_iter iter;
922  dsm_segment *seg;
923 
924  dlist_foreach(iter, &dsm_segment_list)
925  {
926  seg = dlist_container(dsm_segment, node, iter.cur);
927  if (seg->handle == h)
928  return seg;
929  }
930 
931  return NULL;
932 }
933 
934 /*
935  * Get the address at which a dynamic shared memory segment is mapped.
936  */
937 void *
939 {
940  Assert(seg->mapped_address != NULL);
941  return seg->mapped_address;
942 }
943 
944 /*
945  * Get the size of a mapping.
946  */
947 Size
949 {
950  Assert(seg->mapped_address != NULL);
951  return seg->mapped_size;
952 }
953 
954 /*
955  * Get a handle for a mapping.
956  *
957  * To establish communication via dynamic shared memory between two backends,
958  * one of them should first call dsm_create() to establish a new shared
959  * memory mapping. That process should then call dsm_segment_handle() to
960  * obtain a handle for the mapping, and pass that handle to the
961  * coordinating backend via some means (e.g. bgw_main_arg, or via the
962  * main shared memory segment). The recipient, once in possession of the
963  * handle, should call dsm_attach().
964  */
967 {
968  return seg->handle;
969 }
970 
971 /*
972  * Register an on-detach callback for a dynamic shared memory segment.
973  */
974 void
976 {
978 
981  cb->function = function;
982  cb->arg = arg;
983  slist_push_head(&seg->on_detach, &cb->node);
984 }
985 
986 /*
987  * Unregister an on-detach callback for a dynamic shared memory segment.
988  */
989 void
991  Datum arg)
992 {
993  slist_mutable_iter iter;
994 
995  slist_foreach_modify(iter, &seg->on_detach)
996  {
998 
1000  if (cb->function == function && cb->arg == arg)
1001  {
1002  slist_delete_current(&iter);
1003  pfree(cb);
1004  break;
1005  }
1006  }
1007 }
1008 
1009 /*
1010  * Discard all registered on-detach callbacks without executing them.
1011  */
1012 void
1014 {
1015  dlist_iter iter;
1016 
1017  dlist_foreach(iter, &dsm_segment_list)
1018  {
1020 
1021  /* Throw away explicit on-detach actions one by one. */
1022  while (!slist_is_empty(&seg->on_detach))
1023  {
1024  slist_node *node;
1026 
1027  node = slist_pop_head_node(&seg->on_detach);
1028  cb = slist_container(dsm_segment_detach_callback, node, node);
1029  pfree(cb);
1030  }
1031 
1032  /*
1033  * Decrementing the reference count is a sort of implicit on-detach
1034  * action; make sure we don't do that, either.
1035  */
1037  }
1038 }
1039 
1040 /*
1041  * Create a segment descriptor.
1042  */
1043 static dsm_segment *
1045 {
1046  dsm_segment *seg;
1047 
1050 
1052  dlist_push_head(&dsm_segment_list, &seg->node);
1053 
1054  /* seg->handle must be initialized by the caller */
1056  seg->impl_private = NULL;
1057  seg->mapped_address = NULL;
1058  seg->mapped_size = 0;
1059 
1063 
1064  slist_init(&seg->on_detach);
1065 
1066  return seg;
1067 }
1068 
1069 /*
1070  * Sanity check a control segment.
1071  *
1072  * The goal here isn't to detect everything that could possibly be wrong with
1073  * the control segment; there's not enough information for that. Rather, the
1074  * goal is to make sure that someone can iterate over the items in the segment
1075  * without overrunning the end of the mapping and crashing. We also check
1076  * the magic number since, if that's messed up, this may not even be one of
1077  * our segments at all.
1078  */
1079 static bool
1081 {
1082  if (mapped_size < offsetof(dsm_control_header, item))
1083  return false; /* Mapped size too short to read header. */
1084  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1085  return false; /* Magic number doesn't match. */
1086  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1087  return false; /* Max item count won't fit in map. */
1088  if (control->nitems > control->maxitems)
1089  return false; /* Overfull. */
1090  return true;
1091 }
1092 
1093 /*
1094  * Compute the number of control-segment bytes needed to store a given
1095  * number of items.
1096  */
1097 static uint64
1099 {
1100  return offsetof(dsm_control_header, item)
1101  + sizeof(dsm_control_item) * (uint64) nitems;
1102 }
static void * dsm_control_impl_private
Definition: dsm.c:136
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:145
dlist_node node
Definition: dsm.c:69
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1289
dsm_segment * dsm_find_mapping(dsm_handle h)
Definition: dsm.c:919
#define DEBUG1
Definition: elog.h:25
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:993
void reset_on_dsm_detach(void)
Definition: dsm.c:1013
uint32 maxitems
Definition: dsm.c:93
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:54
uint32 dsm_handle
Definition: dsm_impl.h:54
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:53
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
Size mapped_size
Definition: dsm.c:75
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:206
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:556
dsm_handle handle
Definition: dsm.c:82
long random(void)
Definition: random.c:22
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
ResourceOwner CurrentResourceOwner
Definition: resowner.c:142
struct dsm_control_header dsm_control_header
slist_node * cur
Definition: ilist.h:241
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:533
static dsm_handle dsm_control_handle
Definition: dsm.c:133
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:574
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:966
int errcode(int sqlerrcode)
Definition: elog.c:570
static void dsm_backend_startup(void)
Definition: dsm.c:384
static dlist_head dsm_segment_list
Definition: dsm.c:124
#define LOG
Definition: elog.h:26
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:975
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1098
Definition: dirent.h:9
uint32 nitems
Definition: dsm.c:92
bool pinned
Definition: dsm.c:85
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:716
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:805
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:1031
static void slist_init(slist_head *head)
Definition: ilist.h:554
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
uint32 magic
Definition: dsm.c:91
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
int MaxBackends
Definition: globals.c:135
on_dsm_detach_callback function
Definition: dsm.c:61
#define DEBUG2
Definition: elog.h:24
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:765
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static char * buf
Definition: pg_test_fsync.c:68
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1278
bool IsUnderPostmaster
Definition: globals.c:109
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:94
int errcode_for_file_access(void)
Definition: elog.c:593
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:322
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
unsigned int uint32
Definition: c.h:358
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2468
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:837
void * mapped_address
Definition: dsm.c:74
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1080
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void dsm_backend_shutdown(void)
Definition: dsm.c:618
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:596
slist_head on_detach
Definition: dsm.c:76
static bool slist_is_empty(slist_head *head)
Definition: ilist.h:563
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
dsm_handle handle
Definition: dsm.c:71
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
#define slist_container(type, membername, ptr)
Definition: ilist.h:674
uintptr_t Datum
Definition: postgres.h:367
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:437
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:284
dlist_node * cur
Definition: ilist.h:161
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:784
static dsm_control_header * dsm_control
Definition: dsm.c:134
ResourceOwner resowner
Definition: dsm.c:70
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:938
#define Assert(condition)
Definition: c.h:732
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2534
void * impl_private
Definition: dsm.c:73
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1298
size_t Size
Definition: c.h:466
uint32 control_slot
Definition: dsm.c:72
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
void dsm_detach_all(void)
Definition: dsm.c:636
#define DatumGetPointer(X)
Definition: postgres.h:549
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1044
uint32 refcnt
Definition: dsm.c:83
static bool dsm_init_done
Definition: dsm.c:105
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:943
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:664
int errmsg(const char *fmt,...)
Definition: elog.c:784
struct dsm_control_item dsm_control_item
static Size dsm_control_mapped_size
Definition: dsm.c:135
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
struct dsm_segment_detach_callback dsm_segment_detach_callback
#define elog(elevel,...)
Definition: elog.h:226
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:51
int i
void * impl_private_pm_handle
Definition: dsm.c:84
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:652
char d_name[MAX_PATH]
Definition: dirent.h:14
#define INVALID_CONTROL_SLOT
Definition: dsm.c:56
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:54
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:158
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:990
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:948
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:46
#define snprintf
Definition: port.h:192
int FreeDir(DIR *dir)
Definition: fd.c:2586
#define offsetof(type, field)
Definition: c.h:655