PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <string.h>
31 #include <unistd.h>
32 #ifndef WIN32
33 #include <sys/mman.h>
34 #endif
35 #include <sys/stat.h>
36 
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "storage/dsm.h"
40 #include "storage/ipc.h"
41 #include "storage/lwlock.h"
42 #include "storage/pg_shmem.h"
43 #include "utils/guc.h"
44 #include "utils/memutils.h"
45 #include "utils/resowner_private.h"
46 
47 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
48 
49 /*
50  * There's no point in getting too cheap here, because the minimum allocation
51  * is one OS page, which is probably at least 4KB and could easily be as high
52  * as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
53  */
54 #define PG_DYNSHMEM_FIXED_SLOTS 64
55 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
56 
57 #define INVALID_CONTROL_SLOT ((uint32) -1)
58 
59 /* Backend-local tracking for on-detach callbacks. */
61 {
66 
67 /* Backend-local state for a dynamic shared memory segment. */
69 {
70  dlist_node node; /* List link in dsm_segment_list. */
71  ResourceOwner resowner; /* Resource owner. */
72  dsm_handle handle; /* Segment name. */
73  uint32 control_slot; /* Slot in control segment. */
74  void *impl_private; /* Implementation-specific private data. */
75  void *mapped_address; /* Mapping address, or NULL if unmapped. */
76  Size mapped_size; /* Size of our mapping. */
77  slist_head on_detach; /* On-detach callbacks. */
78 };
79 
80 /* Shared-memory state for a dynamic shared memory segment. */
81 typedef struct dsm_control_item
82 {
84  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
85  void *impl_private_pm_handle; /* only needed on Windows */
86  bool pinned;
88 
89 /* Layout of the dynamic shared memory control segment. */
90 typedef struct dsm_control_header
91 {
95  dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
97 
98 static void dsm_cleanup_for_mmap(void);
99 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static dsm_segment *dsm_create_descriptor(void);
101 static bool dsm_control_segment_sane(dsm_control_header *control,
102  Size mapped_size);
103 static uint64 dsm_control_bytes_needed(uint32 nitems);
104 
105 /* Has this backend initialized the dynamic shared memory system yet? */
106 static bool dsm_init_done = false;
107 
108 /*
109  * List of dynamic shared memory segments used by this backend.
110  *
111  * At process exit time, we must decrement the reference count of each
112  * segment we have attached; this list makes it possible to find all such
113  * segments.
114  *
115  * This list should always be empty in the postmaster. We could probably
116  * allow the postmaster to map dynamic shared memory segments before it
117  * begins to start child processes, provided that each process adjusted
118  * the reference counts for those segments in the control segment at
119  * startup time, but there's no obvious need for such a facility, which
120  * would also be complex to handle in the EXEC_BACKEND case. Once the
121  * postmaster has begun spawning children, there's an additional problem:
122  * each new mapping would require an update to the control segment,
123  * which requires locking, in which the postmaster must not be involved.
124  */
125 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
126 
127 /*
128  * Control segment information.
129  *
130  * Unlike ordinary shared memory segments, the control segment is not
131  * reference counted; instead, it lasts for the postmaster's entire
132  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
133  */
138 
139 /*
140  * Start up the dynamic shared memory system.
141  *
142  * This is called just once during each cluster lifetime, at postmaster
143  * startup time.
144  */
145 void
147 {
148  void *dsm_control_address = NULL;
149  uint32 maxitems;
150  Size segsize;
151 
153 
154  /* If dynamic shared memory is disabled, there's nothing to do. */
156  return;
157 
158  /*
159  * If we're using the mmap implementations, clean up any leftovers.
160  * Cleanup isn't needed on Windows, and happens earlier in startup for
161  * POSIX and System V shared memory, via a direct call to
162  * dsm_cleanup_using_control_segment.
163  */
166 
167  /* Determine size for new control segment. */
168  maxitems = PG_DYNSHMEM_FIXED_SLOTS
170  elog(DEBUG2, "dynamic shared memory system will support %u segments",
171  maxitems);
172  segsize = dsm_control_bytes_needed(maxitems);
173 
174  /*
175  * Loop until we find an unused identifier for the new control segment. We
176  * sometimes use 0 as a sentinel value indicating that no control segment
177  * is known to exist, so avoid using that value for a real control
178  * segment.
179  */
180  for (;;)
181  {
182  Assert(dsm_control_address == NULL);
186  continue;
188  &dsm_control_impl_private, &dsm_control_address,
190  break;
191  }
192  dsm_control = dsm_control_address;
194  elog(DEBUG2,
195  "created dynamic shared memory control segment %u (%zu bytes)",
196  dsm_control_handle, segsize);
198 
199  /* Initialize control segment. */
200  dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
201  dsm_control->nitems = 0;
202  dsm_control->maxitems = maxitems;
203 }
204 
205 /*
206  * Determine whether the control segment from the previous postmaster
207  * invocation still exists. If so, remove the dynamic shared memory
208  * segments to which it refers, and then the control segment itself.
209  */
210 void
212 {
213  void *mapped_address = NULL;
214  void *junk_mapped_address = NULL;
215  void *impl_private = NULL;
216  void *junk_impl_private = NULL;
217  Size mapped_size = 0;
218  Size junk_mapped_size = 0;
219  uint32 nitems;
220  uint32 i;
221  dsm_control_header *old_control;
222 
223  /* If dynamic shared memory is disabled, there's nothing to do. */
225  return;
226 
227  /*
228  * Try to attach the segment. If this fails, it probably just means that
229  * the operating system has been rebooted and the segment no longer
230  * exists, or an unrelated process has used the same shm ID. So just fall
231  * out quietly.
232  */
233  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
234  &mapped_address, &mapped_size, DEBUG1))
235  return;
236 
237  /*
238  * We've managed to reattach it, but the contents might not be sane. If
239  * they aren't, we disregard the segment after all.
240  */
241  old_control = (dsm_control_header *) mapped_address;
242  if (!dsm_control_segment_sane(old_control, mapped_size))
243  {
244  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
245  &mapped_address, &mapped_size, LOG);
246  return;
247  }
248 
249  /*
250  * OK, the control segment looks basically valid, so we can use it to get
251  * a list of segments that need to be removed.
252  */
253  nitems = old_control->nitems;
254  for (i = 0; i < nitems; ++i)
255  {
256  dsm_handle handle;
257  uint32 refcnt;
258 
259  /* If the reference count is 0, the slot is actually unused. */
260  refcnt = old_control->item[i].refcnt;
261  if (refcnt == 0)
262  continue;
263 
264  /* Log debugging information. */
265  handle = old_control->item[i].handle;
266  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
267  handle, refcnt);
268 
269  /* Destroy the referenced segment. */
270  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
271  &junk_mapped_address, &junk_mapped_size, LOG);
272  }
273 
274  /* Destroy the old control segment, too. */
275  elog(DEBUG2,
276  "cleaning up dynamic shared memory control segment with ID %u",
277  old_control_handle);
278  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
279  &mapped_address, &mapped_size, LOG);
280 }
281 
282 /*
283  * When we're using the mmap shared memory implementation, "shared memory"
284  * segments might even manage to survive an operating system reboot.
285  * But there's no guarantee as to exactly what will survive: some segments
286  * may survive, and others may not, and the contents of some may be out
287  * of date. In particular, the control segment may be out of date, so we
288  * can't rely on it to figure out what to remove. However, since we know
289  * what directory contains the files we used as shared memory, we can simply
290  * scan the directory and blow everything away that shouldn't be there.
291  */
292 static void
294 {
295  DIR *dir;
296  struct dirent *dent;
297 
298  /* Open the directory; can't use AllocateDir in postmaster. */
299  if ((dir = AllocateDir(PG_DYNSHMEM_DIR)) == NULL)
300  ereport(ERROR,
302  errmsg("could not open directory \"%s\": %m",
303  PG_DYNSHMEM_DIR)));
304 
305  /* Scan for something with a name of the correct format. */
306  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
307  {
308  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
309  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
310  {
311  char buf[MAXPGPATH];
312 
313  snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
314 
315  elog(DEBUG2, "removing file \"%s\"", buf);
316 
317  /* We found a matching file; so remove it. */
318  if (unlink(buf) != 0)
319  {
320  int save_errno;
321 
322  save_errno = errno;
323  closedir(dir);
324  errno = save_errno;
325 
326  ereport(ERROR,
328  errmsg("could not remove file \"%s\": %m", buf)));
329  }
330  }
331  }
332 
333  /* Cleanup complete. */
334  FreeDir(dir);
335 }
336 
337 /*
338  * At shutdown time, we iterate over the control segment and remove all
339  * remaining dynamic shared memory segments. We avoid throwing errors here;
340  * the postmaster is shutting down either way, and this is just non-critical
341  * resource cleanup.
342  */
343 static void
345 {
346  uint32 nitems;
347  uint32 i;
348  void *dsm_control_address;
349  void *junk_mapped_address = NULL;
350  void *junk_impl_private = NULL;
351  Size junk_mapped_size = 0;
353 
354  /*
355  * If some other backend exited uncleanly, it might have corrupted the
356  * control segment while it was dying. In that case, we warn and ignore
357  * the contents of the control segment. This may end up leaving behind
358  * stray shared memory segments, but there's not much we can do about that
359  * if the metadata is gone.
360  */
361  nitems = dsm_control->nitems;
363  {
364  ereport(LOG,
365  (errmsg("dynamic shared memory control segment is corrupt")));
366  return;
367  }
368 
369  /* Remove any remaining segments. */
370  for (i = 0; i < nitems; ++i)
371  {
372  dsm_handle handle;
373 
374  /* If the reference count is 0, the slot is actually unused. */
375  if (dsm_control->item[i].refcnt == 0)
376  continue;
377 
378  /* Log debugging information. */
379  handle = dsm_control->item[i].handle;
380  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
381  handle);
382 
383  /* Destroy the segment. */
384  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
385  &junk_mapped_address, &junk_mapped_size, LOG);
386  }
387 
388  /* Remove the control segment itself. */
389  elog(DEBUG2,
390  "cleaning up dynamic shared memory control segment with ID %u",
392  dsm_control_address = dsm_control;
394  &dsm_control_impl_private, &dsm_control_address,
396  dsm_control = dsm_control_address;
397  shim->dsm_control = 0;
398 }
399 
400 /*
401  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
402  * we must reread the state file and map the control segment; in other cases,
403  * we'll have inherited the postmaster's mapping and global variables.
404  */
405 static void
407 {
408  /* If dynamic shared memory is disabled, reject this. */
410  ereport(ERROR,
411  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
412  errmsg("dynamic shared memory is disabled"),
413  errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
414 
415 #ifdef EXEC_BACKEND
416  {
417  void *control_address = NULL;
418 
419  /* Attach control segment. */
422  &dsm_control_impl_private, &control_address,
424  dsm_control = control_address;
425  /* If control segment doesn't look sane, something is badly wrong. */
427  {
429  &dsm_control_impl_private, &control_address,
431  ereport(FATAL,
432  (errcode(ERRCODE_INTERNAL_ERROR),
433  errmsg("dynamic shared memory control segment is not valid")));
434  }
435  }
436 #endif
437 
438  dsm_init_done = true;
439 }
440 
441 #ifdef EXEC_BACKEND
442 /*
443  * When running under EXEC_BACKEND, we get a callback here when the main
444  * shared memory segment is re-attached, so that we can record the control
445  * handle retrieved from it.
446  */
447 void
448 dsm_set_control_handle(dsm_handle h)
449 {
450  Assert(dsm_control_handle == 0 && h != 0);
451  dsm_control_handle = h;
452 }
453 #endif
454 
455 /*
456  * Create a new dynamic shared memory segment.
457  */
458 dsm_segment *
459 dsm_create(Size size, int flags)
460 {
461  dsm_segment *seg;
462  uint32 i;
463  uint32 nitems;
464 
465  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
467 
468  if (!dsm_init_done)
470 
471  /* Create a new segment descriptor. */
472  seg = dsm_create_descriptor();
473 
474  /* Loop until we find an unused segment identifier. */
475  for (;;)
476  {
477  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
478  seg->handle = random();
479  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
480  continue;
481  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
482  &seg->mapped_address, &seg->mapped_size, ERROR))
483  break;
484  }
485 
486  /* Lock the control segment so we can register the new segment. */
487  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
488 
489  /* Search the control segment for an unused slot. */
490  nitems = dsm_control->nitems;
491  for (i = 0; i < nitems; ++i)
492  {
493  if (dsm_control->item[i].refcnt == 0)
494  {
495  dsm_control->item[i].handle = seg->handle;
496  /* refcnt of 1 triggers destruction, so start at 2 */
497  dsm_control->item[i].refcnt = 2;
498  dsm_control->item[i].impl_private_pm_handle = NULL;
499  dsm_control->item[i].pinned = false;
500  seg->control_slot = i;
501  LWLockRelease(DynamicSharedMemoryControlLock);
502  return seg;
503  }
504  }
505 
506  /* Verify that we can support an additional mapping. */
507  if (nitems >= dsm_control->maxitems)
508  {
509  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
510  {
511  LWLockRelease(DynamicSharedMemoryControlLock);
513  &seg->mapped_address, &seg->mapped_size, WARNING);
514  if (seg->resowner != NULL)
515  ResourceOwnerForgetDSM(seg->resowner, seg);
516  dlist_delete(&seg->node);
517  pfree(seg);
518  return NULL;
519  }
520  ereport(ERROR,
521  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
522  errmsg("too many dynamic shared memory segments")));
523  }
524 
525  /* Enter the handle into a new array slot. */
526  dsm_control->item[nitems].handle = seg->handle;
527  /* refcnt of 1 triggers destruction, so start at 2 */
528  dsm_control->item[nitems].refcnt = 2;
529  dsm_control->item[nitems].impl_private_pm_handle = NULL;
530  dsm_control->item[nitems].pinned = false;
531  seg->control_slot = nitems;
532  dsm_control->nitems++;
533  LWLockRelease(DynamicSharedMemoryControlLock);
534 
535  return seg;
536 }
537 
538 /*
539  * Attach a dynamic shared memory segment.
540  *
541  * See comments for dsm_segment_handle() for an explanation of how this
542  * is intended to be used.
543  *
544  * This function will return NULL if the segment isn't known to the system.
545  * This can happen if we're asked to attach the segment, but then everyone
546  * else detaches it (causing it to be destroyed) before we get around to
547  * attaching it.
548  */
549 dsm_segment *
551 {
552  dsm_segment *seg;
553  dlist_iter iter;
554  uint32 i;
555  uint32 nitems;
556 
557  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
559 
560  if (!dsm_init_done)
562 
563  /*
564  * Since this is just a debugging cross-check, we could leave it out
565  * altogether, or include it only in assert-enabled builds. But since the
566  * list of attached segments should normally be very short, let's include
567  * it always for right now.
568  *
569  * If you're hitting this error, you probably want to attempt to find an
570  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
571  * create a new one.
572  */
573  dlist_foreach(iter, &dsm_segment_list)
574  {
575  seg = dlist_container(dsm_segment, node, iter.cur);
576  if (seg->handle == h)
577  elog(ERROR, "can't attach the same segment more than once");
578  }
579 
580  /* Create a new segment descriptor. */
581  seg = dsm_create_descriptor();
582  seg->handle = h;
583 
584  /* Bump reference count for this segment in shared memory. */
585  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
586  nitems = dsm_control->nitems;
587  for (i = 0; i < nitems; ++i)
588  {
589  /* If the reference count is 0, the slot is actually unused. */
590  if (dsm_control->item[i].refcnt == 0)
591  continue;
592 
593  /* If the handle doesn't match, it's not the slot we want. */
594  if (dsm_control->item[i].handle != seg->handle)
595  continue;
596 
597  /*
598  * If the reference count is 1, the slot is still in use, but the
599  * segment is in the process of going away. Treat that as if we
600  * didn't find a match.
601  */
602  if (dsm_control->item[i].refcnt == 1)
603  break;
604 
605  /* Otherwise we've found a match. */
606  dsm_control->item[i].refcnt++;
607  seg->control_slot = i;
608  break;
609  }
610  LWLockRelease(DynamicSharedMemoryControlLock);
611 
612  /*
613  * If we didn't find the handle we're looking for in the control segment,
614  * it probably means that everyone else who had it mapped, including the
615  * original creator, died before we got to this point. It's up to the
616  * caller to decide what to do about that.
617  */
619  {
620  dsm_detach(seg);
621  return NULL;
622  }
623 
624  /* Here's where we actually try to map the segment. */
626  &seg->mapped_address, &seg->mapped_size, ERROR);
627 
628  return seg;
629 }
630 
631 /*
632  * At backend shutdown time, detach any segments that are still attached.
633  * (This is similar to dsm_detach_all, except that there's no reason to
634  * unmap the control segment before exiting, so we don't bother.)
635  */
636 void
638 {
639  while (!dlist_is_empty(&dsm_segment_list))
640  {
641  dsm_segment *seg;
642 
643  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
644  dsm_detach(seg);
645  }
646 }
647 
648 /*
649  * Detach all shared memory segments, including the control segments. This
650  * should be called, along with PGSharedMemoryDetach, in processes that
651  * might inherit mappings but are not intended to be connected to dynamic
652  * shared memory.
653  */
654 void
656 {
657  void *control_address = dsm_control;
658 
659  while (!dlist_is_empty(&dsm_segment_list))
660  {
661  dsm_segment *seg;
662 
663  seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
664  dsm_detach(seg);
665  }
666 
667  if (control_address != NULL)
669  &dsm_control_impl_private, &control_address,
671 }
672 
673 /*
674  * Resize an existing shared memory segment.
675  *
676  * This may cause the shared memory segment to be remapped at a different
677  * address. For the caller's convenience, we return the mapped address.
678  */
679 void *
681 {
683  dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
684  &seg->mapped_address, &seg->mapped_size, ERROR);
685  return seg->mapped_address;
686 }
687 
688 /*
689  * Remap an existing shared memory segment.
690  *
691  * This is intended to be used when some other process has extended the
692  * mapping using dsm_resize(), but we've still only got the initial
693  * portion mapped. Since this might change the address at which the
694  * segment is mapped, we return the new mapped address.
695  */
696 void *
698 {
700  &seg->mapped_address, &seg->mapped_size, ERROR);
701 
702  return seg->mapped_address;
703 }
704 
705 /*
706  * Detach from a shared memory segment, destroying the segment if we
707  * remove the last reference.
708  *
709  * This function should never fail. It will often be invoked when aborting
710  * a transaction, and a further error won't serve any purpose. It's not a
711  * complete disaster if we fail to unmap or destroy the segment; it means a
712  * resource leak, but that doesn't necessarily preclude further operations.
713  */
714 void
716 {
717  /*
718  * Invoke registered callbacks. Just in case one of those callbacks
719  * throws a further error that brings us back here, pop the callback
720  * before invoking it, to avoid infinite error recursion.
721  */
722  while (!slist_is_empty(&seg->on_detach))
723  {
724  slist_node *node;
726  on_dsm_detach_callback function;
727  Datum arg;
728 
729  node = slist_pop_head_node(&seg->on_detach);
731  function = cb->function;
732  arg = cb->arg;
733  pfree(cb);
734 
735  function(seg, arg);
736  }
737 
738  /*
739  * Try to remove the mapping, if one exists. Normally, there will be, but
740  * maybe not, if we failed partway through a create or attach operation.
741  * We remove the mapping before decrementing the reference count so that
742  * the process that sees a zero reference count can be certain that no
743  * remaining mappings exist. Even if this fails, we pretend that it
744  * works, because retrying is likely to fail in the same way.
745  */
746  if (seg->mapped_address != NULL)
747  {
749  &seg->mapped_address, &seg->mapped_size, WARNING);
750  seg->impl_private = NULL;
751  seg->mapped_address = NULL;
752  seg->mapped_size = 0;
753  }
754 
755  /* Reduce reference count, if we previously increased it. */
757  {
758  uint32 refcnt;
759  uint32 control_slot = seg->control_slot;
760 
761  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
762  Assert(dsm_control->item[control_slot].handle == seg->handle);
763  Assert(dsm_control->item[control_slot].refcnt > 1);
764  refcnt = --dsm_control->item[control_slot].refcnt;
766  LWLockRelease(DynamicSharedMemoryControlLock);
767 
768  /* If new reference count is 1, try to destroy the segment. */
769  if (refcnt == 1)
770  {
771  /* A pinned segment should never reach 1. */
772  Assert(!dsm_control->item[control_slot].pinned);
773 
774  /*
775  * If we fail to destroy the segment here, or are killed before we
776  * finish doing so, the reference count will remain at 1, which
777  * will mean that nobody else can attach to the segment. At
778  * postmaster shutdown time, or when a new postmaster is started
779  * after a hard kill, another attempt will be made to remove the
780  * segment.
781  *
782  * The main case we're worried about here is being killed by a
783  * signal before we can finish removing the segment. In that
784  * case, it's important to be sure that the segment still gets
785  * removed. If we actually fail to remove the segment for some
786  * other reason, the postmaster may not have any better luck than
787  * we did. There's not much we can do about that, though.
788  */
789  if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
790  &seg->mapped_address, &seg->mapped_size, WARNING))
791  {
792  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
793  Assert(dsm_control->item[control_slot].handle == seg->handle);
794  Assert(dsm_control->item[control_slot].refcnt == 1);
795  dsm_control->item[control_slot].refcnt = 0;
796  LWLockRelease(DynamicSharedMemoryControlLock);
797  }
798  }
799  }
800 
801  /* Clean up our remaining backend-private data structures. */
802  if (seg->resowner != NULL)
803  ResourceOwnerForgetDSM(seg->resowner, seg);
804  dlist_delete(&seg->node);
805  pfree(seg);
806 }
807 
808 /*
809  * Keep a dynamic shared memory mapping until end of session.
810  *
811  * By default, mappings are owned by the current resource owner, which
812  * typically means they stick around for the duration of the current query
813  * only.
814  */
815 void
817 {
818  if (seg->resowner != NULL)
819  {
820  ResourceOwnerForgetDSM(seg->resowner, seg);
821  seg->resowner = NULL;
822  }
823 }
824 
825 /*
826  * Arrange to remove a dynamic shared memory mapping at cleanup time.
827  *
828  * dsm_pin_mapping() can be used to preserve a mapping for the entire
829  * lifetime of a process; this function reverses that decision, making
830  * the segment owned by the current resource owner. This may be useful
831  * just before performing some operation that will invalidate the segment
832  * for future use by this backend.
833  */
834 void
836 {
837  Assert(seg->resowner == NULL);
841 }
842 
843 /*
844  * Keep a dynamic shared memory segment until postmaster shutdown, or until
845  * dsm_unpin_segment is called.
846  *
847  * This function should not be called more than once per segment, unless the
848  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
849  *
850  * Note that this function does not arrange for the current process to
851  * keep the segment mapped indefinitely; if that behavior is desired,
852  * dsm_pin_mapping() should be used from each process that needs to
853  * retain the mapping.
854  */
855 void
857 {
858  void *handle;
859 
860  /*
861  * Bump reference count for this segment in shared memory. This will
862  * ensure that even if there is no session which is attached to this
863  * segment, it will remain until postmaster shutdown or an explicit call
864  * to unpin.
865  */
866  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
867  if (dsm_control->item[seg->control_slot].pinned)
868  elog(ERROR, "cannot pin a segment that is already pinned");
869  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
870  dsm_control->item[seg->control_slot].pinned = true;
871  dsm_control->item[seg->control_slot].refcnt++;
872  dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
873  LWLockRelease(DynamicSharedMemoryControlLock);
874 }
875 
876 /*
877  * Unpin a dynamic shared memory segment that was previously pinned with
878  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
879  * was previously called for this segment.
880  *
881  * The argument is a dsm_handle rather than a dsm_segment in case you want
882  * to unpin a segment to which you haven't attached. This turns out to be
883  * useful if, for example, a reference to one shared memory segment is stored
884  * within another shared memory segment. You might want to unpin the
885  * referenced segment before destroying the referencing segment.
886  */
887 void
889 {
890  uint32 control_slot = INVALID_CONTROL_SLOT;
891  bool destroy = false;
892  uint32 i;
893 
894  /* Find the control slot for the given handle. */
895  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
896  for (i = 0; i < dsm_control->nitems; ++i)
897  {
898  /* Skip unused slots. */
899  if (dsm_control->item[i].refcnt == 0)
900  continue;
901 
902  /* If we've found our handle, we can stop searching. */
903  if (dsm_control->item[i].handle == handle)
904  {
905  control_slot = i;
906  break;
907  }
908  }
909 
910  /*
911  * We should definitely have found the slot, and it should not already be
912  * in the process of going away, because this function should only be
913  * called on a segment which is pinned.
914  */
915  if (control_slot == INVALID_CONTROL_SLOT)
916  elog(ERROR, "cannot unpin unknown segment handle");
917  if (!dsm_control->item[control_slot].pinned)
918  elog(ERROR, "cannot unpin a segment that is not pinned");
919  Assert(dsm_control->item[control_slot].refcnt > 1);
920 
921  /*
922  * Allow implementation-specific code to run. We have to do this before
923  * releasing the lock, because impl_private_pm_handle may get modified by
924  * dsm_impl_unpin_segment.
925  */
926  dsm_impl_unpin_segment(handle,
927  &dsm_control->item[control_slot].impl_private_pm_handle);
928 
929  /* Note that 1 means no references (0 means unused slot). */
930  if (--dsm_control->item[control_slot].refcnt == 1)
931  destroy = true;
932  dsm_control->item[control_slot].pinned = false;
933 
934  /* Now we can release the lock. */
935  LWLockRelease(DynamicSharedMemoryControlLock);
936 
937  /* Clean up resources if that was the last reference. */
938  if (destroy)
939  {
940  void *junk_impl_private = NULL;
941  void *junk_mapped_address = NULL;
942  Size junk_mapped_size = 0;
943 
944  /*
945  * For an explanation of how error handling works in this case, see
946  * comments in dsm_detach. Note that if we reach this point, the
947  * current process certainly does not have the segment mapped, because
948  * if it did, the reference count would have still been greater than 1
949  * even after releasing the reference count held by the pin. The fact
950  * that there can't be a dsm_segment for this handle makes it OK to
951  * pass the mapped size, mapped address, and private data as NULL
952  * here.
953  */
954  if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
955  &junk_mapped_address, &junk_mapped_size, WARNING))
956  {
957  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
958  Assert(dsm_control->item[control_slot].handle == handle);
959  Assert(dsm_control->item[control_slot].refcnt == 1);
960  dsm_control->item[control_slot].refcnt = 0;
961  LWLockRelease(DynamicSharedMemoryControlLock);
962  }
963  }
964 }
965 
966 /*
967  * Find an existing mapping for a shared memory segment, if there is one.
968  */
969 dsm_segment *
971 {
972  dlist_iter iter;
973  dsm_segment *seg;
974 
975  dlist_foreach(iter, &dsm_segment_list)
976  {
977  seg = dlist_container(dsm_segment, node, iter.cur);
978  if (seg->handle == h)
979  return seg;
980  }
981 
982  return NULL;
983 }
984 
985 /*
986  * Get the address at which a dynamic shared memory segment is mapped.
987  */
988 void *
990 {
991  Assert(seg->mapped_address != NULL);
992  return seg->mapped_address;
993 }
994 
995 /*
996  * Get the size of a mapping.
997  */
998 Size
1000 {
1001  Assert(seg->mapped_address != NULL);
1002  return seg->mapped_size;
1003 }
1004 
1005 /*
1006  * Get a handle for a mapping.
1007  *
1008  * To establish communication via dynamic shared memory between two backends,
1009  * one of them should first call dsm_create() to establish a new shared
1010  * memory mapping. That process should then call dsm_segment_handle() to
1011  * obtain a handle for the mapping, and pass that handle to the
1012  * coordinating backend via some means (e.g. bgw_main_arg, or via the
1013  * main shared memory segment). The recipient, once in possession of the
1014  * handle, should call dsm_attach().
1015  */
1016 dsm_handle
1018 {
1019  return seg->handle;
1020 }
1021 
1022 /*
1023  * Register an on-detach callback for a dynamic shared memory segment.
1024  */
1025 void
1027 {
1029 
1031  sizeof(dsm_segment_detach_callback));
1032  cb->function = function;
1033  cb->arg = arg;
1034  slist_push_head(&seg->on_detach, &cb->node);
1035 }
1036 
1037 /*
1038  * Unregister an on-detach callback for a dynamic shared memory segment.
1039  */
1040 void
1042  Datum arg)
1043 {
1044  slist_mutable_iter iter;
1045 
1046  slist_foreach_modify(iter, &seg->on_detach)
1047  {
1049 
1051  if (cb->function == function && cb->arg == arg)
1052  {
1053  slist_delete_current(&iter);
1054  pfree(cb);
1055  break;
1056  }
1057  }
1058 }
1059 
1060 /*
1061  * Discard all registered on-detach callbacks without executing them.
1062  */
1063 void
1065 {
1066  dlist_iter iter;
1067 
1068  dlist_foreach(iter, &dsm_segment_list)
1069  {
1070  dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1071 
1072  /* Throw away explicit on-detach actions one by one. */
1073  while (!slist_is_empty(&seg->on_detach))
1074  {
1075  slist_node *node;
1077 
1078  node = slist_pop_head_node(&seg->on_detach);
1079  cb = slist_container(dsm_segment_detach_callback, node, node);
1080  pfree(cb);
1081  }
1082 
1083  /*
1084  * Decrementing the reference count is a sort of implicit on-detach
1085  * action; make sure we don't do that, either.
1086  */
1088  }
1089 }
1090 
1091 /*
1092  * Create a segment descriptor.
1093  */
1094 static dsm_segment *
1096 {
1097  dsm_segment *seg;
1098 
1100 
1102  dlist_push_head(&dsm_segment_list, &seg->node);
1103 
1104  /* seg->handle must be initialized by the caller */
1106  seg->impl_private = NULL;
1107  seg->mapped_address = NULL;
1108  seg->mapped_size = 0;
1109 
1112 
1113  slist_init(&seg->on_detach);
1114 
1115  return seg;
1116 }
1117 
1118 /*
1119  * Sanity check a control segment.
1120  *
1121  * The goal here isn't to detect everything that could possibly be wrong with
1122  * the control segment; there's not enough information for that. Rather, the
1123  * goal is to make sure that someone can iterate over the items in the segment
1124  * without overrunning the end of the mapping and crashing. We also check
1125  * the magic number since, if that's messed up, this may not even be one of
1126  * our segments at all.
1127  */
1128 static bool
1130 {
1131  if (mapped_size < offsetof(dsm_control_header, item))
1132  return false; /* Mapped size too short to read header. */
1133  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1134  return false; /* Magic number doesn't match. */
1135  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1136  return false; /* Max item count won't fit in map. */
1137  if (control->nitems > control->maxitems)
1138  return false; /* Overfull. */
1139  return true;
1140 }
1141 
1142 /*
1143  * Compute the number of control-segment bytes needed to store a given
1144  * number of items.
1145  */
1146 static uint64
1148 {
1149  return offsetof(dsm_control_header, item)
1150  +sizeof(dsm_control_item) * (uint64) nitems;
1151 }
static void * dsm_control_impl_private
Definition: dsm.c:137
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:21
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:146
dlist_node node
Definition: dsm.c:70
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1245
dsm_segment * dsm_find_mapping(dsm_handle h)
Definition: dsm.c:970
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:987
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1057
void reset_on_dsm_detach(void)
Definition: dsm.c:1064
uint32 maxitems
Definition: dsm.c:94
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:55
uint32 dsm_handle
Definition: dsm_impl.h:55
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:54
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:300
Size mapped_size
Definition: dsm.c:76
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:211
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:564
dsm_handle handle
Definition: dsm.c:83
long random(void)
Definition: random.c:22
void * dsm_resize(dsm_segment *seg, Size size)
Definition: dsm.c:680
#define dlist_foreach(iter, lhead)
Definition: ilist.h:507
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct dsm_control_header dsm_control_header
slist_node * cur
Definition: ilist.h:241
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:550
static dsm_handle dsm_control_handle
Definition: dsm.c:134
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:574
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1017
int closedir(DIR *)
Definition: dirent.c:113
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:56
static void dsm_backend_startup(void)
Definition: dsm.c:406
static dlist_head dsm_segment_list
Definition: dsm.c:125
#define LOG
Definition: elog.h:26
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1026
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1147
Definition: dirent.h:9
uint32 nitems
Definition: dsm.c:93
bool pinned
Definition: dsm.c:86
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:716
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:856
#define DSM_HANDLE_INVALID
Definition: dsm.h:23
#define dlist_container(type, membername, ptr)
Definition: ilist.h:477
void pfree(void *pointer)
Definition: mcxt.c:992
static void slist_init(slist_head *head)
Definition: ilist.h:554
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
uint32 magic
Definition: dsm.c:92
#define FATAL
Definition: elog.h:52
#define MAXPGPATH
int MaxBackends
Definition: globals.c:126
on_dsm_detach_callback function
Definition: dsm.c:62
#define DEBUG2
Definition: elog.h:24
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:816
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
static char * buf
Definition: pg_test_fsync.c:65
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1234
bool IsUnderPostmaster
Definition: globals.c:100
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:95
int errcode_for_file_access(void)
Definition: elog.c:598
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:344
int dynamic_shared_memory_type
Definition: dsm_impl.c:112
unsigned int uint32
Definition: c.h:265
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2284
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:888
void * mapped_address
Definition: dsm.c:75
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1129
MemoryContext TopMemoryContext
Definition: mcxt.c:43
void dsm_backend_shutdown(void)
Definition: dsm.c:637
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:596
slist_head on_detach
Definition: dsm.c:77
static bool slist_is_empty(slist_head *head)
Definition: ilist.h:563
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:248
#define WARNING
Definition: elog.h:40
dsm_handle handle
Definition: dsm.c:72
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:487
#define slist_container(type, membername, ptr)
Definition: ilist.h:674
uintptr_t Datum
Definition: postgres.h:374
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:459
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:293
dlist_node * cur
Definition: ilist.h:161
void * dsm_remap(dsm_segment *seg)
Definition: dsm.c:697
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:835
static dsm_control_header * dsm_control
Definition: dsm.c:135
ResourceOwner resowner
Definition: dsm.c:71
#define NULL
Definition: c.h:226
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:989
#define Assert(condition)
Definition: c.h:671
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2350
void * impl_private
Definition: dsm.c:74
static bool dlist_is_empty(dlist_head *head)
Definition: ilist.h:289
#define DSM_IMPL_NONE
Definition: dsm_impl.h:17
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1254
size_t Size
Definition: c.h:353
uint32 control_slot
Definition: dsm.c:73
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
void dsm_detach_all(void)
Definition: dsm.c:655
#define DatumGetPointer(X)
Definition: postgres.h:557
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1095
uint32 refcnt
Definition: dsm.c:84
static bool dsm_init_done
Definition: dsm.c:106
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:1007
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:715
int errmsg(const char *fmt,...)
Definition: elog.c:797
struct dsm_control_item dsm_control_item
static Size dsm_control_mapped_size
Definition: dsm.c:136
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
struct dsm_segment_detach_callback dsm_segment_detach_callback
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
int i
void * arg
void * impl_private_pm_handle
Definition: dsm.c:85
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:652
char d_name[MAX_PATH]
Definition: dirent.h:14
#define INVALID_CONTROL_SLOT
Definition: dsm.c:57
#define elog
Definition: elog.h:219
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:162
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1041
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:999
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:47
int FreeDir(DIR *dir)
Definition: fd.c:2393
#define offsetof(type, field)
Definition: c.h:551