PostgreSQL Source Code  git master
dsm.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides a set of services to make programming with dynamic
7  * shared memory segments more convenient. Unlike the low-level
8  * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9  * created using this module will be cleaned up automatically. Mappings
10  * will be removed when the resource owner under which they were created
11  * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12  * have session lifespan. Segments will be removed when there are no
13  * remaining mappings, or at postmaster shutdown in any case. After a
14  * hard postmaster crash, remaining segments will be removed, if they
15  * still exist, at the next postmaster startup.
16  *
17  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  * src/backend/storage/ipc/dsm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
35 
36 #include "common/pg_prng.h"
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "port/pg_bitutils.h"
40 #include "storage/dsm.h"
41 #include "storage/ipc.h"
42 #include "storage/lwlock.h"
43 #include "storage/pg_shmem.h"
44 #include "utils/freepage.h"
45 #include "utils/guc.h"
46 #include "utils/memutils.h"
47 #include "utils/resowner_private.h"
48 
49 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
50 
51 #define PG_DYNSHMEM_FIXED_SLOTS 64
52 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
53 
54 #define INVALID_CONTROL_SLOT ((uint32) -1)
55 
56 /* Backend-local tracking for on-detach callbacks. */
58 {
63 
64 /* Backend-local state for a dynamic shared memory segment. */
66 {
67  dlist_node node; /* List link in dsm_segment_list. */
68  ResourceOwner resowner; /* Resource owner. */
69  dsm_handle handle; /* Segment name. */
70  uint32 control_slot; /* Slot in control segment. */
71  void *impl_private; /* Implementation-specific private data. */
72  void *mapped_address; /* Mapping address, or NULL if unmapped. */
73  Size mapped_size; /* Size of our mapping. */
74  slist_head on_detach; /* On-detach callbacks. */
75 };
76 
77 /* Shared-memory state for a dynamic shared memory segment. */
78 typedef struct dsm_control_item
79 {
81  uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
82  size_t first_page;
83  size_t npages;
84  void *impl_private_pm_handle; /* only needed on Windows */
85  bool pinned;
87 
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
90 {
96 
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code, Datum arg);
100 static bool dsm_control_segment_sane(dsm_control_header *control,
101  Size mapped_size);
102 static uint64 dsm_control_bytes_needed(uint32 nitems);
103 static inline dsm_handle make_main_region_dsm_handle(int slot);
104 static inline bool is_main_region_dsm_handle(dsm_handle handle);
105 
106 /* Has this backend initialized the dynamic shared memory system yet? */
107 static bool dsm_init_done = false;
108 
109 /* Preallocated DSM space in the main shared memory region. */
110 static void *dsm_main_space_begin = NULL;
111 
112 /*
113  * List of dynamic shared memory segments used by this backend.
114  *
115  * At process exit time, we must decrement the reference count of each
116  * segment we have attached; this list makes it possible to find all such
117  * segments.
118  *
119  * This list should always be empty in the postmaster. We could probably
120  * allow the postmaster to map dynamic shared memory segments before it
121  * begins to start child processes, provided that each process adjusted
122  * the reference counts for those segments in the control segment at
123  * startup time, but there's no obvious need for such a facility, which
124  * would also be complex to handle in the EXEC_BACKEND case. Once the
125  * postmaster has begun spawning children, there's an additional problem:
126  * each new mapping would require an update to the control segment,
127  * which requires locking, in which the postmaster must not be involved.
128  */
130 
131 /*
132  * Control segment information.
133  *
134  * Unlike ordinary shared memory segments, the control segment is not
135  * reference counted; instead, it lasts for the postmaster's entire
136  * life cycle. For simplicity, it doesn't have a dsm_segment object either.
137  */
141 static void *dsm_control_impl_private = NULL;
142 
143 /*
144  * Start up the dynamic shared memory system.
145  *
146  * This is called just once during each cluster lifetime, at postmaster
147  * startup time.
148  */
149 void
151 {
152  void *dsm_control_address = NULL;
153  uint32 maxitems;
154  Size segsize;
155 
157 
158  /*
159  * If we're using the mmap implementations, clean up any leftovers.
160  * Cleanup isn't needed on Windows, and happens earlier in startup for
161  * POSIX and System V shared memory, via a direct call to
162  * dsm_cleanup_using_control_segment.
163  */
166 
167  /* Determine size for new control segment. */
168  maxitems = PG_DYNSHMEM_FIXED_SLOTS
170  elog(DEBUG2, "dynamic shared memory system will support %u segments",
171  maxitems);
172  segsize = dsm_control_bytes_needed(maxitems);
173 
174  /*
175  * Loop until we find an unused identifier for the new control segment. We
176  * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
177  * control segment", so avoid generating that value for a real handle.
178  */
179  for (;;)
180  {
181  Assert(dsm_control_address == NULL);
183  /* Use even numbers only */
186  continue;
188  &dsm_control_impl_private, &dsm_control_address,
190  break;
191  }
192  dsm_control = dsm_control_address;
194  elog(DEBUG2,
195  "created dynamic shared memory control segment %u (%zu bytes)",
196  dsm_control_handle, segsize);
198 
199  /* Initialize control segment. */
201  dsm_control->nitems = 0;
202  dsm_control->maxitems = maxitems;
203 }
204 
205 /*
206  * Determine whether the control segment from the previous postmaster
207  * invocation still exists. If so, remove the dynamic shared memory
208  * segments to which it refers, and then the control segment itself.
209  */
210 void
212 {
213  void *mapped_address = NULL;
214  void *junk_mapped_address = NULL;
215  void *impl_private = NULL;
216  void *junk_impl_private = NULL;
217  Size mapped_size = 0;
218  Size junk_mapped_size = 0;
219  uint32 nitems;
220  uint32 i;
221  dsm_control_header *old_control;
222 
223  /*
224  * Try to attach the segment. If this fails, it probably just means that
225  * the operating system has been rebooted and the segment no longer
226  * exists, or an unrelated process has used the same shm ID. So just fall
227  * out quietly.
228  */
229  if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
230  &mapped_address, &mapped_size, DEBUG1))
231  return;
232 
233  /*
234  * We've managed to reattach it, but the contents might not be sane. If
235  * they aren't, we disregard the segment after all.
236  */
237  old_control = (dsm_control_header *) mapped_address;
238  if (!dsm_control_segment_sane(old_control, mapped_size))
239  {
240  dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
241  &mapped_address, &mapped_size, LOG);
242  return;
243  }
244 
245  /*
246  * OK, the control segment looks basically valid, so we can use it to get
247  * a list of segments that need to be removed.
248  */
249  nitems = old_control->nitems;
250  for (i = 0; i < nitems; ++i)
251  {
252  dsm_handle handle;
253  uint32 refcnt;
254 
255  /* If the reference count is 0, the slot is actually unused. */
256  refcnt = old_control->item[i].refcnt;
257  if (refcnt == 0)
258  continue;
259 
260  /* If it was using the main shmem area, there is nothing to do. */
261  handle = old_control->item[i].handle;
262  if (is_main_region_dsm_handle(handle))
263  continue;
264 
265  /* Log debugging information. */
266  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
267  handle, refcnt);
268 
269  /* Destroy the referenced segment. */
270  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
271  &junk_mapped_address, &junk_mapped_size, LOG);
272  }
273 
274  /* Destroy the old control segment, too. */
275  elog(DEBUG2,
276  "cleaning up dynamic shared memory control segment with ID %u",
277  old_control_handle);
278  dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
279  &mapped_address, &mapped_size, LOG);
280 }
281 
282 /*
283  * When we're using the mmap shared memory implementation, "shared memory"
284  * segments might even manage to survive an operating system reboot.
285  * But there's no guarantee as to exactly what will survive: some segments
286  * may survive, and others may not, and the contents of some may be out
287  * of date. In particular, the control segment may be out of date, so we
288  * can't rely on it to figure out what to remove. However, since we know
289  * what directory contains the files we used as shared memory, we can simply
290  * scan the directory and blow everything away that shouldn't be there.
291  */
292 static void
294 {
295  DIR *dir;
296  struct dirent *dent;
297 
298  /* Scan the directory for something with a name of the correct format. */
300 
301  while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
302  {
303  if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
304  strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
305  {
306  char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
307 
308  snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
309 
310  elog(DEBUG2, "removing file \"%s\"", buf);
311 
312  /* We found a matching file; so remove it. */
313  if (unlink(buf) != 0)
314  ereport(ERROR,
316  errmsg("could not remove file \"%s\": %m", buf)));
317  }
318  }
319 
320  /* Cleanup complete. */
321  FreeDir(dir);
322 }
323 
324 /*
325  * At shutdown time, we iterate over the control segment and remove all
326  * remaining dynamic shared memory segments. We avoid throwing errors here;
327  * the postmaster is shutting down either way, and this is just non-critical
328  * resource cleanup.
329  */
330 static void
332 {
333  uint32 nitems;
334  uint32 i;
335  void *dsm_control_address;
336  void *junk_mapped_address = NULL;
337  void *junk_impl_private = NULL;
338  Size junk_mapped_size = 0;
340 
341  /*
342  * If some other backend exited uncleanly, it might have corrupted the
343  * control segment while it was dying. In that case, we warn and ignore
344  * the contents of the control segment. This may end up leaving behind
345  * stray shared memory segments, but there's not much we can do about that
346  * if the metadata is gone.
347  */
350  {
351  ereport(LOG,
352  (errmsg("dynamic shared memory control segment is corrupt")));
353  return;
354  }
355 
356  /* Remove any remaining segments. */
357  for (i = 0; i < nitems; ++i)
358  {
359  dsm_handle handle;
360 
361  /* If the reference count is 0, the slot is actually unused. */
362  if (dsm_control->item[i].refcnt == 0)
363  continue;
364 
365  handle = dsm_control->item[i].handle;
366  if (is_main_region_dsm_handle(handle))
367  continue;
368 
369  /* Log debugging information. */
370  elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
371  handle);
372 
373  /* Destroy the segment. */
374  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
375  &junk_mapped_address, &junk_mapped_size, LOG);
376  }
377 
378  /* Remove the control segment itself. */
379  elog(DEBUG2,
380  "cleaning up dynamic shared memory control segment with ID %u",
382  dsm_control_address = dsm_control;
384  &dsm_control_impl_private, &dsm_control_address,
386  dsm_control = dsm_control_address;
387  shim->dsm_control = 0;
388 }
389 
390 /*
391  * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
392  * we must reread the state file and map the control segment; in other cases,
393  * we'll have inherited the postmaster's mapping and global variables.
394  */
395 static void
397 {
398 #ifdef EXEC_BACKEND
399  if (IsUnderPostmaster)
400  {
401  void *control_address = NULL;
402 
403  /* Attach control segment. */
406  &dsm_control_impl_private, &control_address,
408  dsm_control = control_address;
409  /* If control segment doesn't look sane, something is badly wrong. */
411  {
413  &dsm_control_impl_private, &control_address,
415  ereport(FATAL,
416  (errcode(ERRCODE_INTERNAL_ERROR),
417  errmsg("dynamic shared memory control segment is not valid")));
418  }
419  }
420 #endif
421 
422  dsm_init_done = true;
423 }
424 
425 #ifdef EXEC_BACKEND
426 /*
427  * When running under EXEC_BACKEND, we get a callback here when the main
428  * shared memory segment is re-attached, so that we can record the control
429  * handle retrieved from it.
430  */
431 void
432 dsm_set_control_handle(dsm_handle h)
433 {
434  Assert(dsm_control_handle == 0 && h != 0);
435  dsm_control_handle = h;
436 }
437 #endif
438 
439 /*
440  * Reserve some space in the main shared memory segment for DSM segments.
441  */
442 size_t
444 {
445  return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
446 }
447 
448 /*
449  * Initialize space in the main shared memory segment for DSM segments.
450  */
451 void
453 {
454  size_t size = dsm_estimate_size();
455  bool found;
456 
457  if (size == 0)
458  return;
459 
460  dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
461  if (!found)
462  {
464  size_t first_page = 0;
465  size_t pages;
466 
467  /* Reserve space for the FreePageManager. */
468  while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
469  ++first_page;
470 
471  /* Initialize it and give it all the rest of the space. */
473  pages = (size / FPM_PAGE_SIZE) - first_page;
474  FreePageManagerPut(fpm, first_page, pages);
475  }
476 }
477 
478 /*
479  * Create a new dynamic shared memory segment.
480  *
481  * If there is a non-NULL CurrentResourceOwner, the new segment is associated
482  * with it and must be detached before the resource owner releases, or a
483  * warning will be logged. If CurrentResourceOwner is NULL, the segment
484  * remains attached until explicitly detached or the session ends.
485  * Creating with a NULL CurrentResourceOwner is equivalent to creating
486  * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
487  */
488 dsm_segment *
489 dsm_create(Size size, int flags)
490 {
491  dsm_segment *seg;
492  uint32 i;
493  uint32 nitems;
494  size_t npages = 0;
495  size_t first_page = 0;
496  FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
497  bool using_main_dsm_region = false;
498 
499  /*
500  * Unsafe in postmaster. It might seem pointless to allow use of dsm in
501  * single user mode, but otherwise some subsystems will need dedicated
502  * single user mode code paths.
503  */
505 
506  if (!dsm_init_done)
508 
509  /* Create a new segment descriptor. */
510  seg = dsm_create_descriptor();
511 
512  /*
513  * Lock the control segment while we try to allocate from the main shared
514  * memory area, if configured.
515  */
516  if (dsm_main_space_fpm)
517  {
518  npages = size / FPM_PAGE_SIZE;
519  if (size % FPM_PAGE_SIZE > 0)
520  ++npages;
521 
522  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
523  if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
524  {
525  /* We can carve out a piece of the main shared memory segment. */
526  seg->mapped_address = (char *) dsm_main_space_begin +
527  first_page * FPM_PAGE_SIZE;
528  seg->mapped_size = npages * FPM_PAGE_SIZE;
529  using_main_dsm_region = true;
530  /* We'll choose a handle below. */
531  }
532  }
533 
534  if (!using_main_dsm_region)
535  {
536  /*
537  * We need to create a new memory segment. Loop until we find an
538  * unused segment identifier.
539  */
540  if (dsm_main_space_fpm)
541  LWLockRelease(DynamicSharedMemoryControlLock);
542  for (;;)
543  {
544  Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
545  /* Use even numbers only */
547  if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
548  continue;
549  if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
550  &seg->mapped_address, &seg->mapped_size, ERROR))
551  break;
552  }
553  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
554  }
555 
556  /* Search the control segment for an unused slot. */
558  for (i = 0; i < nitems; ++i)
559  {
560  if (dsm_control->item[i].refcnt == 0)
561  {
562  if (using_main_dsm_region)
563  {
565  dsm_control->item[i].first_page = first_page;
566  dsm_control->item[i].npages = npages;
567  }
568  else
570  dsm_control->item[i].handle = seg->handle;
571  /* refcnt of 1 triggers destruction, so start at 2 */
572  dsm_control->item[i].refcnt = 2;
574  dsm_control->item[i].pinned = false;
575  seg->control_slot = i;
576  LWLockRelease(DynamicSharedMemoryControlLock);
577  return seg;
578  }
579  }
580 
581  /* Verify that we can support an additional mapping. */
582  if (nitems >= dsm_control->maxitems)
583  {
584  if (using_main_dsm_region)
585  FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
586  LWLockRelease(DynamicSharedMemoryControlLock);
587  if (!using_main_dsm_region)
589  &seg->mapped_address, &seg->mapped_size, WARNING);
590  if (seg->resowner != NULL)
591  ResourceOwnerForgetDSM(seg->resowner, seg);
592  dlist_delete(&seg->node);
593  pfree(seg);
594 
595  if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
596  return NULL;
597  ereport(ERROR,
598  (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
599  errmsg("too many dynamic shared memory segments")));
600  }
601 
602  /* Enter the handle into a new array slot. */
603  if (using_main_dsm_region)
604  {
606  dsm_control->item[i].first_page = first_page;
607  dsm_control->item[i].npages = npages;
608  }
610  /* refcnt of 1 triggers destruction, so start at 2 */
613  dsm_control->item[nitems].pinned = false;
614  seg->control_slot = nitems;
615  dsm_control->nitems++;
616  LWLockRelease(DynamicSharedMemoryControlLock);
617 
618  return seg;
619 }
620 
621 /*
622  * Attach a dynamic shared memory segment.
623  *
624  * See comments for dsm_segment_handle() for an explanation of how this
625  * is intended to be used.
626  *
627  * This function will return NULL if the segment isn't known to the system.
628  * This can happen if we're asked to attach the segment, but then everyone
629  * else detaches it (causing it to be destroyed) before we get around to
630  * attaching it.
631  *
632  * If there is a non-NULL CurrentResourceOwner, the attached segment is
633  * associated with it and must be detached before the resource owner releases,
634  * or a warning will be logged. Otherwise the segment remains attached until
635  * explicitly detached or the session ends. See the note atop dsm_create().
636  */
637 dsm_segment *
639 {
640  dsm_segment *seg;
641  dlist_iter iter;
642  uint32 i;
643  uint32 nitems;
644 
645  /* Unsafe in postmaster (and pointless in a stand-alone backend). */
647 
648  if (!dsm_init_done)
650 
651  /*
652  * Since this is just a debugging cross-check, we could leave it out
653  * altogether, or include it only in assert-enabled builds. But since the
654  * list of attached segments should normally be very short, let's include
655  * it always for right now.
656  *
657  * If you're hitting this error, you probably want to attempt to find an
658  * existing mapping via dsm_find_mapping() before calling dsm_attach() to
659  * create a new one.
660  */
662  {
663  seg = dlist_container(dsm_segment, node, iter.cur);
664  if (seg->handle == h)
665  elog(ERROR, "can't attach the same segment more than once");
666  }
667 
668  /* Create a new segment descriptor. */
669  seg = dsm_create_descriptor();
670  seg->handle = h;
671 
672  /* Bump reference count for this segment in shared memory. */
673  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
675  for (i = 0; i < nitems; ++i)
676  {
677  /*
678  * If the reference count is 0, the slot is actually unused. If the
679  * reference count is 1, the slot is still in use, but the segment is
680  * in the process of going away; even if the handle matches, another
681  * slot may already have started using the same handle value by
682  * coincidence so we have to keep searching.
683  */
684  if (dsm_control->item[i].refcnt <= 1)
685  continue;
686 
687  /* If the handle doesn't match, it's not the slot we want. */
688  if (dsm_control->item[i].handle != seg->handle)
689  continue;
690 
691  /* Otherwise we've found a match. */
692  dsm_control->item[i].refcnt++;
693  seg->control_slot = i;
695  {
696  seg->mapped_address = (char *) dsm_main_space_begin +
699  }
700  break;
701  }
702  LWLockRelease(DynamicSharedMemoryControlLock);
703 
704  /*
705  * If we didn't find the handle we're looking for in the control segment,
706  * it probably means that everyone else who had it mapped, including the
707  * original creator, died before we got to this point. It's up to the
708  * caller to decide what to do about that.
709  */
711  {
712  dsm_detach(seg);
713  return NULL;
714  }
715 
716  /* Here's where we actually try to map the segment. */
719  &seg->mapped_address, &seg->mapped_size, ERROR);
720 
721  return seg;
722 }
723 
724 /*
725  * At backend shutdown time, detach any segments that are still attached.
726  * (This is similar to dsm_detach_all, except that there's no reason to
727  * unmap the control segment before exiting, so we don't bother.)
728  */
729 void
731 {
733  {
734  dsm_segment *seg;
735 
737  dsm_detach(seg);
738  }
739 }
740 
741 /*
742  * Detach all shared memory segments, including the control segments. This
743  * should be called, along with PGSharedMemoryDetach, in processes that
744  * might inherit mappings but are not intended to be connected to dynamic
745  * shared memory.
746  */
747 void
749 {
750  void *control_address = dsm_control;
751 
753  {
754  dsm_segment *seg;
755 
757  dsm_detach(seg);
758  }
759 
760  if (control_address != NULL)
762  &dsm_control_impl_private, &control_address,
764 }
765 
766 /*
767  * Detach from a shared memory segment, destroying the segment if we
768  * remove the last reference.
769  *
770  * This function should never fail. It will often be invoked when aborting
771  * a transaction, and a further error won't serve any purpose. It's not a
772  * complete disaster if we fail to unmap or destroy the segment; it means a
773  * resource leak, but that doesn't necessarily preclude further operations.
774  */
775 void
777 {
778  /*
779  * Invoke registered callbacks. Just in case one of those callbacks
780  * throws a further error that brings us back here, pop the callback
781  * before invoking it, to avoid infinite error recursion. Don't allow
782  * interrupts while running the individual callbacks in non-error code
783  * paths, to avoid leaving cleanup work unfinished if we're interrupted by
784  * a statement timeout or similar.
785  */
786  HOLD_INTERRUPTS();
787  while (!slist_is_empty(&seg->on_detach))
788  {
789  slist_node *node;
791  on_dsm_detach_callback function;
792  Datum arg;
793 
794  node = slist_pop_head_node(&seg->on_detach);
796  function = cb->function;
797  arg = cb->arg;
798  pfree(cb);
799 
800  function(seg, arg);
801  }
803 
804  /*
805  * Try to remove the mapping, if one exists. Normally, there will be, but
806  * maybe not, if we failed partway through a create or attach operation.
807  * We remove the mapping before decrementing the reference count so that
808  * the process that sees a zero reference count can be certain that no
809  * remaining mappings exist. Even if this fails, we pretend that it
810  * works, because retrying is likely to fail in the same way.
811  */
812  if (seg->mapped_address != NULL)
813  {
816  &seg->mapped_address, &seg->mapped_size, WARNING);
817  seg->impl_private = NULL;
818  seg->mapped_address = NULL;
819  seg->mapped_size = 0;
820  }
821 
822  /* Reduce reference count, if we previously increased it. */
824  {
825  uint32 refcnt;
826  uint32 control_slot = seg->control_slot;
827 
828  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
829  Assert(dsm_control->item[control_slot].handle == seg->handle);
830  Assert(dsm_control->item[control_slot].refcnt > 1);
831  refcnt = --dsm_control->item[control_slot].refcnt;
833  LWLockRelease(DynamicSharedMemoryControlLock);
834 
835  /* If new reference count is 1, try to destroy the segment. */
836  if (refcnt == 1)
837  {
838  /* A pinned segment should never reach 1. */
839  Assert(!dsm_control->item[control_slot].pinned);
840 
841  /*
842  * If we fail to destroy the segment here, or are killed before we
843  * finish doing so, the reference count will remain at 1, which
844  * will mean that nobody else can attach to the segment. At
845  * postmaster shutdown time, or when a new postmaster is started
846  * after a hard kill, another attempt will be made to remove the
847  * segment.
848  *
849  * The main case we're worried about here is being killed by a
850  * signal before we can finish removing the segment. In that
851  * case, it's important to be sure that the segment still gets
852  * removed. If we actually fail to remove the segment for some
853  * other reason, the postmaster may not have any better luck than
854  * we did. There's not much we can do about that, though.
855  */
856  if (is_main_region_dsm_handle(seg->handle) ||
858  &seg->mapped_address, &seg->mapped_size, WARNING))
859  {
860  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
863  dsm_control->item[control_slot].first_page,
864  dsm_control->item[control_slot].npages);
865  Assert(dsm_control->item[control_slot].handle == seg->handle);
866  Assert(dsm_control->item[control_slot].refcnt == 1);
867  dsm_control->item[control_slot].refcnt = 0;
868  LWLockRelease(DynamicSharedMemoryControlLock);
869  }
870  }
871  }
872 
873  /* Clean up our remaining backend-private data structures. */
874  if (seg->resowner != NULL)
875  ResourceOwnerForgetDSM(seg->resowner, seg);
876  dlist_delete(&seg->node);
877  pfree(seg);
878 }
879 
880 /*
881  * Keep a dynamic shared memory mapping until end of session.
882  *
883  * By default, mappings are owned by the current resource owner, which
884  * typically means they stick around for the duration of the current query
885  * only.
886  */
887 void
889 {
890  if (seg->resowner != NULL)
891  {
892  ResourceOwnerForgetDSM(seg->resowner, seg);
893  seg->resowner = NULL;
894  }
895 }
896 
897 /*
898  * Arrange to remove a dynamic shared memory mapping at cleanup time.
899  *
900  * dsm_pin_mapping() can be used to preserve a mapping for the entire
901  * lifetime of a process; this function reverses that decision, making
902  * the segment owned by the current resource owner. This may be useful
903  * just before performing some operation that will invalidate the segment
904  * for future use by this backend.
905  */
906 void
908 {
909  Assert(seg->resowner == NULL);
913 }
914 
915 /*
916  * Keep a dynamic shared memory segment until postmaster shutdown, or until
917  * dsm_unpin_segment is called.
918  *
919  * This function should not be called more than once per segment, unless the
920  * segment is explicitly unpinned with dsm_unpin_segment in between calls.
921  *
922  * Note that this function does not arrange for the current process to
923  * keep the segment mapped indefinitely; if that behavior is desired,
924  * dsm_pin_mapping() should be used from each process that needs to
925  * retain the mapping.
926  */
927 void
929 {
930  void *handle;
931 
932  /*
933  * Bump reference count for this segment in shared memory. This will
934  * ensure that even if there is no session which is attached to this
935  * segment, it will remain until postmaster shutdown or an explicit call
936  * to unpin.
937  */
938  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
939  if (dsm_control->item[seg->control_slot].pinned)
940  elog(ERROR, "cannot pin a segment that is already pinned");
941  dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
942  dsm_control->item[seg->control_slot].pinned = true;
945  LWLockRelease(DynamicSharedMemoryControlLock);
946 }
947 
948 /*
949  * Unpin a dynamic shared memory segment that was previously pinned with
950  * dsm_pin_segment. This function should not be called unless dsm_pin_segment
951  * was previously called for this segment.
952  *
953  * The argument is a dsm_handle rather than a dsm_segment in case you want
954  * to unpin a segment to which you haven't attached. This turns out to be
955  * useful if, for example, a reference to one shared memory segment is stored
956  * within another shared memory segment. You might want to unpin the
957  * referenced segment before destroying the referencing segment.
958  */
959 void
961 {
962  uint32 control_slot = INVALID_CONTROL_SLOT;
963  bool destroy = false;
964  uint32 i;
965 
966  /* Find the control slot for the given handle. */
967  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
968  for (i = 0; i < dsm_control->nitems; ++i)
969  {
970  /* Skip unused slots and segments that are concurrently going away. */
971  if (dsm_control->item[i].refcnt <= 1)
972  continue;
973 
974  /* If we've found our handle, we can stop searching. */
975  if (dsm_control->item[i].handle == handle)
976  {
977  control_slot = i;
978  break;
979  }
980  }
981 
982  /*
983  * We should definitely have found the slot, and it should not already be
984  * in the process of going away, because this function should only be
985  * called on a segment which is pinned.
986  */
987  if (control_slot == INVALID_CONTROL_SLOT)
988  elog(ERROR, "cannot unpin unknown segment handle");
989  if (!dsm_control->item[control_slot].pinned)
990  elog(ERROR, "cannot unpin a segment that is not pinned");
991  Assert(dsm_control->item[control_slot].refcnt > 1);
992 
993  /*
994  * Allow implementation-specific code to run. We have to do this before
995  * releasing the lock, because impl_private_pm_handle may get modified by
996  * dsm_impl_unpin_segment.
997  */
998  dsm_impl_unpin_segment(handle,
999  &dsm_control->item[control_slot].impl_private_pm_handle);
1000 
1001  /* Note that 1 means no references (0 means unused slot). */
1002  if (--dsm_control->item[control_slot].refcnt == 1)
1003  destroy = true;
1004  dsm_control->item[control_slot].pinned = false;
1005 
1006  /* Now we can release the lock. */
1007  LWLockRelease(DynamicSharedMemoryControlLock);
1008 
1009  /* Clean up resources if that was the last reference. */
1010  if (destroy)
1011  {
1012  void *junk_impl_private = NULL;
1013  void *junk_mapped_address = NULL;
1014  Size junk_mapped_size = 0;
1015 
1016  /*
1017  * For an explanation of how error handling works in this case, see
1018  * comments in dsm_detach. Note that if we reach this point, the
1019  * current process certainly does not have the segment mapped, because
1020  * if it did, the reference count would have still been greater than 1
1021  * even after releasing the reference count held by the pin. The fact
1022  * that there can't be a dsm_segment for this handle makes it OK to
1023  * pass the mapped size, mapped address, and private data as NULL
1024  * here.
1025  */
1026  if (is_main_region_dsm_handle(handle) ||
1027  dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1028  &junk_mapped_address, &junk_mapped_size, WARNING))
1029  {
1030  LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
1031  if (is_main_region_dsm_handle(handle))
1033  dsm_control->item[control_slot].first_page,
1034  dsm_control->item[control_slot].npages);
1035  Assert(dsm_control->item[control_slot].handle == handle);
1036  Assert(dsm_control->item[control_slot].refcnt == 1);
1037  dsm_control->item[control_slot].refcnt = 0;
1038  LWLockRelease(DynamicSharedMemoryControlLock);
1039  }
1040  }
1041 }
1042 
1043 /*
1044  * Find an existing mapping for a shared memory segment, if there is one.
1045  */
1046 dsm_segment *
1048 {
1049  dlist_iter iter;
1050  dsm_segment *seg;
1051 
1053  {
1054  seg = dlist_container(dsm_segment, node, iter.cur);
1055  if (seg->handle == handle)
1056  return seg;
1057  }
1058 
1059  return NULL;
1060 }
1061 
1062 /*
1063  * Get the address at which a dynamic shared memory segment is mapped.
1064  */
1065 void *
1067 {
1068  Assert(seg->mapped_address != NULL);
1069  return seg->mapped_address;
1070 }
1071 
1072 /*
1073  * Get the size of a mapping.
1074  */
1075 Size
1077 {
1078  Assert(seg->mapped_address != NULL);
1079  return seg->mapped_size;
1080 }
1081 
1082 /*
1083  * Get a handle for a mapping.
1084  *
1085  * To establish communication via dynamic shared memory between two backends,
1086  * one of them should first call dsm_create() to establish a new shared
1087  * memory mapping. That process should then call dsm_segment_handle() to
1088  * obtain a handle for the mapping, and pass that handle to the
1089  * coordinating backend via some means (e.g. bgw_main_arg, or via the
1090  * main shared memory segment). The recipient, once in possession of the
1091  * handle, should call dsm_attach().
1092  */
1093 dsm_handle
1095 {
1096  return seg->handle;
1097 }
1098 
1099 /*
1100  * Register an on-detach callback for a dynamic shared memory segment.
1101  */
1102 void
1104 {
1106 
1108  sizeof(dsm_segment_detach_callback));
1109  cb->function = function;
1110  cb->arg = arg;
1111  slist_push_head(&seg->on_detach, &cb->node);
1112 }
1113 
1114 /*
1115  * Unregister an on-detach callback for a dynamic shared memory segment.
1116  */
1117 void
1119  Datum arg)
1120 {
1121  slist_mutable_iter iter;
1122 
1123  slist_foreach_modify(iter, &seg->on_detach)
1124  {
1126 
1128  if (cb->function == function && cb->arg == arg)
1129  {
1130  slist_delete_current(&iter);
1131  pfree(cb);
1132  break;
1133  }
1134  }
1135 }
1136 
1137 /*
1138  * Discard all registered on-detach callbacks without executing them.
1139  */
1140 void
1142 {
1143  dlist_iter iter;
1144 
1146  {
1147  dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1148 
1149  /* Throw away explicit on-detach actions one by one. */
1150  while (!slist_is_empty(&seg->on_detach))
1151  {
1152  slist_node *node;
1154 
1155  node = slist_pop_head_node(&seg->on_detach);
1156  cb = slist_container(dsm_segment_detach_callback, node, node);
1157  pfree(cb);
1158  }
1159 
1160  /*
1161  * Decrementing the reference count is a sort of implicit on-detach
1162  * action; make sure we don't do that, either.
1163  */
1165  }
1166 }
1167 
1168 /*
1169  * Create a segment descriptor.
1170  */
1171 static dsm_segment *
1173 {
1174  dsm_segment *seg;
1175 
1178 
1181 
1182  /* seg->handle must be initialized by the caller */
1184  seg->impl_private = NULL;
1185  seg->mapped_address = NULL;
1186  seg->mapped_size = 0;
1187 
1191 
1192  slist_init(&seg->on_detach);
1193 
1194  return seg;
1195 }
1196 
1197 /*
1198  * Sanity check a control segment.
1199  *
1200  * The goal here isn't to detect everything that could possibly be wrong with
1201  * the control segment; there's not enough information for that. Rather, the
1202  * goal is to make sure that someone can iterate over the items in the segment
1203  * without overrunning the end of the mapping and crashing. We also check
1204  * the magic number since, if that's messed up, this may not even be one of
1205  * our segments at all.
1206  */
1207 static bool
1209 {
1210  if (mapped_size < offsetof(dsm_control_header, item))
1211  return false; /* Mapped size too short to read header. */
1212  if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1213  return false; /* Magic number doesn't match. */
1214  if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1215  return false; /* Max item count won't fit in map. */
1216  if (control->nitems > control->maxitems)
1217  return false; /* Overfull. */
1218  return true;
1219 }
1220 
1221 /*
1222  * Compute the number of control-segment bytes needed to store a given
1223  * number of items.
1224  */
1225 static uint64
1227 {
1228  return offsetof(dsm_control_header, item)
1229  + sizeof(dsm_control_item) * (uint64) nitems;
1230 }
1231 
1232 static inline dsm_handle
1234 {
1235  dsm_handle handle;
1236 
1237  /*
1238  * We need to create a handle that doesn't collide with any existing extra
1239  * segment created by dsm_impl_op(), so we'll make it odd. It also
1240  * mustn't collide with any other main area pseudo-segment, so we'll
1241  * include the slot number in some of the bits. We also want to make an
1242  * effort to avoid newly created and recently destroyed handles from being
1243  * confused, so we'll make the rest of the bits random.
1244  */
1245  handle = 1;
1246  handle |= slot << 1;
1248  return handle;
1249 }
1250 
1251 static inline bool
1253 {
1254  return handle & 1;
1255 }
unsigned int uint32
Definition: c.h:495
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:387
size_t Size
Definition: c.h:594
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1094
size_t dsm_estimate_size(void)
Definition: dsm.c:443
static void dsm_backend_startup(void)
Definition: dsm.c:396
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:1066
static void * dsm_main_space_begin
Definition: dsm.c:110
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:776
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1103
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:638
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:489
static dsm_handle dsm_control_handle
Definition: dsm.c:138
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:888
static dlist_head dsm_segment_list
Definition: dsm.c:129
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:331
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:960
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:928
void dsm_detach_all(void)
Definition: dsm.c:748
static dsm_handle make_main_region_dsm_handle(int slot)
Definition: dsm.c:1233
static bool dsm_init_done
Definition: dsm.c:107
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:211
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:150
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:49
static dsm_control_header * dsm_control
Definition: dsm.c:139
static Size dsm_control_mapped_size
Definition: dsm.c:140
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1172
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1226
void dsm_shmem_init(void)
Definition: dsm.c:452
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:52
struct dsm_control_item dsm_control_item
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:51
void dsm_backend_shutdown(void)
Definition: dsm.c:730
struct dsm_segment_detach_callback dsm_segment_detach_callback
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1118
void reset_on_dsm_detach(void)
Definition: dsm.c:1141
dsm_segment * dsm_find_mapping(dsm_handle handle)
Definition: dsm.c:1047
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:293
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:1076
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:907
struct dsm_control_header dsm_control_header
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1208
#define INVALID_CONTROL_SLOT
Definition: dsm.c:54
static void * dsm_control_impl_private
Definition: dsm.c:141
static bool is_main_region_dsm_handle(dsm_handle handle)
Definition: dsm.c:1252
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:54
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:963
int min_dynamic_shared_memory
Definition: dsm_impl.c:115
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1014
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:159
int dynamic_shared_memory_type
Definition: dsm_impl.c:112
uint32 dsm_handle
Definition: dsm_impl.h:55
@ DSM_OP_DETACH
Definition: dsm_impl.h:65
@ DSM_OP_CREATE
Definition: dsm_impl.h:63
@ DSM_OP_DESTROY
Definition: dsm_impl.h:66
@ DSM_OP_ATTACH
Definition: dsm_impl.h:64
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
#define DSM_HANDLE_INVALID
Definition: dsm_impl.h:58
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
int errcode_for_file_access(void)
Definition: elog.c:881
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2854
int FreeDir(DIR *dir)
Definition: fd.c:2906
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2788
bool FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
Definition: freepage.c:210
void FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
Definition: freepage.c:379
void FreePageManagerInitialize(FreePageManager *fpm, char *base)
Definition: freepage.c:183
#define FPM_PAGE_SIZE
Definition: freepage.h:30
bool IsUnderPostmaster
Definition: globals.c:113
int MaxBackends
Definition: globals.c:140
bool IsPostmasterEnvironment
Definition: globals.c:112
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:1084
#define dlist_foreach(iter, lhead)
Definition: ilist.h:623
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:603
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:1148
static void slist_init(slist_head *head)
Definition: ilist.h:986
static bool slist_is_empty(const slist_head *head)
Definition: ilist.h:995
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:347
static bool dlist_is_empty(const dlist_head *head)
Definition: ilist.h:336
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:1006
#define slist_container(type, membername, ptr)
Definition: ilist.h:1106
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:281
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:1028
#define nitems(x)
Definition: indent.h:31
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
@ LW_EXCLUSIVE
Definition: lwlock.h:116
void pfree(void *pointer)
Definition: mcxt.c:1456
MemoryContext TopMemoryContext
Definition: mcxt.c:141
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1021
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:134
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:132
void * arg
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define MAXPGPATH
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:191
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static char * buf
Definition: pg_test_fsync.c:67
#define snprintf
Definition: port.h:238
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1415
ResourceOwner CurrentResourceOwner
Definition: resowner.c:147
void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: resowner.c:1406
void ResourceOwnerEnlargeDSMs(ResourceOwner owner)
Definition: resowner.c:1395
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Definition: dirent.c:26
dsm_handle dsm_control
Definition: pg_shmem.h:36
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
dlist_node * cur
Definition: ilist.h:179
uint32 maxitems
Definition: dsm.c:93
uint32 nitems
Definition: dsm.c:92
uint32 magic
Definition: dsm.c:91
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:94
size_t npages
Definition: dsm.c:83
dsm_handle handle
Definition: dsm.c:80
size_t first_page
Definition: dsm.c:82
bool pinned
Definition: dsm.c:85
void * impl_private_pm_handle
Definition: dsm.c:84
uint32 refcnt
Definition: dsm.c:81
on_dsm_detach_callback function
Definition: dsm.c:59
uint32 control_slot
Definition: dsm.c:70
dsm_handle handle
Definition: dsm.c:69
Size mapped_size
Definition: dsm.c:73
void * impl_private
Definition: dsm.c:71
slist_head on_detach
Definition: dsm.c:74
dlist_node node
Definition: dsm.c:67
ResourceOwner resowner
Definition: dsm.c:68
void * mapped_address
Definition: dsm.c:72
slist_node * cur
Definition: ilist.h:274