PostgreSQL Source Code git master
Loading...
Searching...
No Matches
dsm.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dsm.c
4 * manage dynamic shared memory segments
5 *
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
16 *
17 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
19 *
20 *
21 * IDENTIFICATION
22 * src/backend/storage/ipc/dsm.c
23 *
24 *-------------------------------------------------------------------------
25 */
26
27#include "postgres.h"
28
29#include <fcntl.h>
30#include <unistd.h>
31#ifndef WIN32
32#include <sys/mman.h>
33#endif
34#include <sys/stat.h>
35
36#include "common/pg_prng.h"
37#include "lib/ilist.h"
38#include "miscadmin.h"
39#include "port/pg_bitutils.h"
40#include "storage/dsm.h"
41#include "storage/fd.h"
42#include "storage/ipc.h"
43#include "storage/lwlock.h"
44#include "storage/pg_shmem.h"
45#include "storage/shmem.h"
46#include "storage/subsystems.h"
47#include "utils/freepage.h"
48#include "utils/memutils.h"
49#include "utils/resowner.h"
50
51#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
52
53#define PG_DYNSHMEM_FIXED_SLOTS 64
54#define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
55
56#define INVALID_CONTROL_SLOT ((uint32) -1)
57
58/* Backend-local tracking for on-detach callbacks. */
65
66/* Backend-local state for a dynamic shared memory segment. */
68{
69 dlist_node node; /* List link in dsm_segment_list. */
70 ResourceOwner resowner; /* Resource owner. */
71 dsm_handle handle; /* Segment name. */
72 uint32 control_slot; /* Slot in control segment. */
73 void *impl_private; /* Implementation-specific private data. */
74 void *mapped_address; /* Mapping address, or NULL if unmapped. */
75 Size mapped_size; /* Size of our mapping. */
76 slist_head on_detach; /* On-detach callbacks. */
77};
78
79/* Shared-memory state for a dynamic shared memory segment. */
80typedef struct dsm_control_item
81{
83 uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
84 size_t first_page;
85 size_t npages;
86 void *impl_private_pm_handle; /* only needed on Windows */
87 bool pinned;
89
90/* Layout of the dynamic shared memory control segment. */
98
99static void dsm_cleanup_for_mmap(void);
100static void dsm_postmaster_shutdown(int code, Datum arg);
103 Size mapped_size);
105static inline dsm_handle make_main_region_dsm_handle(int slot);
106static inline bool is_main_region_dsm_handle(dsm_handle handle);
107
108/* Has this backend initialized the dynamic shared memory system yet? */
109static bool dsm_init_done = false;
110
111/* Preallocated DSM space in the main shared memory region. */
114
115static void dsm_main_space_request(void *arg);
116static void dsm_main_space_init(void *arg);
117
122
123/*
124 * List of dynamic shared memory segments used by this backend.
125 *
126 * At process exit time, we must decrement the reference count of each
127 * segment we have attached; this list makes it possible to find all such
128 * segments.
129 *
130 * This list should always be empty in the postmaster. We could probably
131 * allow the postmaster to map dynamic shared memory segments before it
132 * begins to start child processes, provided that each process adjusted
133 * the reference counts for those segments in the control segment at
134 * startup time, but there's no obvious need for such a facility, which
135 * would also be complex to handle in the EXEC_BACKEND case. Once the
136 * postmaster has begun spawning children, there's an additional problem:
137 * each new mapping would require an update to the control segment,
138 * which requires locking, in which the postmaster must not be involved.
139 */
141
142/*
143 * Control segment information.
144 *
145 * Unlike ordinary shared memory segments, the control segment is not
146 * reference counted; instead, it lasts for the postmaster's entire
147 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
148 */
153
154
155/* ResourceOwner callbacks to hold DSM segments */
156static void ResOwnerReleaseDSM(Datum res);
157static char *ResOwnerPrintDSM(Datum res);
158
160{
161 .name = "dynamic shared memory segment",
162 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
163 .release_priority = RELEASE_PRIO_DSMS,
164 .ReleaseResource = ResOwnerReleaseDSM,
165 .DebugPrint = ResOwnerPrintDSM
166};
167
168/* Convenience wrappers over ResourceOwnerRemember/Forget */
169static inline void
174static inline void
179
180/*
181 * Start up the dynamic shared memory system.
182 *
183 * This is called just once during each cluster lifetime, at postmaster
184 * startup time.
185 */
186void
188{
190 uint32 maxitems;
191 Size segsize;
192
194
195 /*
196 * If we're using the mmap implementations, clean up any leftovers.
197 * Cleanup isn't needed on Windows, and happens earlier in startup for
198 * POSIX and System V shared memory, via a direct call to
199 * dsm_cleanup_using_control_segment.
200 */
203
204 /* Determine size for new control segment. */
205 maxitems = PG_DYNSHMEM_FIXED_SLOTS
207 elog(DEBUG2, "dynamic shared memory system will support %u segments",
208 maxitems);
209 segsize = dsm_control_bytes_needed(maxitems);
210
211 /*
212 * Loop until we find an unused identifier for the new control segment. We
213 * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
214 * control segment", so avoid generating that value for a real handle.
215 */
216 for (;;)
217 {
220 /* Use even numbers only */
223 continue;
227 break;
228 }
231 elog(DEBUG2,
232 "created dynamic shared memory control segment %u (%zu bytes)",
233 dsm_control_handle, segsize);
234 shim->dsm_control = dsm_control_handle;
235
236 /* Initialize control segment. */
238 dsm_control->nitems = 0;
239 dsm_control->maxitems = maxitems;
240}
241
242/*
243 * Determine whether the control segment from the previous postmaster
244 * invocation still exists. If so, remove the dynamic shared memory
245 * segments to which it refers, and then the control segment itself.
246 */
247void
249{
250 void *mapped_address = NULL;
252 void *impl_private = NULL;
253 void *junk_impl_private = NULL;
254 Size mapped_size = 0;
257 uint32 i;
259
260 /*
261 * Try to attach the segment. If this fails, it probably just means that
262 * the operating system has been rebooted and the segment no longer
263 * exists, or an unrelated process has used the same shm ID. So just fall
264 * out quietly.
265 */
266 if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
267 &mapped_address, &mapped_size, DEBUG1))
268 return;
269
270 /*
271 * We've managed to reattach it, but the contents might not be sane. If
272 * they aren't, we disregard the segment after all.
273 */
274 old_control = (dsm_control_header *) mapped_address;
275 if (!dsm_control_segment_sane(old_control, mapped_size))
276 {
278 &mapped_address, &mapped_size, LOG);
279 return;
280 }
281
282 /*
283 * OK, the control segment looks basically valid, so we can use it to get
284 * a list of segments that need to be removed.
285 */
286 nitems = old_control->nitems;
287 for (i = 0; i < nitems; ++i)
288 {
289 dsm_handle handle;
290 uint32 refcnt;
291
292 /* If the reference count is 0, the slot is actually unused. */
293 refcnt = old_control->item[i].refcnt;
294 if (refcnt == 0)
295 continue;
296
297 /* If it was using the main shmem area, there is nothing to do. */
298 handle = old_control->item[i].handle;
299 if (is_main_region_dsm_handle(handle))
300 continue;
301
302 /* Log debugging information. */
303 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
304 handle, refcnt);
305
306 /* Destroy the referenced segment. */
309 }
310
311 /* Destroy the old control segment, too. */
312 elog(DEBUG2,
313 "cleaning up dynamic shared memory control segment with ID %u",
316 &mapped_address, &mapped_size, LOG);
317}
318
319/*
320 * When we're using the mmap shared memory implementation, "shared memory"
321 * segments might even manage to survive an operating system reboot.
322 * But there's no guarantee as to exactly what will survive: some segments
323 * may survive, and others may not, and the contents of some may be out
324 * of date. In particular, the control segment may be out of date, so we
325 * can't rely on it to figure out what to remove. However, since we know
326 * what directory contains the files we used as shared memory, we can simply
327 * scan the directory and blow everything away that shouldn't be there.
328 */
329static void
331{
332 DIR *dir;
333 struct dirent *dent;
334
335 /* Scan the directory for something with a name of the correct format. */
337
338 while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
339 {
342 {
343 char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
344
345 snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
346
347 elog(DEBUG2, "removing file \"%s\"", buf);
348
349 /* We found a matching file; so remove it. */
350 if (unlink(buf) != 0)
353 errmsg("could not remove file \"%s\": %m", buf)));
354 }
355 }
356
357 /* Cleanup complete. */
358 FreeDir(dir);
359}
360
361/*
362 * At shutdown time, we iterate over the control segment and remove all
363 * remaining dynamic shared memory segments. We avoid throwing errors here;
364 * the postmaster is shutting down either way, and this is just non-critical
365 * resource cleanup.
366 */
367static void
369{
371 uint32 i;
374 void *junk_impl_private = NULL;
377
378 /*
379 * If some other backend exited uncleanly, it might have corrupted the
380 * control segment while it was dying. In that case, we warn and ignore
381 * the contents of the control segment. This may end up leaving behind
382 * stray shared memory segments, but there's not much we can do about that
383 * if the metadata is gone.
384 */
387 {
388 ereport(LOG,
389 (errmsg("dynamic shared memory control segment is corrupt")));
390 return;
391 }
392
393 /* Remove any remaining segments. */
394 for (i = 0; i < nitems; ++i)
395 {
396 dsm_handle handle;
397
398 /* If the reference count is 0, the slot is actually unused. */
399 if (dsm_control->item[i].refcnt == 0)
400 continue;
401
402 handle = dsm_control->item[i].handle;
403 if (is_main_region_dsm_handle(handle))
404 continue;
405
406 /* Log debugging information. */
407 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
408 handle);
409
410 /* Destroy the segment. */
413 }
414
415 /* Remove the control segment itself. */
416 elog(DEBUG2,
417 "cleaning up dynamic shared memory control segment with ID %u",
424 shim->dsm_control = 0;
425}
426
427/*
428 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
429 * we must reread the state file and map the control segment; in other cases,
430 * we'll have inherited the postmaster's mapping and global variables.
431 */
432static void
434{
435#ifdef EXEC_BACKEND
437 {
438 void *control_address = NULL;
439
440 /* Attach control segment. */
446 /* If control segment doesn't look sane, something is badly wrong. */
448 {
454 errmsg("dynamic shared memory control segment is not valid")));
455 }
456 }
457#endif
458
459 dsm_init_done = true;
460}
461
462#ifdef EXEC_BACKEND
463/*
464 * When running under EXEC_BACKEND, we get a callback here when the main
465 * shared memory segment is re-attached, so that we can record the control
466 * handle retrieved from it.
467 */
468void
470{
471 Assert(dsm_control_handle == 0 && h != 0);
473}
474#endif
475
476/*
477 * Reserve space in the main shared memory segment for DSM segments.
478 */
479static void
481{
483
484 if (dsm_main_space_size == 0)
485 return;
486
487 ShmemRequestStruct(.name = "Preallocated DSM",
488 .size = dsm_main_space_size,
489 .ptr = &dsm_main_space_begin,
490 );
491}
492
493static void
495{
497 size_t first_page = 0;
498 size_t pages;
499
500 if (dsm_main_space_size == 0)
501 return;
502
503 /* Reserve space for the FreePageManager. */
504 while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
505 ++first_page;
506
507 /* Initialize it and give it all the rest of the space. */
509 pages = (dsm_main_space_size / FPM_PAGE_SIZE) - first_page;
510 FreePageManagerPut(fpm, first_page, pages);
511}
512
513/*
514 * Create a new dynamic shared memory segment.
515 *
516 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
517 * with it and must be detached before the resource owner releases, or a
518 * warning will be logged. If CurrentResourceOwner is NULL, the segment
519 * remains attached until explicitly detached or the session ends.
520 * Creating with a NULL CurrentResourceOwner is equivalent to creating
521 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
522 */
524dsm_create(Size size, int flags)
525{
526 dsm_segment *seg;
527 uint32 i;
529 size_t npages = 0;
530 size_t first_page = 0;
532 bool using_main_dsm_region = false;
533
534 /*
535 * Unsafe in postmaster. It might seem pointless to allow use of dsm in
536 * single user mode, but otherwise some subsystems will need dedicated
537 * single user mode code paths.
538 */
540
541 if (!dsm_init_done)
543
544 /* Create a new segment descriptor. */
545 seg = dsm_create_descriptor();
546
547 /*
548 * Lock the control segment while we try to allocate from the main shared
549 * memory area, if configured.
550 */
552 {
553 npages = size / FPM_PAGE_SIZE;
554 if (size % FPM_PAGE_SIZE > 0)
555 ++npages;
556
558 if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
559 {
560 /* We can carve out a piece of the main shared memory segment. */
561 seg->mapped_address = (char *) dsm_main_space_begin +
562 first_page * FPM_PAGE_SIZE;
563 seg->mapped_size = npages * FPM_PAGE_SIZE;
565 /* We'll choose a handle below. */
566 }
567 }
568
570 {
571 /*
572 * We need to create a new memory segment. Loop until we find an
573 * unused segment identifier.
574 */
577 for (;;)
578 {
579 Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
580 /* Use even numbers only */
582 if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
583 continue;
584 if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
585 &seg->mapped_address, &seg->mapped_size, ERROR))
586 break;
587 }
589 }
590
591 /* Search the control segment for an unused slot. */
593 for (i = 0; i < nitems; ++i)
594 {
595 if (dsm_control->item[i].refcnt == 0)
596 {
598 {
600 dsm_control->item[i].first_page = first_page;
601 dsm_control->item[i].npages = npages;
602 }
603 else
605 dsm_control->item[i].handle = seg->handle;
606 /* refcnt of 1 triggers destruction, so start at 2 */
607 dsm_control->item[i].refcnt = 2;
609 dsm_control->item[i].pinned = false;
610 seg->control_slot = i;
612 return seg;
613 }
614 }
615
616 /* Verify that we can support an additional mapping. */
618 {
620 FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
624 &seg->mapped_address, &seg->mapped_size, WARNING);
625 if (seg->resowner != NULL)
627 dlist_delete(&seg->node);
628 pfree(seg);
629
630 if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
631 return NULL;
634 errmsg("too many dynamic shared memory segments")));
635 }
636
637 /* Enter the handle into a new array slot. */
639 {
641 dsm_control->item[i].first_page = first_page;
642 dsm_control->item[i].npages = npages;
643 }
645 /* refcnt of 1 triggers destruction, so start at 2 */
648 dsm_control->item[nitems].pinned = false;
649 seg->control_slot = nitems;
652
653 return seg;
654}
655
656/*
657 * Attach a dynamic shared memory segment.
658 *
659 * See comments for dsm_segment_handle() for an explanation of how this
660 * is intended to be used.
661 *
662 * This function will return NULL if the segment isn't known to the system.
663 * This can happen if we're asked to attach the segment, but then everyone
664 * else detaches it (causing it to be destroyed) before we get around to
665 * attaching it.
666 *
667 * If there is a non-NULL CurrentResourceOwner, the attached segment is
668 * associated with it and must be detached before the resource owner releases,
669 * or a warning will be logged. Otherwise the segment remains attached until
670 * explicitly detached or the session ends. See the note atop dsm_create().
671 */
674{
675 dsm_segment *seg;
676 dlist_iter iter;
677 uint32 i;
679
680 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
682
683 if (!dsm_init_done)
685
686 /*
687 * Since this is just a debugging cross-check, we could leave it out
688 * altogether, or include it only in assert-enabled builds. But since the
689 * list of attached segments should normally be very short, let's include
690 * it always for right now.
691 *
692 * If you're hitting this error, you probably want to attempt to find an
693 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
694 * create a new one.
695 */
697 {
698 seg = dlist_container(dsm_segment, node, iter.cur);
699 if (seg->handle == h)
700 elog(ERROR, "can't attach the same segment more than once");
701 }
702
703 /* Create a new segment descriptor. */
704 seg = dsm_create_descriptor();
705 seg->handle = h;
706
707 /* Bump reference count for this segment in shared memory. */
710 for (i = 0; i < nitems; ++i)
711 {
712 /*
713 * If the reference count is 0, the slot is actually unused. If the
714 * reference count is 1, the slot is still in use, but the segment is
715 * in the process of going away; even if the handle matches, another
716 * slot may already have started using the same handle value by
717 * coincidence so we have to keep searching.
718 */
719 if (dsm_control->item[i].refcnt <= 1)
720 continue;
721
722 /* If the handle doesn't match, it's not the slot we want. */
723 if (dsm_control->item[i].handle != seg->handle)
724 continue;
725
726 /* Otherwise we've found a match. */
728 seg->control_slot = i;
730 {
731 seg->mapped_address = (char *) dsm_main_space_begin +
734 }
735 break;
736 }
738
739 /*
740 * If we didn't find the handle we're looking for in the control segment,
741 * it probably means that everyone else who had it mapped, including the
742 * original creator, died before we got to this point. It's up to the
743 * caller to decide what to do about that.
744 */
746 {
747 dsm_detach(seg);
748 return NULL;
749 }
750
751 /* Here's where we actually try to map the segment. */
754 &seg->mapped_address, &seg->mapped_size, ERROR);
755
756 return seg;
757}
758
759/*
760 * At backend shutdown time, detach any segments that are still attached.
761 * (This is similar to dsm_detach_all, except that there's no reason to
762 * unmap the control segment before exiting, so we don't bother.)
763 */
764void
766{
768 {
769 dsm_segment *seg;
770
772 dsm_detach(seg);
773 }
774}
775
776/*
777 * Detach all shared memory segments, including the control segments. This
778 * should be called, along with PGSharedMemoryDetach, in processes that
779 * might inherit mappings but are not intended to be connected to dynamic
780 * shared memory.
781 */
782void
800
801/*
802 * Detach from a shared memory segment, destroying the segment if we
803 * remove the last reference.
804 *
805 * This function should never fail. It will often be invoked when aborting
806 * a transaction, and a further error won't serve any purpose. It's not a
807 * complete disaster if we fail to unmap or destroy the segment; it means a
808 * resource leak, but that doesn't necessarily preclude further operations.
809 */
810void
812{
813 /*
814 * Invoke registered callbacks. Just in case one of those callbacks
815 * throws a further error that brings us back here, pop the callback
816 * before invoking it, to avoid infinite error recursion. Don't allow
817 * interrupts while running the individual callbacks in non-error code
818 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
819 * a statement timeout or similar.
820 */
822 while (!slist_is_empty(&seg->on_detach))
823 {
824 slist_node *node;
827 Datum arg;
828
829 node = slist_pop_head_node(&seg->on_detach);
831 function = cb->function;
832 arg = cb->arg;
833 pfree(cb);
834
835 function(seg, arg);
836 }
838
839 /*
840 * Try to remove the mapping, if one exists. Normally, there will be, but
841 * maybe not, if we failed partway through a create or attach operation.
842 * We remove the mapping before decrementing the reference count so that
843 * the process that sees a zero reference count can be certain that no
844 * remaining mappings exist. Even if this fails, we pretend that it
845 * works, because retrying is likely to fail in the same way.
846 */
847 if (seg->mapped_address != NULL)
848 {
851 &seg->mapped_address, &seg->mapped_size, WARNING);
852 seg->impl_private = NULL;
853 seg->mapped_address = NULL;
854 seg->mapped_size = 0;
855 }
856
857 /* Reduce reference count, if we previously increased it. */
859 {
860 uint32 refcnt;
861 uint32 control_slot = seg->control_slot;
862
864 Assert(dsm_control->item[control_slot].handle == seg->handle);
865 Assert(dsm_control->item[control_slot].refcnt > 1);
866 refcnt = --dsm_control->item[control_slot].refcnt;
869
870 /* If new reference count is 1, try to destroy the segment. */
871 if (refcnt == 1)
872 {
873 /* A pinned segment should never reach 1. */
874 Assert(!dsm_control->item[control_slot].pinned);
875
876 /*
877 * If we fail to destroy the segment here, or are killed before we
878 * finish doing so, the reference count will remain at 1, which
879 * will mean that nobody else can attach to the segment. At
880 * postmaster shutdown time, or when a new postmaster is started
881 * after a hard kill, another attempt will be made to remove the
882 * segment.
883 *
884 * The main case we're worried about here is being killed by a
885 * signal before we can finish removing the segment. In that
886 * case, it's important to be sure that the segment still gets
887 * removed. If we actually fail to remove the segment for some
888 * other reason, the postmaster may not have any better luck than
889 * we did. There's not much we can do about that, though.
890 */
893 &seg->mapped_address, &seg->mapped_size, WARNING))
894 {
898 dsm_control->item[control_slot].first_page,
899 dsm_control->item[control_slot].npages);
900 Assert(dsm_control->item[control_slot].handle == seg->handle);
901 Assert(dsm_control->item[control_slot].refcnt == 1);
902 dsm_control->item[control_slot].refcnt = 0;
904 }
905 }
906 }
907
908 /* Clean up our remaining backend-private data structures. */
909 if (seg->resowner != NULL)
911 dlist_delete(&seg->node);
912 pfree(seg);
913}
914
915/*
916 * Keep a dynamic shared memory mapping until end of session.
917 *
918 * By default, mappings are owned by the current resource owner, which
919 * typically means they stick around for the duration of the current query
920 * only.
921 */
922void
924{
925 if (seg->resowner != NULL)
926 {
928 seg->resowner = NULL;
929 }
930}
931
932/*
933 * Arrange to remove a dynamic shared memory mapping at cleanup time.
934 *
935 * dsm_pin_mapping() can be used to preserve a mapping for the entire
936 * lifetime of a process; this function reverses that decision, making
937 * the segment owned by the current resource owner. This may be useful
938 * just before performing some operation that will invalidate the segment
939 * for future use by this backend.
940 */
941void
949
950/*
951 * Keep a dynamic shared memory segment until postmaster shutdown, or until
952 * dsm_unpin_segment is called.
953 *
954 * This function should not be called more than once per segment, unless the
955 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
956 *
957 * Note that this function does not arrange for the current process to
958 * keep the segment mapped indefinitely; if that behavior is desired,
959 * dsm_pin_mapping() should be used from each process that needs to
960 * retain the mapping.
961 */
962void
964{
965 void *handle = NULL;
966
967 /*
968 * Bump reference count for this segment in shared memory. This will
969 * ensure that even if there is no session which is attached to this
970 * segment, it will remain until postmaster shutdown or an explicit call
971 * to unpin.
972 */
975 elog(ERROR, "cannot pin a segment that is already pinned");
977 dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
978 dsm_control->item[seg->control_slot].pinned = true;
982}
983
984/*
985 * Unpin a dynamic shared memory segment that was previously pinned with
986 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
987 * was previously called for this segment.
988 *
989 * The argument is a dsm_handle rather than a dsm_segment in case you want
990 * to unpin a segment to which you haven't attached. This turns out to be
991 * useful if, for example, a reference to one shared memory segment is stored
992 * within another shared memory segment. You might want to unpin the
993 * referenced segment before destroying the referencing segment.
994 */
995void
997{
998 uint32 control_slot = INVALID_CONTROL_SLOT;
999 bool destroy = false;
1000 uint32 i;
1001
1002 /* Find the control slot for the given handle. */
1004 for (i = 0; i < dsm_control->nitems; ++i)
1005 {
1006 /* Skip unused slots and segments that are concurrently going away. */
1007 if (dsm_control->item[i].refcnt <= 1)
1008 continue;
1009
1010 /* If we've found our handle, we can stop searching. */
1011 if (dsm_control->item[i].handle == handle)
1012 {
1013 control_slot = i;
1014 break;
1015 }
1016 }
1017
1018 /*
1019 * We should definitely have found the slot, and it should not already be
1020 * in the process of going away, because this function should only be
1021 * called on a segment which is pinned.
1022 */
1023 if (control_slot == INVALID_CONTROL_SLOT)
1024 elog(ERROR, "cannot unpin unknown segment handle");
1025 if (!dsm_control->item[control_slot].pinned)
1026 elog(ERROR, "cannot unpin a segment that is not pinned");
1027 Assert(dsm_control->item[control_slot].refcnt > 1);
1028
1029 /*
1030 * Allow implementation-specific code to run. We have to do this before
1031 * releasing the lock, because impl_private_pm_handle may get modified by
1032 * dsm_impl_unpin_segment.
1033 */
1034 if (!is_main_region_dsm_handle(handle))
1036 &dsm_control->item[control_slot].impl_private_pm_handle);
1037
1038 /* Note that 1 means no references (0 means unused slot). */
1039 if (--dsm_control->item[control_slot].refcnt == 1)
1040 destroy = true;
1041 dsm_control->item[control_slot].pinned = false;
1042
1043 /* Now we can release the lock. */
1045
1046 /* Clean up resources if that was the last reference. */
1047 if (destroy)
1048 {
1049 void *junk_impl_private = NULL;
1050 void *junk_mapped_address = NULL;
1052
1053 /*
1054 * For an explanation of how error handling works in this case, see
1055 * comments in dsm_detach. Note that if we reach this point, the
1056 * current process certainly does not have the segment mapped, because
1057 * if it did, the reference count would have still been greater than 1
1058 * even after releasing the reference count held by the pin. The fact
1059 * that there can't be a dsm_segment for this handle makes it OK to
1060 * pass the mapped size, mapped address, and private data as NULL
1061 * here.
1062 */
1063 if (is_main_region_dsm_handle(handle) ||
1066 {
1068 if (is_main_region_dsm_handle(handle))
1070 dsm_control->item[control_slot].first_page,
1071 dsm_control->item[control_slot].npages);
1072 Assert(dsm_control->item[control_slot].handle == handle);
1073 Assert(dsm_control->item[control_slot].refcnt == 1);
1074 dsm_control->item[control_slot].refcnt = 0;
1076 }
1077 }
1078}
1079
1080/*
1081 * Find an existing mapping for a shared memory segment, if there is one.
1082 */
1085{
1086 dlist_iter iter;
1087 dsm_segment *seg;
1088
1090 {
1091 seg = dlist_container(dsm_segment, node, iter.cur);
1092 if (seg->handle == handle)
1093 return seg;
1094 }
1095
1096 return NULL;
1097}
1098
1099/*
1100 * Get the address at which a dynamic shared memory segment is mapped.
1101 */
1102void *
1104{
1105 Assert(seg->mapped_address != NULL);
1106 return seg->mapped_address;
1107}
1108
1109/*
1110 * Get the size of a mapping.
1111 */
1112Size
1114{
1115 Assert(seg->mapped_address != NULL);
1116 return seg->mapped_size;
1117}
1118
1119/*
1120 * Get a handle for a mapping.
1121 *
1122 * To establish communication via dynamic shared memory between two backends,
1123 * one of them should first call dsm_create() to establish a new shared
1124 * memory mapping. That process should then call dsm_segment_handle() to
1125 * obtain a handle for the mapping, and pass that handle to the
1126 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1127 * main shared memory segment). The recipient, once in possession of the
1128 * handle, should call dsm_attach().
1129 */
1132{
1133 return seg->handle;
1134}
1135
1136/*
1137 * Register an on-detach callback for a dynamic shared memory segment.
1138 */
1139void
1150
1151/*
1152 * Unregister an on-detach callback for a dynamic shared memory segment.
1153 */
1154void
1156 Datum arg)
1157{
1158 slist_mutable_iter iter;
1159
1160 slist_foreach_modify(iter, &seg->on_detach)
1161 {
1163
1165 if (cb->function == function && cb->arg == arg)
1166 {
1167 slist_delete_current(&iter);
1168 pfree(cb);
1169 break;
1170 }
1171 }
1172}
1173
1174/*
1175 * Discard all registered on-detach callbacks without executing them.
1176 */
1177void
1179{
1180 dlist_iter iter;
1181
1183 {
1184 dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1185
1186 /* Throw away explicit on-detach actions one by one. */
1187 while (!slist_is_empty(&seg->on_detach))
1188 {
1189 slist_node *node;
1191
1192 node = slist_pop_head_node(&seg->on_detach);
1194 pfree(cb);
1195 }
1196
1197 /*
1198 * Decrementing the reference count is a sort of implicit on-detach
1199 * action; make sure we don't do that, either.
1200 */
1202 }
1203}
1204
1205/*
1206 * Create a segment descriptor.
1207 */
1208static dsm_segment *
1210{
1211 dsm_segment *seg;
1212
1215
1218
1219 /* seg->handle must be initialized by the caller */
1221 seg->impl_private = NULL;
1222 seg->mapped_address = NULL;
1223 seg->mapped_size = 0;
1224
1228
1229 slist_init(&seg->on_detach);
1230
1231 return seg;
1232}
1233
1234/*
1235 * Sanity check a control segment.
1236 *
1237 * The goal here isn't to detect everything that could possibly be wrong with
1238 * the control segment; there's not enough information for that. Rather, the
1239 * goal is to make sure that someone can iterate over the items in the segment
1240 * without overrunning the end of the mapping and crashing. We also check
1241 * the magic number since, if that's messed up, this may not even be one of
1242 * our segments at all.
1243 */
1244static bool
1246{
1247 if (mapped_size < offsetof(dsm_control_header, item))
1248 return false; /* Mapped size too short to read header. */
1249 if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1250 return false; /* Magic number doesn't match. */
1251 if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1252 return false; /* Max item count won't fit in map. */
1253 if (control->nitems > control->maxitems)
1254 return false; /* Overfull. */
1255 return true;
1256}
1257
1258/*
1259 * Compute the number of control-segment bytes needed to store a given
1260 * number of items.
1261 */
1262static uint64
1268
1269static inline dsm_handle
1271{
1272 dsm_handle handle;
1273
1274 /*
1275 * We need to create a handle that doesn't collide with any existing extra
1276 * segment created by dsm_impl_op(), so we'll make it odd. It also
1277 * mustn't collide with any other main area pseudo-segment, so we'll
1278 * include the slot number in some of the bits. We also want to make an
1279 * effort to avoid newly created and recently destroyed handles from being
1280 * confused, so we'll make the rest of the bits random.
1281 */
1282 handle = 1;
1283 handle |= slot << 1;
1285 return handle;
1286}
1287
1288static inline bool
1290{
1291 return handle & 1;
1292}
1293
1294/* ResourceOwner callbacks */
1295
1296static void
1298{
1299 dsm_segment *seg = (dsm_segment *) DatumGetPointer(res);
1300
1301 seg->resowner = NULL;
1302 dsm_detach(seg);
1303}
1304static char *
1306{
1307 dsm_segment *seg = (dsm_segment *) DatumGetPointer(res);
1308
1309 return psprintf("dynamic shared memory segment %u",
1310 dsm_segment_handle(seg));
1311}
#define Assert(condition)
Definition c.h:943
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:558
uint64_t uint64
Definition c.h:625
uint32_t uint32
Definition c.h:624
size_t Size
Definition c.h:689
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition dsm.c:1131
static void dsm_backend_startup(void)
Definition dsm.c:433
const ShmemCallbacks dsm_shmem_callbacks
Definition dsm.c:118
static void * dsm_main_space_begin
Definition dsm.c:112
void dsm_detach(dsm_segment *seg)
Definition dsm.c:811
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition dsm.c:1140
static dsm_handle dsm_control_handle
Definition dsm.c:149
void dsm_pin_mapping(dsm_segment *seg)
Definition dsm.c:923
static dlist_head dsm_segment_list
Definition dsm.c:140
static char * ResOwnerPrintDSM(Datum res)
Definition dsm.c:1305
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition dsm.c:368
void dsm_unpin_segment(dsm_handle handle)
Definition dsm.c:996
void dsm_pin_segment(dsm_segment *seg)
Definition dsm.c:963
static void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition dsm.c:175
void dsm_detach_all(void)
Definition dsm.c:783
void * dsm_segment_address(dsm_segment *seg)
Definition dsm.c:1103
static void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition dsm.c:170
static dsm_handle make_main_region_dsm_handle(int slot)
Definition dsm.c:1270
static const ResourceOwnerDesc dsm_resowner_desc
Definition dsm.c:159
static bool dsm_init_done
Definition dsm.c:109
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition dsm.c:248
dsm_segment * dsm_create(Size size, int flags)
Definition dsm.c:524
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition dsm.c:187
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition dsm.c:51
static dsm_control_header * dsm_control
Definition dsm.c:150
static Size dsm_control_mapped_size
Definition dsm.c:151
static size_t dsm_main_space_size
Definition dsm.c:113
static dsm_segment * dsm_create_descriptor(void)
Definition dsm.c:1209
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition dsm.c:1263
dsm_segment * dsm_attach(dsm_handle h)
Definition dsm.c:673
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition dsm.c:54
#define PG_DYNSHMEM_FIXED_SLOTS
Definition dsm.c:53
void dsm_backend_shutdown(void)
Definition dsm.c:765
dsm_segment * dsm_find_mapping(dsm_handle handle)
Definition dsm.c:1084
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition dsm.c:1155
void reset_on_dsm_detach(void)
Definition dsm.c:1178
static void dsm_main_space_init(void *arg)
Definition dsm.c:494
static void dsm_cleanup_for_mmap(void)
Definition dsm.c:330
static void ResOwnerReleaseDSM(Datum res)
Definition dsm.c:1297
Size dsm_segment_map_length(dsm_segment *seg)
Definition dsm.c:1113
void dsm_unpin_mapping(dsm_segment *seg)
Definition dsm.c:942
static void dsm_main_space_request(void *arg)
Definition dsm.c:480
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition dsm.c:1245
#define INVALID_CONTROL_SLOT
Definition dsm.c:56
static void * dsm_control_impl_private
Definition dsm.c:152
static bool is_main_region_dsm_handle(dsm_handle handle)
Definition dsm.c:1289
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition dsm.h:20
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition dsm.h:51
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition dsm_impl.c:964
int min_dynamic_shared_memory
Definition dsm_impl.c:116
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition dsm_impl.c:1015
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition dsm_impl.c:160
int dynamic_shared_memory_type
Definition dsm_impl.c:113
uint32 dsm_handle
Definition dsm_impl.h:55
@ DSM_OP_DETACH
Definition dsm_impl.h:65
@ DSM_OP_CREATE
Definition dsm_impl.h:63
@ DSM_OP_DESTROY
Definition dsm_impl.h:66
@ DSM_OP_ATTACH
Definition dsm_impl.h:64
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition dsm_impl.h:52
#define PG_DYNSHMEM_DIR
Definition dsm_impl.h:51
#define DSM_HANDLE_INVALID
Definition dsm_impl.h:58
#define DSM_IMPL_MMAP
Definition dsm_impl.h:20
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
#define FATAL
Definition elog.h:42
#define WARNING
Definition elog.h:37
#define DEBUG2
Definition elog.h:30
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
int FreeDir(DIR *dir)
Definition fd.c:3009
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
bool FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
Definition freepage.c:210
void FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
Definition freepage.c:379
void FreePageManagerInitialize(FreePageManager *fpm, char *base)
Definition freepage.c:183
#define FPM_PAGE_SIZE
Definition freepage.h:30
bool IsUnderPostmaster
Definition globals.c:122
int MaxBackends
Definition globals.c:149
bool IsPostmasterEnvironment
Definition globals.c:121
static void slist_delete_current(slist_mutable_iter *iter)
Definition ilist.h:1084
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
#define slist_foreach_modify(iter, lhead)
Definition ilist.h:1148
static void slist_init(slist_head *head)
Definition ilist.h:986
static bool slist_is_empty(const slist_head *head)
Definition ilist.h:995
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void slist_push_head(slist_head *head, slist_node *node)
Definition ilist.h:1006
#define slist_container(type, membername, ptr)
Definition ilist.h:1106
static slist_node * slist_pop_head_node(slist_head *head)
Definition ilist.h:1028
#define DLIST_STATIC_INIT(name)
Definition ilist.h:281
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define nitems(x)
Definition indent.h:31
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_EXCLUSIVE
Definition lwlock.h:104
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext TopMemoryContext
Definition mcxt.c:166
#define RESUME_INTERRUPTS()
Definition miscadmin.h:138
#define HOLD_INTERRUPTS()
Definition miscadmin.h:136
static char * errmsg
on_exit_nicely_callback function
static int pg_leftmost_one_pos32(uint32 word)
Definition pg_bitutils.h:41
#define MAXPGPATH
uint32 pg_prng_uint32(pg_prng_state *state)
Definition pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition pg_prng.c:34
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define snprintf
Definition port.h:260
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static int fb(int x)
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void ResourceOwnerForget(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition resowner.c:561
void ResourceOwnerRemember(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition resowner.c:521
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition resowner.c:449
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition resowner.h:54
#define RELEASE_PRIO_DSMS
Definition resowner.h:65
#define ShmemRequestStruct(...)
Definition shmem.h:176
Definition dirent.c:26
const char * name
Definition resowner.h:93
ShmemRequestCallback request_fn
Definition shmem.h:133
dlist_node * cur
Definition ilist.h:179
uint32 maxitems
Definition dsm.c:95
uint32 nitems
Definition dsm.c:94
uint32 magic
Definition dsm.c:93
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition dsm.c:96
size_t npages
Definition dsm.c:85
dsm_handle handle
Definition dsm.c:82
size_t first_page
Definition dsm.c:84
bool pinned
Definition dsm.c:87
void * impl_private_pm_handle
Definition dsm.c:86
uint32 refcnt
Definition dsm.c:83
on_dsm_detach_callback function
Definition dsm.c:61
uint32 control_slot
Definition dsm.c:72
dsm_handle handle
Definition dsm.c:71
Size mapped_size
Definition dsm.c:75
void * impl_private
Definition dsm.c:73
slist_head on_detach
Definition dsm.c:76
dlist_node node
Definition dsm.c:69
ResourceOwner resowner
Definition dsm.c:70
void * mapped_address
Definition dsm.c:74
slist_node * cur
Definition ilist.h:274
const char * name