PostgreSQL Source Code git master
dsm.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dsm.c
4 * manage dynamic shared memory segments
5 *
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
16 *
17 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
19 *
20 *
21 * IDENTIFICATION
22 * src/backend/storage/ipc/dsm.c
23 *
24 *-------------------------------------------------------------------------
25 */
26
27#include "postgres.h"
28
29#include <fcntl.h>
30#include <unistd.h>
31#ifndef WIN32
32#include <sys/mman.h>
33#endif
34#include <sys/stat.h>
35
36#include "common/pg_prng.h"
37#include "lib/ilist.h"
38#include "miscadmin.h"
39#include "port/pg_bitutils.h"
40#include "storage/dsm.h"
41#include "storage/fd.h"
42#include "storage/ipc.h"
43#include "storage/lwlock.h"
44#include "storage/pg_shmem.h"
45#include "storage/shmem.h"
46#include "utils/freepage.h"
47#include "utils/memutils.h"
48#include "utils/resowner.h"
49
50#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
51
52#define PG_DYNSHMEM_FIXED_SLOTS 64
53#define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
54
55#define INVALID_CONTROL_SLOT ((uint32) -1)
56
57/* Backend-local tracking for on-detach callbacks. */
59{
64
65/* Backend-local state for a dynamic shared memory segment. */
67{
68 dlist_node node; /* List link in dsm_segment_list. */
69 ResourceOwner resowner; /* Resource owner. */
70 dsm_handle handle; /* Segment name. */
71 uint32 control_slot; /* Slot in control segment. */
72 void *impl_private; /* Implementation-specific private data. */
73 void *mapped_address; /* Mapping address, or NULL if unmapped. */
74 Size mapped_size; /* Size of our mapping. */
75 slist_head on_detach; /* On-detach callbacks. */
76};
77
78/* Shared-memory state for a dynamic shared memory segment. */
79typedef struct dsm_control_item
80{
82 uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
83 size_t first_page;
84 size_t npages;
85 void *impl_private_pm_handle; /* only needed on Windows */
86 bool pinned;
88
89/* Layout of the dynamic shared memory control segment. */
90typedef struct dsm_control_header
91{
97
98static void dsm_cleanup_for_mmap(void);
99static void dsm_postmaster_shutdown(int code, Datum arg);
102 Size mapped_size);
104static inline dsm_handle make_main_region_dsm_handle(int slot);
105static inline bool is_main_region_dsm_handle(dsm_handle handle);
106
107/* Has this backend initialized the dynamic shared memory system yet? */
108static bool dsm_init_done = false;
109
110/* Preallocated DSM space in the main shared memory region. */
111static void *dsm_main_space_begin = NULL;
112
113/*
114 * List of dynamic shared memory segments used by this backend.
115 *
116 * At process exit time, we must decrement the reference count of each
117 * segment we have attached; this list makes it possible to find all such
118 * segments.
119 *
120 * This list should always be empty in the postmaster. We could probably
121 * allow the postmaster to map dynamic shared memory segments before it
122 * begins to start child processes, provided that each process adjusted
123 * the reference counts for those segments in the control segment at
124 * startup time, but there's no obvious need for such a facility, which
125 * would also be complex to handle in the EXEC_BACKEND case. Once the
126 * postmaster has begun spawning children, there's an additional problem:
127 * each new mapping would require an update to the control segment,
128 * which requires locking, in which the postmaster must not be involved.
129 */
131
132/*
133 * Control segment information.
134 *
135 * Unlike ordinary shared memory segments, the control segment is not
136 * reference counted; instead, it lasts for the postmaster's entire
137 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
138 */
142static void *dsm_control_impl_private = NULL;
143
144
145/* ResourceOwner callbacks to hold DSM segments */
146static void ResOwnerReleaseDSM(Datum res);
147static char *ResOwnerPrintDSM(Datum res);
148
150{
151 .name = "dynamic shared memory segment",
152 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
153 .release_priority = RELEASE_PRIO_DSMS,
154 .ReleaseResource = ResOwnerReleaseDSM,
155 .DebugPrint = ResOwnerPrintDSM
156};
157
158/* Convenience wrappers over ResourceOwnerRemember/Forget */
159static inline void
161{
163}
164static inline void
166{
168}
169
170/*
171 * Start up the dynamic shared memory system.
172 *
173 * This is called just once during each cluster lifetime, at postmaster
174 * startup time.
175 */
176void
178{
179 void *dsm_control_address = NULL;
180 uint32 maxitems;
181 Size segsize;
182
184
185 /*
186 * If we're using the mmap implementations, clean up any leftovers.
187 * Cleanup isn't needed on Windows, and happens earlier in startup for
188 * POSIX and System V shared memory, via a direct call to
189 * dsm_cleanup_using_control_segment.
190 */
193
194 /* Determine size for new control segment. */
195 maxitems = PG_DYNSHMEM_FIXED_SLOTS
197 elog(DEBUG2, "dynamic shared memory system will support %u segments",
198 maxitems);
199 segsize = dsm_control_bytes_needed(maxitems);
200
201 /*
202 * Loop until we find an unused identifier for the new control segment. We
203 * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
204 * control segment", so avoid generating that value for a real handle.
205 */
206 for (;;)
207 {
208 Assert(dsm_control_address == NULL);
210 /* Use even numbers only */
213 continue;
215 &dsm_control_impl_private, &dsm_control_address,
217 break;
218 }
219 dsm_control = dsm_control_address;
221 elog(DEBUG2,
222 "created dynamic shared memory control segment %u (%zu bytes)",
223 dsm_control_handle, segsize);
225
226 /* Initialize control segment. */
228 dsm_control->nitems = 0;
229 dsm_control->maxitems = maxitems;
230}
231
232/*
233 * Determine whether the control segment from the previous postmaster
234 * invocation still exists. If so, remove the dynamic shared memory
235 * segments to which it refers, and then the control segment itself.
236 */
237void
239{
240 void *mapped_address = NULL;
241 void *junk_mapped_address = NULL;
242 void *impl_private = NULL;
243 void *junk_impl_private = NULL;
244 Size mapped_size = 0;
245 Size junk_mapped_size = 0;
247 uint32 i;
248 dsm_control_header *old_control;
249
250 /*
251 * Try to attach the segment. If this fails, it probably just means that
252 * the operating system has been rebooted and the segment no longer
253 * exists, or an unrelated process has used the same shm ID. So just fall
254 * out quietly.
255 */
256 if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
257 &mapped_address, &mapped_size, DEBUG1))
258 return;
259
260 /*
261 * We've managed to reattach it, but the contents might not be sane. If
262 * they aren't, we disregard the segment after all.
263 */
264 old_control = (dsm_control_header *) mapped_address;
265 if (!dsm_control_segment_sane(old_control, mapped_size))
266 {
267 dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
268 &mapped_address, &mapped_size, LOG);
269 return;
270 }
271
272 /*
273 * OK, the control segment looks basically valid, so we can use it to get
274 * a list of segments that need to be removed.
275 */
276 nitems = old_control->nitems;
277 for (i = 0; i < nitems; ++i)
278 {
279 dsm_handle handle;
280 uint32 refcnt;
281
282 /* If the reference count is 0, the slot is actually unused. */
283 refcnt = old_control->item[i].refcnt;
284 if (refcnt == 0)
285 continue;
286
287 /* If it was using the main shmem area, there is nothing to do. */
288 handle = old_control->item[i].handle;
289 if (is_main_region_dsm_handle(handle))
290 continue;
291
292 /* Log debugging information. */
293 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
294 handle, refcnt);
295
296 /* Destroy the referenced segment. */
297 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
298 &junk_mapped_address, &junk_mapped_size, LOG);
299 }
300
301 /* Destroy the old control segment, too. */
302 elog(DEBUG2,
303 "cleaning up dynamic shared memory control segment with ID %u",
304 old_control_handle);
305 dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
306 &mapped_address, &mapped_size, LOG);
307}
308
309/*
310 * When we're using the mmap shared memory implementation, "shared memory"
311 * segments might even manage to survive an operating system reboot.
312 * But there's no guarantee as to exactly what will survive: some segments
313 * may survive, and others may not, and the contents of some may be out
314 * of date. In particular, the control segment may be out of date, so we
315 * can't rely on it to figure out what to remove. However, since we know
316 * what directory contains the files we used as shared memory, we can simply
317 * scan the directory and blow everything away that shouldn't be there.
318 */
319static void
321{
322 DIR *dir;
323 struct dirent *dent;
324
325 /* Scan the directory for something with a name of the correct format. */
327
328 while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
329 {
330 if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
331 strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
332 {
333 char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
334
335 snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
336
337 elog(DEBUG2, "removing file \"%s\"", buf);
338
339 /* We found a matching file; so remove it. */
340 if (unlink(buf) != 0)
343 errmsg("could not remove file \"%s\": %m", buf)));
344 }
345 }
346
347 /* Cleanup complete. */
348 FreeDir(dir);
349}
350
351/*
352 * At shutdown time, we iterate over the control segment and remove all
353 * remaining dynamic shared memory segments. We avoid throwing errors here;
354 * the postmaster is shutting down either way, and this is just non-critical
355 * resource cleanup.
356 */
357static void
359{
361 uint32 i;
362 void *dsm_control_address;
363 void *junk_mapped_address = NULL;
364 void *junk_impl_private = NULL;
365 Size junk_mapped_size = 0;
367
368 /*
369 * If some other backend exited uncleanly, it might have corrupted the
370 * control segment while it was dying. In that case, we warn and ignore
371 * the contents of the control segment. This may end up leaving behind
372 * stray shared memory segments, but there's not much we can do about that
373 * if the metadata is gone.
374 */
377 {
378 ereport(LOG,
379 (errmsg("dynamic shared memory control segment is corrupt")));
380 return;
381 }
382
383 /* Remove any remaining segments. */
384 for (i = 0; i < nitems; ++i)
385 {
386 dsm_handle handle;
387
388 /* If the reference count is 0, the slot is actually unused. */
389 if (dsm_control->item[i].refcnt == 0)
390 continue;
391
392 handle = dsm_control->item[i].handle;
393 if (is_main_region_dsm_handle(handle))
394 continue;
395
396 /* Log debugging information. */
397 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
398 handle);
399
400 /* Destroy the segment. */
401 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
402 &junk_mapped_address, &junk_mapped_size, LOG);
403 }
404
405 /* Remove the control segment itself. */
406 elog(DEBUG2,
407 "cleaning up dynamic shared memory control segment with ID %u",
409 dsm_control_address = dsm_control;
411 &dsm_control_impl_private, &dsm_control_address,
413 dsm_control = dsm_control_address;
414 shim->dsm_control = 0;
415}
416
417/*
418 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
419 * we must reread the state file and map the control segment; in other cases,
420 * we'll have inherited the postmaster's mapping and global variables.
421 */
422static void
424{
425#ifdef EXEC_BACKEND
427 {
428 void *control_address = NULL;
429
430 /* Attach control segment. */
433 &dsm_control_impl_private, &control_address,
435 dsm_control = control_address;
436 /* If control segment doesn't look sane, something is badly wrong. */
438 {
440 &dsm_control_impl_private, &control_address,
443 (errcode(ERRCODE_INTERNAL_ERROR),
444 errmsg("dynamic shared memory control segment is not valid")));
445 }
446 }
447#endif
448
449 dsm_init_done = true;
450}
451
452#ifdef EXEC_BACKEND
453/*
454 * When running under EXEC_BACKEND, we get a callback here when the main
455 * shared memory segment is re-attached, so that we can record the control
456 * handle retrieved from it.
457 */
458void
459dsm_set_control_handle(dsm_handle h)
460{
461 Assert(dsm_control_handle == 0 && h != 0);
463}
464#endif
465
466/*
467 * Reserve some space in the main shared memory segment for DSM segments.
468 */
469size_t
471{
472 return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
473}
474
475/*
476 * Initialize space in the main shared memory segment for DSM segments.
477 */
478void
480{
481 size_t size = dsm_estimate_size();
482 bool found;
483
484 if (size == 0)
485 return;
486
487 dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
488 if (!found)
489 {
491 size_t first_page = 0;
492 size_t pages;
493
494 /* Reserve space for the FreePageManager. */
495 while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
496 ++first_page;
497
498 /* Initialize it and give it all the rest of the space. */
500 pages = (size / FPM_PAGE_SIZE) - first_page;
501 FreePageManagerPut(fpm, first_page, pages);
502 }
503}
504
505/*
506 * Create a new dynamic shared memory segment.
507 *
508 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
509 * with it and must be detached before the resource owner releases, or a
510 * warning will be logged. If CurrentResourceOwner is NULL, the segment
511 * remains attached until explicitly detached or the session ends.
512 * Creating with a NULL CurrentResourceOwner is equivalent to creating
513 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
514 */
517{
518 dsm_segment *seg;
519 uint32 i;
521 size_t npages = 0;
522 size_t first_page = 0;
523 FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
524 bool using_main_dsm_region = false;
525
526 /*
527 * Unsafe in postmaster. It might seem pointless to allow use of dsm in
528 * single user mode, but otherwise some subsystems will need dedicated
529 * single user mode code paths.
530 */
532
533 if (!dsm_init_done)
535
536 /* Create a new segment descriptor. */
537 seg = dsm_create_descriptor();
538
539 /*
540 * Lock the control segment while we try to allocate from the main shared
541 * memory area, if configured.
542 */
543 if (dsm_main_space_fpm)
544 {
545 npages = size / FPM_PAGE_SIZE;
546 if (size % FPM_PAGE_SIZE > 0)
547 ++npages;
548
549 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
550 if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
551 {
552 /* We can carve out a piece of the main shared memory segment. */
553 seg->mapped_address = (char *) dsm_main_space_begin +
554 first_page * FPM_PAGE_SIZE;
555 seg->mapped_size = npages * FPM_PAGE_SIZE;
556 using_main_dsm_region = true;
557 /* We'll choose a handle below. */
558 }
559 }
560
561 if (!using_main_dsm_region)
562 {
563 /*
564 * We need to create a new memory segment. Loop until we find an
565 * unused segment identifier.
566 */
567 if (dsm_main_space_fpm)
568 LWLockRelease(DynamicSharedMemoryControlLock);
569 for (;;)
570 {
571 Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
572 /* Use even numbers only */
574 if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
575 continue;
577 &seg->mapped_address, &seg->mapped_size, ERROR))
578 break;
579 }
580 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
581 }
582
583 /* Search the control segment for an unused slot. */
585 for (i = 0; i < nitems; ++i)
586 {
587 if (dsm_control->item[i].refcnt == 0)
588 {
589 if (using_main_dsm_region)
590 {
592 dsm_control->item[i].first_page = first_page;
593 dsm_control->item[i].npages = npages;
594 }
595 else
597 dsm_control->item[i].handle = seg->handle;
598 /* refcnt of 1 triggers destruction, so start at 2 */
599 dsm_control->item[i].refcnt = 2;
601 dsm_control->item[i].pinned = false;
602 seg->control_slot = i;
603 LWLockRelease(DynamicSharedMemoryControlLock);
604 return seg;
605 }
606 }
607
608 /* Verify that we can support an additional mapping. */
610 {
611 if (using_main_dsm_region)
612 FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
613 LWLockRelease(DynamicSharedMemoryControlLock);
614 if (!using_main_dsm_region)
616 &seg->mapped_address, &seg->mapped_size, WARNING);
617 if (seg->resowner != NULL)
619 dlist_delete(&seg->node);
620 pfree(seg);
621
622 if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
623 return NULL;
625 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
626 errmsg("too many dynamic shared memory segments")));
627 }
628
629 /* Enter the handle into a new array slot. */
630 if (using_main_dsm_region)
631 {
633 dsm_control->item[i].first_page = first_page;
634 dsm_control->item[i].npages = npages;
635 }
637 /* refcnt of 1 triggers destruction, so start at 2 */
640 dsm_control->item[nitems].pinned = false;
641 seg->control_slot = nitems;
643 LWLockRelease(DynamicSharedMemoryControlLock);
644
645 return seg;
646}
647
648/*
649 * Attach a dynamic shared memory segment.
650 *
651 * See comments for dsm_segment_handle() for an explanation of how this
652 * is intended to be used.
653 *
654 * This function will return NULL if the segment isn't known to the system.
655 * This can happen if we're asked to attach the segment, but then everyone
656 * else detaches it (causing it to be destroyed) before we get around to
657 * attaching it.
658 *
659 * If there is a non-NULL CurrentResourceOwner, the attached segment is
660 * associated with it and must be detached before the resource owner releases,
661 * or a warning will be logged. Otherwise the segment remains attached until
662 * explicitly detached or the session ends. See the note atop dsm_create().
663 */
666{
667 dsm_segment *seg;
668 dlist_iter iter;
669 uint32 i;
671
672 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
674
675 if (!dsm_init_done)
677
678 /*
679 * Since this is just a debugging cross-check, we could leave it out
680 * altogether, or include it only in assert-enabled builds. But since the
681 * list of attached segments should normally be very short, let's include
682 * it always for right now.
683 *
684 * If you're hitting this error, you probably want to attempt to find an
685 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
686 * create a new one.
687 */
689 {
690 seg = dlist_container(dsm_segment, node, iter.cur);
691 if (seg->handle == h)
692 elog(ERROR, "can't attach the same segment more than once");
693 }
694
695 /* Create a new segment descriptor. */
696 seg = dsm_create_descriptor();
697 seg->handle = h;
698
699 /* Bump reference count for this segment in shared memory. */
700 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
702 for (i = 0; i < nitems; ++i)
703 {
704 /*
705 * If the reference count is 0, the slot is actually unused. If the
706 * reference count is 1, the slot is still in use, but the segment is
707 * in the process of going away; even if the handle matches, another
708 * slot may already have started using the same handle value by
709 * coincidence so we have to keep searching.
710 */
711 if (dsm_control->item[i].refcnt <= 1)
712 continue;
713
714 /* If the handle doesn't match, it's not the slot we want. */
715 if (dsm_control->item[i].handle != seg->handle)
716 continue;
717
718 /* Otherwise we've found a match. */
720 seg->control_slot = i;
722 {
723 seg->mapped_address = (char *) dsm_main_space_begin +
726 }
727 break;
728 }
729 LWLockRelease(DynamicSharedMemoryControlLock);
730
731 /*
732 * If we didn't find the handle we're looking for in the control segment,
733 * it probably means that everyone else who had it mapped, including the
734 * original creator, died before we got to this point. It's up to the
735 * caller to decide what to do about that.
736 */
738 {
739 dsm_detach(seg);
740 return NULL;
741 }
742
743 /* Here's where we actually try to map the segment. */
746 &seg->mapped_address, &seg->mapped_size, ERROR);
747
748 return seg;
749}
750
751/*
752 * At backend shutdown time, detach any segments that are still attached.
753 * (This is similar to dsm_detach_all, except that there's no reason to
754 * unmap the control segment before exiting, so we don't bother.)
755 */
756void
758{
760 {
761 dsm_segment *seg;
762
764 dsm_detach(seg);
765 }
766}
767
768/*
769 * Detach all shared memory segments, including the control segments. This
770 * should be called, along with PGSharedMemoryDetach, in processes that
771 * might inherit mappings but are not intended to be connected to dynamic
772 * shared memory.
773 */
774void
776{
777 void *control_address = dsm_control;
778
780 {
781 dsm_segment *seg;
782
784 dsm_detach(seg);
785 }
786
787 if (control_address != NULL)
789 &dsm_control_impl_private, &control_address,
791}
792
793/*
794 * Detach from a shared memory segment, destroying the segment if we
795 * remove the last reference.
796 *
797 * This function should never fail. It will often be invoked when aborting
798 * a transaction, and a further error won't serve any purpose. It's not a
799 * complete disaster if we fail to unmap or destroy the segment; it means a
800 * resource leak, but that doesn't necessarily preclude further operations.
801 */
802void
804{
805 /*
806 * Invoke registered callbacks. Just in case one of those callbacks
807 * throws a further error that brings us back here, pop the callback
808 * before invoking it, to avoid infinite error recursion. Don't allow
809 * interrupts while running the individual callbacks in non-error code
810 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
811 * a statement timeout or similar.
812 */
814 while (!slist_is_empty(&seg->on_detach))
815 {
816 slist_node *node;
819 Datum arg;
820
821 node = slist_pop_head_node(&seg->on_detach);
823 function = cb->function;
824 arg = cb->arg;
825 pfree(cb);
826
827 function(seg, arg);
828 }
830
831 /*
832 * Try to remove the mapping, if one exists. Normally, there will be, but
833 * maybe not, if we failed partway through a create or attach operation.
834 * We remove the mapping before decrementing the reference count so that
835 * the process that sees a zero reference count can be certain that no
836 * remaining mappings exist. Even if this fails, we pretend that it
837 * works, because retrying is likely to fail in the same way.
838 */
839 if (seg->mapped_address != NULL)
840 {
843 &seg->mapped_address, &seg->mapped_size, WARNING);
844 seg->impl_private = NULL;
845 seg->mapped_address = NULL;
846 seg->mapped_size = 0;
847 }
848
849 /* Reduce reference count, if we previously increased it. */
851 {
852 uint32 refcnt;
853 uint32 control_slot = seg->control_slot;
854
855 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
856 Assert(dsm_control->item[control_slot].handle == seg->handle);
857 Assert(dsm_control->item[control_slot].refcnt > 1);
858 refcnt = --dsm_control->item[control_slot].refcnt;
860 LWLockRelease(DynamicSharedMemoryControlLock);
861
862 /* If new reference count is 1, try to destroy the segment. */
863 if (refcnt == 1)
864 {
865 /* A pinned segment should never reach 1. */
866 Assert(!dsm_control->item[control_slot].pinned);
867
868 /*
869 * If we fail to destroy the segment here, or are killed before we
870 * finish doing so, the reference count will remain at 1, which
871 * will mean that nobody else can attach to the segment. At
872 * postmaster shutdown time, or when a new postmaster is started
873 * after a hard kill, another attempt will be made to remove the
874 * segment.
875 *
876 * The main case we're worried about here is being killed by a
877 * signal before we can finish removing the segment. In that
878 * case, it's important to be sure that the segment still gets
879 * removed. If we actually fail to remove the segment for some
880 * other reason, the postmaster may not have any better luck than
881 * we did. There's not much we can do about that, though.
882 */
885 &seg->mapped_address, &seg->mapped_size, WARNING))
886 {
887 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
890 dsm_control->item[control_slot].first_page,
891 dsm_control->item[control_slot].npages);
892 Assert(dsm_control->item[control_slot].handle == seg->handle);
893 Assert(dsm_control->item[control_slot].refcnt == 1);
894 dsm_control->item[control_slot].refcnt = 0;
895 LWLockRelease(DynamicSharedMemoryControlLock);
896 }
897 }
898 }
899
900 /* Clean up our remaining backend-private data structures. */
901 if (seg->resowner != NULL)
903 dlist_delete(&seg->node);
904 pfree(seg);
905}
906
907/*
908 * Keep a dynamic shared memory mapping until end of session.
909 *
910 * By default, mappings are owned by the current resource owner, which
911 * typically means they stick around for the duration of the current query
912 * only.
913 */
914void
916{
917 if (seg->resowner != NULL)
918 {
920 seg->resowner = NULL;
921 }
922}
923
924/*
925 * Arrange to remove a dynamic shared memory mapping at cleanup time.
926 *
927 * dsm_pin_mapping() can be used to preserve a mapping for the entire
928 * lifetime of a process; this function reverses that decision, making
929 * the segment owned by the current resource owner. This may be useful
930 * just before performing some operation that will invalidate the segment
931 * for future use by this backend.
932 */
933void
935{
936 Assert(seg->resowner == NULL);
940}
941
942/*
943 * Keep a dynamic shared memory segment until postmaster shutdown, or until
944 * dsm_unpin_segment is called.
945 *
946 * This function should not be called more than once per segment, unless the
947 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
948 *
949 * Note that this function does not arrange for the current process to
950 * keep the segment mapped indefinitely; if that behavior is desired,
951 * dsm_pin_mapping() should be used from each process that needs to
952 * retain the mapping.
953 */
954void
956{
957 void *handle = NULL;
958
959 /*
960 * Bump reference count for this segment in shared memory. This will
961 * ensure that even if there is no session which is attached to this
962 * segment, it will remain until postmaster shutdown or an explicit call
963 * to unpin.
964 */
965 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
967 elog(ERROR, "cannot pin a segment that is already pinned");
969 dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
970 dsm_control->item[seg->control_slot].pinned = true;
973 LWLockRelease(DynamicSharedMemoryControlLock);
974}
975
976/*
977 * Unpin a dynamic shared memory segment that was previously pinned with
978 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
979 * was previously called for this segment.
980 *
981 * The argument is a dsm_handle rather than a dsm_segment in case you want
982 * to unpin a segment to which you haven't attached. This turns out to be
983 * useful if, for example, a reference to one shared memory segment is stored
984 * within another shared memory segment. You might want to unpin the
985 * referenced segment before destroying the referencing segment.
986 */
987void
989{
990 uint32 control_slot = INVALID_CONTROL_SLOT;
991 bool destroy = false;
992 uint32 i;
993
994 /* Find the control slot for the given handle. */
995 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
996 for (i = 0; i < dsm_control->nitems; ++i)
997 {
998 /* Skip unused slots and segments that are concurrently going away. */
999 if (dsm_control->item[i].refcnt <= 1)
1000 continue;
1001
1002 /* If we've found our handle, we can stop searching. */
1003 if (dsm_control->item[i].handle == handle)
1004 {
1005 control_slot = i;
1006 break;
1007 }
1008 }
1009
1010 /*
1011 * We should definitely have found the slot, and it should not already be
1012 * in the process of going away, because this function should only be
1013 * called on a segment which is pinned.
1014 */
1015 if (control_slot == INVALID_CONTROL_SLOT)
1016 elog(ERROR, "cannot unpin unknown segment handle");
1017 if (!dsm_control->item[control_slot].pinned)
1018 elog(ERROR, "cannot unpin a segment that is not pinned");
1019 Assert(dsm_control->item[control_slot].refcnt > 1);
1020
1021 /*
1022 * Allow implementation-specific code to run. We have to do this before
1023 * releasing the lock, because impl_private_pm_handle may get modified by
1024 * dsm_impl_unpin_segment.
1025 */
1026 if (!is_main_region_dsm_handle(handle))
1028 &dsm_control->item[control_slot].impl_private_pm_handle);
1029
1030 /* Note that 1 means no references (0 means unused slot). */
1031 if (--dsm_control->item[control_slot].refcnt == 1)
1032 destroy = true;
1033 dsm_control->item[control_slot].pinned = false;
1034
1035 /* Now we can release the lock. */
1036 LWLockRelease(DynamicSharedMemoryControlLock);
1037
1038 /* Clean up resources if that was the last reference. */
1039 if (destroy)
1040 {
1041 void *junk_impl_private = NULL;
1042 void *junk_mapped_address = NULL;
1043 Size junk_mapped_size = 0;
1044
1045 /*
1046 * For an explanation of how error handling works in this case, see
1047 * comments in dsm_detach. Note that if we reach this point, the
1048 * current process certainly does not have the segment mapped, because
1049 * if it did, the reference count would have still been greater than 1
1050 * even after releasing the reference count held by the pin. The fact
1051 * that there can't be a dsm_segment for this handle makes it OK to
1052 * pass the mapped size, mapped address, and private data as NULL
1053 * here.
1054 */
1055 if (is_main_region_dsm_handle(handle) ||
1056 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1057 &junk_mapped_address, &junk_mapped_size, WARNING))
1058 {
1059 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
1060 if (is_main_region_dsm_handle(handle))
1062 dsm_control->item[control_slot].first_page,
1063 dsm_control->item[control_slot].npages);
1064 Assert(dsm_control->item[control_slot].handle == handle);
1065 Assert(dsm_control->item[control_slot].refcnt == 1);
1066 dsm_control->item[control_slot].refcnt = 0;
1067 LWLockRelease(DynamicSharedMemoryControlLock);
1068 }
1069 }
1070}
1071
1072/*
1073 * Find an existing mapping for a shared memory segment, if there is one.
1074 */
1077{
1078 dlist_iter iter;
1079 dsm_segment *seg;
1080
1082 {
1083 seg = dlist_container(dsm_segment, node, iter.cur);
1084 if (seg->handle == handle)
1085 return seg;
1086 }
1087
1088 return NULL;
1089}
1090
1091/*
1092 * Get the address at which a dynamic shared memory segment is mapped.
1093 */
1094void *
1096{
1097 Assert(seg->mapped_address != NULL);
1098 return seg->mapped_address;
1099}
1100
1101/*
1102 * Get the size of a mapping.
1103 */
1104Size
1106{
1107 Assert(seg->mapped_address != NULL);
1108 return seg->mapped_size;
1109}
1110
1111/*
1112 * Get a handle for a mapping.
1113 *
1114 * To establish communication via dynamic shared memory between two backends,
1115 * one of them should first call dsm_create() to establish a new shared
1116 * memory mapping. That process should then call dsm_segment_handle() to
1117 * obtain a handle for the mapping, and pass that handle to the
1118 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1119 * main shared memory segment). The recipient, once in possession of the
1120 * handle, should call dsm_attach().
1121 */
1124{
1125 return seg->handle;
1126}
1127
1128/*
1129 * Register an on-detach callback for a dynamic shared memory segment.
1130 */
1131void
1133{
1135
1138 cb->function = function;
1139 cb->arg = arg;
1140 slist_push_head(&seg->on_detach, &cb->node);
1141}
1142
1143/*
1144 * Unregister an on-detach callback for a dynamic shared memory segment.
1145 */
1146void
1148 Datum arg)
1149{
1150 slist_mutable_iter iter;
1151
1152 slist_foreach_modify(iter, &seg->on_detach)
1153 {
1155
1157 if (cb->function == function && cb->arg == arg)
1158 {
1159 slist_delete_current(&iter);
1160 pfree(cb);
1161 break;
1162 }
1163 }
1164}
1165
1166/*
1167 * Discard all registered on-detach callbacks without executing them.
1168 */
1169void
1171{
1172 dlist_iter iter;
1173
1175 {
1176 dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1177
1178 /* Throw away explicit on-detach actions one by one. */
1179 while (!slist_is_empty(&seg->on_detach))
1180 {
1181 slist_node *node;
1183
1184 node = slist_pop_head_node(&seg->on_detach);
1186 pfree(cb);
1187 }
1188
1189 /*
1190 * Decrementing the reference count is a sort of implicit on-detach
1191 * action; make sure we don't do that, either.
1192 */
1194 }
1195}
1196
1197/*
1198 * Create a segment descriptor.
1199 */
1200static dsm_segment *
1202{
1203 dsm_segment *seg;
1204
1207
1210
1211 /* seg->handle must be initialized by the caller */
1213 seg->impl_private = NULL;
1214 seg->mapped_address = NULL;
1215 seg->mapped_size = 0;
1216
1220
1221 slist_init(&seg->on_detach);
1222
1223 return seg;
1224}
1225
1226/*
1227 * Sanity check a control segment.
1228 *
1229 * The goal here isn't to detect everything that could possibly be wrong with
1230 * the control segment; there's not enough information for that. Rather, the
1231 * goal is to make sure that someone can iterate over the items in the segment
1232 * without overrunning the end of the mapping and crashing. We also check
1233 * the magic number since, if that's messed up, this may not even be one of
1234 * our segments at all.
1235 */
1236static bool
1238{
1239 if (mapped_size < offsetof(dsm_control_header, item))
1240 return false; /* Mapped size too short to read header. */
1241 if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1242 return false; /* Magic number doesn't match. */
1243 if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1244 return false; /* Max item count won't fit in map. */
1245 if (control->nitems > control->maxitems)
1246 return false; /* Overfull. */
1247 return true;
1248}
1249
1250/*
1251 * Compute the number of control-segment bytes needed to store a given
1252 * number of items.
1253 */
1254static uint64
1256{
1257 return offsetof(dsm_control_header, item)
1258 + sizeof(dsm_control_item) * (uint64) nitems;
1259}
1260
1261static inline dsm_handle
1263{
1264 dsm_handle handle;
1265
1266 /*
1267 * We need to create a handle that doesn't collide with any existing extra
1268 * segment created by dsm_impl_op(), so we'll make it odd. It also
1269 * mustn't collide with any other main area pseudo-segment, so we'll
1270 * include the slot number in some of the bits. We also want to make an
1271 * effort to avoid newly created and recently destroyed handles from being
1272 * confused, so we'll make the rest of the bits random.
1273 */
1274 handle = 1;
1275 handle |= slot << 1;
1277 return handle;
1278}
1279
1280static inline bool
1282{
1283 return handle & 1;
1284}
1285
1286/* ResourceOwner callbacks */
1287
1288static void
1290{
1292
1293 seg->resowner = NULL;
1294 dsm_detach(seg);
1295}
1296static char *
1298{
1300
1301 return psprintf("dynamic shared memory segment %u",
1302 dsm_segment_handle(seg));
1303}
#define Assert(condition)
Definition: c.h:815
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:420
uint64_t uint64
Definition: c.h:489
uint32_t uint32
Definition: c.h:488
size_t Size
Definition: c.h:562
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition: dsm.c:1123
size_t dsm_estimate_size(void)
Definition: dsm.c:470
static void dsm_backend_startup(void)
Definition: dsm.c:423
static void * dsm_main_space_begin
Definition: dsm.c:111
void dsm_detach(dsm_segment *seg)
Definition: dsm.c:803
void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1132
static dsm_handle dsm_control_handle
Definition: dsm.c:139
void dsm_pin_mapping(dsm_segment *seg)
Definition: dsm.c:915
static dlist_head dsm_segment_list
Definition: dsm.c:130
static char * ResOwnerPrintDSM(Datum res)
Definition: dsm.c:1297
static void dsm_postmaster_shutdown(int code, Datum arg)
Definition: dsm.c:358
void dsm_unpin_segment(dsm_handle handle)
Definition: dsm.c:988
void dsm_pin_segment(dsm_segment *seg)
Definition: dsm.c:955
static void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
Definition: dsm.c:165
void dsm_detach_all(void)
Definition: dsm.c:775
void * dsm_segment_address(dsm_segment *seg)
Definition: dsm.c:1095
static void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
Definition: dsm.c:160
static dsm_handle make_main_region_dsm_handle(int slot)
Definition: dsm.c:1262
static const ResourceOwnerDesc dsm_resowner_desc
Definition: dsm.c:149
static bool dsm_init_done
Definition: dsm.c:108
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:238
dsm_segment * dsm_create(Size size, int flags)
Definition: dsm.c:516
void dsm_postmaster_startup(PGShmemHeader *shim)
Definition: dsm.c:177
#define PG_DYNSHMEM_CONTROL_MAGIC
Definition: dsm.c:50
static dsm_control_header * dsm_control
Definition: dsm.c:140
static Size dsm_control_mapped_size
Definition: dsm.c:141
static dsm_segment * dsm_create_descriptor(void)
Definition: dsm.c:1201
static uint64 dsm_control_bytes_needed(uint32 nitems)
Definition: dsm.c:1255
dsm_segment * dsm_attach(dsm_handle h)
Definition: dsm.c:665
void dsm_shmem_init(void)
Definition: dsm.c:479
#define PG_DYNSHMEM_SLOTS_PER_BACKEND
Definition: dsm.c:53
struct dsm_control_item dsm_control_item
#define PG_DYNSHMEM_FIXED_SLOTS
Definition: dsm.c:52
void dsm_backend_shutdown(void)
Definition: dsm.c:757
dsm_segment * dsm_find_mapping(dsm_handle handle)
Definition: dsm.c:1076
struct dsm_segment_detach_callback dsm_segment_detach_callback
void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
Definition: dsm.c:1147
void reset_on_dsm_detach(void)
Definition: dsm.c:1170
static void dsm_cleanup_for_mmap(void)
Definition: dsm.c:320
static void ResOwnerReleaseDSM(Datum res)
Definition: dsm.c:1289
Size dsm_segment_map_length(dsm_segment *seg)
Definition: dsm.c:1105
void dsm_unpin_mapping(dsm_segment *seg)
Definition: dsm.c:934
struct dsm_control_header dsm_control_header
static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
Definition: dsm.c:1237
#define INVALID_CONTROL_SLOT
Definition: dsm.c:55
static void * dsm_control_impl_private
Definition: dsm.c:142
static bool is_main_region_dsm_handle(dsm_handle handle)
Definition: dsm.c:1281
#define DSM_CREATE_NULL_IF_MAXSEGMENTS
Definition: dsm.h:20
void(* on_dsm_detach_callback)(dsm_segment *, Datum arg)
Definition: dsm.h:54
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:963
int min_dynamic_shared_memory
Definition: dsm_impl.c:115
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1014
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:159
int dynamic_shared_memory_type
Definition: dsm_impl.c:112
uint32 dsm_handle
Definition: dsm_impl.h:55
@ DSM_OP_DETACH
Definition: dsm_impl.h:65
@ DSM_OP_CREATE
Definition: dsm_impl.h:63
@ DSM_OP_DESTROY
Definition: dsm_impl.h:66
@ DSM_OP_ATTACH
Definition: dsm_impl.h:64
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
#define DSM_HANDLE_INVALID
Definition: dsm_impl.h:58
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
int errcode_for_file_access(void)
Definition: elog.c:876
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
int FreeDir(DIR *dir)
Definition: fd.c:2983
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2865
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2931
bool FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
Definition: freepage.c:210
void FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
Definition: freepage.c:379
void FreePageManagerInitialize(FreePageManager *fpm, char *base)
Definition: freepage.c:183
#define FPM_PAGE_SIZE
Definition: freepage.h:30
bool IsUnderPostmaster
Definition: globals.c:119
int MaxBackends
Definition: globals.c:145
bool IsPostmasterEnvironment
Definition: globals.c:118
static void slist_delete_current(slist_mutable_iter *iter)
Definition: ilist.h:1084
#define dlist_foreach(iter, lhead)
Definition: ilist.h:623
#define dlist_head_element(type, membername, lhead)
Definition: ilist.h:603
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405
#define slist_foreach_modify(iter, lhead)
Definition: ilist.h:1148
static void slist_init(slist_head *head)
Definition: ilist.h:986
static bool slist_is_empty(const slist_head *head)
Definition: ilist.h:995
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition: ilist.h:347
static bool dlist_is_empty(const dlist_head *head)
Definition: ilist.h:336
static void slist_push_head(slist_head *head, slist_node *node)
Definition: ilist.h:1006
#define slist_container(type, membername, ptr)
Definition: ilist.h:1106
static slist_node * slist_pop_head_node(slist_head *head)
Definition: ilist.h:1028
#define DLIST_STATIC_INIT(name)
Definition: ilist.h:281
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
#define nitems(x)
Definition: indent.h:31
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:72
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext TopMemoryContext
Definition: mcxt.c:149
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:135
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:133
on_exit_nicely_callback function
void * arg
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define MAXPGPATH
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static char * buf
Definition: pg_test_fsync.c:72
#define snprintf
Definition: port.h:238
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
void ResourceOwnerForget(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition: resowner.c:554
void ResourceOwnerRemember(ResourceOwner owner, Datum value, const ResourceOwnerDesc *kind)
Definition: resowner.c:514
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:442
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition: resowner.h:54
#define RELEASE_PRIO_DSMS
Definition: resowner.h:65
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
static pg_noinline void Size size
Definition: slab.c:607
Definition: dirent.c:26
dsm_handle dsm_control
Definition: pg_shmem.h:36
const char * name
Definition: resowner.h:93
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
dlist_node * cur
Definition: ilist.h:179
uint32 maxitems
Definition: dsm.c:94
uint32 nitems
Definition: dsm.c:93
uint32 magic
Definition: dsm.c:92
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]
Definition: dsm.c:95
size_t npages
Definition: dsm.c:84
dsm_handle handle
Definition: dsm.c:81
size_t first_page
Definition: dsm.c:83
bool pinned
Definition: dsm.c:86
void * impl_private_pm_handle
Definition: dsm.c:85
uint32 refcnt
Definition: dsm.c:82
on_dsm_detach_callback function
Definition: dsm.c:60
uint32 control_slot
Definition: dsm.c:71
dsm_handle handle
Definition: dsm.c:70
Size mapped_size
Definition: dsm.c:74
void * impl_private
Definition: dsm.c:72
slist_head on_detach
Definition: dsm.c:75
dlist_node node
Definition: dsm.c:68
ResourceOwner resowner
Definition: dsm.c:69
void * mapped_address
Definition: dsm.c:73
slist_node * cur
Definition: ilist.h:274