PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
dsm_impl.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dsm_impl.c
4  * manage dynamic shared memory segments
5  *
6  * This file provides low-level APIs for creating and destroying shared
7  * memory segments using several different possible techniques. We refer
8  * to these segments as dynamic because they can be created, altered, and
9  * destroyed at any point during the server life cycle. This is unlike
10  * the main shared memory segment, of which there is always exactly one
11  * and which is always mapped at a fixed address in every PostgreSQL
12  * background process.
13  *
14  * Because not all systems provide the same primitives in this area, nor
15  * do all primitives behave the same way on all systems, we provide
16  * several implementations of this facility. Many systems implement
17  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
18  * in this area, with the exception that shared memory identifiers live
19  * in a flat system-wide namespace, raising the uncomfortable prospect of
20  * name collisions with other processes (including other copies of
21  * PostgreSQL) running on the same system. Some systems only support
22  * the older System V shared memory interface (shmget etc.) which is
23  * also usable; however, the default allocation limits are often quite
24  * small, and the namespace is even more restricted.
25  *
26  * We also provide an mmap-based shared memory implementation. This may
27  * be useful on systems that provide shared memory via a special-purpose
28  * filesystem; by opting for this implementation, the user can even
29  * control precisely where their shared memory segments are placed. It
30  * can also be used as a fallback for systems where shm_open and shmget
31  * are not available or can't be used for some reason. Of course,
32  * mapping a file residing on an actual spinning disk is a fairly poor
33  * approximation for shared memory because writeback may hurt performance
34  * substantially, but there should be few systems where we must make do
35  * with such poor tools.
36  *
37  * As ever, Windows requires its own implementation.
38  *
39  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
40  * Portions Copyright (c) 1994, Regents of the University of California
41  *
42  *
43  * IDENTIFICATION
44  * src/backend/storage/ipc/dsm_impl.c
45  *
46  *-------------------------------------------------------------------------
47  */
48 
49 #include "postgres.h"
50 
51 #include <fcntl.h>
52 #include <unistd.h>
53 #ifndef WIN32
54 #include <sys/mman.h>
55 #endif
56 #include <sys/stat.h>
57 #ifdef HAVE_SYS_IPC_H
58 #include <sys/ipc.h>
59 #endif
60 #ifdef HAVE_SYS_SHM_H
61 #include <sys/shm.h>
62 #endif
63 #include "pgstat.h"
64 
65 #include "portability/mem.h"
66 #include "storage/dsm_impl.h"
67 #include "storage/fd.h"
68 #include "utils/guc.h"
69 #include "utils/memutils.h"
70 #include "postmaster/postmaster.h"
71 
72 #ifdef USE_DSM_POSIX
73 static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
74  void **impl_private, void **mapped_address,
75  Size *mapped_size, int elevel);
76 #endif
77 #ifdef USE_DSM_SYSV
78 static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
79  void **impl_private, void **mapped_address,
80  Size *mapped_size, int elevel);
81 #endif
82 #ifdef USE_DSM_WINDOWS
83 static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
84  void **impl_private, void **mapped_address,
85  Size *mapped_size, int elevel);
86 #endif
87 #ifdef USE_DSM_MMAP
88 static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
89  void **impl_private, void **mapped_address,
90  Size *mapped_size, int elevel);
91 #endif
92 static int errcode_for_dynamic_shared_memory(void);
93 
95 #ifdef USE_DSM_POSIX
96  {"posix", DSM_IMPL_POSIX, false},
97 #endif
98 #ifdef USE_DSM_SYSV
99  {"sysv", DSM_IMPL_SYSV, false},
100 #endif
101 #ifdef USE_DSM_WINDOWS
102  {"windows", DSM_IMPL_WINDOWS, false},
103 #endif
104 #ifdef USE_DSM_MMAP
105  {"mmap", DSM_IMPL_MMAP, false},
106 #endif
107  {"none", DSM_IMPL_NONE, false},
108  {NULL, 0, false}
109 };
110 
111 /* Implementation selector. */
113 
114 /* Size of buffer to be used for zero-filling. */
115 #define ZBUFFER_SIZE 8192
116 
117 #define SEGMENT_NAME_PREFIX "Global/PostgreSQL"
118 
119 /*------
120  * Perform a low-level shared memory operation in a platform-specific way,
121  * as dictated by the selected implementation. Each implementation is
122  * required to implement the following primitives.
123  *
124  * DSM_OP_CREATE. Create a segment whose size is the request_size and
125  * map it.
126  *
127  * DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
128  * The segment may already be mapped; any existing mapping should be removed
129  * before creating a new one.
130  *
131  * DSM_OP_DETACH. Unmap the segment.
132  *
133  * DSM_OP_RESIZE. Resize the segment to the given request_size and
134  * remap the segment at that new size.
135  *
136  * DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
137  * segment.
138  *
139  * Arguments:
140  * op: The operation to be performed.
141  * handle: The handle of an existing object, or for DSM_OP_CREATE, the
142  * a new handle the caller wants created.
143  * request_size: For DSM_OP_CREATE, the requested size. For DSM_OP_RESIZE,
144  * the new size. Otherwise, 0.
145  * impl_private: Private, implementation-specific data. Will be a pointer
146  * to NULL for the first operation on a shared memory segment within this
147  * backend; thereafter, it will point to the value to which it was set
148  * on the previous call.
149  * mapped_address: Pointer to start of current mapping; pointer to NULL
150  * if none. Updated with new mapping address.
151  * mapped_size: Pointer to size of current mapping; pointer to 0 if none.
152  * Updated with new mapped size.
153  * elevel: Level at which to log errors.
154  *
155  * Return value: true on success, false on failure. When false is returned,
156  * a message should first be logged at the specified elevel, except in the
157  * case where DSM_OP_CREATE experiences a name collision, which should
158  * silently return false.
159  *-----
160  */
161 bool
162 dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
163  void **impl_private, void **mapped_address, Size *mapped_size,
164  int elevel)
165 {
166  Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
167  Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
168  (*mapped_address == NULL && *mapped_size == 0));
169 
171  {
172 #ifdef USE_DSM_POSIX
173  case DSM_IMPL_POSIX:
174  return dsm_impl_posix(op, handle, request_size, impl_private,
175  mapped_address, mapped_size, elevel);
176 #endif
177 #ifdef USE_DSM_SYSV
178  case DSM_IMPL_SYSV:
179  return dsm_impl_sysv(op, handle, request_size, impl_private,
180  mapped_address, mapped_size, elevel);
181 #endif
182 #ifdef USE_DSM_WINDOWS
183  case DSM_IMPL_WINDOWS:
184  return dsm_impl_windows(op, handle, request_size, impl_private,
185  mapped_address, mapped_size, elevel);
186 #endif
187 #ifdef USE_DSM_MMAP
188  case DSM_IMPL_MMAP:
189  return dsm_impl_mmap(op, handle, request_size, impl_private,
190  mapped_address, mapped_size, elevel);
191 #endif
192  default:
193  elog(ERROR, "unexpected dynamic shared memory type: %d",
195  return false;
196  }
197 }
198 
199 /*
200  * Does the current dynamic shared memory implementation support resizing
201  * segments? (The answer here could be platform-dependent in the future,
202  * since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
203  * can't resize segments to anything larger than 256MB that way. For now,
204  * we keep it simple.)
205  */
206 bool
208 {
210  {
211  case DSM_IMPL_NONE:
212  return false;
213  case DSM_IMPL_POSIX:
214  return true;
215  case DSM_IMPL_SYSV:
216  return false;
217  case DSM_IMPL_WINDOWS:
218  return false;
219  case DSM_IMPL_MMAP:
220  return true;
221  default:
222  return false; /* should not happen */
223  }
224 }
225 
226 #ifdef USE_DSM_POSIX
227 /*
228  * Operating system primitives to support POSIX shared memory.
229  *
230  * POSIX shared memory segments are created and attached using shm_open()
231  * and shm_unlink(); other operations, such as sizing or mapping the
232  * segment, are performed as if the shared memory segments were files.
233  *
234  * Indeed, on some platforms, they may be implemented that way. While
235  * POSIX shared memory segments seem intended to exist in a flat namespace,
236  * some operating systems may implement them as files, even going so far
237  * to treat a request for /xyz as a request to create a file by that name
238  * in the root directory. Users of such broken platforms should select
239  * a different shared memory implementation.
240  */
241 static bool
242 dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
243  void **impl_private, void **mapped_address, Size *mapped_size,
244  int elevel)
245 {
246  char name[64];
247  int flags;
248  int fd;
249  char *address;
250 
251  snprintf(name, 64, "/PostgreSQL.%u", handle);
252 
253  /* Handle teardown cases. */
254  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
255  {
256  if (*mapped_address != NULL
257  && munmap(*mapped_address, *mapped_size) != 0)
258  {
259  ereport(elevel,
261  errmsg("could not unmap shared memory segment \"%s\": %m",
262  name)));
263  return false;
264  }
265  *mapped_address = NULL;
266  *mapped_size = 0;
267  if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
268  {
269  ereport(elevel,
271  errmsg("could not remove shared memory segment \"%s\": %m",
272  name)));
273  return false;
274  }
275  return true;
276  }
277 
278  /*
279  * Create new segment or open an existing one for attach or resize.
280  *
281  * Even though we're not going through fd.c, we should be safe against
282  * running out of file descriptors, because of NUM_RESERVED_FDS. We're
283  * only opening one extra descriptor here, and we'll close it before
284  * returning.
285  */
286  flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
287  if ((fd = shm_open(name, flags, 0600)) == -1)
288  {
289  if (errno != EEXIST)
290  ereport(elevel,
292  errmsg("could not open shared memory segment \"%s\": %m",
293  name)));
294  return false;
295  }
296 
297  /*
298  * If we're attaching the segment, determine the current size; if we are
299  * creating or resizing the segment, set the size to the requested value.
300  */
301  if (op == DSM_OP_ATTACH)
302  {
303  struct stat st;
304 
305  if (fstat(fd, &st) != 0)
306  {
307  int save_errno;
308 
309  /* Back out what's already been done. */
310  save_errno = errno;
311  close(fd);
312  errno = save_errno;
313 
314  ereport(elevel,
316  errmsg("could not stat shared memory segment \"%s\": %m",
317  name)));
318  return false;
319  }
320  request_size = st.st_size;
321  }
322  else if (*mapped_size != request_size && ftruncate(fd, request_size))
323  {
324  int save_errno;
325 
326  /* Back out what's already been done. */
327  save_errno = errno;
328  close(fd);
329  if (op == DSM_OP_CREATE)
330  shm_unlink(name);
331  errno = save_errno;
332 
333  ereport(elevel,
335  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
336  name, request_size)));
337  return false;
338  }
339 
340  /*
341  * If we're reattaching or resizing, we must remove any existing mapping,
342  * unless we've already got the right thing mapped.
343  */
344  if (*mapped_address != NULL)
345  {
346  if (*mapped_size == request_size)
347  return true;
348  if (munmap(*mapped_address, *mapped_size) != 0)
349  {
350  int save_errno;
351 
352  /* Back out what's already been done. */
353  save_errno = errno;
354  close(fd);
355  if (op == DSM_OP_CREATE)
356  shm_unlink(name);
357  errno = save_errno;
358 
359  ereport(elevel,
361  errmsg("could not unmap shared memory segment \"%s\": %m",
362  name)));
363  return false;
364  }
365  *mapped_address = NULL;
366  *mapped_size = 0;
367  }
368 
369  /* Map it. */
370  address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
371  MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
372  if (address == MAP_FAILED)
373  {
374  int save_errno;
375 
376  /* Back out what's already been done. */
377  save_errno = errno;
378  close(fd);
379  if (op == DSM_OP_CREATE)
380  shm_unlink(name);
381  errno = save_errno;
382 
383  ereport(elevel,
385  errmsg("could not map shared memory segment \"%s\": %m",
386  name)));
387  return false;
388  }
389  *mapped_address = address;
390  *mapped_size = request_size;
391  close(fd);
392 
393  return true;
394 }
395 #endif
396 
397 #ifdef USE_DSM_SYSV
398 /*
399  * Operating system primitives to support System V shared memory.
400  *
401  * System V shared memory segments are manipulated using shmget(), shmat(),
402  * shmdt(), and shmctl(). There's no portable way to resize such
403  * segments. As the default allocation limits for System V shared memory
404  * are usually quite low, the POSIX facilities may be preferable; but
405  * those are not supported everywhere.
406  */
407 static bool
408 dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
409  void **impl_private, void **mapped_address, Size *mapped_size,
410  int elevel)
411 {
412  key_t key;
413  int ident;
414  char *address;
415  char name[64];
416  int *ident_cache;
417 
418  /* Resize is not supported for System V shared memory. */
419  if (op == DSM_OP_RESIZE)
420  {
421  elog(elevel, "System V shared memory segments cannot be resized");
422  return false;
423  }
424 
425  /* Since resize isn't supported, reattach is a no-op. */
426  if (op == DSM_OP_ATTACH && *mapped_address != NULL)
427  return true;
428 
429  /*
430  * POSIX shared memory and mmap-based shared memory identify segments with
431  * names. To avoid needless error message variation, we use the handle as
432  * the name.
433  */
434  snprintf(name, 64, "%u", handle);
435 
436  /*
437  * The System V shared memory namespace is very restricted; names are of
438  * type key_t, which is expected to be some sort of integer data type, but
439  * not necessarily the same one as dsm_handle. Since we use dsm_handle to
440  * identify shared memory segments across processes, this might seem like
441  * a problem, but it's really not. If dsm_handle is bigger than key_t,
442  * the cast below might truncate away some bits from the handle the
443  * user-provided, but it'll truncate exactly the same bits away in exactly
444  * the same fashion every time we use that handle, which is all that
445  * really matters. Conversely, if dsm_handle is smaller than key_t, we
446  * won't use the full range of available key space, but that's no big deal
447  * either.
448  *
449  * We do make sure that the key isn't negative, because that might not be
450  * portable.
451  */
452  key = (key_t) handle;
453  if (key < 1) /* avoid compiler warning if type is unsigned */
454  key = -key;
455 
456  /*
457  * There's one special key, IPC_PRIVATE, which can't be used. If we end
458  * up with that value by chance during a create operation, just pretend it
459  * already exists, so that caller will retry. If we run into it anywhere
460  * else, the caller has passed a handle that doesn't correspond to
461  * anything we ever created, which should not happen.
462  */
463  if (key == IPC_PRIVATE)
464  {
465  if (op != DSM_OP_CREATE)
466  elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
467  errno = EEXIST;
468  return false;
469  }
470 
471  /*
472  * Before we can do anything with a shared memory segment, we have to map
473  * the shared memory key to a shared memory identifier using shmget(). To
474  * avoid repeated lookups, we store the key using impl_private.
475  */
476  if (*impl_private != NULL)
477  {
478  ident_cache = *impl_private;
479  ident = *ident_cache;
480  }
481  else
482  {
483  int flags = IPCProtection;
484  size_t segsize;
485 
486  /*
487  * Allocate the memory BEFORE acquiring the resource, so that we don't
488  * leak the resource if memory allocation fails.
489  */
490  ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
491 
492  /*
493  * When using shmget to find an existing segment, we must pass the
494  * size as 0. Passing a non-zero size which is greater than the
495  * actual size will result in EINVAL.
496  */
497  segsize = 0;
498 
499  if (op == DSM_OP_CREATE)
500  {
501  flags |= IPC_CREAT | IPC_EXCL;
502  segsize = request_size;
503  }
504 
505  if ((ident = shmget(key, segsize, flags)) == -1)
506  {
507  if (errno != EEXIST)
508  {
509  int save_errno = errno;
510 
511  pfree(ident_cache);
512  errno = save_errno;
513  ereport(elevel,
515  errmsg("could not get shared memory segment: %m")));
516  }
517  return false;
518  }
519 
520  *ident_cache = ident;
521  *impl_private = ident_cache;
522  }
523 
524  /* Handle teardown cases. */
525  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
526  {
527  pfree(ident_cache);
528  *impl_private = NULL;
529  if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
530  {
531  ereport(elevel,
533  errmsg("could not unmap shared memory segment \"%s\": %m",
534  name)));
535  return false;
536  }
537  *mapped_address = NULL;
538  *mapped_size = 0;
539  if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
540  {
541  ereport(elevel,
543  errmsg("could not remove shared memory segment \"%s\": %m",
544  name)));
545  return false;
546  }
547  return true;
548  }
549 
550  /* If we're attaching it, we must use IPC_STAT to determine the size. */
551  if (op == DSM_OP_ATTACH)
552  {
553  struct shmid_ds shm;
554 
555  if (shmctl(ident, IPC_STAT, &shm) != 0)
556  {
557  ereport(elevel,
559  errmsg("could not stat shared memory segment \"%s\": %m",
560  name)));
561  return false;
562  }
563  request_size = shm.shm_segsz;
564  }
565 
566  /* Map it. */
567  address = shmat(ident, NULL, PG_SHMAT_FLAGS);
568  if (address == (void *) -1)
569  {
570  int save_errno;
571 
572  /* Back out what's already been done. */
573  save_errno = errno;
574  if (op == DSM_OP_CREATE)
575  shmctl(ident, IPC_RMID, NULL);
576  errno = save_errno;
577 
578  ereport(elevel,
580  errmsg("could not map shared memory segment \"%s\": %m",
581  name)));
582  return false;
583  }
584  *mapped_address = address;
585  *mapped_size = request_size;
586 
587  return true;
588 }
589 #endif
590 
591 #ifdef USE_DSM_WINDOWS
592 /*
593  * Operating system primitives to support Windows shared memory.
594  *
595  * Windows shared memory implementation is done using file mapping
596  * which can be backed by either physical file or system paging file.
597  * Current implementation uses system paging file as other effects
598  * like performance are not clear for physical file and it is used in similar
599  * way for main shared memory in windows.
600  *
601  * A memory mapping object is a kernel object - they always get deleted when
602  * the last reference to them goes away, either explicitly via a CloseHandle or
603  * when the process containing the reference exits.
604  */
605 static bool
606 dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
607  void **impl_private, void **mapped_address,
608  Size *mapped_size, int elevel)
609 {
610  char *address;
611  HANDLE hmap;
612  char name[64];
613  MEMORY_BASIC_INFORMATION info;
614 
615  /* Resize is not supported for Windows shared memory. */
616  if (op == DSM_OP_RESIZE)
617  {
618  elog(elevel, "Windows shared memory segments cannot be resized");
619  return false;
620  }
621 
622  /* Since resize isn't supported, reattach is a no-op. */
623  if (op == DSM_OP_ATTACH && *mapped_address != NULL)
624  return true;
625 
626  /*
627  * Storing the shared memory segment in the Global\ namespace, can allow
628  * any process running in any session to access that file mapping object
629  * provided that the caller has the required access rights. But to avoid
630  * issues faced in main shared memory, we are using the naming convention
631  * similar to main shared memory. We can change here once issue mentioned
632  * in GetSharedMemName is resolved.
633  */
634  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
635 
636  /*
637  * Handle teardown cases. Since Windows automatically destroys the object
638  * when no references reamin, we can treat it the same as detach.
639  */
640  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
641  {
642  if (*mapped_address != NULL
643  && UnmapViewOfFile(*mapped_address) == 0)
644  {
645  _dosmaperr(GetLastError());
646  ereport(elevel,
648  errmsg("could not unmap shared memory segment \"%s\": %m",
649  name)));
650  return false;
651  }
652  if (*impl_private != NULL
653  && CloseHandle(*impl_private) == 0)
654  {
655  _dosmaperr(GetLastError());
656  ereport(elevel,
658  errmsg("could not remove shared memory segment \"%s\": %m",
659  name)));
660  return false;
661  }
662 
663  *impl_private = NULL;
664  *mapped_address = NULL;
665  *mapped_size = 0;
666  return true;
667  }
668 
669  /* Create new segment or open an existing one for attach. */
670  if (op == DSM_OP_CREATE)
671  {
672  DWORD size_high;
673  DWORD size_low;
674  DWORD errcode;
675 
676  /* Shifts >= the width of the type are undefined. */
677 #ifdef _WIN64
678  size_high = request_size >> 32;
679 #else
680  size_high = 0;
681 #endif
682  size_low = (DWORD) request_size;
683 
684  /* CreateFileMapping might not clear the error code on success */
685  SetLastError(0);
686 
687  hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
688  NULL, /* Default security attrs */
689  PAGE_READWRITE, /* Memory is read/write */
690  size_high, /* Upper 32 bits of size */
691  size_low, /* Lower 32 bits of size */
692  name);
693 
694  errcode = GetLastError();
695  if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
696  {
697  /*
698  * On Windows, when the segment already exists, a handle for the
699  * existing segment is returned. We must close it before
700  * returning. However, if the existing segment is created by a
701  * service, then it returns ERROR_ACCESS_DENIED. We don't do
702  * _dosmaperr here, so errno won't be modified.
703  */
704  if (hmap)
705  CloseHandle(hmap);
706  return false;
707  }
708 
709  if (!hmap)
710  {
711  _dosmaperr(errcode);
712  ereport(elevel,
714  errmsg("could not create shared memory segment \"%s\": %m",
715  name)));
716  return false;
717  }
718  }
719  else
720  {
721  hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
722  FALSE, /* do not inherit the name */
723  name); /* name of mapping object */
724  if (!hmap)
725  {
726  _dosmaperr(GetLastError());
727  ereport(elevel,
729  errmsg("could not open shared memory segment \"%s\": %m",
730  name)));
731  return false;
732  }
733  }
734 
735  /* Map it. */
736  address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
737  0, 0, 0);
738  if (!address)
739  {
740  int save_errno;
741 
742  _dosmaperr(GetLastError());
743  /* Back out what's already been done. */
744  save_errno = errno;
745  CloseHandle(hmap);
746  errno = save_errno;
747 
748  ereport(elevel,
750  errmsg("could not map shared memory segment \"%s\": %m",
751  name)));
752  return false;
753  }
754 
755  /*
756  * VirtualQuery gives size in page_size units, which is 4K for Windows. We
757  * need size only when we are attaching, but it's better to get the size
758  * when creating new segment to keep size consistent both for
759  * DSM_OP_CREATE and DSM_OP_ATTACH.
760  */
761  if (VirtualQuery(address, &info, sizeof(info)) == 0)
762  {
763  int save_errno;
764 
765  _dosmaperr(GetLastError());
766  /* Back out what's already been done. */
767  save_errno = errno;
768  UnmapViewOfFile(address);
769  CloseHandle(hmap);
770  errno = save_errno;
771 
772  ereport(elevel,
774  errmsg("could not stat shared memory segment \"%s\": %m",
775  name)));
776  return false;
777  }
778 
779  *mapped_address = address;
780  *mapped_size = info.RegionSize;
781  *impl_private = hmap;
782 
783  return true;
784 }
785 #endif
786 
787 #ifdef USE_DSM_MMAP
788 /*
789  * Operating system primitives to support mmap-based shared memory.
790  *
791  * Calling this "shared memory" is somewhat of a misnomer, because what
792  * we're really doing is creating a bunch of files and mapping them into
793  * our address space. The operating system may feel obliged to
794  * synchronize the contents to disk even if nothing is being paged out,
795  * which will not serve us well. The user can relocate the pg_dynshmem
796  * directory to a ramdisk to avoid this problem, if available.
797  */
798 static bool
799 dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
800  void **impl_private, void **mapped_address, Size *mapped_size,
801  int elevel)
802 {
803  char name[64];
804  int flags;
805  int fd;
806  char *address;
807 
809  handle);
810 
811  /* Handle teardown cases. */
812  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
813  {
814  if (*mapped_address != NULL
815  && munmap(*mapped_address, *mapped_size) != 0)
816  {
817  ereport(elevel,
819  errmsg("could not unmap shared memory segment \"%s\": %m",
820  name)));
821  return false;
822  }
823  *mapped_address = NULL;
824  *mapped_size = 0;
825  if (op == DSM_OP_DESTROY && unlink(name) != 0)
826  {
827  ereport(elevel,
829  errmsg("could not remove shared memory segment \"%s\": %m",
830  name)));
831  return false;
832  }
833  return true;
834  }
835 
836  /* Create new segment or open an existing one for attach or resize. */
837  flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
838  if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
839  {
840  if (errno != EEXIST)
841  ereport(elevel,
843  errmsg("could not open shared memory segment \"%s\": %m",
844  name)));
845  return false;
846  }
847 
848  /*
849  * If we're attaching the segment, determine the current size; if we are
850  * creating or resizing the segment, set the size to the requested value.
851  */
852  if (op == DSM_OP_ATTACH)
853  {
854  struct stat st;
855 
856  if (fstat(fd, &st) != 0)
857  {
858  int save_errno;
859 
860  /* Back out what's already been done. */
861  save_errno = errno;
862  CloseTransientFile(fd);
863  errno = save_errno;
864 
865  ereport(elevel,
867  errmsg("could not stat shared memory segment \"%s\": %m",
868  name)));
869  return false;
870  }
871  request_size = st.st_size;
872  }
873  else if (*mapped_size > request_size && ftruncate(fd, request_size))
874  {
875  int save_errno;
876 
877  /* Back out what's already been done. */
878  save_errno = errno;
879  close(fd);
880  if (op == DSM_OP_CREATE)
881  unlink(name);
882  errno = save_errno;
883 
884  ereport(elevel,
886  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
887  name, request_size)));
888  return false;
889  }
890  else if (*mapped_size < request_size)
891  {
892  /*
893  * Allocate a buffer full of zeros.
894  *
895  * Note: palloc zbuffer, instead of just using a local char array, to
896  * ensure it is reasonably well-aligned; this may save a few cycles
897  * transferring data to the kernel.
898  */
899  char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
900  uint32 remaining = request_size;
901  bool success = true;
902 
903  /*
904  * Zero-fill the file. We have to do this the hard way to ensure that
905  * all the file space has really been allocated, so that we don't
906  * later seg fault when accessing the memory mapping. This is pretty
907  * pessimal.
908  */
909  while (success && remaining > 0)
910  {
911  Size goal = remaining;
912 
913  if (goal > ZBUFFER_SIZE)
914  goal = ZBUFFER_SIZE;
916  if (write(fd, zbuffer, goal) == goal)
917  remaining -= goal;
918  else
919  success = false;
921  }
922 
923  if (!success)
924  {
925  int save_errno;
926 
927  /* Back out what's already been done. */
928  save_errno = errno;
929  CloseTransientFile(fd);
930  if (op == DSM_OP_CREATE)
931  unlink(name);
932  errno = save_errno ? save_errno : ENOSPC;
933 
934  ereport(elevel,
936  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
937  name, request_size)));
938  return false;
939  }
940  }
941 
942  /*
943  * If we're reattaching or resizing, we must remove any existing mapping,
944  * unless we've already got the right thing mapped.
945  */
946  if (*mapped_address != NULL)
947  {
948  if (*mapped_size == request_size)
949  return true;
950  if (munmap(*mapped_address, *mapped_size) != 0)
951  {
952  int save_errno;
953 
954  /* Back out what's already been done. */
955  save_errno = errno;
956  CloseTransientFile(fd);
957  if (op == DSM_OP_CREATE)
958  unlink(name);
959  errno = save_errno;
960 
961  ereport(elevel,
963  errmsg("could not unmap shared memory segment \"%s\": %m",
964  name)));
965  return false;
966  }
967  *mapped_address = NULL;
968  *mapped_size = 0;
969  }
970 
971  /* Map it. */
972  address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
973  MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
974  if (address == MAP_FAILED)
975  {
976  int save_errno;
977 
978  /* Back out what's already been done. */
979  save_errno = errno;
980  CloseTransientFile(fd);
981  if (op == DSM_OP_CREATE)
982  unlink(name);
983  errno = save_errno;
984 
985  ereport(elevel,
987  errmsg("could not map shared memory segment \"%s\": %m",
988  name)));
989  return false;
990  }
991  *mapped_address = address;
992  *mapped_size = request_size;
993  CloseTransientFile(fd);
994 
995  return true;
996 }
997 #endif
998 
999 /*
1000  * Implementation-specific actions that must be performed when a segment is to
1001  * be preserved even when no backend has it attached.
1002  *
1003  * Except on Windows, we don't need to do anything at all. But since Windows
1004  * cleans up segments automatically when no references remain, we duplicate
1005  * the segment handle into the postmaster process. The postmaster needn't
1006  * do anything to receive the handle; Windows transfers it automatically.
1007  */
1008 void
1009 dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
1010  void **impl_private_pm_handle)
1011 {
1013  {
1014 #ifdef USE_DSM_WINDOWS
1015  case DSM_IMPL_WINDOWS:
1016  {
1017  HANDLE hmap;
1018 
1019  if (!DuplicateHandle(GetCurrentProcess(), impl_private,
1020  PostmasterHandle, &hmap, 0, FALSE,
1021  DUPLICATE_SAME_ACCESS))
1022  {
1023  char name[64];
1024 
1025  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1026  _dosmaperr(GetLastError());
1027  ereport(ERROR,
1029  errmsg("could not duplicate handle for \"%s\": %m",
1030  name)));
1031  }
1032 
1033  /*
1034  * Here, we remember the handle that we created in the
1035  * postmaster process. This handle isn't actually usable in
1036  * any process other than the postmaster, but that doesn't
1037  * matter. We're just holding onto it so that, if the segment
1038  * is unpinned, dsm_impl_unpin_segment can close it.
1039  */
1040  *impl_private_pm_handle = hmap;
1041  break;
1042  }
1043 #endif
1044  default:
1045  break;
1046  }
1047 }
1048 
1049 /*
1050  * Implementation-specific actions that must be performed when a segment is no
1051  * longer to be preserved, so that it will be cleaned up when all backends
1052  * have detached from it.
1053  *
1054  * Except on Windows, we don't need to do anything at all. For Windows, we
1055  * close the extra handle that dsm_impl_pin_segment created in the
1056  * postmaster's process space.
1057  */
1058 void
1059 dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
1060 {
1062  {
1063 #ifdef USE_DSM_WINDOWS
1064  case DSM_IMPL_WINDOWS:
1065  {
1066  if (*impl_private &&
1067  !DuplicateHandle(PostmasterHandle, *impl_private,
1068  NULL, NULL, 0, FALSE,
1069  DUPLICATE_CLOSE_SOURCE))
1070  {
1071  char name[64];
1072 
1073  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1074  _dosmaperr(GetLastError());
1075  ereport(ERROR,
1077  errmsg("could not duplicate handle for \"%s\": %m",
1078  name)));
1079  }
1080 
1081  *impl_private = NULL;
1082  break;
1083  }
1084 #endif
1085  default:
1086  break;
1087  }
1088 }
1089 
1090 static int
1092 {
1093  if (errno == EFBIG || errno == ENOMEM)
1094  return errcode(ERRCODE_OUT_OF_MEMORY);
1095  else
1096  return errcode_for_file_access();
1097 }
int remaining
Definition: informix.c:692
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:21
#define DSM_IMPL_SYSV
Definition: dsm_impl.h:19
#define IPC_CREAT
Definition: win32.h:107
#define MAP_HASSEMAPHORE
Definition: mem.h:30
#define IPC_EXCL
Definition: win32.h:108
#define MAP_FAILED
Definition: mem.h:45
void dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
Definition: dsm_impl.c:1059
uint32 dsm_handle
Definition: dsm_impl.h:55
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:51
Definition: guc.h:164
#define write(a, b, c)
Definition: win32.h:14
#define IPCProtection
Definition: posix_sema.c:52
#define PG_SHMAT_FLAGS
Definition: mem.h:20
#define IPC_STAT
Definition: win32.h:111
#define MAP_NOSYNC
Definition: mem.h:38
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:20
static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:799
#define DEBUG4
Definition: elog.h:22
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1091
long key_t
Definition: win32.h:253
void pfree(void *pointer)
Definition: mcxt.c:950
const struct config_enum_entry dynamic_shared_memory_options[]
Definition: dsm_impl.c:94
#define ERROR
Definition: elog.h:43
#define DSM_IMPL_POSIX
Definition: dsm_impl.h:18
#define FALSE
Definition: c.h:221
static bool success
Definition: pg_basebackup.c:96
#define SEGMENT_NAME_PREFIX
Definition: dsm_impl.c:117
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
int errcode_for_file_access(void)
Definition: elog.c:598
#define IPC_PRIVATE
Definition: win32.h:109
int dynamic_shared_memory_type
Definition: dsm_impl.c:112
unsigned int uint32
Definition: c.h:268
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1232
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
MemoryContext TopMemoryContext
Definition: mcxt.c:43
int CloseTransientFile(int fd)
Definition: fd.c:2305
static int elevel
Definition: vacuumlazy.c:137
void * palloc0(Size size)
Definition: mcxt.c:878
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:408
#define IPC_RMID
Definition: win32.h:106
#define ftruncate(a, b)
Definition: win32.h:59
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define DSM_IMPL_NONE
Definition: dsm_impl.h:17
size_t Size
Definition: c.h:356
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1208
const char * name
Definition: encode.c:521
void dsm_impl_pin_segment(dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
Definition: dsm_impl.c:1009
int errmsg(const char *fmt,...)
Definition: elog.c:797
dsm_op
Definition: dsm_impl.h:58
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:52
#define ZBUFFER_SIZE
Definition: dsm_impl.c:115
bool dsm_impl_can_resize(void)
Definition: dsm_impl.c:207
#define elog
Definition: elog.h:219
#define close(a)
Definition: win32.h:12
bool dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:162