PostgreSQL Source Code  git master
sysv_shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * sysv_shmem.c
4  * Implement shared memory using SysV facilities
5  *
6  * These routines used to be a fairly thin layer on top of SysV shared
7  * memory functionality. With the addition of anonymous-shmem logic,
8  * they're a bit fatter now. We still require a SysV shmem block to
9  * exist, though, because mmap'd shmem provides no way to find out how
10  * many processes are attached, which we need for interlocking purposes.
11  *
12  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  * IDENTIFICATION
16  * src/backend/port/sysv_shmem.c
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include <signal.h>
23 #include <unistd.h>
24 #include <sys/file.h>
25 #include <sys/mman.h>
26 #include <sys/stat.h>
27 #ifdef HAVE_SYS_IPC_H
28 #include <sys/ipc.h>
29 #endif
30 #ifdef HAVE_SYS_SHM_H
31 #include <sys/shm.h>
32 #endif
33 
34 #include "miscadmin.h"
35 #include "port/pg_bitutils.h"
36 #include "portability/mem.h"
37 #include "storage/dsm.h"
38 #include "storage/fd.h"
39 #include "storage/ipc.h"
40 #include "storage/pg_shmem.h"
41 #include "utils/guc.h"
42 #include "utils/pidfile.h"
43 
44 
45 /*
46  * As of PostgreSQL 9.3, we normally allocate only a very small amount of
47  * System V shared memory, and only for the purposes of providing an
48  * interlock to protect the data directory. The real shared memory block
49  * is allocated using mmap(). This works around the problem that many
50  * systems have very low limits on the amount of System V shared memory
51  * that can be allocated. Even a limit of a few megabytes will be enough
52  * to run many copies of PostgreSQL without needing to adjust system settings.
53  *
54  * We assume that no one will attempt to run PostgreSQL 9.3 or later on
55  * systems that are ancient enough that anonymous shared memory is not
56  * supported, such as pre-2.4 versions of Linux. If that turns out to be
57  * false, we might need to add compile and/or run-time tests here and do this
58  * only if the running kernel supports it.
59  *
60  * However, we must always disable this logic in the EXEC_BACKEND case, and
61  * fall back to the old method of allocating the entire segment using System V
62  * shared memory, because there's no way to attach an anonymous mmap'd segment
63  * to a process after exec(). Since EXEC_BACKEND is intended only for
64  * developer use, this shouldn't be a big problem. Because of this, we do
65  * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
66  *
67  * As of PostgreSQL 12, we regained the ability to use a large System V shared
68  * memory region even in non-EXEC_BACKEND builds, if shared_memory_type is set
69  * to sysv (though this is not the default).
70  */
71 
72 
73 typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
74 typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
75 
76 /*
77  * How does a given IpcMemoryId relate to this PostgreSQL process?
78  *
79  * One could recycle unattached segments of different data directories if we
80  * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would
81  * cause us to visit less of the key space, making us less likely to detect a
82  * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis,
83  * in that postmasters of different data directories could simultaneously
84  * attempt to recycle a given key. We'll waste keys longer in some cases, but
85  * avoiding the problems of the alternative justifies that loss.
86  */
87 typedef enum
88 {
89  SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */
90  SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */
91  SHMSTATE_ENOENT, /* no segment of that ID */
92  SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */
93  SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */
95 
96 
97 unsigned long UsedShmemSegID = 0;
98 void *UsedShmemSegAddr = NULL;
99 
101 static void *AnonymousShmem = NULL;
102 
103 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
104 static void IpcMemoryDetach(int status, Datum shmaddr);
105 static void IpcMemoryDelete(int status, Datum shmId);
107  void *attachAt,
108  PGShmemHeader **addr);
109 
110 
111 /*
112  * InternalIpcMemoryCreate(memKey, size)
113  *
114  * Attempt to create a new shared memory segment with the specified key.
115  * Will fail (return NULL) if such a segment already exists. If successful,
116  * attach the segment to the current process and return its attached address.
117  * On success, callbacks are registered with on_shmem_exit to detach and
118  * delete the segment when on_shmem_exit is called.
119  *
120  * If we fail with a failure code other than collision-with-existing-segment,
121  * print out an error and abort. Other types of errors are not recoverable.
122  */
123 static void *
125 {
126  IpcMemoryId shmid;
127  void *requestedAddress = NULL;
128  void *memAddress;
129 
130  /*
131  * Normally we just pass requestedAddress = NULL to shmat(), allowing the
132  * system to choose where the segment gets mapped. But in an EXEC_BACKEND
133  * build, it's possible for whatever is chosen in the postmaster to not
134  * work for backends, due to variations in address space layout. As a
135  * rather klugy workaround, allow the user to specify the address to use
136  * via setting the environment variable PG_SHMEM_ADDR. (If this were of
137  * interest for anything except debugging, we'd probably create a cleaner
138  * and better-documented way to set it, such as a GUC.)
139  */
140 #ifdef EXEC_BACKEND
141  {
142  char *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
143 
144  if (pg_shmem_addr)
145  requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
146  }
147 #endif
148 
149  shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
150 
151  if (shmid < 0)
152  {
153  int shmget_errno = errno;
154 
155  /*
156  * Fail quietly if error indicates a collision with existing segment.
157  * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
158  * we could get a permission violation instead? Also, EIDRM might
159  * occur if an old seg is slated for destruction but not gone yet.
160  */
161  if (shmget_errno == EEXIST || shmget_errno == EACCES
162 #ifdef EIDRM
163  || shmget_errno == EIDRM
164 #endif
165  )
166  return NULL;
167 
168  /*
169  * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
170  * there is an existing segment but it's smaller than "size" (this is
171  * a result of poorly-thought-out ordering of error tests). To
172  * distinguish between collision and invalid size in such cases, we
173  * make a second try with size = 0. These kernels do not test size
174  * against SHMMIN in the preexisting-segment case, so we will not get
175  * EINVAL a second time if there is such a segment.
176  */
177  if (shmget_errno == EINVAL)
178  {
179  shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
180 
181  if (shmid < 0)
182  {
183  /* As above, fail quietly if we verify a collision */
184  if (errno == EEXIST || errno == EACCES
185 #ifdef EIDRM
186  || errno == EIDRM
187 #endif
188  )
189  return NULL;
190  /* Otherwise, fall through to report the original error */
191  }
192  else
193  {
194  /*
195  * On most platforms we cannot get here because SHMMIN is
196  * greater than zero. However, if we do succeed in creating a
197  * zero-size segment, free it and then fall through to report
198  * the original error.
199  */
200  if (shmctl(shmid, IPC_RMID, NULL) < 0)
201  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
202  (int) shmid, IPC_RMID);
203  }
204  }
205 
206  /*
207  * Else complain and abort.
208  *
209  * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
210  * is violated. SHMALL violation might be reported as either ENOMEM
211  * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
212  * it should be. SHMMNI violation is ENOSPC, per spec. Just plain
213  * not-enough-RAM is ENOMEM.
214  */
215  errno = shmget_errno;
216  ereport(FATAL,
217  (errmsg("could not create shared memory segment: %m"),
218  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
219  (unsigned long) memKey, size,
221  (shmget_errno == EINVAL) ?
222  errhint("This error usually means that PostgreSQL's request for a shared memory "
223  "segment exceeded your kernel's SHMMAX parameter, or possibly that "
224  "it is less than "
225  "your kernel's SHMMIN parameter.\n"
226  "The PostgreSQL documentation contains more information about shared "
227  "memory configuration.") : 0,
228  (shmget_errno == ENOMEM) ?
229  errhint("This error usually means that PostgreSQL's request for a shared "
230  "memory segment exceeded your kernel's SHMALL parameter. You might need "
231  "to reconfigure the kernel with larger SHMALL.\n"
232  "The PostgreSQL documentation contains more information about shared "
233  "memory configuration.") : 0,
234  (shmget_errno == ENOSPC) ?
235  errhint("This error does *not* mean that you have run out of disk space. "
236  "It occurs either if all available shared memory IDs have been taken, "
237  "in which case you need to raise the SHMMNI parameter in your kernel, "
238  "or because the system's overall limit for shared memory has been "
239  "reached.\n"
240  "The PostgreSQL documentation contains more information about shared "
241  "memory configuration.") : 0));
242  }
243 
244  /* Register on-exit routine to delete the new segment */
246 
247  /* OK, should be able to attach to the segment */
248  memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
249 
250  if (memAddress == (void *) -1)
251  elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
252  shmid, requestedAddress, PG_SHMAT_FLAGS);
253 
254  /* Register on-exit routine to detach new segment before deleting */
256 
257  /*
258  * Store shmem key and ID in data directory lockfile. Format to try to
259  * keep it the same length always (trailing junk in the lockfile won't
260  * hurt, but might confuse humans).
261  */
262  {
263  char line[64];
264 
265  sprintf(line, "%9lu %9lu",
266  (unsigned long) memKey, (unsigned long) shmid);
268  }
269 
270  return memAddress;
271 }
272 
273 /****************************************************************************/
274 /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
275 /* from process' address space */
276 /* (called as an on_shmem_exit callback, hence funny argument list) */
277 /****************************************************************************/
278 static void
280 {
281  /* Detach System V shared memory block. */
282  if (shmdt(DatumGetPointer(shmaddr)) < 0)
283  elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
284 }
285 
286 /****************************************************************************/
287 /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
288 /* (called as an on_shmem_exit callback, hence funny argument list) */
289 /****************************************************************************/
290 static void
292 {
293  if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
294  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
295  DatumGetInt32(shmId), IPC_RMID);
296 }
297 
298 /*
299  * PGSharedMemoryIsInUse
300  *
301  * Is a previously-existing shmem segment still existing and in use?
302  *
303  * The point of this exercise is to detect the case where a prior postmaster
304  * crashed, but it left child backends that are still running. Therefore
305  * we only care about shmem segments that are associated with the intended
306  * DataDir. This is an important consideration since accidental matches of
307  * shmem segment IDs are reasonably common.
308  */
309 bool
310 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
311 {
312  PGShmemHeader *memAddress;
314 
315  state = PGSharedMemoryAttach((IpcMemoryId) id2, NULL, &memAddress);
316  if (memAddress && shmdt(memAddress) < 0)
317  elog(LOG, "shmdt(%p) failed: %m", memAddress);
318  switch (state)
319  {
320  case SHMSTATE_ENOENT:
321  case SHMSTATE_FOREIGN:
322  case SHMSTATE_UNATTACHED:
323  return false;
325  case SHMSTATE_ATTACHED:
326  return true;
327  }
328  return true;
329 }
330 
331 /*
332  * Test for a segment with id shmId; see comment at IpcMemoryState.
333  *
334  * If the segment exists, we'll attempt to attach to it, using attachAt
335  * if that's not NULL (but it's best to pass NULL if possible).
336  *
337  * *addr is set to the segment memory address if we attached to it, else NULL.
338  */
339 static IpcMemoryState
341  void *attachAt,
342  PGShmemHeader **addr)
343 {
344  struct shmid_ds shmStat;
345  struct stat statbuf;
346  PGShmemHeader *hdr;
347 
348  *addr = NULL;
349 
350  /*
351  * First, try to stat the shm segment ID, to see if it exists at all.
352  */
353  if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
354  {
355  /*
356  * EINVAL actually has multiple possible causes documented in the
357  * shmctl man page, but we assume it must mean the segment no longer
358  * exists.
359  */
360  if (errno == EINVAL)
361  return SHMSTATE_ENOENT;
362 
363  /*
364  * EACCES implies we have no read permission, which means it is not a
365  * Postgres shmem segment (or at least, not one that is relevant to
366  * our data directory).
367  */
368  if (errno == EACCES)
369  return SHMSTATE_FOREIGN;
370 
371  /*
372  * Some Linux kernel versions (in fact, all of them as of July 2007)
373  * sometimes return EIDRM when EINVAL is correct. The Linux kernel
374  * actually does not have any internal state that would justify
375  * returning EIDRM, so we can get away with assuming that EIDRM is
376  * equivalent to EINVAL on that platform.
377  */
378 #ifdef HAVE_LINUX_EIDRM_BUG
379  if (errno == EIDRM)
380  return SHMSTATE_ENOENT;
381 #endif
382 
383  /*
384  * Otherwise, we had better assume that the segment is in use. The
385  * only likely case is (non-Linux, assumed spec-compliant) EIDRM,
386  * which implies that the segment has been IPC_RMID'd but there are
387  * still processes attached to it.
388  */
390  }
391 
392  /*
393  * Try to attach to the segment and see if it matches our data directory.
394  * This avoids any risk of duplicate-shmem-key conflicts on machines that
395  * are running several postmasters under the same userid.
396  *
397  * (When we're called from PGSharedMemoryCreate, this stat call is
398  * duplicative; but since this isn't a high-traffic case it's not worth
399  * trying to optimize.)
400  */
401  if (stat(DataDir, &statbuf) < 0)
402  return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
403 
404  hdr = (PGShmemHeader *) shmat(shmId, attachAt, PG_SHMAT_FLAGS);
405  if (hdr == (PGShmemHeader *) -1)
406  {
407  /*
408  * Attachment failed. The cases we're interested in are the same as
409  * for the shmctl() call above. In particular, note that the owning
410  * postmaster could have terminated and removed the segment between
411  * shmctl() and shmat().
412  *
413  * If attachAt isn't NULL, it's possible that EINVAL reflects a
414  * problem with that address not a vanished segment, so it's best to
415  * pass NULL when probing for conflicting segments.
416  */
417  if (errno == EINVAL)
418  return SHMSTATE_ENOENT; /* segment disappeared */
419  if (errno == EACCES)
420  return SHMSTATE_FOREIGN; /* must be non-Postgres */
421 #ifdef HAVE_LINUX_EIDRM_BUG
422  if (errno == EIDRM)
423  return SHMSTATE_ENOENT; /* segment disappeared */
424 #endif
425  /* Otherwise, be conservative. */
427  }
428  *addr = hdr;
429 
430  if (hdr->magic != PGShmemMagic ||
431  hdr->device != statbuf.st_dev ||
432  hdr->inode != statbuf.st_ino)
433  {
434  /*
435  * It's either not a Postgres segment, or not one for my data
436  * directory.
437  */
438  return SHMSTATE_FOREIGN;
439  }
440 
441  /*
442  * It does match our data directory, so now test whether any processes are
443  * still attached to it. (We are, now, but the shm_nattch result is from
444  * before we attached to it.)
445  */
446  return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
447 }
448 
449 #ifdef MAP_HUGETLB
450 
451 /*
452  * Identify the huge page size to use, and compute the related mmap flags.
453  *
454  * Some Linux kernel versions have a bug causing mmap() to fail on requests
455  * that are not a multiple of the hugepage size. Versions without that bug
456  * instead silently round the request up to the next hugepage multiple ---
457  * and then munmap() fails when we give it a size different from that.
458  * So we have to round our request up to a multiple of the actual hugepage
459  * size to avoid trouble.
460  *
461  * Doing the round-up ourselves also lets us make use of the extra memory,
462  * rather than just wasting it. Currently, we just increase the available
463  * space recorded in the shmem header, which will make the extra usable for
464  * purposes such as additional locktable entries. Someday, for very large
465  * hugepage sizes, we might want to think about more invasive strategies,
466  * such as increasing shared_buffers to absorb the extra space.
467  *
468  * Returns the (real, assumed or config provided) page size into *hugepagesize,
469  * and the hugepage-related mmap flags to use into *mmap_flags.
470  */
471 static void
472 GetHugePageSize(Size *hugepagesize, int *mmap_flags)
473 {
474  Size default_hugepagesize = 0;
475 
476  /*
477  * System-dependent code to find out the default huge page size.
478  *
479  * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
480  * nnnn kB". Ignore any failures, falling back to the preset default.
481  */
482 #ifdef __linux__
483 
484  {
485  FILE *fp = AllocateFile("/proc/meminfo", "r");
486  char buf[128];
487  unsigned int sz;
488  char ch;
489 
490  if (fp)
491  {
492  while (fgets(buf, sizeof(buf), fp))
493  {
494  if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
495  {
496  if (ch == 'k')
497  {
498  default_hugepagesize = sz * (Size) 1024;
499  break;
500  }
501  /* We could accept other units besides kB, if needed */
502  }
503  }
504  FreeFile(fp);
505  }
506  }
507 #endif /* __linux__ */
508 
509  if (huge_page_size != 0)
510  {
511  /* If huge page size is requested explicitly, use that. */
512  *hugepagesize = (Size) huge_page_size * 1024;
513  }
514  else if (default_hugepagesize != 0)
515  {
516  /* Otherwise use the system default, if we have it. */
517  *hugepagesize = default_hugepagesize;
518  }
519  else
520  {
521  /*
522  * If we fail to find out the system's default huge page size, or no
523  * huge page size is requested explicitly, assume it is 2MB. This will
524  * work fine when the actual size is less. If it's more, we might get
525  * mmap() or munmap() failures due to unaligned requests; but at this
526  * writing, there are no reports of any non-Linux systems being picky
527  * about that.
528  */
529  *hugepagesize = 2 * 1024 * 1024;
530  }
531 
532  *mmap_flags = MAP_HUGETLB;
533 
534  /*
535  * On recent enough Linux, also include the explicit page size, if
536  * necessary.
537  */
538 #if defined(MAP_HUGE_MASK) && defined(MAP_HUGE_SHIFT)
539  if (*hugepagesize != default_hugepagesize)
540  {
541  int shift = pg_ceil_log2_64(*hugepagesize);
542 
543  *mmap_flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
544  }
545 #endif
546 }
547 
548 #endif /* MAP_HUGETLB */
549 
550 /*
551  * Creates an anonymous mmap()ed shared memory segment.
552  *
553  * Pass the requested size in *size. This function will modify *size to the
554  * actual size of the allocation, if it ends up allocating a segment that is
555  * larger than requested.
556  */
557 static void *
559 {
560  Size allocsize = *size;
561  void *ptr = MAP_FAILED;
562  int mmap_errno = 0;
563 
564 #ifndef MAP_HUGETLB
565  /* PGSharedMemoryCreate should have dealt with this case */
567 #else
569  {
570  /*
571  * Round up the request size to a suitable large value.
572  */
573  Size hugepagesize;
574  int mmap_flags;
575 
576  GetHugePageSize(&hugepagesize, &mmap_flags);
577 
578  if (allocsize % hugepagesize != 0)
579  allocsize += hugepagesize - (allocsize % hugepagesize);
580 
581  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
582  PG_MMAP_FLAGS | mmap_flags, -1, 0);
583  mmap_errno = errno;
584  if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
585  elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
586  allocsize);
587  }
588 #endif
589 
590  if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
591  {
592  /*
593  * Use the original size, not the rounded-up value, when falling back
594  * to non-huge pages.
595  */
596  allocsize = *size;
597  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
598  PG_MMAP_FLAGS, -1, 0);
599  mmap_errno = errno;
600  }
601 
602  if (ptr == MAP_FAILED)
603  {
604  errno = mmap_errno;
605  ereport(FATAL,
606  (errmsg("could not map anonymous shared memory: %m"),
607  (mmap_errno == ENOMEM) ?
608  errhint("This error usually means that PostgreSQL's request "
609  "for a shared memory segment exceeded available memory, "
610  "swap space, or huge pages. To reduce the request size "
611  "(currently %zu bytes), reduce PostgreSQL's shared "
612  "memory usage, perhaps by reducing shared_buffers or "
613  "max_connections.",
614  allocsize) : 0));
615  }
616 
617  *size = allocsize;
618  return ptr;
619 }
620 
621 /*
622  * AnonymousShmemDetach --- detach from an anonymous mmap'd block
623  * (called as an on_shmem_exit callback, hence funny argument list)
624  */
625 static void
627 {
628  /* Release anonymous shared memory block, if any. */
629  if (AnonymousShmem != NULL)
630  {
631  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
632  elog(LOG, "munmap(%p, %zu) failed: %m",
634  AnonymousShmem = NULL;
635  }
636 }
637 
638 /*
639  * PGSharedMemoryCreate
640  *
641  * Create a shared memory segment of the given size and initialize its
642  * standard header. Also, register an on_shmem_exit callback to release
643  * the storage.
644  *
645  * Dead Postgres segments pertinent to this DataDir are recycled if found, but
646  * we do not fail upon collision with foreign shmem segments. The idea here
647  * is to detect and re-use keys that may have been assigned by a crashed
648  * postmaster or backend.
649  */
652  PGShmemHeader **shim)
653 {
654  IpcMemoryKey NextShmemSegID;
655  void *memAddress;
656  PGShmemHeader *hdr;
657  struct stat statbuf;
658  Size sysvsize;
659 
660  /*
661  * We use the data directory's ID info (inode and device numbers) to
662  * positively identify shmem segments associated with this data dir, and
663  * also as seeds for searching for a free shmem key.
664  */
665  if (stat(DataDir, &statbuf) < 0)
666  ereport(FATAL,
668  errmsg("could not stat data directory \"%s\": %m",
669  DataDir)));
670 
671  /* Complain if hugepages demanded but we can't possibly support them */
672 #if !defined(MAP_HUGETLB)
673  if (huge_pages == HUGE_PAGES_ON)
674  ereport(ERROR,
675  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
676  errmsg("huge pages not supported on this platform")));
677 #endif
678 
679  /* Room for a header? */
680  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
681 
683  {
685  AnonymousShmemSize = size;
686 
687  /* Register on-exit routine to unmap the anonymous segment */
689 
690  /* Now we need only allocate a minimal-sized SysV shmem block. */
691  sysvsize = sizeof(PGShmemHeader);
692  }
693  else
694  sysvsize = size;
695 
696  /*
697  * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to
698  * ensure no more than one postmaster per data directory can enter this
699  * loop simultaneously. (CreateDataDirLockFile() does not entirely ensure
700  * that, but prefer fixing it over coping here.)
701  */
702  NextShmemSegID = statbuf.st_ino;
703 
704  for (;;)
705  {
706  IpcMemoryId shmid;
707  PGShmemHeader *oldhdr;
709 
710  /* Try to create new segment */
711  memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
712  if (memAddress)
713  break; /* successful create and attach */
714 
715  /* Check shared memory and possibly remove and recreate */
716 
717  /*
718  * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
719  * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
720  * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
721  */
722  shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
723  if (shmid < 0)
724  {
725  oldhdr = NULL;
726  state = SHMSTATE_FOREIGN;
727  }
728  else
729  state = PGSharedMemoryAttach(shmid, NULL, &oldhdr);
730 
731  switch (state)
732  {
734  case SHMSTATE_ATTACHED:
735  ereport(FATAL,
736  (errcode(ERRCODE_LOCK_FILE_EXISTS),
737  errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
738  (unsigned long) NextShmemSegID,
739  (unsigned long) shmid),
740  errhint("Terminate any old server processes associated with data directory \"%s\".",
741  DataDir)));
742  break;
743  case SHMSTATE_ENOENT:
744 
745  /*
746  * To our surprise, some other process deleted since our last
747  * InternalIpcMemoryCreate(). Moments earlier, we would have
748  * seen SHMSTATE_FOREIGN. Try that same ID again.
749  */
750  elog(LOG,
751  "shared memory block (key %lu, ID %lu) deleted during startup",
752  (unsigned long) NextShmemSegID,
753  (unsigned long) shmid);
754  break;
755  case SHMSTATE_FOREIGN:
756  NextShmemSegID++;
757  break;
758  case SHMSTATE_UNATTACHED:
759 
760  /*
761  * The segment pertains to DataDir, and every process that had
762  * used it has died or detached. Zap it, if possible, and any
763  * associated dynamic shared memory segments, as well. This
764  * shouldn't fail, but if it does, assume the segment belongs
765  * to someone else after all, and try the next candidate.
766  * Otherwise, try again to create the segment. That may fail
767  * if some other process creates the same shmem key before we
768  * do, in which case we'll try the next key.
769  */
770  if (oldhdr->dsm_control != 0)
772  if (shmctl(shmid, IPC_RMID, NULL) < 0)
773  NextShmemSegID++;
774  break;
775  }
776 
777  if (oldhdr && shmdt(oldhdr) < 0)
778  elog(LOG, "shmdt(%p) failed: %m", oldhdr);
779  }
780 
781  /* Initialize new segment. */
782  hdr = (PGShmemHeader *) memAddress;
783  hdr->creatorPID = getpid();
784  hdr->magic = PGShmemMagic;
785  hdr->dsm_control = 0;
786 
787  /* Fill in the data directory ID info, too */
788  hdr->device = statbuf.st_dev;
789  hdr->inode = statbuf.st_ino;
790 
791  /*
792  * Initialize space allocation status for segment.
793  */
794  hdr->totalsize = size;
795  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
796  *shim = hdr;
797 
798  /* Save info for possible future use */
799  UsedShmemSegAddr = memAddress;
800  UsedShmemSegID = (unsigned long) NextShmemSegID;
801 
802  /*
803  * If AnonymousShmem is NULL here, then we're not using anonymous shared
804  * memory, and should return a pointer to the System V shared memory
805  * block. Otherwise, the System V shared memory block is only a shim, and
806  * we must return a pointer to the real block.
807  */
808  if (AnonymousShmem == NULL)
809  return hdr;
810  memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
811  return (PGShmemHeader *) AnonymousShmem;
812 }
813 
814 #ifdef EXEC_BACKEND
815 
816 /*
817  * PGSharedMemoryReAttach
818  *
819  * This is called during startup of a postmaster child process to re-attach to
820  * an already existing shared memory segment. This is needed only in the
821  * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
822  * segment attachment via fork().
823  *
824  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
825  * routine. The caller must have already restored them to the postmaster's
826  * values.
827  */
828 void
830 {
831  IpcMemoryId shmid;
832  PGShmemHeader *hdr;
834  void *origUsedShmemSegAddr = UsedShmemSegAddr;
835 
836  Assert(UsedShmemSegAddr != NULL);
838 
839 #ifdef __CYGWIN__
840  /* cygipc (currently) appears to not detach on exec. */
842  UsedShmemSegAddr = origUsedShmemSegAddr;
843 #endif
844 
845  elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
846  shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
847  if (shmid < 0)
848  state = SHMSTATE_FOREIGN;
849  else
850  state = PGSharedMemoryAttach(shmid, UsedShmemSegAddr, &hdr);
851  if (state != SHMSTATE_ATTACHED)
852  elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
854  if (hdr != origUsedShmemSegAddr)
855  elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
856  hdr, origUsedShmemSegAddr);
857  dsm_set_control_handle(hdr->dsm_control);
858 
859  UsedShmemSegAddr = hdr; /* probably redundant */
860 }
861 
862 /*
863  * PGSharedMemoryNoReAttach
864  *
865  * This is called during startup of a postmaster child process when we choose
866  * *not* to re-attach to the existing shared memory segment. We must clean up
867  * to leave things in the appropriate state. This is not used in the non
868  * EXEC_BACKEND case, either.
869  *
870  * The child process startup logic might or might not call PGSharedMemoryDetach
871  * after this; make sure that it will be a no-op if called.
872  *
873  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
874  * routine. The caller must have already restored them to the postmaster's
875  * values.
876  */
877 void
879 {
880  Assert(UsedShmemSegAddr != NULL);
882 
883 #ifdef __CYGWIN__
884  /* cygipc (currently) appears to not detach on exec. */
886 #endif
887 
888  /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
889  UsedShmemSegAddr = NULL;
890  /* And the same for UsedShmemSegID. */
891  UsedShmemSegID = 0;
892 }
893 
894 #endif /* EXEC_BACKEND */
895 
896 /*
897  * PGSharedMemoryDetach
898  *
899  * Detach from the shared memory segment, if still attached. This is not
900  * intended to be called explicitly by the process that originally created the
901  * segment (it will have on_shmem_exit callback(s) registered to do that).
902  * Rather, this is for subprocesses that have inherited an attachment and want
903  * to get rid of it.
904  *
905  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
906  * routine, also AnonymousShmem and AnonymousShmemSize.
907  */
908 void
910 {
911  if (UsedShmemSegAddr != NULL)
912  {
913  if ((shmdt(UsedShmemSegAddr) < 0)
914 #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
915  /* Work-around for cygipc exec bug */
916  && shmdt(NULL) < 0
917 #endif
918  )
919  elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
920  UsedShmemSegAddr = NULL;
921  }
922 
923  if (AnonymousShmem != NULL)
924  {
925  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
926  elog(LOG, "munmap(%p, %zu) failed: %m",
928  AnonymousShmem = NULL;
929  }
930 }
IpcMemoryState
Definition: sysv_shmem.c:87
PGShmemHeader * PGSharedMemoryCreate(Size size, PGShmemHeader **shim)
Definition: sysv_shmem.c:651
pid_t creatorPID
Definition: pg_shmem.h:33
#define MAP_FAILED
Definition: mem.h:45
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1149
static void IpcMemoryDetach(int status, Datum shmaddr)
Definition: sysv_shmem.c:279
#define DatumGetInt32(X)
Definition: postgres.h:472
#define IPC_CREAT
Definition: win32_port.h:87
#define DEBUG3
Definition: elog.h:23
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:210
dsm_handle dsm_control
Definition: pg_shmem.h:36
void AddToDataDirLockFile(int target_line, const char *str)
Definition: miscinit.c:1329
#define PointerGetDatum(X)
Definition: postgres.h:556
#define IPCProtection
Definition: posix_sema.c:59
#define LOCK_FILE_LINE_SHMEM_KEY
Definition: pidfile.h:43
#define PG_SHMAT_FLAGS
Definition: mem.h:20
struct PGShmemHeader PGShmemHeader
int shared_memory_type
Definition: ipci.c:51
static Size AnonymousShmemSize
Definition: sysv_shmem.c:100
void PGSharedMemoryNoReAttach(void)
Definition: win32_shmem.c:449
int errcode(int sqlerrcode)
Definition: elog.c:691
#define LOG
Definition: elog.h:26
_ino_t st_ino
Definition: win32_port.h:259
void PGSharedMemoryDetach(void)
Definition: sysv_shmem.c:909
static uint64 pg_ceil_log2_64(uint64 num)
Definition: pg_bitutils.h:202
int huge_page_size
Definition: guc.c:583
#define sprintf
Definition: port.h:217
static void * InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
Definition: sysv_shmem.c:124
#define ERROR
Definition: elog.h:43
#define FATAL
Definition: elog.h:52
_dev_t st_dev
Definition: win32_port.h:258
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static char * buf
Definition: pg_test_fsync.c:68
#define EIDRM
Definition: win32_port.h:95
#define PG_MMAP_FLAGS
Definition: mem.h:41
key_t IpcMemoryKey
Definition: sysv_shmem.c:73
bool IsUnderPostmaster
Definition: globals.c:109
int errdetail(const char *fmt,...)
Definition: elog.c:1035
static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId, void *attachAt, PGShmemHeader **addr)
Definition: sysv_shmem.c:340
void PGSharedMemoryReAttach(void)
Definition: win32_shmem.c:401
int errcode_for_file_access(void)
Definition: elog.c:714
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2349
void * UsedShmemSegAddr
Definition: sysv_shmem.c:98
static void AnonymousShmemDetach(int status, Datum arg)
Definition: sysv_shmem.c:626
static void IpcMemoryDelete(int status, Datum shmId)
Definition: sysv_shmem.c:291
int32 magic
Definition: pg_shmem.h:31
static void * CreateAnonymousSegment(Size *size)
Definition: sysv_shmem.c:558
Size totalsize
Definition: pg_shmem.h:34
uintptr_t Datum
Definition: postgres.h:367
unsigned long UsedShmemSegID
Definition: sysv_shmem.c:97
#define IPC_RMID
Definition: win32_port.h:86
static void * AnonymousShmem
Definition: sysv_shmem.c:101
ino_t inode
Definition: pg_shmem.h:40
#define ereport(elevel,...)
Definition: elog.h:155
dev_t device
Definition: pg_shmem.h:39
#define Assert(condition)
Definition: c.h:800
Definition: regguts.h:298
long key_t
Definition: win32_port.h:239
#define PGShmemMagic
Definition: pg_shmem.h:32
Size freeoffset
Definition: pg_shmem.h:35
size_t Size
Definition: c.h:528
#define MAXALIGN(LEN)
Definition: c.h:753
int IpcMemoryId
Definition: sysv_shmem.c:74
int huge_pages
Definition: guc.c:582
#define IPC_EXCL
Definition: win32_port.h:88
#define DatumGetPointer(X)
Definition: postgres.h:549
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
Definition: sysv_shmem.c:310
int FreeFile(FILE *file)
Definition: fd.c:2548
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:902
#define elog(elevel,...)
Definition: elog.h:228
void * arg
char * DataDir
Definition: globals.c:62
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
#define IPC_STAT
Definition: win32_port.h:91
#define stat
Definition: win32_port.h:275