PostgreSQL Source Code  git master
sysv_shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * sysv_shmem.c
4  * Implement shared memory using SysV facilities
5  *
6  * These routines used to be a fairly thin layer on top of SysV shared
7  * memory functionality. With the addition of anonymous-shmem logic,
8  * they're a bit fatter now. We still require a SysV shmem block to
9  * exist, though, because mmap'd shmem provides no way to find out how
10  * many processes are attached, which we need for interlocking purposes.
11  *
12  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  * IDENTIFICATION
16  * src/backend/port/sysv_shmem.c
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include <signal.h>
23 #include <unistd.h>
24 #include <sys/file.h>
25 #include <sys/mman.h>
26 #include <sys/stat.h>
27 #ifdef HAVE_SYS_IPC_H
28 #include <sys/ipc.h>
29 #endif
30 #ifdef HAVE_SYS_SHM_H
31 #include <sys/shm.h>
32 #endif
33 
34 #include "miscadmin.h"
35 #include "portability/mem.h"
36 #include "storage/dsm.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/pg_shmem.h"
40 #include "utils/guc.h"
41 #include "utils/pidfile.h"
42 
43 
44 /*
45  * As of PostgreSQL 9.3, we normally allocate only a very small amount of
46  * System V shared memory, and only for the purposes of providing an
47  * interlock to protect the data directory. The real shared memory block
48  * is allocated using mmap(). This works around the problem that many
49  * systems have very low limits on the amount of System V shared memory
50  * that can be allocated. Even a limit of a few megabytes will be enough
51  * to run many copies of PostgreSQL without needing to adjust system settings.
52  *
53  * We assume that no one will attempt to run PostgreSQL 9.3 or later on
54  * systems that are ancient enough that anonymous shared memory is not
55  * supported, such as pre-2.4 versions of Linux. If that turns out to be
56  * false, we might need to add compile and/or run-time tests here and do this
57  * only if the running kernel supports it.
58  *
59  * However, we must always disable this logic in the EXEC_BACKEND case, and
60  * fall back to the old method of allocating the entire segment using System V
61  * shared memory, because there's no way to attach an anonymous mmap'd segment
62  * to a process after exec(). Since EXEC_BACKEND is intended only for
63  * developer use, this shouldn't be a big problem. Because of this, we do
64  * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
65  *
66  * As of PostgreSQL 12, we regained the ability to use a large System V shared
67  * memory region even in non-EXEC_BACKEND builds, if shared_memory_type is set
68  * to sysv (though this is not the default).
69  */
70 
71 
72 typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
73 typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
74 
75 /*
76  * How does a given IpcMemoryId relate to this PostgreSQL process?
77  *
78  * One could recycle unattached segments of different data directories if we
79  * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would
80  * cause us to visit less of the key space, making us less likely to detect a
81  * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis,
82  * in that postmasters of different data directories could simultaneously
83  * attempt to recycle a given key. We'll waste keys longer in some cases, but
84  * avoiding the problems of the alternative justifies that loss.
85  */
86 typedef enum
87 {
88  SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */
89  SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */
90  SHMSTATE_ENOENT, /* no segment of that ID */
91  SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */
92  SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */
94 
95 
96 unsigned long UsedShmemSegID = 0;
97 void *UsedShmemSegAddr = NULL;
98 
100 static void *AnonymousShmem = NULL;
101 
102 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
103 static void IpcMemoryDetach(int status, Datum shmaddr);
104 static void IpcMemoryDelete(int status, Datum shmId);
106  void *attachAt,
107  PGShmemHeader **addr);
108 
109 
110 /*
111  * InternalIpcMemoryCreate(memKey, size)
112  *
113  * Attempt to create a new shared memory segment with the specified key.
114  * Will fail (return NULL) if such a segment already exists. If successful,
115  * attach the segment to the current process and return its attached address.
116  * On success, callbacks are registered with on_shmem_exit to detach and
117  * delete the segment when on_shmem_exit is called.
118  *
119  * If we fail with a failure code other than collision-with-existing-segment,
120  * print out an error and abort. Other types of errors are not recoverable.
121  */
122 static void *
124 {
125  IpcMemoryId shmid;
126  void *requestedAddress = NULL;
127  void *memAddress;
128 
129  /*
130  * Normally we just pass requestedAddress = NULL to shmat(), allowing the
131  * system to choose where the segment gets mapped. But in an EXEC_BACKEND
132  * build, it's possible for whatever is chosen in the postmaster to not
133  * work for backends, due to variations in address space layout. As a
134  * rather klugy workaround, allow the user to specify the address to use
135  * via setting the environment variable PG_SHMEM_ADDR. (If this were of
136  * interest for anything except debugging, we'd probably create a cleaner
137  * and better-documented way to set it, such as a GUC.)
138  */
139 #ifdef EXEC_BACKEND
140  {
141  char *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
142 
143  if (pg_shmem_addr)
144  requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
145  }
146 #endif
147 
148  shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
149 
150  if (shmid < 0)
151  {
152  int shmget_errno = errno;
153 
154  /*
155  * Fail quietly if error indicates a collision with existing segment.
156  * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
157  * we could get a permission violation instead? Also, EIDRM might
158  * occur if an old seg is slated for destruction but not gone yet.
159  */
160  if (shmget_errno == EEXIST || shmget_errno == EACCES
161 #ifdef EIDRM
162  || shmget_errno == EIDRM
163 #endif
164  )
165  return NULL;
166 
167  /*
168  * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
169  * there is an existing segment but it's smaller than "size" (this is
170  * a result of poorly-thought-out ordering of error tests). To
171  * distinguish between collision and invalid size in such cases, we
172  * make a second try with size = 0. These kernels do not test size
173  * against SHMMIN in the preexisting-segment case, so we will not get
174  * EINVAL a second time if there is such a segment.
175  */
176  if (shmget_errno == EINVAL)
177  {
178  shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
179 
180  if (shmid < 0)
181  {
182  /* As above, fail quietly if we verify a collision */
183  if (errno == EEXIST || errno == EACCES
184 #ifdef EIDRM
185  || errno == EIDRM
186 #endif
187  )
188  return NULL;
189  /* Otherwise, fall through to report the original error */
190  }
191  else
192  {
193  /*
194  * On most platforms we cannot get here because SHMMIN is
195  * greater than zero. However, if we do succeed in creating a
196  * zero-size segment, free it and then fall through to report
197  * the original error.
198  */
199  if (shmctl(shmid, IPC_RMID, NULL) < 0)
200  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
201  (int) shmid, IPC_RMID);
202  }
203  }
204 
205  /*
206  * Else complain and abort.
207  *
208  * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
209  * is violated. SHMALL violation might be reported as either ENOMEM
210  * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
211  * it should be. SHMMNI violation is ENOSPC, per spec. Just plain
212  * not-enough-RAM is ENOMEM.
213  */
214  errno = shmget_errno;
215  ereport(FATAL,
216  (errmsg("could not create shared memory segment: %m"),
217  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
218  (unsigned long) memKey, size,
220  (shmget_errno == EINVAL) ?
221  errhint("This error usually means that PostgreSQL's request for a shared memory "
222  "segment exceeded your kernel's SHMMAX parameter, or possibly that "
223  "it is less than "
224  "your kernel's SHMMIN parameter.\n"
225  "The PostgreSQL documentation contains more information about shared "
226  "memory configuration.") : 0,
227  (shmget_errno == ENOMEM) ?
228  errhint("This error usually means that PostgreSQL's request for a shared "
229  "memory segment exceeded your kernel's SHMALL parameter. You might need "
230  "to reconfigure the kernel with larger SHMALL.\n"
231  "The PostgreSQL documentation contains more information about shared "
232  "memory configuration.") : 0,
233  (shmget_errno == ENOSPC) ?
234  errhint("This error does *not* mean that you have run out of disk space. "
235  "It occurs either if all available shared memory IDs have been taken, "
236  "in which case you need to raise the SHMMNI parameter in your kernel, "
237  "or because the system's overall limit for shared memory has been "
238  "reached.\n"
239  "The PostgreSQL documentation contains more information about shared "
240  "memory configuration.") : 0));
241  }
242 
243  /* Register on-exit routine to delete the new segment */
245 
246  /* OK, should be able to attach to the segment */
247  memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
248 
249  if (memAddress == (void *) -1)
250  elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
251  shmid, requestedAddress, PG_SHMAT_FLAGS);
252 
253  /* Register on-exit routine to detach new segment before deleting */
255 
256  /*
257  * Store shmem key and ID in data directory lockfile. Format to try to
258  * keep it the same length always (trailing junk in the lockfile won't
259  * hurt, but might confuse humans).
260  */
261  {
262  char line[64];
263 
264  sprintf(line, "%9lu %9lu",
265  (unsigned long) memKey, (unsigned long) shmid);
267  }
268 
269  return memAddress;
270 }
271 
272 /****************************************************************************/
273 /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
274 /* from process' address space */
275 /* (called as an on_shmem_exit callback, hence funny argument list) */
276 /****************************************************************************/
277 static void
279 {
280  /* Detach System V shared memory block. */
281  if (shmdt(DatumGetPointer(shmaddr)) < 0)
282  elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
283 }
284 
285 /****************************************************************************/
286 /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
287 /* (called as an on_shmem_exit callback, hence funny argument list) */
288 /****************************************************************************/
289 static void
291 {
292  if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
293  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
294  DatumGetInt32(shmId), IPC_RMID);
295 }
296 
297 /*
298  * PGSharedMemoryIsInUse
299  *
300  * Is a previously-existing shmem segment still existing and in use?
301  *
302  * The point of this exercise is to detect the case where a prior postmaster
303  * crashed, but it left child backends that are still running. Therefore
304  * we only care about shmem segments that are associated with the intended
305  * DataDir. This is an important consideration since accidental matches of
306  * shmem segment IDs are reasonably common.
307  */
308 bool
309 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
310 {
311  PGShmemHeader *memAddress;
313 
314  state = PGSharedMemoryAttach((IpcMemoryId) id2, NULL, &memAddress);
315  if (memAddress && shmdt(memAddress) < 0)
316  elog(LOG, "shmdt(%p) failed: %m", memAddress);
317  switch (state)
318  {
319  case SHMSTATE_ENOENT:
320  case SHMSTATE_FOREIGN:
321  case SHMSTATE_UNATTACHED:
322  return false;
324  case SHMSTATE_ATTACHED:
325  return true;
326  }
327  return true;
328 }
329 
330 /*
331  * Test for a segment with id shmId; see comment at IpcMemoryState.
332  *
333  * If the segment exists, we'll attempt to attach to it, using attachAt
334  * if that's not NULL (but it's best to pass NULL if possible).
335  *
336  * *addr is set to the segment memory address if we attached to it, else NULL.
337  */
338 static IpcMemoryState
340  void *attachAt,
341  PGShmemHeader **addr)
342 {
343  struct shmid_ds shmStat;
344  struct stat statbuf;
345  PGShmemHeader *hdr;
346 
347  *addr = NULL;
348 
349  /*
350  * First, try to stat the shm segment ID, to see if it exists at all.
351  */
352  if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
353  {
354  /*
355  * EINVAL actually has multiple possible causes documented in the
356  * shmctl man page, but we assume it must mean the segment no longer
357  * exists.
358  */
359  if (errno == EINVAL)
360  return SHMSTATE_ENOENT;
361 
362  /*
363  * EACCES implies we have no read permission, which means it is not a
364  * Postgres shmem segment (or at least, not one that is relevant to
365  * our data directory).
366  */
367  if (errno == EACCES)
368  return SHMSTATE_FOREIGN;
369 
370  /*
371  * Some Linux kernel versions (in fact, all of them as of July 2007)
372  * sometimes return EIDRM when EINVAL is correct. The Linux kernel
373  * actually does not have any internal state that would justify
374  * returning EIDRM, so we can get away with assuming that EIDRM is
375  * equivalent to EINVAL on that platform.
376  */
377 #ifdef HAVE_LINUX_EIDRM_BUG
378  if (errno == EIDRM)
379  return SHMSTATE_ENOENT;
380 #endif
381 
382  /*
383  * Otherwise, we had better assume that the segment is in use. The
384  * only likely case is (non-Linux, assumed spec-compliant) EIDRM,
385  * which implies that the segment has been IPC_RMID'd but there are
386  * still processes attached to it.
387  */
389  }
390 
391  /*
392  * Try to attach to the segment and see if it matches our data directory.
393  * This avoids any risk of duplicate-shmem-key conflicts on machines that
394  * are running several postmasters under the same userid.
395  *
396  * (When we're called from PGSharedMemoryCreate, this stat call is
397  * duplicative; but since this isn't a high-traffic case it's not worth
398  * trying to optimize.)
399  */
400  if (stat(DataDir, &statbuf) < 0)
401  return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
402 
403  hdr = (PGShmemHeader *) shmat(shmId, attachAt, PG_SHMAT_FLAGS);
404  if (hdr == (PGShmemHeader *) -1)
405  {
406  /*
407  * Attachment failed. The cases we're interested in are the same as
408  * for the shmctl() call above. In particular, note that the owning
409  * postmaster could have terminated and removed the segment between
410  * shmctl() and shmat().
411  *
412  * If attachAt isn't NULL, it's possible that EINVAL reflects a
413  * problem with that address not a vanished segment, so it's best to
414  * pass NULL when probing for conflicting segments.
415  */
416  if (errno == EINVAL)
417  return SHMSTATE_ENOENT; /* segment disappeared */
418  if (errno == EACCES)
419  return SHMSTATE_FOREIGN; /* must be non-Postgres */
420 #ifdef HAVE_LINUX_EIDRM_BUG
421  if (errno == EIDRM)
422  return SHMSTATE_ENOENT; /* segment disappeared */
423 #endif
424  /* Otherwise, be conservative. */
426  }
427  *addr = hdr;
428 
429  if (hdr->magic != PGShmemMagic ||
430  hdr->device != statbuf.st_dev ||
431  hdr->inode != statbuf.st_ino)
432  {
433  /*
434  * It's either not a Postgres segment, or not one for my data
435  * directory.
436  */
437  return SHMSTATE_FOREIGN;
438  }
439 
440  /*
441  * It does match our data directory, so now test whether any processes are
442  * still attached to it. (We are, now, but the shm_nattch result is from
443  * before we attached to it.)
444  */
445  return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
446 }
447 
448 #ifdef MAP_HUGETLB
449 
450 /*
451  * Identify the huge page size to use.
452  *
453  * Some Linux kernel versions have a bug causing mmap() to fail on requests
454  * that are not a multiple of the hugepage size. Versions without that bug
455  * instead silently round the request up to the next hugepage multiple ---
456  * and then munmap() fails when we give it a size different from that.
457  * So we have to round our request up to a multiple of the actual hugepage
458  * size to avoid trouble.
459  *
460  * Doing the round-up ourselves also lets us make use of the extra memory,
461  * rather than just wasting it. Currently, we just increase the available
462  * space recorded in the shmem header, which will make the extra usable for
463  * purposes such as additional locktable entries. Someday, for very large
464  * hugepage sizes, we might want to think about more invasive strategies,
465  * such as increasing shared_buffers to absorb the extra space.
466  *
467  * Returns the (real or assumed) page size into *hugepagesize,
468  * and the hugepage-related mmap flags to use into *mmap_flags.
469  *
470  * Currently *mmap_flags is always just MAP_HUGETLB. Someday, on systems
471  * that support it, we might OR in additional bits to specify a particular
472  * non-default huge page size.
473  */
474 static void
475 GetHugePageSize(Size *hugepagesize, int *mmap_flags)
476 {
477  /*
478  * If we fail to find out the system's default huge page size, assume it
479  * is 2MB. This will work fine when the actual size is less. If it's
480  * more, we might get mmap() or munmap() failures due to unaligned
481  * requests; but at this writing, there are no reports of any non-Linux
482  * systems being picky about that.
483  */
484  *hugepagesize = 2 * 1024 * 1024;
485  *mmap_flags = MAP_HUGETLB;
486 
487  /*
488  * System-dependent code to find out the default huge page size.
489  *
490  * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
491  * nnnn kB". Ignore any failures, falling back to the preset default.
492  */
493 #ifdef __linux__
494  {
495  FILE *fp = AllocateFile("/proc/meminfo", "r");
496  char buf[128];
497  unsigned int sz;
498  char ch;
499 
500  if (fp)
501  {
502  while (fgets(buf, sizeof(buf), fp))
503  {
504  if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
505  {
506  if (ch == 'k')
507  {
508  *hugepagesize = sz * (Size) 1024;
509  break;
510  }
511  /* We could accept other units besides kB, if needed */
512  }
513  }
514  FreeFile(fp);
515  }
516  }
517 #endif /* __linux__ */
518 }
519 
520 #endif /* MAP_HUGETLB */
521 
522 /*
523  * Creates an anonymous mmap()ed shared memory segment.
524  *
525  * Pass the requested size in *size. This function will modify *size to the
526  * actual size of the allocation, if it ends up allocating a segment that is
527  * larger than requested.
528  */
529 static void *
531 {
532  Size allocsize = *size;
533  void *ptr = MAP_FAILED;
534  int mmap_errno = 0;
535 
536 #ifndef MAP_HUGETLB
537  /* PGSharedMemoryCreate should have dealt with this case */
539 #else
541  {
542  /*
543  * Round up the request size to a suitable large value.
544  */
545  Size hugepagesize;
546  int mmap_flags;
547 
548  GetHugePageSize(&hugepagesize, &mmap_flags);
549 
550  if (allocsize % hugepagesize != 0)
551  allocsize += hugepagesize - (allocsize % hugepagesize);
552 
553  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
554  PG_MMAP_FLAGS | mmap_flags, -1, 0);
555  mmap_errno = errno;
556  if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
557  elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
558  allocsize);
559  }
560 #endif
561 
562  if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
563  {
564  /*
565  * Use the original size, not the rounded-up value, when falling back
566  * to non-huge pages.
567  */
568  allocsize = *size;
569  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
570  PG_MMAP_FLAGS, -1, 0);
571  mmap_errno = errno;
572  }
573 
574  if (ptr == MAP_FAILED)
575  {
576  errno = mmap_errno;
577  ereport(FATAL,
578  (errmsg("could not map anonymous shared memory: %m"),
579  (mmap_errno == ENOMEM) ?
580  errhint("This error usually means that PostgreSQL's request "
581  "for a shared memory segment exceeded available memory, "
582  "swap space, or huge pages. To reduce the request size "
583  "(currently %zu bytes), reduce PostgreSQL's shared "
584  "memory usage, perhaps by reducing shared_buffers or "
585  "max_connections.",
586  *size) : 0));
587  }
588 
589  *size = allocsize;
590  return ptr;
591 }
592 
593 /*
594  * AnonymousShmemDetach --- detach from an anonymous mmap'd block
595  * (called as an on_shmem_exit callback, hence funny argument list)
596  */
597 static void
599 {
600  /* Release anonymous shared memory block, if any. */
601  if (AnonymousShmem != NULL)
602  {
603  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
604  elog(LOG, "munmap(%p, %zu) failed: %m",
606  AnonymousShmem = NULL;
607  }
608 }
609 
610 /*
611  * PGSharedMemoryCreate
612  *
613  * Create a shared memory segment of the given size and initialize its
614  * standard header. Also, register an on_shmem_exit callback to release
615  * the storage.
616  *
617  * Dead Postgres segments pertinent to this DataDir are recycled if found, but
618  * we do not fail upon collision with foreign shmem segments. The idea here
619  * is to detect and re-use keys that may have been assigned by a crashed
620  * postmaster or backend.
621  */
624  PGShmemHeader **shim)
625 {
626  IpcMemoryKey NextShmemSegID;
627  void *memAddress;
628  PGShmemHeader *hdr;
629  struct stat statbuf;
630  Size sysvsize;
631 
632  /*
633  * We use the data directory's ID info (inode and device numbers) to
634  * positively identify shmem segments associated with this data dir, and
635  * also as seeds for searching for a free shmem key.
636  */
637  if (stat(DataDir, &statbuf) < 0)
638  ereport(FATAL,
640  errmsg("could not stat data directory \"%s\": %m",
641  DataDir)));
642 
643  /* Complain if hugepages demanded but we can't possibly support them */
644 #if !defined(MAP_HUGETLB)
645  if (huge_pages == HUGE_PAGES_ON)
646  ereport(ERROR,
647  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
648  errmsg("huge pages not supported on this platform")));
649 #endif
650 
651  /* Room for a header? */
652  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
653 
655  {
657  AnonymousShmemSize = size;
658 
659  /* Register on-exit routine to unmap the anonymous segment */
661 
662  /* Now we need only allocate a minimal-sized SysV shmem block. */
663  sysvsize = sizeof(PGShmemHeader);
664  }
665  else
666  sysvsize = size;
667 
668  /*
669  * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to
670  * ensure no more than one postmaster per data directory can enter this
671  * loop simultaneously. (CreateDataDirLockFile() does not entirely ensure
672  * that, but prefer fixing it over coping here.)
673  */
674  NextShmemSegID = statbuf.st_ino;
675 
676  for (;;)
677  {
678  IpcMemoryId shmid;
679  PGShmemHeader *oldhdr;
681 
682  /* Try to create new segment */
683  memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
684  if (memAddress)
685  break; /* successful create and attach */
686 
687  /* Check shared memory and possibly remove and recreate */
688 
689  /*
690  * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
691  * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
692  * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
693  */
694  shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
695  if (shmid < 0)
696  {
697  oldhdr = NULL;
698  state = SHMSTATE_FOREIGN;
699  }
700  else
701  state = PGSharedMemoryAttach(shmid, NULL, &oldhdr);
702 
703  switch (state)
704  {
706  case SHMSTATE_ATTACHED:
707  ereport(FATAL,
708  (errcode(ERRCODE_LOCK_FILE_EXISTS),
709  errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
710  (unsigned long) NextShmemSegID,
711  (unsigned long) shmid),
712  errhint("Terminate any old server processes associated with data directory \"%s\".",
713  DataDir)));
714  break;
715  case SHMSTATE_ENOENT:
716 
717  /*
718  * To our surprise, some other process deleted since our last
719  * InternalIpcMemoryCreate(). Moments earlier, we would have
720  * seen SHMSTATE_FOREIGN. Try that same ID again.
721  */
722  elog(LOG,
723  "shared memory block (key %lu, ID %lu) deleted during startup",
724  (unsigned long) NextShmemSegID,
725  (unsigned long) shmid);
726  break;
727  case SHMSTATE_FOREIGN:
728  NextShmemSegID++;
729  break;
730  case SHMSTATE_UNATTACHED:
731 
732  /*
733  * The segment pertains to DataDir, and every process that had
734  * used it has died or detached. Zap it, if possible, and any
735  * associated dynamic shared memory segments, as well. This
736  * shouldn't fail, but if it does, assume the segment belongs
737  * to someone else after all, and try the next candidate.
738  * Otherwise, try again to create the segment. That may fail
739  * if some other process creates the same shmem key before we
740  * do, in which case we'll try the next key.
741  */
742  if (oldhdr->dsm_control != 0)
744  if (shmctl(shmid, IPC_RMID, NULL) < 0)
745  NextShmemSegID++;
746  break;
747  }
748 
749  if (oldhdr && shmdt(oldhdr) < 0)
750  elog(LOG, "shmdt(%p) failed: %m", oldhdr);
751  }
752 
753  /* Initialize new segment. */
754  hdr = (PGShmemHeader *) memAddress;
755  hdr->creatorPID = getpid();
756  hdr->magic = PGShmemMagic;
757  hdr->dsm_control = 0;
758 
759  /* Fill in the data directory ID info, too */
760  hdr->device = statbuf.st_dev;
761  hdr->inode = statbuf.st_ino;
762 
763  /*
764  * Initialize space allocation status for segment.
765  */
766  hdr->totalsize = size;
767  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
768  *shim = hdr;
769 
770  /* Save info for possible future use */
771  UsedShmemSegAddr = memAddress;
772  UsedShmemSegID = (unsigned long) NextShmemSegID;
773 
774  /*
775  * If AnonymousShmem is NULL here, then we're not using anonymous shared
776  * memory, and should return a pointer to the System V shared memory
777  * block. Otherwise, the System V shared memory block is only a shim, and
778  * we must return a pointer to the real block.
779  */
780  if (AnonymousShmem == NULL)
781  return hdr;
782  memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
783  return (PGShmemHeader *) AnonymousShmem;
784 }
785 
786 #ifdef EXEC_BACKEND
787 
788 /*
789  * PGSharedMemoryReAttach
790  *
791  * This is called during startup of a postmaster child process to re-attach to
792  * an already existing shared memory segment. This is needed only in the
793  * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
794  * segment attachment via fork().
795  *
796  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
797  * routine. The caller must have already restored them to the postmaster's
798  * values.
799  */
800 void
802 {
803  IpcMemoryId shmid;
804  PGShmemHeader *hdr;
806  void *origUsedShmemSegAddr = UsedShmemSegAddr;
807 
808  Assert(UsedShmemSegAddr != NULL);
810 
811 #ifdef __CYGWIN__
812  /* cygipc (currently) appears to not detach on exec. */
814  UsedShmemSegAddr = origUsedShmemSegAddr;
815 #endif
816 
817  elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
818  shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
819  if (shmid < 0)
820  state = SHMSTATE_FOREIGN;
821  else
822  state = PGSharedMemoryAttach(shmid, UsedShmemSegAddr, &hdr);
823  if (state != SHMSTATE_ATTACHED)
824  elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
826  if (hdr != origUsedShmemSegAddr)
827  elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
828  hdr, origUsedShmemSegAddr);
829  dsm_set_control_handle(hdr->dsm_control);
830 
831  UsedShmemSegAddr = hdr; /* probably redundant */
832 }
833 
834 /*
835  * PGSharedMemoryNoReAttach
836  *
837  * This is called during startup of a postmaster child process when we choose
838  * *not* to re-attach to the existing shared memory segment. We must clean up
839  * to leave things in the appropriate state. This is not used in the non
840  * EXEC_BACKEND case, either.
841  *
842  * The child process startup logic might or might not call PGSharedMemoryDetach
843  * after this; make sure that it will be a no-op if called.
844  *
845  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
846  * routine. The caller must have already restored them to the postmaster's
847  * values.
848  */
849 void
851 {
852  Assert(UsedShmemSegAddr != NULL);
854 
855 #ifdef __CYGWIN__
856  /* cygipc (currently) appears to not detach on exec. */
858 #endif
859 
860  /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
861  UsedShmemSegAddr = NULL;
862  /* And the same for UsedShmemSegID. */
863  UsedShmemSegID = 0;
864 }
865 
866 #endif /* EXEC_BACKEND */
867 
868 /*
869  * PGSharedMemoryDetach
870  *
871  * Detach from the shared memory segment, if still attached. This is not
872  * intended to be called explicitly by the process that originally created the
873  * segment (it will have on_shmem_exit callback(s) registered to do that).
874  * Rather, this is for subprocesses that have inherited an attachment and want
875  * to get rid of it.
876  *
877  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
878  * routine, also AnonymousShmem and AnonymousShmemSize.
879  */
880 void
882 {
883  if (UsedShmemSegAddr != NULL)
884  {
885  if ((shmdt(UsedShmemSegAddr) < 0)
886 #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
887  /* Work-around for cygipc exec bug */
888  && shmdt(NULL) < 0
889 #endif
890  )
891  elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
892  UsedShmemSegAddr = NULL;
893  }
894 
895  if (AnonymousShmem != NULL)
896  {
897  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
898  elog(LOG, "munmap(%p, %zu) failed: %m",
900  AnonymousShmem = NULL;
901  }
902 }
IpcMemoryState
Definition: sysv_shmem.c:86
PGShmemHeader * PGSharedMemoryCreate(Size size, PGShmemHeader **shim)
Definition: sysv_shmem.c:623
pid_t creatorPID
Definition: pg_shmem.h:33
#define MAP_FAILED
Definition: mem.h:45
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1069
static void IpcMemoryDetach(int status, Datum shmaddr)
Definition: sysv_shmem.c:278
#define DatumGetInt32(X)
Definition: postgres.h:472
#define IPC_CREAT
Definition: win32_port.h:82
#define DEBUG3
Definition: elog.h:23
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:206
dsm_handle dsm_control
Definition: pg_shmem.h:36
void AddToDataDirLockFile(int target_line, const char *str)
Definition: miscinit.c:1254
#define PointerGetDatum(X)
Definition: postgres.h:556
#define IPCProtection
Definition: posix_sema.c:59
#define LOCK_FILE_LINE_SHMEM_KEY
Definition: pidfile.h:43
#define PG_SHMAT_FLAGS
Definition: mem.h:20
struct PGShmemHeader PGShmemHeader
int shared_memory_type
Definition: ipci.c:50
static Size AnonymousShmemSize
Definition: sysv_shmem.c:99
void PGSharedMemoryNoReAttach(void)
Definition: win32_shmem.c:449
int errcode(int sqlerrcode)
Definition: elog.c:608
#define LOG
Definition: elog.h:26
void PGSharedMemoryDetach(void)
Definition: sysv_shmem.c:881
#define sprintf
Definition: port.h:194
static void * InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
Definition: sysv_shmem.c:123
#define ERROR
Definition: elog.h:43
#define FATAL
Definition: elog.h:52
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static char * buf
Definition: pg_test_fsync.c:67
#define EIDRM
Definition: win32_port.h:90
#define PG_MMAP_FLAGS
Definition: mem.h:41
key_t IpcMemoryKey
Definition: sysv_shmem.c:72
bool IsUnderPostmaster
Definition: globals.c:109
int errdetail(const char *fmt,...)
Definition: elog.c:955
static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId, void *attachAt, PGShmemHeader **addr)
Definition: sysv_shmem.c:339
void PGSharedMemoryReAttach(void)
Definition: win32_shmem.c:401
int errcode_for_file_access(void)
Definition: elog.c:631
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2242
void * UsedShmemSegAddr
Definition: sysv_shmem.c:97
static void AnonymousShmemDetach(int status, Datum arg)
Definition: sysv_shmem.c:598
#define ereport(elevel, rest)
Definition: elog.h:141
static void IpcMemoryDelete(int status, Datum shmId)
Definition: sysv_shmem.c:290
int32 magic
Definition: pg_shmem.h:31
static void * CreateAnonymousSegment(Size *size)
Definition: sysv_shmem.c:530
#define stat(a, b)
Definition: win32_port.h:255
Size totalsize
Definition: pg_shmem.h:34
uintptr_t Datum
Definition: postgres.h:367
unsigned long UsedShmemSegID
Definition: sysv_shmem.c:96
#define IPC_RMID
Definition: win32_port.h:81
static void * AnonymousShmem
Definition: sysv_shmem.c:100
ino_t inode
Definition: pg_shmem.h:40
dev_t device
Definition: pg_shmem.h:39
#define Assert(condition)
Definition: c.h:739
Definition: regguts.h:298
long key_t
Definition: win32_port.h:233
#define PGShmemMagic
Definition: pg_shmem.h:32
Size freeoffset
Definition: pg_shmem.h:35
size_t Size
Definition: c.h:467
#define MAXALIGN(LEN)
Definition: c.h:692
int IpcMemoryId
Definition: sysv_shmem.c:73
int huge_pages
Definition: guc.c:555
#define IPC_EXCL
Definition: win32_port.h:83
#define DatumGetPointer(X)
Definition: postgres.h:549
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
Definition: sysv_shmem.c:309
int FreeFile(FILE *file)
Definition: fd.c:2441
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:822
#define elog(elevel,...)
Definition: elog.h:228
void * arg
char * DataDir
Definition: globals.c:62
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:226
#define IPC_STAT
Definition: win32_port.h:86