PostgreSQL Source Code  git master
sysv_shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * sysv_shmem.c
4  * Implement shared memory using SysV facilities
5  *
6  * These routines used to be a fairly thin layer on top of SysV shared
7  * memory functionality. With the addition of anonymous-shmem logic,
8  * they're a bit fatter now. We still require a SysV shmem block to
9  * exist, though, because mmap'd shmem provides no way to find out how
10  * many processes are attached, which we need for interlocking purposes.
11  *
12  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  * IDENTIFICATION
16  * src/backend/port/sysv_shmem.c
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include <signal.h>
23 #include <unistd.h>
24 #include <sys/file.h>
25 #include <sys/mman.h>
26 #include <sys/stat.h>
27 #ifdef HAVE_SYS_IPC_H
28 #include <sys/ipc.h>
29 #endif
30 #ifdef HAVE_SYS_SHM_H
31 #include <sys/shm.h>
32 #endif
33 
34 #include "miscadmin.h"
35 #include "portability/mem.h"
36 #include "storage/dsm.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/pg_shmem.h"
40 #include "utils/guc.h"
41 #include "utils/pidfile.h"
42 
43 
44 /*
45  * As of PostgreSQL 9.3, we normally allocate only a very small amount of
46  * System V shared memory, and only for the purposes of providing an
47  * interlock to protect the data directory. The real shared memory block
48  * is allocated using mmap(). This works around the problem that many
49  * systems have very low limits on the amount of System V shared memory
50  * that can be allocated. Even a limit of a few megabytes will be enough
51  * to run many copies of PostgreSQL without needing to adjust system settings.
52  *
53  * We assume that no one will attempt to run PostgreSQL 9.3 or later on
54  * systems that are ancient enough that anonymous shared memory is not
55  * supported, such as pre-2.4 versions of Linux. If that turns out to be
56  * false, we might need to add compile and/or run-time tests here and do this
57  * only if the running kernel supports it.
58  *
59  * However, we must always disable this logic in the EXEC_BACKEND case, and
60  * fall back to the old method of allocating the entire segment using System V
61  * shared memory, because there's no way to attach an anonymous mmap'd segment
62  * to a process after exec(). Since EXEC_BACKEND is intended only for
63  * developer use, this shouldn't be a big problem. Because of this, we do
64  * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
65  *
66  * As of PostgreSQL 12, we regained the ability to use a large System V shared
67  * memory region even in non-EXEC_BACKEND builds, if shared_memory_type is set
68  * to sysv (though this is not the default).
69  */
70 
71 
72 typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
73 typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
74 
75 /*
76  * How does a given IpcMemoryId relate to this PostgreSQL process?
77  *
78  * One could recycle unattached segments of different data directories if we
79  * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would
80  * cause us to visit less of the key space, making us less likely to detect a
81  * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis,
82  * in that postmasters of different data directories could simultaneously
83  * attempt to recycle a given key. We'll waste keys longer in some cases, but
84  * avoiding the problems of the alternative justifies that loss.
85  */
86 typedef enum
87 {
88  SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */
89  SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */
90  SHMSTATE_ENOENT, /* no segment of that ID */
91  SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */
92  SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */
94 
95 
96 unsigned long UsedShmemSegID = 0;
97 void *UsedShmemSegAddr = NULL;
98 
100 static void *AnonymousShmem = NULL;
101 
102 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
103 static void IpcMemoryDetach(int status, Datum shmaddr);
104 static void IpcMemoryDelete(int status, Datum shmId);
106  void *attachAt,
107  PGShmemHeader **addr);
108 
109 
110 /*
111  * InternalIpcMemoryCreate(memKey, size)
112  *
113  * Attempt to create a new shared memory segment with the specified key.
114  * Will fail (return NULL) if such a segment already exists. If successful,
115  * attach the segment to the current process and return its attached address.
116  * On success, callbacks are registered with on_shmem_exit to detach and
117  * delete the segment when on_shmem_exit is called.
118  *
119  * If we fail with a failure code other than collision-with-existing-segment,
120  * print out an error and abort. Other types of errors are not recoverable.
121  */
122 static void *
124 {
125  IpcMemoryId shmid;
126  void *requestedAddress = NULL;
127  void *memAddress;
128 
129  /*
130  * Normally we just pass requestedAddress = NULL to shmat(), allowing the
131  * system to choose where the segment gets mapped. But in an EXEC_BACKEND
132  * build, it's possible for whatever is chosen in the postmaster to not
133  * work for backends, due to variations in address space layout. As a
134  * rather klugy workaround, allow the user to specify the address to use
135  * via setting the environment variable PG_SHMEM_ADDR. (If this were of
136  * interest for anything except debugging, we'd probably create a cleaner
137  * and better-documented way to set it, such as a GUC.)
138  */
139 #ifdef EXEC_BACKEND
140  {
141  char *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
142 
143  if (pg_shmem_addr)
144  requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
145  }
146 #endif
147 
148  shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
149 
150  if (shmid < 0)
151  {
152  int shmget_errno = errno;
153 
154  /*
155  * Fail quietly if error indicates a collision with existing segment.
156  * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
157  * we could get a permission violation instead? Also, EIDRM might
158  * occur if an old seg is slated for destruction but not gone yet.
159  */
160  if (shmget_errno == EEXIST || shmget_errno == EACCES
161 #ifdef EIDRM
162  || shmget_errno == EIDRM
163 #endif
164  )
165  return NULL;
166 
167  /*
168  * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
169  * there is an existing segment but it's smaller than "size" (this is
170  * a result of poorly-thought-out ordering of error tests). To
171  * distinguish between collision and invalid size in such cases, we
172  * make a second try with size = 0. These kernels do not test size
173  * against SHMMIN in the preexisting-segment case, so we will not get
174  * EINVAL a second time if there is such a segment.
175  */
176  if (shmget_errno == EINVAL)
177  {
178  shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
179 
180  if (shmid < 0)
181  {
182  /* As above, fail quietly if we verify a collision */
183  if (errno == EEXIST || errno == EACCES
184 #ifdef EIDRM
185  || errno == EIDRM
186 #endif
187  )
188  return NULL;
189  /* Otherwise, fall through to report the original error */
190  }
191  else
192  {
193  /*
194  * On most platforms we cannot get here because SHMMIN is
195  * greater than zero. However, if we do succeed in creating a
196  * zero-size segment, free it and then fall through to report
197  * the original error.
198  */
199  if (shmctl(shmid, IPC_RMID, NULL) < 0)
200  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
201  (int) shmid, IPC_RMID);
202  }
203  }
204 
205  /*
206  * Else complain and abort.
207  *
208  * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
209  * is violated. SHMALL violation might be reported as either ENOMEM
210  * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
211  * it should be. SHMMNI violation is ENOSPC, per spec. Just plain
212  * not-enough-RAM is ENOMEM.
213  */
214  errno = shmget_errno;
215  ereport(FATAL,
216  (errmsg("could not create shared memory segment: %m"),
217  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
218  (unsigned long) memKey, size,
220  (shmget_errno == EINVAL) ?
221  errhint("This error usually means that PostgreSQL's request for a shared memory "
222  "segment exceeded your kernel's SHMMAX parameter, or possibly that "
223  "it is less than "
224  "your kernel's SHMMIN parameter.\n"
225  "The PostgreSQL documentation contains more information about shared "
226  "memory configuration.") : 0,
227  (shmget_errno == ENOMEM) ?
228  errhint("This error usually means that PostgreSQL's request for a shared "
229  "memory segment exceeded your kernel's SHMALL parameter. You might need "
230  "to reconfigure the kernel with larger SHMALL.\n"
231  "The PostgreSQL documentation contains more information about shared "
232  "memory configuration.") : 0,
233  (shmget_errno == ENOSPC) ?
234  errhint("This error does *not* mean that you have run out of disk space. "
235  "It occurs either if all available shared memory IDs have been taken, "
236  "in which case you need to raise the SHMMNI parameter in your kernel, "
237  "or because the system's overall limit for shared memory has been "
238  "reached.\n"
239  "The PostgreSQL documentation contains more information about shared "
240  "memory configuration.") : 0));
241  }
242 
243  /* Register on-exit routine to delete the new segment */
245 
246  /* OK, should be able to attach to the segment */
247  memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
248 
249  if (memAddress == (void *) -1)
250  elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
251  shmid, requestedAddress, PG_SHMAT_FLAGS);
252 
253  /* Register on-exit routine to detach new segment before deleting */
255 
256  /*
257  * Store shmem key and ID in data directory lockfile. Format to try to
258  * keep it the same length always (trailing junk in the lockfile won't
259  * hurt, but might confuse humans).
260  */
261  {
262  char line[64];
263 
264  sprintf(line, "%9lu %9lu",
265  (unsigned long) memKey, (unsigned long) shmid);
267  }
268 
269  return memAddress;
270 }
271 
272 /****************************************************************************/
273 /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
274 /* from process' address space */
275 /* (called as an on_shmem_exit callback, hence funny argument list) */
276 /****************************************************************************/
277 static void
279 {
280  /* Detach System V shared memory block. */
281  if (shmdt(DatumGetPointer(shmaddr)) < 0)
282  elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
283 }
284 
285 /****************************************************************************/
286 /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
287 /* (called as an on_shmem_exit callback, hence funny argument list) */
288 /****************************************************************************/
289 static void
291 {
292  if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
293  elog(LOG, "shmctl(%d, %d, 0) failed: %m",
294  DatumGetInt32(shmId), IPC_RMID);
295 }
296 
297 /*
298  * PGSharedMemoryIsInUse
299  *
300  * Is a previously-existing shmem segment still existing and in use?
301  *
302  * The point of this exercise is to detect the case where a prior postmaster
303  * crashed, but it left child backends that are still running. Therefore
304  * we only care about shmem segments that are associated with the intended
305  * DataDir. This is an important consideration since accidental matches of
306  * shmem segment IDs are reasonably common.
307  */
308 bool
309 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
310 {
311  PGShmemHeader *memAddress;
313 
314  state = PGSharedMemoryAttach((IpcMemoryId) id2, NULL, &memAddress);
315  if (memAddress && shmdt(memAddress) < 0)
316  elog(LOG, "shmdt(%p) failed: %m", memAddress);
317  switch (state)
318  {
319  case SHMSTATE_ENOENT:
320  case SHMSTATE_FOREIGN:
321  case SHMSTATE_UNATTACHED:
322  return false;
324  case SHMSTATE_ATTACHED:
325  return true;
326  }
327  return true;
328 }
329 
330 /*
331  * Test for a segment with id shmId; see comment at IpcMemoryState.
332  *
333  * If the segment exists, we'll attempt to attach to it, using attachAt
334  * if that's not NULL (but it's best to pass NULL if possible).
335  *
336  * *addr is set to the segment memory address if we attached to it, else NULL.
337  */
338 static IpcMemoryState
340  void *attachAt,
341  PGShmemHeader **addr)
342 {
343  struct shmid_ds shmStat;
344  struct stat statbuf;
345  PGShmemHeader *hdr;
346 
347  *addr = NULL;
348 
349  /*
350  * First, try to stat the shm segment ID, to see if it exists at all.
351  */
352  if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
353  {
354  /*
355  * EINVAL actually has multiple possible causes documented in the
356  * shmctl man page, but we assume it must mean the segment no longer
357  * exists.
358  */
359  if (errno == EINVAL)
360  return SHMSTATE_ENOENT;
361 
362  /*
363  * EACCES implies we have no read permission, which means it is not a
364  * Postgres shmem segment (or at least, not one that is relevant to
365  * our data directory).
366  */
367  if (errno == EACCES)
368  return SHMSTATE_FOREIGN;
369 
370  /*
371  * Some Linux kernel versions (in fact, all of them as of July 2007)
372  * sometimes return EIDRM when EINVAL is correct. The Linux kernel
373  * actually does not have any internal state that would justify
374  * returning EIDRM, so we can get away with assuming that EIDRM is
375  * equivalent to EINVAL on that platform.
376  */
377 #ifdef HAVE_LINUX_EIDRM_BUG
378  if (errno == EIDRM)
379  return SHMSTATE_ENOENT;
380 #endif
381 
382  /*
383  * Otherwise, we had better assume that the segment is in use. The
384  * only likely case is (non-Linux, assumed spec-compliant) EIDRM,
385  * which implies that the segment has been IPC_RMID'd but there are
386  * still processes attached to it.
387  */
389  }
390 
391  /*
392  * Try to attach to the segment and see if it matches our data directory.
393  * This avoids key-conflict problems on machines that are running several
394  * postmasters under the same userid and port number. (That would not
395  * ordinarily happen in production, but it can happen during parallel
396  * testing. Since our test setups don't open any TCP ports on Unix, such
397  * cases don't conflict otherwise.)
398  */
399  if (stat(DataDir, &statbuf) < 0)
400  return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
401 
402  hdr = (PGShmemHeader *) shmat(shmId, attachAt, PG_SHMAT_FLAGS);
403  if (hdr == (PGShmemHeader *) -1)
404  {
405  /*
406  * Attachment failed. The cases we're interested in are the same as
407  * for the shmctl() call above. In particular, note that the owning
408  * postmaster could have terminated and removed the segment between
409  * shmctl() and shmat().
410  *
411  * If attachAt isn't NULL, it's possible that EINVAL reflects a
412  * problem with that address not a vanished segment, so it's best to
413  * pass NULL when probing for conflicting segments.
414  */
415  if (errno == EINVAL)
416  return SHMSTATE_ENOENT; /* segment disappeared */
417  if (errno == EACCES)
418  return SHMSTATE_FOREIGN; /* must be non-Postgres */
419 #ifdef HAVE_LINUX_EIDRM_BUG
420  if (errno == EIDRM)
421  return SHMSTATE_ENOENT; /* segment disappeared */
422 #endif
423  /* Otherwise, be conservative. */
425  }
426  *addr = hdr;
427 
428  if (hdr->magic != PGShmemMagic ||
429  hdr->device != statbuf.st_dev ||
430  hdr->inode != statbuf.st_ino)
431  {
432  /*
433  * It's either not a Postgres segment, or not one for my data
434  * directory.
435  */
436  return SHMSTATE_FOREIGN;
437  }
438 
439  /*
440  * It does match our data directory, so now test whether any processes are
441  * still attached to it. (We are, now, but the shm_nattch result is from
442  * before we attached to it.)
443  */
444  return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
445 }
446 
447 #ifdef MAP_HUGETLB
448 
449 /*
450  * Identify the huge page size to use.
451  *
452  * Some Linux kernel versions have a bug causing mmap() to fail on requests
453  * that are not a multiple of the hugepage size. Versions without that bug
454  * instead silently round the request up to the next hugepage multiple ---
455  * and then munmap() fails when we give it a size different from that.
456  * So we have to round our request up to a multiple of the actual hugepage
457  * size to avoid trouble.
458  *
459  * Doing the round-up ourselves also lets us make use of the extra memory,
460  * rather than just wasting it. Currently, we just increase the available
461  * space recorded in the shmem header, which will make the extra usable for
462  * purposes such as additional locktable entries. Someday, for very large
463  * hugepage sizes, we might want to think about more invasive strategies,
464  * such as increasing shared_buffers to absorb the extra space.
465  *
466  * Returns the (real or assumed) page size into *hugepagesize,
467  * and the hugepage-related mmap flags to use into *mmap_flags.
468  *
469  * Currently *mmap_flags is always just MAP_HUGETLB. Someday, on systems
470  * that support it, we might OR in additional bits to specify a particular
471  * non-default huge page size.
472  */
473 static void
474 GetHugePageSize(Size *hugepagesize, int *mmap_flags)
475 {
476  /*
477  * If we fail to find out the system's default huge page size, assume it
478  * is 2MB. This will work fine when the actual size is less. If it's
479  * more, we might get mmap() or munmap() failures due to unaligned
480  * requests; but at this writing, there are no reports of any non-Linux
481  * systems being picky about that.
482  */
483  *hugepagesize = 2 * 1024 * 1024;
484  *mmap_flags = MAP_HUGETLB;
485 
486  /*
487  * System-dependent code to find out the default huge page size.
488  *
489  * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
490  * nnnn kB". Ignore any failures, falling back to the preset default.
491  */
492 #ifdef __linux__
493  {
494  FILE *fp = AllocateFile("/proc/meminfo", "r");
495  char buf[128];
496  unsigned int sz;
497  char ch;
498 
499  if (fp)
500  {
501  while (fgets(buf, sizeof(buf), fp))
502  {
503  if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
504  {
505  if (ch == 'k')
506  {
507  *hugepagesize = sz * (Size) 1024;
508  break;
509  }
510  /* We could accept other units besides kB, if needed */
511  }
512  }
513  FreeFile(fp);
514  }
515  }
516 #endif /* __linux__ */
517 }
518 
519 #endif /* MAP_HUGETLB */
520 
521 /*
522  * Creates an anonymous mmap()ed shared memory segment.
523  *
524  * Pass the requested size in *size. This function will modify *size to the
525  * actual size of the allocation, if it ends up allocating a segment that is
526  * larger than requested.
527  */
528 static void *
530 {
531  Size allocsize = *size;
532  void *ptr = MAP_FAILED;
533  int mmap_errno = 0;
534 
535 #ifndef MAP_HUGETLB
536  /* PGSharedMemoryCreate should have dealt with this case */
538 #else
540  {
541  /*
542  * Round up the request size to a suitable large value.
543  */
544  Size hugepagesize;
545  int mmap_flags;
546 
547  GetHugePageSize(&hugepagesize, &mmap_flags);
548 
549  if (allocsize % hugepagesize != 0)
550  allocsize += hugepagesize - (allocsize % hugepagesize);
551 
552  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
553  PG_MMAP_FLAGS | mmap_flags, -1, 0);
554  mmap_errno = errno;
555  if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
556  elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
557  allocsize);
558  }
559 #endif
560 
561  if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
562  {
563  /*
564  * Use the original size, not the rounded-up value, when falling back
565  * to non-huge pages.
566  */
567  allocsize = *size;
568  ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
569  PG_MMAP_FLAGS, -1, 0);
570  mmap_errno = errno;
571  }
572 
573  if (ptr == MAP_FAILED)
574  {
575  errno = mmap_errno;
576  ereport(FATAL,
577  (errmsg("could not map anonymous shared memory: %m"),
578  (mmap_errno == ENOMEM) ?
579  errhint("This error usually means that PostgreSQL's request "
580  "for a shared memory segment exceeded available memory, "
581  "swap space, or huge pages. To reduce the request size "
582  "(currently %zu bytes), reduce PostgreSQL's shared "
583  "memory usage, perhaps by reducing shared_buffers or "
584  "max_connections.",
585  *size) : 0));
586  }
587 
588  *size = allocsize;
589  return ptr;
590 }
591 
592 /*
593  * AnonymousShmemDetach --- detach from an anonymous mmap'd block
594  * (called as an on_shmem_exit callback, hence funny argument list)
595  */
596 static void
598 {
599  /* Release anonymous shared memory block, if any. */
600  if (AnonymousShmem != NULL)
601  {
602  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
603  elog(LOG, "munmap(%p, %zu) failed: %m",
605  AnonymousShmem = NULL;
606  }
607 }
608 
609 /*
610  * PGSharedMemoryCreate
611  *
612  * Create a shared memory segment of the given size and initialize its
613  * standard header. Also, register an on_shmem_exit callback to release
614  * the storage.
615  *
616  * Dead Postgres segments pertinent to this DataDir are recycled if found, but
617  * we do not fail upon collision with foreign shmem segments. The idea here
618  * is to detect and re-use keys that may have been assigned by a crashed
619  * postmaster or backend.
620  *
621  * The port number is passed for possible use as a key (for SysV, we use
622  * it to generate the starting shmem key).
623  */
626  PGShmemHeader **shim)
627 {
628  IpcMemoryKey NextShmemSegID;
629  void *memAddress;
630  PGShmemHeader *hdr;
631  struct stat statbuf;
632  Size sysvsize;
633 
634  /* Complain if hugepages demanded but we can't possibly support them */
635 #if !defined(MAP_HUGETLB)
636  if (huge_pages == HUGE_PAGES_ON)
637  ereport(ERROR,
638  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
639  errmsg("huge pages not supported on this platform")));
640 #endif
641 
642  /* Room for a header? */
643  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
644 
646  {
648  AnonymousShmemSize = size;
649 
650  /* Register on-exit routine to unmap the anonymous segment */
652 
653  /* Now we need only allocate a minimal-sized SysV shmem block. */
654  sysvsize = sizeof(PGShmemHeader);
655  }
656  else
657  sysvsize = size;
658 
659  /*
660  * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to
661  * ensure no more than one postmaster per data directory can enter this
662  * loop simultaneously. (CreateDataDirLockFile() does not ensure that,
663  * but prefer fixing it over coping here.)
664  */
665  NextShmemSegID = 1 + port * 1000;
666 
667  for (;;)
668  {
669  IpcMemoryId shmid;
670  PGShmemHeader *oldhdr;
672 
673  /* Try to create new segment */
674  memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
675  if (memAddress)
676  break; /* successful create and attach */
677 
678  /* Check shared memory and possibly remove and recreate */
679 
680  /*
681  * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
682  * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
683  * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
684  */
685  shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
686  if (shmid < 0)
687  {
688  oldhdr = NULL;
689  state = SHMSTATE_FOREIGN;
690  }
691  else
692  state = PGSharedMemoryAttach(shmid, NULL, &oldhdr);
693 
694  switch (state)
695  {
697  case SHMSTATE_ATTACHED:
698  ereport(FATAL,
699  (errcode(ERRCODE_LOCK_FILE_EXISTS),
700  errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
701  (unsigned long) NextShmemSegID,
702  (unsigned long) shmid),
703  errhint("Terminate any old server processes associated with data directory \"%s\".",
704  DataDir)));
705  break;
706  case SHMSTATE_ENOENT:
707 
708  /*
709  * To our surprise, some other process deleted since our last
710  * InternalIpcMemoryCreate(). Moments earlier, we would have
711  * seen SHMSTATE_FOREIGN. Try that same ID again.
712  */
713  elog(LOG,
714  "shared memory block (key %lu, ID %lu) deleted during startup",
715  (unsigned long) NextShmemSegID,
716  (unsigned long) shmid);
717  break;
718  case SHMSTATE_FOREIGN:
719  NextShmemSegID++;
720  break;
721  case SHMSTATE_UNATTACHED:
722 
723  /*
724  * The segment pertains to DataDir, and every process that had
725  * used it has died or detached. Zap it, if possible, and any
726  * associated dynamic shared memory segments, as well. This
727  * shouldn't fail, but if it does, assume the segment belongs
728  * to someone else after all, and try the next candidate.
729  * Otherwise, try again to create the segment. That may fail
730  * if some other process creates the same shmem key before we
731  * do, in which case we'll try the next key.
732  */
733  if (oldhdr->dsm_control != 0)
735  if (shmctl(shmid, IPC_RMID, NULL) < 0)
736  NextShmemSegID++;
737  break;
738  }
739 
740  if (oldhdr && shmdt(oldhdr) < 0)
741  elog(LOG, "shmdt(%p) failed: %m", oldhdr);
742  }
743 
744  /* Initialize new segment. */
745  hdr = (PGShmemHeader *) memAddress;
746  hdr->creatorPID = getpid();
747  hdr->magic = PGShmemMagic;
748  hdr->dsm_control = 0;
749 
750  /* Fill in the data directory ID info, too */
751  if (stat(DataDir, &statbuf) < 0)
752  ereport(FATAL,
754  errmsg("could not stat data directory \"%s\": %m",
755  DataDir)));
756  hdr->device = statbuf.st_dev;
757  hdr->inode = statbuf.st_ino;
758 
759  /*
760  * Initialize space allocation status for segment.
761  */
762  hdr->totalsize = size;
763  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
764  *shim = hdr;
765 
766  /* Save info for possible future use */
767  UsedShmemSegAddr = memAddress;
768  UsedShmemSegID = (unsigned long) NextShmemSegID;
769 
770  /*
771  * If AnonymousShmem is NULL here, then we're not using anonymous shared
772  * memory, and should return a pointer to the System V shared memory
773  * block. Otherwise, the System V shared memory block is only a shim, and
774  * we must return a pointer to the real block.
775  */
776  if (AnonymousShmem == NULL)
777  return hdr;
778  memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
779  return (PGShmemHeader *) AnonymousShmem;
780 }
781 
782 #ifdef EXEC_BACKEND
783 
784 /*
785  * PGSharedMemoryReAttach
786  *
787  * This is called during startup of a postmaster child process to re-attach to
788  * an already existing shared memory segment. This is needed only in the
789  * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
790  * segment attachment via fork().
791  *
792  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
793  * routine. The caller must have already restored them to the postmaster's
794  * values.
795  */
796 void
798 {
799  IpcMemoryId shmid;
800  PGShmemHeader *hdr;
802  void *origUsedShmemSegAddr = UsedShmemSegAddr;
803 
804  Assert(UsedShmemSegAddr != NULL);
806 
807 #ifdef __CYGWIN__
808  /* cygipc (currently) appears to not detach on exec. */
810  UsedShmemSegAddr = origUsedShmemSegAddr;
811 #endif
812 
813  elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
814  shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
815  if (shmid < 0)
816  state = SHMSTATE_FOREIGN;
817  else
818  state = PGSharedMemoryAttach(shmid, UsedShmemSegAddr, &hdr);
819  if (state != SHMSTATE_ATTACHED)
820  elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
822  if (hdr != origUsedShmemSegAddr)
823  elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
824  hdr, origUsedShmemSegAddr);
825  dsm_set_control_handle(hdr->dsm_control);
826 
827  UsedShmemSegAddr = hdr; /* probably redundant */
828 }
829 
830 /*
831  * PGSharedMemoryNoReAttach
832  *
833  * This is called during startup of a postmaster child process when we choose
834  * *not* to re-attach to the existing shared memory segment. We must clean up
835  * to leave things in the appropriate state. This is not used in the non
836  * EXEC_BACKEND case, either.
837  *
838  * The child process startup logic might or might not call PGSharedMemoryDetach
839  * after this; make sure that it will be a no-op if called.
840  *
841  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
842  * routine. The caller must have already restored them to the postmaster's
843  * values.
844  */
845 void
847 {
848  Assert(UsedShmemSegAddr != NULL);
850 
851 #ifdef __CYGWIN__
852  /* cygipc (currently) appears to not detach on exec. */
854 #endif
855 
856  /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
857  UsedShmemSegAddr = NULL;
858  /* And the same for UsedShmemSegID. */
859  UsedShmemSegID = 0;
860 }
861 
862 #endif /* EXEC_BACKEND */
863 
864 /*
865  * PGSharedMemoryDetach
866  *
867  * Detach from the shared memory segment, if still attached. This is not
868  * intended to be called explicitly by the process that originally created the
869  * segment (it will have on_shmem_exit callback(s) registered to do that).
870  * Rather, this is for subprocesses that have inherited an attachment and want
871  * to get rid of it.
872  *
873  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
874  * routine, also AnonymousShmem and AnonymousShmemSize.
875  */
876 void
878 {
879  if (UsedShmemSegAddr != NULL)
880  {
881  if ((shmdt(UsedShmemSegAddr) < 0)
882 #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
883  /* Work-around for cygipc exec bug */
884  && shmdt(NULL) < 0
885 #endif
886  )
887  elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
888  UsedShmemSegAddr = NULL;
889  }
890 
891  if (AnonymousShmem != NULL)
892  {
893  if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
894  elog(LOG, "munmap(%p, %zu) failed: %m",
896  AnonymousShmem = NULL;
897  }
898 }
IpcMemoryState
Definition: sysv_shmem.c:86
pid_t creatorPID
Definition: pg_shmem.h:33
#define MAP_FAILED
Definition: mem.h:45
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:974
static void IpcMemoryDetach(int status, Datum shmaddr)
Definition: sysv_shmem.c:278
#define DatumGetInt32(X)
Definition: postgres.h:472
#define IPC_CREAT
Definition: win32_port.h:82
#define DEBUG3
Definition: elog.h:23
void dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
Definition: dsm.c:206
dsm_handle dsm_control
Definition: pg_shmem.h:36
void AddToDataDirLockFile(int target_line, const char *str)
Definition: miscinit.c:1254
#define PointerGetDatum(X)
Definition: postgres.h:556
#define IPCProtection
Definition: posix_sema.c:58
#define LOCK_FILE_LINE_SHMEM_KEY
Definition: pidfile.h:42
#define PG_SHMAT_FLAGS
Definition: mem.h:20
struct PGShmemHeader PGShmemHeader
int shared_memory_type
Definition: ipci.c:50
PGShmemHeader * PGSharedMemoryCreate(Size size, int port, PGShmemHeader **shim)
Definition: sysv_shmem.c:625
static Size AnonymousShmemSize
Definition: sysv_shmem.c:99
void PGSharedMemoryNoReAttach(void)
Definition: win32_shmem.c:449
int errcode(int sqlerrcode)
Definition: elog.c:570
#define LOG
Definition: elog.h:26
void PGSharedMemoryDetach(void)
Definition: sysv_shmem.c:877
#define sprintf
Definition: port.h:194
static void * InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
Definition: sysv_shmem.c:123
#define ERROR
Definition: elog.h:43
#define FATAL
Definition: elog.h:52
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static char * buf
Definition: pg_test_fsync.c:68
#define EIDRM
Definition: win32_port.h:90
#define PG_MMAP_FLAGS
Definition: mem.h:41
key_t IpcMemoryKey
Definition: sysv_shmem.c:72
bool IsUnderPostmaster
Definition: globals.c:109
int errdetail(const char *fmt,...)
Definition: elog.c:860
static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId, void *attachAt, PGShmemHeader **addr)
Definition: sysv_shmem.c:339
void PGSharedMemoryReAttach(void)
Definition: win32_shmem.c:401
int errcode_for_file_access(void)
Definition: elog.c:593
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2207
void * UsedShmemSegAddr
Definition: sysv_shmem.c:97
static void AnonymousShmemDetach(int status, Datum arg)
Definition: sysv_shmem.c:597
#define ereport(elevel, rest)
Definition: elog.h:141
static void IpcMemoryDelete(int status, Datum shmId)
Definition: sysv_shmem.c:290
static int port
Definition: pg_regress.c:92
int32 magic
Definition: pg_shmem.h:31
static void * CreateAnonymousSegment(Size *size)
Definition: sysv_shmem.c:529
#define stat(a, b)
Definition: win32_port.h:264
Size totalsize
Definition: pg_shmem.h:34
uintptr_t Datum
Definition: postgres.h:367
unsigned long UsedShmemSegID
Definition: sysv_shmem.c:96
#define IPC_RMID
Definition: win32_port.h:81
static void * AnonymousShmem
Definition: sysv_shmem.c:100
ino_t inode
Definition: pg_shmem.h:40
dev_t device
Definition: pg_shmem.h:39
#define Assert(condition)
Definition: c.h:732
Definition: regguts.h:298
long key_t
Definition: win32_port.h:242
#define PGShmemMagic
Definition: pg_shmem.h:32
Size freeoffset
Definition: pg_shmem.h:35
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:685
int IpcMemoryId
Definition: sysv_shmem.c:73
int huge_pages
Definition: guc.c:548
#define IPC_EXCL
Definition: win32_port.h:83
#define DatumGetPointer(X)
Definition: postgres.h:549
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
Definition: sysv_shmem.c:309
int FreeFile(FILE *file)
Definition: fd.c:2406
#define Int32GetDatum(X)
Definition: postgres.h:479
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
void * arg
char * DataDir
Definition: globals.c:62
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
#define IPC_STAT
Definition: win32_port.h:86