PostgreSQL Source Code  git master
sysv_sema.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * sysv_sema.c
4  * Implement PGSemaphores using SysV semaphore facilities
5  *
6  *
7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/port/sysv_sema.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <signal.h>
18 #include <unistd.h>
19 #include <sys/file.h>
20 #include <sys/stat.h>
21 #ifdef HAVE_SYS_IPC_H
22 #include <sys/ipc.h>
23 #endif
24 #ifdef HAVE_SYS_SEM_H
25 #include <sys/sem.h>
26 #endif
27 
28 #include "miscadmin.h"
29 #include "storage/ipc.h"
30 #include "storage/pg_sema.h"
31 #include "storage/shmem.h"
32 
33 
34 typedef struct PGSemaphoreData
35 {
36  int semId; /* semaphore set identifier */
37  int semNum; /* semaphore number within set */
39 
40 #ifndef HAVE_UNION_SEMUN
41 union semun
42 {
43  int val;
44  struct semid_ds *buf;
45  unsigned short *array;
46 };
47 #endif
48 
49 typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
50 typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
51 
52 /*
53  * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
54  * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
55  * per set) parameter, which is often around 25. (Less than, because we
56  * allocate one extra sema in each set for identification purposes.)
57  */
58 #define SEMAS_PER_SET 16
59 
60 #define IPCProtection (0600) /* access/modify by user only */
61 
62 #define PGSemaMagic 537 /* must be less than SEMVMX */
63 
64 
65 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
66 static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
67 static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
68 static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
69 static int numSemaSets; /* number of sema sets acquired so far */
70 static int maxSemaSets; /* allocated size of mySemaSets array */
71 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
72 static int nextSemaNumber; /* next free sem num in last sema set */
73 
74 
76  int numSems);
78  int value);
83 static void ReleaseSemaphores(int status, Datum arg);
84 
85 
86 /*
87  * InternalIpcSemaphoreCreate
88  *
89  * Attempt to create a new semaphore set with the specified key.
90  * Will fail (return -1) if such a set already exists.
91  *
92  * If we fail with a failure code other than collision-with-existing-set,
93  * print out an error and abort. Other types of errors suggest nonrecoverable
94  * problems.
95  */
96 static IpcSemaphoreId
98 {
99  int semId;
100 
101  semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
102 
103  if (semId < 0)
104  {
105  int saved_errno = errno;
106 
107  /*
108  * Fail quietly if error indicates a collision with existing set. One
109  * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
110  * could get a permission violation instead? Also, EIDRM might occur
111  * if an old set is slated for destruction but not gone yet.
112  */
113  if (saved_errno == EEXIST || saved_errno == EACCES
114 #ifdef EIDRM
115  || saved_errno == EIDRM
116 #endif
117  )
118  return -1;
119 
120  /*
121  * Else complain and abort
122  */
123  ereport(FATAL,
124  (errmsg("could not create semaphores: %m"),
125  errdetail("Failed system call was semget(%lu, %d, 0%o).",
126  (unsigned long) semKey, numSems,
128  (saved_errno == ENOSPC) ?
129  errhint("This error does *not* mean that you have run out of disk space. "
130  "It occurs when either the system limit for the maximum number of "
131  "semaphore sets (SEMMNI), or the system wide maximum number of "
132  "semaphores (SEMMNS), would be exceeded. You need to raise the "
133  "respective kernel parameter. Alternatively, reduce PostgreSQL's "
134  "consumption of semaphores by reducing its max_connections parameter.\n"
135  "The PostgreSQL documentation contains more information about "
136  "configuring your system for PostgreSQL.") : 0));
137  }
138 
139  return semId;
140 }
141 
142 /*
143  * Initialize a semaphore to the specified value.
144  */
145 static void
147 {
148  union semun semun;
149 
150  semun.val = value;
151  if (semctl(semId, semNum, SETVAL, semun) < 0)
152  {
153  int saved_errno = errno;
154 
155  ereport(FATAL,
156  (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
157  semId, semNum, value),
158  (saved_errno == ERANGE) ?
159  errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
160  "%d. Look into the PostgreSQL documentation for details.",
161  value) : 0));
162  }
163 }
164 
165 /*
166  * IpcSemaphoreKill(semId) - removes a semaphore set
167  */
168 static void
170 {
171  union semun semun;
172 
173  semun.val = 0; /* unused, but keep compiler quiet */
174 
175  if (semctl(semId, 0, IPC_RMID, semun) < 0)
176  elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
177 }
178 
179 /* Get the current value (semval) of the semaphore */
180 static int
182 {
183  union semun dummy; /* for Solaris */
184 
185  dummy.val = 0; /* unused */
186 
187  return semctl(semId, semNum, GETVAL, dummy);
188 }
189 
190 /* Get the PID of the last process to do semop() on the semaphore */
191 static pid_t
193 {
194  union semun dummy; /* for Solaris */
195 
196  dummy.val = 0; /* unused */
197 
198  return semctl(semId, semNum, GETPID, dummy);
199 }
200 
201 
202 /*
203  * Create a semaphore set with the given number of useful semaphores
204  * (an additional sema is actually allocated to serve as identifier).
205  * Dead Postgres sema sets are recycled if found, but we do not fail
206  * upon collision with non-Postgres sema sets.
207  *
208  * The idea here is to detect and re-use keys that may have been assigned
209  * by a crashed postmaster or backend.
210  */
211 static IpcSemaphoreId
213 {
215  union semun semun;
216  PGSemaphoreData mysema;
217 
218  /* Loop till we find a free IPC key */
219  for (nextSemaKey++;; nextSemaKey++)
220  {
221  pid_t creatorPID;
222 
223  /* Try to create new semaphore set */
224  semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
225  if (semId >= 0)
226  break; /* successful create */
227 
228  /* See if it looks to be leftover from a dead Postgres process */
229  semId = semget(nextSemaKey, numSems + 1, 0);
230  if (semId < 0)
231  continue; /* failed: must be some other app's */
232  if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
233  continue; /* sema belongs to a non-Postgres app */
234 
235  /*
236  * If the creator PID is my own PID or does not belong to any extant
237  * process, it's safe to zap it.
238  */
239  creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
240  if (creatorPID <= 0)
241  continue; /* oops, GETPID failed */
242  if (creatorPID != getpid())
243  {
244  if (kill(creatorPID, 0) == 0 || errno != ESRCH)
245  continue; /* sema belongs to a live process */
246  }
247 
248  /*
249  * The sema set appears to be from a dead Postgres process, or from a
250  * previous cycle of life in this same process. Zap it, if possible.
251  * This probably shouldn't fail, but if it does, assume the sema set
252  * belongs to someone else after all, and continue quietly.
253  */
254  semun.val = 0; /* unused, but keep compiler quiet */
255  if (semctl(semId, 0, IPC_RMID, semun) < 0)
256  continue;
257 
258  /*
259  * Now try again to create the sema set.
260  */
261  semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
262  if (semId >= 0)
263  break; /* successful create */
264 
265  /*
266  * Can only get here if some other process managed to create the same
267  * sema key before we did. Let him have that one, loop around to try
268  * next key.
269  */
270  }
271 
272  /*
273  * OK, we created a new sema set. Mark it as created by this process. We
274  * do this by setting the spare semaphore to PGSemaMagic-1 and then
275  * incrementing it with semop(). That leaves it with value PGSemaMagic
276  * and sempid referencing this process.
277  */
278  IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
279  mysema.semId = semId;
280  mysema.semNum = numSems;
281  PGSemaphoreUnlock(&mysema);
282 
283  return semId;
284 }
285 
286 
287 /*
288  * Report amount of shared memory needed for semaphores
289  */
290 Size
291 PGSemaphoreShmemSize(int maxSemas)
292 {
293  return mul_size(maxSemas, sizeof(PGSemaphoreData));
294 }
295 
296 /*
297  * PGReserveSemaphores --- initialize semaphore support
298  *
299  * This is called during postmaster start or shared memory reinitialization.
300  * It should do whatever is needed to be able to support up to maxSemas
301  * subsequent PGSemaphoreCreate calls. Also, if any system resources
302  * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
303  * callback to release them.
304  *
305  * In the SysV implementation, we acquire semaphore sets on-demand; the
306  * maxSemas parameter is just used to size the arrays. There is an array
307  * of PGSemaphoreData structs in shared memory, and a postmaster-local array
308  * with one entry per SysV semaphore set, which we use for releasing the
309  * semaphore sets when done. (This design ensures that postmaster shutdown
310  * doesn't rely on the contents of shared memory, which a failed backend might
311  * have clobbered.)
312  */
313 void
314 PGReserveSemaphores(int maxSemas)
315 {
316  struct stat statbuf;
317 
318  /*
319  * We use the data directory's inode number to seed the search for free
320  * semaphore keys. This minimizes the odds of collision with other
321  * postmasters, while maximizing the odds that we will detect and clean up
322  * semaphores left over from a crashed postmaster in our own directory.
323  */
324  if (stat(DataDir, &statbuf) < 0)
325  ereport(FATAL,
327  errmsg("could not stat data directory \"%s\": %m",
328  DataDir)));
329 
330  /*
331  * We must use ShmemAllocUnlocked(), since the spinlock protecting
332  * ShmemAlloc() won't be ready yet. (This ordering is necessary when we
333  * are emulating spinlocks with semaphores.)
334  */
335  sharedSemas = (PGSemaphore)
337  numSharedSemas = 0;
338  maxSharedSemas = maxSemas;
339 
340  maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
342  malloc(maxSemaSets * sizeof(IpcSemaphoreId));
343  if (mySemaSets == NULL)
344  elog(PANIC, "out of memory");
345  numSemaSets = 0;
346  nextSemaKey = statbuf.st_ino;
347  nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
348 
350 }
351 
352 /*
353  * Release semaphores at shutdown or shmem reinitialization
354  *
355  * (called as an on_shmem_exit callback, hence funny argument list)
356  */
357 static void
359 {
360  int i;
361 
362  for (i = 0; i < numSemaSets; i++)
364  free(mySemaSets);
365 }
366 
367 /*
368  * PGSemaphoreCreate
369  *
370  * Allocate a PGSemaphore structure with initial count 1
371  */
374 {
375  PGSemaphore sema;
376 
377  /* Can't do this in a backend, because static state is postmaster's */
379 
381  {
382  /* Time to allocate another semaphore set */
383  if (numSemaSets >= maxSemaSets)
384  elog(PANIC, "too many semaphores created");
386  numSemaSets++;
387  nextSemaNumber = 0;
388  }
389  /* Use the next shared PGSemaphoreData */
391  elog(PANIC, "too many semaphores created");
392  sema = &sharedSemas[numSharedSemas++];
393  /* Assign the next free semaphore in the current set */
394  sema->semId = mySemaSets[numSemaSets - 1];
395  sema->semNum = nextSemaNumber++;
396  /* Initialize it to count 1 */
397  IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
398 
399  return sema;
400 }
401 
402 /*
403  * PGSemaphoreReset
404  *
405  * Reset a previously-initialized PGSemaphore to have count 0
406  */
407 void
409 {
410  IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
411 }
412 
413 /*
414  * PGSemaphoreLock
415  *
416  * Lock a semaphore (decrement count), blocking if count would be < 0
417  */
418 void
420 {
421  int errStatus;
422  struct sembuf sops;
423 
424  sops.sem_op = -1; /* decrement */
425  sops.sem_flg = 0;
426  sops.sem_num = sema->semNum;
427 
428  /*
429  * Note: if errStatus is -1 and errno == EINTR then it means we returned
430  * from the operation prematurely because we were sent a signal. So we
431  * try and lock the semaphore again.
432  *
433  * We used to check interrupts here, but that required servicing
434  * interrupts directly from signal handlers. Which is hard to do safely
435  * and portably.
436  */
437  do
438  {
439  errStatus = semop(sema->semId, &sops, 1);
440  } while (errStatus < 0 && errno == EINTR);
441 
442  if (errStatus < 0)
443  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
444 }
445 
446 /*
447  * PGSemaphoreUnlock
448  *
449  * Unlock a semaphore (increment count)
450  */
451 void
453 {
454  int errStatus;
455  struct sembuf sops;
456 
457  sops.sem_op = 1; /* increment */
458  sops.sem_flg = 0;
459  sops.sem_num = sema->semNum;
460 
461  /*
462  * Note: if errStatus is -1 and errno == EINTR then it means we returned
463  * from the operation prematurely because we were sent a signal. So we
464  * try and unlock the semaphore again. Not clear this can really happen,
465  * but might as well cope.
466  */
467  do
468  {
469  errStatus = semop(sema->semId, &sops, 1);
470  } while (errStatus < 0 && errno == EINTR);
471 
472  if (errStatus < 0)
473  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
474 }
475 
476 /*
477  * PGSemaphoreTryLock
478  *
479  * Lock a semaphore only if able to do so without blocking
480  */
481 bool
483 {
484  int errStatus;
485  struct sembuf sops;
486 
487  sops.sem_op = -1; /* decrement */
488  sops.sem_flg = IPC_NOWAIT; /* but don't block */
489  sops.sem_num = sema->semNum;
490 
491  /*
492  * Note: if errStatus is -1 and errno == EINTR then it means we returned
493  * from the operation prematurely because we were sent a signal. So we
494  * try and lock the semaphore again.
495  */
496  do
497  {
498  errStatus = semop(sema->semId, &sops, 1);
499  } while (errStatus < 0 && errno == EINTR);
500 
501  if (errStatus < 0)
502  {
503  /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
504 #ifdef EAGAIN
505  if (errno == EAGAIN)
506  return false; /* failed to lock it */
507 #endif
508 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
509  if (errno == EWOULDBLOCK)
510  return false; /* failed to lock it */
511 #endif
512  /* Otherwise we got trouble */
513  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
514  }
515 
516  return true;
517 }
key_t IpcSemaphoreKey
Definition: sysv_sema.c:49
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: sysv_sema.c:452
static void IpcSemaphoreKill(IpcSemaphoreId semId)
Definition: sysv_sema.c:169
int errhint(const char *fmt,...)
Definition: elog.c:1071
static int maxSemaSets
Definition: sysv_sema.c:70
#define IPC_CREAT
Definition: win32_port.h:82
static void ReleaseSemaphores(int status, Datum arg)
Definition: sysv_sema.c:358
#define EAGAIN
Definition: win32_port.h:321
#define SEMAS_PER_SET
Definition: sysv_sema.c:58
bool PGSemaphoreTryLock(PGSemaphore sema)
Definition: sysv_sema.c:482
void PGSemaphoreReset(PGSemaphore sema)
Definition: sysv_sema.c:408
void PGReserveSemaphores(int maxSemas)
Definition: sysv_sema.c:314
#define GETVAL
Definition: win32_port.h:95
static int numSems
Definition: posix_sema.c:66
static struct @145 value
#define kill(pid, sig)
Definition: win32_port.h:426
#define LOG
Definition: elog.h:26
static int maxSharedSemas
Definition: sysv_sema.c:67
static PGSemaphore sharedSemas
Definition: sysv_sema.c:65
#define PANIC
Definition: elog.h:53
int IpcSemaphoreId
Definition: sysv_sema.c:50
#define malloc(a)
Definition: header.h:50
static int nextSemaNumber
Definition: sysv_sema.c:72
#define GETPID
Definition: win32_port.h:97
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
Definition: sysv_sema.c:146
#define FATAL
Definition: elog.h:52
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
#define EIDRM
Definition: win32_port.h:90
bool IsUnderPostmaster
Definition: globals.c:109
int errdetail(const char *fmt,...)
Definition: elog.c:957
int errcode_for_file_access(void)
Definition: elog.c:631
#define PGSemaMagic
Definition: sysv_sema.c:62
int val
Definition: sysv_sema.c:43
#define ereport(elevel, rest)
Definition: elog.h:141
#define stat(a, b)
Definition: win32_port.h:255
Size mul_size(Size s1, Size s2)
Definition: shmem.c:492
void PGSemaphoreLock(PGSemaphore sema)
Definition: sysv_sema.c:419
static int numSharedSemas
Definition: sysv_sema.c:66
uintptr_t Datum
Definition: postgres.h:367
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:227
#define IPC_RMID
Definition: win32_port.h:81
static IpcSemaphoreId * mySemaSets
Definition: sysv_sema.c:68
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:181
#define free(a)
Definition: header.h:65
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
struct PGSemaphoreData PGSemaphoreData
#define Assert(condition)
Definition: c.h:733
#define IPC_NOWAIT
Definition: win32_port.h:85
long key_t
Definition: win32_port.h:233
size_t Size
Definition: c.h:467
#define IPC_EXCL
Definition: win32_port.h:83
struct semid_ds * buf
Definition: sysv_sema.c:44
#define IPCProtection
Definition: sysv_sema.c:60
int errmsg(const char *fmt,...)
Definition: elog.c:822
Size PGSemaphoreShmemSize(int maxSemas)
Definition: sysv_sema.c:291
struct PGSemaphoreData * PGSemaphore
Definition: pg_sema.h:34
#define elog(elevel,...)
Definition: elog.h:228
int i
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
Definition: sysv_sema.c:97
void * arg
char * DataDir
Definition: globals.c:62
static IpcSemaphoreKey nextSemaKey
Definition: sysv_sema.c:71
#define EWOULDBLOCK
Definition: win32_port.h:329
static IpcSemaphoreId IpcSemaphoreCreate(int numSems)
Definition: sysv_sema.c:212
PGSemaphore PGSemaphoreCreate(void)
Definition: sysv_sema.c:373
#define EINTR
Definition: win32_port.h:323
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:226
unsigned short * array
Definition: sysv_sema.c:45
#define SETVAL
Definition: win32_port.h:96
static int numSemaSets
Definition: sysv_sema.c:69
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:192