PostgreSQL Source Code  git master
sysv_sema.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * sysv_sema.c
4  * Implement PGSemaphores using SysV semaphore facilities
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/port/sysv_sema.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <signal.h>
18 #include <unistd.h>
19 #include <sys/file.h>
20 #include <sys/ipc.h>
21 #include <sys/sem.h>
22 #include <sys/stat.h>
23 
24 #include "miscadmin.h"
25 #include "storage/ipc.h"
26 #include "storage/pg_sema.h"
27 #include "storage/shmem.h"
28 
29 
30 typedef struct PGSemaphoreData
31 {
32  int semId; /* semaphore set identifier */
33  int semNum; /* semaphore number within set */
35 
36 #ifndef HAVE_UNION_SEMUN
37 union semun
38 {
39  int val;
40  struct semid_ds *buf;
41  unsigned short *array;
42 };
43 #endif
44 
45 typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46 typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
47 
48 /*
49  * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50  * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51  * per set) parameter, which is often around 25. (Less than, because we
52  * allocate one extra sema in each set for identification purposes.)
53  */
54 #define SEMAS_PER_SET 16
55 
56 #define IPCProtection (0600) /* access/modify by user only */
57 
58 #define PGSemaMagic 537 /* must be less than SEMVMX */
59 
60 
61 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
62 static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
63 static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
64 static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
65 static int numSemaSets; /* number of sema sets acquired so far */
66 static int maxSemaSets; /* allocated size of mySemaSets array */
67 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
68 static int nextSemaNumber; /* next free sem num in last sema set */
69 
70 
72  int numSems);
73 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
74  int value);
75 static void IpcSemaphoreKill(IpcSemaphoreId semId);
76 static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
77 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
79 static void ReleaseSemaphores(int status, Datum arg);
80 
81 
82 /*
83  * InternalIpcSemaphoreCreate
84  *
85  * Attempt to create a new semaphore set with the specified key.
86  * Will fail (return -1) if such a set already exists.
87  *
88  * If we fail with a failure code other than collision-with-existing-set,
89  * print out an error and abort. Other types of errors suggest nonrecoverable
90  * problems.
91  */
92 static IpcSemaphoreId
94 {
95  int semId;
96 
97  semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
98 
99  if (semId < 0)
100  {
101  int saved_errno = errno;
102 
103  /*
104  * Fail quietly if error indicates a collision with existing set. One
105  * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
106  * could get a permission violation instead? Also, EIDRM might occur
107  * if an old set is slated for destruction but not gone yet.
108  */
109  if (saved_errno == EEXIST || saved_errno == EACCES
110 #ifdef EIDRM
111  || saved_errno == EIDRM
112 #endif
113  )
114  return -1;
115 
116  /*
117  * Else complain and abort
118  */
119  ereport(FATAL,
120  (errmsg("could not create semaphores: %m"),
121  errdetail("Failed system call was semget(%lu, %d, 0%o).",
122  (unsigned long) semKey, numSems,
124  (saved_errno == ENOSPC) ?
125  errhint("This error does *not* mean that you have run out of disk space. "
126  "It occurs when either the system limit for the maximum number of "
127  "semaphore sets (SEMMNI), or the system wide maximum number of "
128  "semaphores (SEMMNS), would be exceeded. You need to raise the "
129  "respective kernel parameter. Alternatively, reduce PostgreSQL's "
130  "consumption of semaphores by reducing its max_connections parameter.\n"
131  "The PostgreSQL documentation contains more information about "
132  "configuring your system for PostgreSQL.") : 0));
133  }
134 
135  return semId;
136 }
137 
138 /*
139  * Initialize a semaphore to the specified value.
140  */
141 static void
143 {
144  union semun semun;
145 
146  semun.val = value;
147  if (semctl(semId, semNum, SETVAL, semun) < 0)
148  {
149  int saved_errno = errno;
150 
151  ereport(FATAL,
152  (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
153  semId, semNum, value),
154  (saved_errno == ERANGE) ?
155  errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
156  "%d. Look into the PostgreSQL documentation for details.",
157  value) : 0));
158  }
159 }
160 
161 /*
162  * IpcSemaphoreKill(semId) - removes a semaphore set
163  */
164 static void
166 {
167  union semun semun;
168 
169  semun.val = 0; /* unused, but keep compiler quiet */
170 
171  if (semctl(semId, 0, IPC_RMID, semun) < 0)
172  elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
173 }
174 
175 /* Get the current value (semval) of the semaphore */
176 static int
178 {
179  union semun dummy; /* for Solaris */
180 
181  dummy.val = 0; /* unused */
182 
183  return semctl(semId, semNum, GETVAL, dummy);
184 }
185 
186 /* Get the PID of the last process to do semop() on the semaphore */
187 static pid_t
189 {
190  union semun dummy; /* for Solaris */
191 
192  dummy.val = 0; /* unused */
193 
194  return semctl(semId, semNum, GETPID, dummy);
195 }
196 
197 
198 /*
199  * Create a semaphore set with the given number of useful semaphores
200  * (an additional sema is actually allocated to serve as identifier).
201  * Dead Postgres sema sets are recycled if found, but we do not fail
202  * upon collision with non-Postgres sema sets.
203  *
204  * The idea here is to detect and re-use keys that may have been assigned
205  * by a crashed postmaster or backend.
206  */
207 static IpcSemaphoreId
209 {
210  IpcSemaphoreId semId;
211  union semun semun;
212  PGSemaphoreData mysema;
213 
214  /* Loop till we find a free IPC key */
215  for (nextSemaKey++;; nextSemaKey++)
216  {
217  pid_t creatorPID;
218 
219  /* Try to create new semaphore set */
221  if (semId >= 0)
222  break; /* successful create */
223 
224  /* See if it looks to be leftover from a dead Postgres process */
225  semId = semget(nextSemaKey, numSems + 1, 0);
226  if (semId < 0)
227  continue; /* failed: must be some other app's */
229  continue; /* sema belongs to a non-Postgres app */
230 
231  /*
232  * If the creator PID is my own PID or does not belong to any extant
233  * process, it's safe to zap it.
234  */
235  creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
236  if (creatorPID <= 0)
237  continue; /* oops, GETPID failed */
238  if (creatorPID != getpid())
239  {
240  if (kill(creatorPID, 0) == 0 || errno != ESRCH)
241  continue; /* sema belongs to a live process */
242  }
243 
244  /*
245  * The sema set appears to be from a dead Postgres process, or from a
246  * previous cycle of life in this same process. Zap it, if possible.
247  * This probably shouldn't fail, but if it does, assume the sema set
248  * belongs to someone else after all, and continue quietly.
249  */
250  semun.val = 0; /* unused, but keep compiler quiet */
251  if (semctl(semId, 0, IPC_RMID, semun) < 0)
252  continue;
253 
254  /*
255  * Now try again to create the sema set.
256  */
258  if (semId >= 0)
259  break; /* successful create */
260 
261  /*
262  * Can only get here if some other process managed to create the same
263  * sema key before we did. Let him have that one, loop around to try
264  * next key.
265  */
266  }
267 
268  /*
269  * OK, we created a new sema set. Mark it as created by this process. We
270  * do this by setting the spare semaphore to PGSemaMagic-1 and then
271  * incrementing it with semop(). That leaves it with value PGSemaMagic
272  * and sempid referencing this process.
273  */
275  mysema.semId = semId;
276  mysema.semNum = numSems;
277  PGSemaphoreUnlock(&mysema);
278 
279  return semId;
280 }
281 
282 
283 /*
284  * Report amount of shared memory needed for semaphores
285  */
286 Size
287 PGSemaphoreShmemSize(int maxSemas)
288 {
289  return mul_size(maxSemas, sizeof(PGSemaphoreData));
290 }
291 
292 /*
293  * PGReserveSemaphores --- initialize semaphore support
294  *
295  * This is called during postmaster start or shared memory reinitialization.
296  * It should do whatever is needed to be able to support up to maxSemas
297  * subsequent PGSemaphoreCreate calls. Also, if any system resources
298  * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
299  * callback to release them.
300  *
301  * In the SysV implementation, we acquire semaphore sets on-demand; the
302  * maxSemas parameter is just used to size the arrays. There is an array
303  * of PGSemaphoreData structs in shared memory, and a postmaster-local array
304  * with one entry per SysV semaphore set, which we use for releasing the
305  * semaphore sets when done. (This design ensures that postmaster shutdown
306  * doesn't rely on the contents of shared memory, which a failed backend might
307  * have clobbered.)
308  */
309 void
310 PGReserveSemaphores(int maxSemas)
311 {
312  struct stat statbuf;
313 
314  /*
315  * We use the data directory's inode number to seed the search for free
316  * semaphore keys. This minimizes the odds of collision with other
317  * postmasters, while maximizing the odds that we will detect and clean up
318  * semaphores left over from a crashed postmaster in our own directory.
319  */
320  if (stat(DataDir, &statbuf) < 0)
321  ereport(FATAL,
323  errmsg("could not stat data directory \"%s\": %m",
324  DataDir)));
325 
326  /*
327  * We must use ShmemAllocUnlocked(), since the spinlock protecting
328  * ShmemAlloc() won't be ready yet. (This ordering is necessary when we
329  * are emulating spinlocks with semaphores.)
330  */
333  numSharedSemas = 0;
334  maxSharedSemas = maxSemas;
335 
336  maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
338  malloc(maxSemaSets * sizeof(IpcSemaphoreId));
339  if (mySemaSets == NULL)
340  elog(PANIC, "out of memory");
341  numSemaSets = 0;
342  nextSemaKey = statbuf.st_ino;
343  nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
344 
346 }
347 
348 /*
349  * Release semaphores at shutdown or shmem reinitialization
350  *
351  * (called as an on_shmem_exit callback, hence funny argument list)
352  */
353 static void
355 {
356  int i;
357 
358  for (i = 0; i < numSemaSets; i++)
360  free(mySemaSets);
361 }
362 
363 /*
364  * PGSemaphoreCreate
365  *
366  * Allocate a PGSemaphore structure with initial count 1
367  */
370 {
371  PGSemaphore sema;
372 
373  /* Can't do this in a backend, because static state is postmaster's */
375 
377  {
378  /* Time to allocate another semaphore set */
379  if (numSemaSets >= maxSemaSets)
380  elog(PANIC, "too many semaphores created");
382  numSemaSets++;
383  nextSemaNumber = 0;
384  }
385  /* Use the next shared PGSemaphoreData */
387  elog(PANIC, "too many semaphores created");
388  sema = &sharedSemas[numSharedSemas++];
389  /* Assign the next free semaphore in the current set */
390  sema->semId = mySemaSets[numSemaSets - 1];
391  sema->semNum = nextSemaNumber++;
392  /* Initialize it to count 1 */
393  IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
394 
395  return sema;
396 }
397 
398 /*
399  * PGSemaphoreReset
400  *
401  * Reset a previously-initialized PGSemaphore to have count 0
402  */
403 void
405 {
406  IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
407 }
408 
409 /*
410  * PGSemaphoreLock
411  *
412  * Lock a semaphore (decrement count), blocking if count would be < 0
413  */
414 void
416 {
417  int errStatus;
418  struct sembuf sops;
419 
420  sops.sem_op = -1; /* decrement */
421  sops.sem_flg = 0;
422  sops.sem_num = sema->semNum;
423 
424  /*
425  * Note: if errStatus is -1 and errno == EINTR then it means we returned
426  * from the operation prematurely because we were sent a signal. So we
427  * try and lock the semaphore again.
428  *
429  * We used to check interrupts here, but that required servicing
430  * interrupts directly from signal handlers. Which is hard to do safely
431  * and portably.
432  */
433  do
434  {
435  errStatus = semop(sema->semId, &sops, 1);
436  } while (errStatus < 0 && errno == EINTR);
437 
438  if (errStatus < 0)
439  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
440 }
441 
442 /*
443  * PGSemaphoreUnlock
444  *
445  * Unlock a semaphore (increment count)
446  */
447 void
449 {
450  int errStatus;
451  struct sembuf sops;
452 
453  sops.sem_op = 1; /* increment */
454  sops.sem_flg = 0;
455  sops.sem_num = sema->semNum;
456 
457  /*
458  * Note: if errStatus is -1 and errno == EINTR then it means we returned
459  * from the operation prematurely because we were sent a signal. So we
460  * try and unlock the semaphore again. Not clear this can really happen,
461  * but might as well cope.
462  */
463  do
464  {
465  errStatus = semop(sema->semId, &sops, 1);
466  } while (errStatus < 0 && errno == EINTR);
467 
468  if (errStatus < 0)
469  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
470 }
471 
472 /*
473  * PGSemaphoreTryLock
474  *
475  * Lock a semaphore only if able to do so without blocking
476  */
477 bool
479 {
480  int errStatus;
481  struct sembuf sops;
482 
483  sops.sem_op = -1; /* decrement */
484  sops.sem_flg = IPC_NOWAIT; /* but don't block */
485  sops.sem_num = sema->semNum;
486 
487  /*
488  * Note: if errStatus is -1 and errno == EINTR then it means we returned
489  * from the operation prematurely because we were sent a signal. So we
490  * try and lock the semaphore again.
491  */
492  do
493  {
494  errStatus = semop(sema->semId, &sops, 1);
495  } while (errStatus < 0 && errno == EINTR);
496 
497  if (errStatus < 0)
498  {
499  /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
500 #ifdef EAGAIN
501  if (errno == EAGAIN)
502  return false; /* failed to lock it */
503 #endif
504 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
505  if (errno == EWOULDBLOCK)
506  return false; /* failed to lock it */
507 #endif
508  /* Otherwise we got trouble */
509  elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
510  }
511 
512  return true;
513 }
size_t Size
Definition: c.h:592
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
int errcode_for_file_access(void)
Definition: elog.c:882
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define PANIC
Definition: elog.h:42
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
bool IsUnderPostmaster
Definition: globals.c:117
char * DataDir
Definition: globals.c:68
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
static struct @150 value
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void * arg
struct PGSemaphoreData * PGSemaphore
Definition: pg_sema.h:34
static int numSems
Definition: posix_sema.c:66
uintptr_t Datum
Definition: postgres.h:64
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:238
_ino_t st_ino
Definition: win32_port.h:267
Size PGSemaphoreShmemSize(int maxSemas)
Definition: sysv_sema.c:287
#define PGSemaMagic
Definition: sysv_sema.c:58
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: sysv_sema.c:448
struct PGSemaphoreData PGSemaphoreData
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:188
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:177
key_t IpcSemaphoreKey
Definition: sysv_sema.c:45
int IpcSemaphoreId
Definition: sysv_sema.c:46
void PGReserveSemaphores(int maxSemas)
Definition: sysv_sema.c:310
static void IpcSemaphoreKill(IpcSemaphoreId semId)
Definition: sysv_sema.c:165
static int maxSharedSemas
Definition: sysv_sema.c:63
#define SEMAS_PER_SET
Definition: sysv_sema.c:54
void PGSemaphoreReset(PGSemaphore sema)
Definition: sysv_sema.c:404
void PGSemaphoreLock(PGSemaphore sema)
Definition: sysv_sema.c:415
#define IPCProtection
Definition: sysv_sema.c:56
static IpcSemaphoreKey nextSemaKey
Definition: sysv_sema.c:67
static int numSemaSets
Definition: sysv_sema.c:65
bool PGSemaphoreTryLock(PGSemaphore sema)
Definition: sysv_sema.c:478
static int nextSemaNumber
Definition: sysv_sema.c:68
static PGSemaphore sharedSemas
Definition: sysv_sema.c:61
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
Definition: sysv_sema.c:93
static int maxSemaSets
Definition: sysv_sema.c:66
PGSemaphore PGSemaphoreCreate(void)
Definition: sysv_sema.c:369
static int numSharedSemas
Definition: sysv_sema.c:62
static void ReleaseSemaphores(int status, Datum arg)
Definition: sysv_sema.c:354
static IpcSemaphoreId * mySemaSets
Definition: sysv_sema.c:64
static IpcSemaphoreId IpcSemaphoreCreate(int numSems)
Definition: sysv_sema.c:208
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
Definition: sysv_sema.c:142
int val
Definition: sysv_sema.c:39
struct semid_ds * buf
Definition: sysv_sema.c:40
unsigned short * array
Definition: sysv_sema.c:41
#define stat
Definition: win32_port.h:284
#define SETVAL
Definition: win32_port.h:110
#define EINTR
Definition: win32_port.h:374
#define EWOULDBLOCK
Definition: win32_port.h:380
#define IPC_NOWAIT
Definition: win32_port.h:99
#define IPC_RMID
Definition: win32_port.h:95
#define GETPID
Definition: win32_port.h:111
#define kill(pid, sig)
Definition: win32_port.h:485
long key_t
Definition: win32_port.h:247
#define IPC_EXCL
Definition: win32_port.h:97
#define IPC_CREAT
Definition: win32_port.h:96
#define EIDRM
Definition: win32_port.h:104
#define GETVAL
Definition: win32_port.h:109
#define EAGAIN
Definition: win32_port.h:372