PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
sysv_sema.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * sysv_sema.c
4 * Implement PGSemaphores using SysV semaphore facilities
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/port/sysv_sema.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <signal.h>
18#include <unistd.h>
19#include <sys/file.h>
20#include <sys/ipc.h>
21#include <sys/sem.h>
22#include <sys/stat.h>
23
24#include "miscadmin.h"
25#include "storage/ipc.h"
26#include "storage/pg_sema.h"
27#include "storage/shmem.h"
28
29
30typedef struct PGSemaphoreData
31{
32 int semId; /* semaphore set identifier */
33 int semNum; /* semaphore number within set */
35
36#ifndef HAVE_UNION_SEMUN
37union semun
38{
39 int val;
40 struct semid_ds *buf;
41 unsigned short *array;
42};
43#endif
44
45typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
47
48/*
49 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51 * per set) parameter, which is often around 25. (Less than, because we
52 * allocate one extra sema in each set for identification purposes.)
53 *
54 * The present value of 19 is chosen with one eye on NetBSD/OpenBSD's default
55 * SEMMNS setting of 60. Remembering the extra sema per set, this lets us
56 * allocate three sets with 57 useful semaphores before exceeding that, which
57 * is enough to run our core regression tests. Users of those systems will
58 * still want to raise SEMMNS for any sort of production work, though.
59 */
60#define SEMAS_PER_SET 19
61
62#define IPCProtection (0600) /* access/modify by user only */
63
64#define PGSemaMagic 537 /* must be less than SEMVMX */
65
66
67static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
68static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
69static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
70static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
71static int numSemaSets; /* number of sema sets acquired so far */
72static int maxSemaSets; /* allocated size of mySemaSets array */
73static IpcSemaphoreKey nextSemaKey; /* next key to try using */
74static int nextSemaNumber; /* next free sem num in last sema set */
75
76
78 int numSems);
79static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
80 int value);
81static void IpcSemaphoreKill(IpcSemaphoreId semId);
82static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
83static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
85static void ReleaseSemaphores(int status, Datum arg);
86
87
88/*
89 * InternalIpcSemaphoreCreate
90 *
91 * Attempt to create a new semaphore set with the specified key.
92 * Will fail (return -1) if such a set already exists.
93 *
94 * If we fail with a failure code other than collision-with-existing-set,
95 * print out an error and abort. Other types of errors suggest nonrecoverable
96 * problems.
97 */
98static IpcSemaphoreId
100{
101 int semId;
102
103 semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
104
105 if (semId < 0)
106 {
107 int saved_errno = errno;
108
109 /*
110 * Fail quietly if error indicates a collision with existing set. One
111 * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
112 * could get a permission violation instead? Also, EIDRM might occur
113 * if an old set is slated for destruction but not gone yet.
114 */
115 if (saved_errno == EEXIST || saved_errno == EACCES
116#ifdef EIDRM
117 || saved_errno == EIDRM
118#endif
119 )
120 return -1;
121
122 /*
123 * Else complain and abort
124 */
126 (errmsg("could not create semaphores: %m"),
127 errdetail("Failed system call was semget(%lu, %d, 0%o).",
128 (unsigned long) semKey, numSems,
130 (saved_errno == ENOSPC) ?
131 errhint("This error does *not* mean that you have run out of disk space. "
132 "It occurs when either the system limit for the maximum number of "
133 "semaphore sets (SEMMNI), or the system wide maximum number of "
134 "semaphores (SEMMNS), would be exceeded. You need to raise the "
135 "respective kernel parameter. Alternatively, reduce PostgreSQL's "
136 "consumption of semaphores by reducing its \"max_connections\" parameter.\n"
137 "The PostgreSQL documentation contains more information about "
138 "configuring your system for PostgreSQL.") : 0));
139 }
140
141 return semId;
142}
143
144/*
145 * Initialize a semaphore to the specified value.
146 */
147static void
149{
150 union semun semun;
151
152 semun.val = value;
153 if (semctl(semId, semNum, SETVAL, semun) < 0)
154 {
155 int saved_errno = errno;
156
158 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
159 semId, semNum, value),
160 (saved_errno == ERANGE) ?
161 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
162 "%d. Look into the PostgreSQL documentation for details.",
163 value) : 0));
164 }
165}
166
167/*
168 * IpcSemaphoreKill(semId) - removes a semaphore set
169 */
170static void
172{
173 union semun semun;
174
175 semun.val = 0; /* unused, but keep compiler quiet */
176
177 if (semctl(semId, 0, IPC_RMID, semun) < 0)
178 elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
179}
180
181/* Get the current value (semval) of the semaphore */
182static int
184{
185 union semun dummy; /* for Solaris */
186
187 dummy.val = 0; /* unused */
188
189 return semctl(semId, semNum, GETVAL, dummy);
190}
191
192/* Get the PID of the last process to do semop() on the semaphore */
193static pid_t
195{
196 union semun dummy; /* for Solaris */
197
198 dummy.val = 0; /* unused */
199
200 return semctl(semId, semNum, GETPID, dummy);
201}
202
203
204/*
205 * Create a semaphore set with the given number of useful semaphores
206 * (an additional sema is actually allocated to serve as identifier).
207 * Dead Postgres sema sets are recycled if found, but we do not fail
208 * upon collision with non-Postgres sema sets.
209 *
210 * The idea here is to detect and re-use keys that may have been assigned
211 * by a crashed postmaster or backend.
212 */
213static IpcSemaphoreId
215{
216 IpcSemaphoreId semId;
217 union semun semun;
218 PGSemaphoreData mysema;
219
220 /* Loop till we find a free IPC key */
221 for (nextSemaKey++;; nextSemaKey++)
222 {
223 pid_t creatorPID;
224
225 /* Try to create new semaphore set */
227 if (semId >= 0)
228 break; /* successful create */
229
230 /* See if it looks to be leftover from a dead Postgres process */
231 semId = semget(nextSemaKey, numSems + 1, 0);
232 if (semId < 0)
233 continue; /* failed: must be some other app's */
235 continue; /* sema belongs to a non-Postgres app */
236
237 /*
238 * If the creator PID is my own PID or does not belong to any extant
239 * process, it's safe to zap it.
240 */
241 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
242 if (creatorPID <= 0)
243 continue; /* oops, GETPID failed */
244 if (creatorPID != getpid())
245 {
246 if (kill(creatorPID, 0) == 0 || errno != ESRCH)
247 continue; /* sema belongs to a live process */
248 }
249
250 /*
251 * The sema set appears to be from a dead Postgres process, or from a
252 * previous cycle of life in this same process. Zap it, if possible.
253 * This probably shouldn't fail, but if it does, assume the sema set
254 * belongs to someone else after all, and continue quietly.
255 */
256 semun.val = 0; /* unused, but keep compiler quiet */
257 if (semctl(semId, 0, IPC_RMID, semun) < 0)
258 continue;
259
260 /*
261 * Now try again to create the sema set.
262 */
264 if (semId >= 0)
265 break; /* successful create */
266
267 /*
268 * Can only get here if some other process managed to create the same
269 * sema key before we did. Let him have that one, loop around to try
270 * next key.
271 */
272 }
273
274 /*
275 * OK, we created a new sema set. Mark it as created by this process. We
276 * do this by setting the spare semaphore to PGSemaMagic-1 and then
277 * incrementing it with semop(). That leaves it with value PGSemaMagic
278 * and sempid referencing this process.
279 */
281 mysema.semId = semId;
282 mysema.semNum = numSems;
283 PGSemaphoreUnlock(&mysema);
284
285 return semId;
286}
287
288
289/*
290 * Report amount of shared memory needed for semaphores
291 */
292Size
294{
295 return mul_size(maxSemas, sizeof(PGSemaphoreData));
296}
297
298/*
299 * PGReserveSemaphores --- initialize semaphore support
300 *
301 * This is called during postmaster start or shared memory reinitialization.
302 * It should do whatever is needed to be able to support up to maxSemas
303 * subsequent PGSemaphoreCreate calls. Also, if any system resources
304 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
305 * callback to release them.
306 *
307 * In the SysV implementation, we acquire semaphore sets on-demand; the
308 * maxSemas parameter is just used to size the arrays. There is an array
309 * of PGSemaphoreData structs in shared memory, and a postmaster-local array
310 * with one entry per SysV semaphore set, which we use for releasing the
311 * semaphore sets when done. (This design ensures that postmaster shutdown
312 * doesn't rely on the contents of shared memory, which a failed backend might
313 * have clobbered.)
314 */
315void
317{
318 struct stat statbuf;
319
320 /*
321 * We use the data directory's inode number to seed the search for free
322 * semaphore keys. This minimizes the odds of collision with other
323 * postmasters, while maximizing the odds that we will detect and clean up
324 * semaphores left over from a crashed postmaster in our own directory.
325 */
326 if (stat(DataDir, &statbuf) < 0)
329 errmsg("could not stat data directory \"%s\": %m",
330 DataDir)));
331
332 /*
333 * We must use ShmemAllocUnlocked(), since the spinlock protecting
334 * ShmemAlloc() won't be ready yet.
335 */
338 numSharedSemas = 0;
339 maxSharedSemas = maxSemas;
340
341 maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
344 if (mySemaSets == NULL)
345 elog(PANIC, "out of memory");
346 numSemaSets = 0;
347 nextSemaKey = statbuf.st_ino;
348 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
349
351}
352
353/*
354 * Release semaphores at shutdown or shmem reinitialization
355 *
356 * (called as an on_shmem_exit callback, hence funny argument list)
357 */
358static void
360{
361 int i;
362
363 for (i = 0; i < numSemaSets; i++)
366}
367
368/*
369 * PGSemaphoreCreate
370 *
371 * Allocate a PGSemaphore structure with initial count 1
372 */
375{
376 PGSemaphore sema;
377
378 /* Can't do this in a backend, because static state is postmaster's */
380
382 {
383 /* Time to allocate another semaphore set */
385 elog(PANIC, "too many semaphores created");
387 numSemaSets++;
388 nextSemaNumber = 0;
389 }
390 /* Use the next shared PGSemaphoreData */
392 elog(PANIC, "too many semaphores created");
393 sema = &sharedSemas[numSharedSemas++];
394 /* Assign the next free semaphore in the current set */
395 sema->semId = mySemaSets[numSemaSets - 1];
396 sema->semNum = nextSemaNumber++;
397 /* Initialize it to count 1 */
398 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
399
400 return sema;
401}
402
403/*
404 * PGSemaphoreReset
405 *
406 * Reset a previously-initialized PGSemaphore to have count 0
407 */
408void
410{
411 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
412}
413
414/*
415 * PGSemaphoreLock
416 *
417 * Lock a semaphore (decrement count), blocking if count would be < 0
418 */
419void
421{
422 int errStatus;
423 struct sembuf sops;
424
425 sops.sem_op = -1; /* decrement */
426 sops.sem_flg = 0;
427 sops.sem_num = sema->semNum;
428
429 /*
430 * Note: if errStatus is -1 and errno == EINTR then it means we returned
431 * from the operation prematurely because we were sent a signal. So we
432 * try and lock the semaphore again.
433 *
434 * We used to check interrupts here, but that required servicing
435 * interrupts directly from signal handlers. Which is hard to do safely
436 * and portably.
437 */
438 do
439 {
440 errStatus = semop(sema->semId, &sops, 1);
441 } while (errStatus < 0 && errno == EINTR);
442
443 if (errStatus < 0)
444 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
445}
446
447/*
448 * PGSemaphoreUnlock
449 *
450 * Unlock a semaphore (increment count)
451 */
452void
454{
455 int errStatus;
456 struct sembuf sops;
457
458 sops.sem_op = 1; /* increment */
459 sops.sem_flg = 0;
460 sops.sem_num = sema->semNum;
461
462 /*
463 * Note: if errStatus is -1 and errno == EINTR then it means we returned
464 * from the operation prematurely because we were sent a signal. So we
465 * try and unlock the semaphore again. Not clear this can really happen,
466 * but might as well cope.
467 */
468 do
469 {
470 errStatus = semop(sema->semId, &sops, 1);
471 } while (errStatus < 0 && errno == EINTR);
472
473 if (errStatus < 0)
474 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
475}
476
477/*
478 * PGSemaphoreTryLock
479 *
480 * Lock a semaphore only if able to do so without blocking
481 */
482bool
484{
485 int errStatus;
486 struct sembuf sops;
487
488 sops.sem_op = -1; /* decrement */
489 sops.sem_flg = IPC_NOWAIT; /* but don't block */
490 sops.sem_num = sema->semNum;
491
492 /*
493 * Note: if errStatus is -1 and errno == EINTR then it means we returned
494 * from the operation prematurely because we were sent a signal. So we
495 * try and lock the semaphore again.
496 */
497 do
498 {
499 errStatus = semop(sema->semId, &sops, 1);
500 } while (errStatus < 0 && errno == EINTR);
501
502 if (errStatus < 0)
503 {
504 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
505#ifdef EAGAIN
506 if (errno == EAGAIN)
507 return false; /* failed to lock it */
508#endif
509#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
510 if (errno == EWOULDBLOCK)
511 return false; /* failed to lock it */
512#endif
513 /* Otherwise we got trouble */
514 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
515 }
516
517 return true;
518}
size_t Size
Definition: c.h:576
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define PANIC
Definition: elog.h:42
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
bool IsUnderPostmaster
Definition: globals.c:119
char * DataDir
Definition: globals.c:70
Assert(PointerIsAligned(start, uint64))
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
static struct @162 value
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:72
void * arg
struct PGSemaphoreData * PGSemaphore
Definition: pg_sema.h:34
static int numSems
Definition: posix_sema.c:66
uintptr_t Datum
Definition: postgres.h:69
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:233
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505
_ino_t st_ino
Definition: win32_port.h:257
Size PGSemaphoreShmemSize(int maxSemas)
Definition: sysv_sema.c:293
#define PGSemaMagic
Definition: sysv_sema.c:64
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: sysv_sema.c:453
struct PGSemaphoreData PGSemaphoreData
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:194
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:183
key_t IpcSemaphoreKey
Definition: sysv_sema.c:45
int IpcSemaphoreId
Definition: sysv_sema.c:46
void PGReserveSemaphores(int maxSemas)
Definition: sysv_sema.c:316
static void IpcSemaphoreKill(IpcSemaphoreId semId)
Definition: sysv_sema.c:171
static int maxSharedSemas
Definition: sysv_sema.c:69
#define SEMAS_PER_SET
Definition: sysv_sema.c:60
void PGSemaphoreReset(PGSemaphore sema)
Definition: sysv_sema.c:409
void PGSemaphoreLock(PGSemaphore sema)
Definition: sysv_sema.c:420
#define IPCProtection
Definition: sysv_sema.c:62
static IpcSemaphoreKey nextSemaKey
Definition: sysv_sema.c:73
static int numSemaSets
Definition: sysv_sema.c:71
bool PGSemaphoreTryLock(PGSemaphore sema)
Definition: sysv_sema.c:483
static int nextSemaNumber
Definition: sysv_sema.c:74
static PGSemaphore sharedSemas
Definition: sysv_sema.c:67
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
Definition: sysv_sema.c:99
static int maxSemaSets
Definition: sysv_sema.c:72
PGSemaphore PGSemaphoreCreate(void)
Definition: sysv_sema.c:374
static int numSharedSemas
Definition: sysv_sema.c:68
static void ReleaseSemaphores(int status, Datum arg)
Definition: sysv_sema.c:359
static IpcSemaphoreId * mySemaSets
Definition: sysv_sema.c:70
static IpcSemaphoreId IpcSemaphoreCreate(int numSems)
Definition: sysv_sema.c:214
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
Definition: sysv_sema.c:148
int val
Definition: sysv_sema.c:39
struct semid_ds * buf
Definition: sysv_sema.c:40
unsigned short * array
Definition: sysv_sema.c:41
#define stat
Definition: win32_port.h:274
#define SETVAL
Definition: win32_port.h:108
#define EINTR
Definition: win32_port.h:364
#define EWOULDBLOCK
Definition: win32_port.h:370
#define IPC_NOWAIT
Definition: win32_port.h:97
#define IPC_RMID
Definition: win32_port.h:93
#define GETPID
Definition: win32_port.h:109
#define kill(pid, sig)
Definition: win32_port.h:493
long key_t
Definition: win32_port.h:237
#define IPC_EXCL
Definition: win32_port.h:95
#define IPC_CREAT
Definition: win32_port.h:94
#define EIDRM
Definition: win32_port.h:102
#define GETVAL
Definition: win32_port.h:107
#define EAGAIN
Definition: win32_port.h:362