PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
sysv_sema.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * sysv_sema.c
4 * Implement PGSemaphores using SysV semaphore facilities
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/port/sysv_sema.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <signal.h>
18#include <unistd.h>
19#include <sys/file.h>
20#include <sys/ipc.h>
21#include <sys/sem.h>
22#include <sys/stat.h>
23
24#include "miscadmin.h"
25#include "storage/ipc.h"
26#include "storage/pg_sema.h"
27#include "storage/shmem.h"
28
29
30typedef struct PGSemaphoreData
31{
32 int semId; /* semaphore set identifier */
33 int semNum; /* semaphore number within set */
35
36#ifndef HAVE_UNION_SEMUN
37union semun
38{
39 int val;
40 struct semid_ds *buf;
41 unsigned short *array;
42};
43#endif
44
45typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
47
48/*
49 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51 * per set) parameter, which is often around 25. (Less than, because we
52 * allocate one extra sema in each set for identification purposes.)
53 */
54#define SEMAS_PER_SET 16
55
56#define IPCProtection (0600) /* access/modify by user only */
57
58#define PGSemaMagic 537 /* must be less than SEMVMX */
59
60
61static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
62static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
63static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
64static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
65static int numSemaSets; /* number of sema sets acquired so far */
66static int maxSemaSets; /* allocated size of mySemaSets array */
67static IpcSemaphoreKey nextSemaKey; /* next key to try using */
68static int nextSemaNumber; /* next free sem num in last sema set */
69
70
72 int numSems);
73static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
74 int value);
75static void IpcSemaphoreKill(IpcSemaphoreId semId);
76static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
77static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
79static void ReleaseSemaphores(int status, Datum arg);
80
81
82/*
83 * InternalIpcSemaphoreCreate
84 *
85 * Attempt to create a new semaphore set with the specified key.
86 * Will fail (return -1) if such a set already exists.
87 *
88 * If we fail with a failure code other than collision-with-existing-set,
89 * print out an error and abort. Other types of errors suggest nonrecoverable
90 * problems.
91 */
92static IpcSemaphoreId
94{
95 int semId;
96
97 semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
98
99 if (semId < 0)
100 {
101 int saved_errno = errno;
102
103 /*
104 * Fail quietly if error indicates a collision with existing set. One
105 * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
106 * could get a permission violation instead? Also, EIDRM might occur
107 * if an old set is slated for destruction but not gone yet.
108 */
109 if (saved_errno == EEXIST || saved_errno == EACCES
110#ifdef EIDRM
111 || saved_errno == EIDRM
112#endif
113 )
114 return -1;
115
116 /*
117 * Else complain and abort
118 */
120 (errmsg("could not create semaphores: %m"),
121 errdetail("Failed system call was semget(%lu, %d, 0%o).",
122 (unsigned long) semKey, numSems,
124 (saved_errno == ENOSPC) ?
125 errhint("This error does *not* mean that you have run out of disk space. "
126 "It occurs when either the system limit for the maximum number of "
127 "semaphore sets (SEMMNI), or the system wide maximum number of "
128 "semaphores (SEMMNS), would be exceeded. You need to raise the "
129 "respective kernel parameter. Alternatively, reduce PostgreSQL's "
130 "consumption of semaphores by reducing its \"max_connections\" parameter.\n"
131 "The PostgreSQL documentation contains more information about "
132 "configuring your system for PostgreSQL.") : 0));
133 }
134
135 return semId;
136}
137
138/*
139 * Initialize a semaphore to the specified value.
140 */
141static void
143{
144 union semun semun;
145
146 semun.val = value;
147 if (semctl(semId, semNum, SETVAL, semun) < 0)
148 {
149 int saved_errno = errno;
150
152 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
153 semId, semNum, value),
154 (saved_errno == ERANGE) ?
155 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
156 "%d. Look into the PostgreSQL documentation for details.",
157 value) : 0));
158 }
159}
160
161/*
162 * IpcSemaphoreKill(semId) - removes a semaphore set
163 */
164static void
166{
167 union semun semun;
168
169 semun.val = 0; /* unused, but keep compiler quiet */
170
171 if (semctl(semId, 0, IPC_RMID, semun) < 0)
172 elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
173}
174
175/* Get the current value (semval) of the semaphore */
176static int
178{
179 union semun dummy; /* for Solaris */
180
181 dummy.val = 0; /* unused */
182
183 return semctl(semId, semNum, GETVAL, dummy);
184}
185
186/* Get the PID of the last process to do semop() on the semaphore */
187static pid_t
189{
190 union semun dummy; /* for Solaris */
191
192 dummy.val = 0; /* unused */
193
194 return semctl(semId, semNum, GETPID, dummy);
195}
196
197
198/*
199 * Create a semaphore set with the given number of useful semaphores
200 * (an additional sema is actually allocated to serve as identifier).
201 * Dead Postgres sema sets are recycled if found, but we do not fail
202 * upon collision with non-Postgres sema sets.
203 *
204 * The idea here is to detect and re-use keys that may have been assigned
205 * by a crashed postmaster or backend.
206 */
207static IpcSemaphoreId
209{
210 IpcSemaphoreId semId;
211 union semun semun;
212 PGSemaphoreData mysema;
213
214 /* Loop till we find a free IPC key */
215 for (nextSemaKey++;; nextSemaKey++)
216 {
217 pid_t creatorPID;
218
219 /* Try to create new semaphore set */
221 if (semId >= 0)
222 break; /* successful create */
223
224 /* See if it looks to be leftover from a dead Postgres process */
225 semId = semget(nextSemaKey, numSems + 1, 0);
226 if (semId < 0)
227 continue; /* failed: must be some other app's */
229 continue; /* sema belongs to a non-Postgres app */
230
231 /*
232 * If the creator PID is my own PID or does not belong to any extant
233 * process, it's safe to zap it.
234 */
235 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
236 if (creatorPID <= 0)
237 continue; /* oops, GETPID failed */
238 if (creatorPID != getpid())
239 {
240 if (kill(creatorPID, 0) == 0 || errno != ESRCH)
241 continue; /* sema belongs to a live process */
242 }
243
244 /*
245 * The sema set appears to be from a dead Postgres process, or from a
246 * previous cycle of life in this same process. Zap it, if possible.
247 * This probably shouldn't fail, but if it does, assume the sema set
248 * belongs to someone else after all, and continue quietly.
249 */
250 semun.val = 0; /* unused, but keep compiler quiet */
251 if (semctl(semId, 0, IPC_RMID, semun) < 0)
252 continue;
253
254 /*
255 * Now try again to create the sema set.
256 */
258 if (semId >= 0)
259 break; /* successful create */
260
261 /*
262 * Can only get here if some other process managed to create the same
263 * sema key before we did. Let him have that one, loop around to try
264 * next key.
265 */
266 }
267
268 /*
269 * OK, we created a new sema set. Mark it as created by this process. We
270 * do this by setting the spare semaphore to PGSemaMagic-1 and then
271 * incrementing it with semop(). That leaves it with value PGSemaMagic
272 * and sempid referencing this process.
273 */
275 mysema.semId = semId;
276 mysema.semNum = numSems;
277 PGSemaphoreUnlock(&mysema);
278
279 return semId;
280}
281
282
283/*
284 * Report amount of shared memory needed for semaphores
285 */
286Size
288{
289 return mul_size(maxSemas, sizeof(PGSemaphoreData));
290}
291
292/*
293 * PGReserveSemaphores --- initialize semaphore support
294 *
295 * This is called during postmaster start or shared memory reinitialization.
296 * It should do whatever is needed to be able to support up to maxSemas
297 * subsequent PGSemaphoreCreate calls. Also, if any system resources
298 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
299 * callback to release them.
300 *
301 * In the SysV implementation, we acquire semaphore sets on-demand; the
302 * maxSemas parameter is just used to size the arrays. There is an array
303 * of PGSemaphoreData structs in shared memory, and a postmaster-local array
304 * with one entry per SysV semaphore set, which we use for releasing the
305 * semaphore sets when done. (This design ensures that postmaster shutdown
306 * doesn't rely on the contents of shared memory, which a failed backend might
307 * have clobbered.)
308 */
309void
311{
312 struct stat statbuf;
313
314 /*
315 * We use the data directory's inode number to seed the search for free
316 * semaphore keys. This minimizes the odds of collision with other
317 * postmasters, while maximizing the odds that we will detect and clean up
318 * semaphores left over from a crashed postmaster in our own directory.
319 */
320 if (stat(DataDir, &statbuf) < 0)
323 errmsg("could not stat data directory \"%s\": %m",
324 DataDir)));
325
326 /*
327 * We must use ShmemAllocUnlocked(), since the spinlock protecting
328 * ShmemAlloc() won't be ready yet.
329 */
332 numSharedSemas = 0;
333 maxSharedSemas = maxSemas;
334
335 maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
338 if (mySemaSets == NULL)
339 elog(PANIC, "out of memory");
340 numSemaSets = 0;
341 nextSemaKey = statbuf.st_ino;
342 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
343
345}
346
347/*
348 * Release semaphores at shutdown or shmem reinitialization
349 *
350 * (called as an on_shmem_exit callback, hence funny argument list)
351 */
352static void
354{
355 int i;
356
357 for (i = 0; i < numSemaSets; i++)
360}
361
362/*
363 * PGSemaphoreCreate
364 *
365 * Allocate a PGSemaphore structure with initial count 1
366 */
369{
370 PGSemaphore sema;
371
372 /* Can't do this in a backend, because static state is postmaster's */
374
376 {
377 /* Time to allocate another semaphore set */
379 elog(PANIC, "too many semaphores created");
381 numSemaSets++;
382 nextSemaNumber = 0;
383 }
384 /* Use the next shared PGSemaphoreData */
386 elog(PANIC, "too many semaphores created");
387 sema = &sharedSemas[numSharedSemas++];
388 /* Assign the next free semaphore in the current set */
389 sema->semId = mySemaSets[numSemaSets - 1];
390 sema->semNum = nextSemaNumber++;
391 /* Initialize it to count 1 */
392 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
393
394 return sema;
395}
396
397/*
398 * PGSemaphoreReset
399 *
400 * Reset a previously-initialized PGSemaphore to have count 0
401 */
402void
404{
405 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
406}
407
408/*
409 * PGSemaphoreLock
410 *
411 * Lock a semaphore (decrement count), blocking if count would be < 0
412 */
413void
415{
416 int errStatus;
417 struct sembuf sops;
418
419 sops.sem_op = -1; /* decrement */
420 sops.sem_flg = 0;
421 sops.sem_num = sema->semNum;
422
423 /*
424 * Note: if errStatus is -1 and errno == EINTR then it means we returned
425 * from the operation prematurely because we were sent a signal. So we
426 * try and lock the semaphore again.
427 *
428 * We used to check interrupts here, but that required servicing
429 * interrupts directly from signal handlers. Which is hard to do safely
430 * and portably.
431 */
432 do
433 {
434 errStatus = semop(sema->semId, &sops, 1);
435 } while (errStatus < 0 && errno == EINTR);
436
437 if (errStatus < 0)
438 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
439}
440
441/*
442 * PGSemaphoreUnlock
443 *
444 * Unlock a semaphore (increment count)
445 */
446void
448{
449 int errStatus;
450 struct sembuf sops;
451
452 sops.sem_op = 1; /* increment */
453 sops.sem_flg = 0;
454 sops.sem_num = sema->semNum;
455
456 /*
457 * Note: if errStatus is -1 and errno == EINTR then it means we returned
458 * from the operation prematurely because we were sent a signal. So we
459 * try and unlock the semaphore again. Not clear this can really happen,
460 * but might as well cope.
461 */
462 do
463 {
464 errStatus = semop(sema->semId, &sops, 1);
465 } while (errStatus < 0 && errno == EINTR);
466
467 if (errStatus < 0)
468 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
469}
470
471/*
472 * PGSemaphoreTryLock
473 *
474 * Lock a semaphore only if able to do so without blocking
475 */
476bool
478{
479 int errStatus;
480 struct sembuf sops;
481
482 sops.sem_op = -1; /* decrement */
483 sops.sem_flg = IPC_NOWAIT; /* but don't block */
484 sops.sem_num = sema->semNum;
485
486 /*
487 * Note: if errStatus is -1 and errno == EINTR then it means we returned
488 * from the operation prematurely because we were sent a signal. So we
489 * try and lock the semaphore again.
490 */
491 do
492 {
493 errStatus = semop(sema->semId, &sops, 1);
494 } while (errStatus < 0 && errno == EINTR);
495
496 if (errStatus < 0)
497 {
498 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
499#ifdef EAGAIN
500 if (errno == EAGAIN)
501 return false; /* failed to lock it */
502#endif
503#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
504 if (errno == EWOULDBLOCK)
505 return false; /* failed to lock it */
506#endif
507 /* Otherwise we got trouble */
508 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
509 }
510
511 return true;
512}
size_t Size
Definition: c.h:576
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define FATAL
Definition: elog.h:41
#define PANIC
Definition: elog.h:42
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
bool IsUnderPostmaster
Definition: globals.c:121
char * DataDir
Definition: globals.c:72
Assert(PointerIsAligned(start, uint64))
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
static struct @165 value
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:77
void * arg
struct PGSemaphoreData * PGSemaphore
Definition: pg_sema.h:34
static int numSems
Definition: posix_sema.c:66
uintptr_t Datum
Definition: postgres.h:69
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:238
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
_ino_t st_ino
Definition: win32_port.h:257
Size PGSemaphoreShmemSize(int maxSemas)
Definition: sysv_sema.c:287
#define PGSemaMagic
Definition: sysv_sema.c:58
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: sysv_sema.c:447
struct PGSemaphoreData PGSemaphoreData
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:188
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
Definition: sysv_sema.c:177
key_t IpcSemaphoreKey
Definition: sysv_sema.c:45
int IpcSemaphoreId
Definition: sysv_sema.c:46
void PGReserveSemaphores(int maxSemas)
Definition: sysv_sema.c:310
static void IpcSemaphoreKill(IpcSemaphoreId semId)
Definition: sysv_sema.c:165
static int maxSharedSemas
Definition: sysv_sema.c:63
#define SEMAS_PER_SET
Definition: sysv_sema.c:54
void PGSemaphoreReset(PGSemaphore sema)
Definition: sysv_sema.c:403
void PGSemaphoreLock(PGSemaphore sema)
Definition: sysv_sema.c:414
#define IPCProtection
Definition: sysv_sema.c:56
static IpcSemaphoreKey nextSemaKey
Definition: sysv_sema.c:67
static int numSemaSets
Definition: sysv_sema.c:65
bool PGSemaphoreTryLock(PGSemaphore sema)
Definition: sysv_sema.c:477
static int nextSemaNumber
Definition: sysv_sema.c:68
static PGSemaphore sharedSemas
Definition: sysv_sema.c:61
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
Definition: sysv_sema.c:93
static int maxSemaSets
Definition: sysv_sema.c:66
PGSemaphore PGSemaphoreCreate(void)
Definition: sysv_sema.c:368
static int numSharedSemas
Definition: sysv_sema.c:62
static void ReleaseSemaphores(int status, Datum arg)
Definition: sysv_sema.c:353
static IpcSemaphoreId * mySemaSets
Definition: sysv_sema.c:64
static IpcSemaphoreId IpcSemaphoreCreate(int numSems)
Definition: sysv_sema.c:208
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
Definition: sysv_sema.c:142
int val
Definition: sysv_sema.c:39
struct semid_ds * buf
Definition: sysv_sema.c:40
unsigned short * array
Definition: sysv_sema.c:41
#define stat
Definition: win32_port.h:274
#define SETVAL
Definition: win32_port.h:108
#define EINTR
Definition: win32_port.h:364
#define EWOULDBLOCK
Definition: win32_port.h:370
#define IPC_NOWAIT
Definition: win32_port.h:97
#define IPC_RMID
Definition: win32_port.h:93
#define GETPID
Definition: win32_port.h:109
#define kill(pid, sig)
Definition: win32_port.h:493
long key_t
Definition: win32_port.h:237
#define IPC_EXCL
Definition: win32_port.h:95
#define IPC_CREAT
Definition: win32_port.h:94
#define EIDRM
Definition: win32_port.h:102
#define GETVAL
Definition: win32_port.h:107
#define EAGAIN
Definition: win32_port.h:362