PostgreSQL Source Code git master
Loading...
Searching...
No Matches
sysv_sema.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * sysv_sema.c
4 * Implement PGSemaphores using SysV semaphore facilities
5 *
6 *
7 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/port/sysv_sema.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <signal.h>
18#include <unistd.h>
19#include <sys/file.h>
20#include <sys/ipc.h>
21#include <sys/sem.h>
22#include <sys/stat.h>
23
24#include "miscadmin.h"
25#include "storage/ipc.h"
26#include "storage/pg_sema.h"
27#include "storage/shmem.h"
28
29
30typedef struct PGSemaphoreData
31{
32 int semId; /* semaphore set identifier */
33 int semNum; /* semaphore number within set */
35
36#ifndef HAVE_UNION_SEMUN
37union semun
38{
39 int val;
40 struct semid_ds *buf;
41 unsigned short *array;
42};
43#endif
44
45typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
47
48/*
49 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51 * per set) parameter, which is often around 25. (Less than, because we
52 * allocate one extra sema in each set for identification purposes.)
53 */
54#define SEMAS_PER_SET 16
55
56#define IPCProtection (0600) /* access/modify by user only */
57
58#define PGSemaMagic 537 /* must be less than SEMVMX */
59
60
61static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
62static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
63static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
64static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
65static int numSemaSets; /* number of sema sets acquired so far */
66static int maxSemaSets; /* allocated size of mySemaSets array */
67static IpcSemaphoreKey nextSemaKey; /* next key to try using */
68static int nextSemaNumber; /* next free sem num in last sema set */
69
70
72 int numSems, bool retry_ok);
73static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
74 int value);
75static void IpcSemaphoreKill(IpcSemaphoreId semId);
76static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
77static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
79static void ReleaseSemaphores(int status, Datum arg);
80
81
82/*
83 * InternalIpcSemaphoreCreate
84 *
85 * Attempt to create a new semaphore set with the specified key.
86 * Will fail (return -1) if such a set already exists.
87 *
88 * If we fail with a failure code other than collision-with-existing-set,
89 * print out an error and abort. Other types of errors suggest nonrecoverable
90 * problems.
91 *
92 * Unfortunately, it's sometimes hard to tell whether errors are
93 * nonrecoverable. Our caller keeps track of whether continuing to retry
94 * is sane or not; if not, we abort on failure regardless of the errno.
95 */
96static IpcSemaphoreId
98{
99 int semId;
100
102
103 if (semId < 0)
104 {
105 int saved_errno = errno;
106
107 /*
108 * Fail quietly if error suggests a collision with an existing set and
109 * our caller has not lost patience.
110 *
111 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
112 * we could get a permission violation instead. On some platforms
113 * EINVAL will be reported if the existing set has too few semaphores.
114 * Also, EIDRM might occur if an old set is slated for destruction but
115 * not gone yet.
116 *
117 * EINVAL is the key reason why we need the caller-level loop limit,
118 * as it can also mean that the platform's SEMMSL is less than
119 * numSems, and that condition can't be fixed by trying another key.
120 */
121 if (retry_ok &&
122 (saved_errno == EEXIST
123 || saved_errno == EACCES
124 || saved_errno == EINVAL
126 || saved_errno == EIDRM
127#endif
128 ))
129 return -1;
130
131 /*
132 * Else complain and abort
133 */
135 (errmsg("could not create semaphores: %m"),
136 errdetail("Failed system call was semget(%lu, %d, 0%o).",
137 (unsigned long) semKey, numSems,
139 (saved_errno == ENOSPC) ?
140 errhint("This error does *not* mean that you have run out of disk space. "
141 "It occurs when either the system limit for the maximum number of "
142 "semaphore sets (SEMMNI), or the system wide maximum number of "
143 "semaphores (SEMMNS), would be exceeded. You need to raise the "
144 "respective kernel parameter. Alternatively, reduce PostgreSQL's "
145 "consumption of semaphores by reducing its \"max_connections\" parameter.\n"
146 "The PostgreSQL documentation contains more information about "
147 "configuring your system for PostgreSQL.") : 0));
148 }
149
150 return semId;
151}
152
153/*
154 * Initialize a semaphore to the specified value.
155 */
156static void
158{
159 union semun semun;
160
161 semun.val = value;
162 if (semctl(semId, semNum, SETVAL, semun) < 0)
163 {
164 int saved_errno = errno;
165
167 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
168 semId, semNum, value),
169 (saved_errno == ERANGE) ?
170 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
171 "%d. Look into the PostgreSQL documentation for details.",
172 value) : 0));
173 }
174}
175
176/*
177 * IpcSemaphoreKill(semId) - removes a semaphore set
178 */
179static void
181{
182 union semun semun;
183
184 semun.val = 0; /* unused, but keep compiler quiet */
185
186 if (semctl(semId, 0, IPC_RMID, semun) < 0)
187 elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
188}
189
190/* Get the current value (semval) of the semaphore */
191static int
193{
194 union semun dummy; /* for Solaris */
195
196 dummy.val = 0; /* unused */
197
198 return semctl(semId, semNum, GETVAL, dummy);
199}
200
201/* Get the PID of the last process to do semop() on the semaphore */
202static pid_t
204{
205 union semun dummy; /* for Solaris */
206
207 dummy.val = 0; /* unused */
208
209 return semctl(semId, semNum, GETPID, dummy);
210}
211
212
213/*
214 * Create a semaphore set with the given number of useful semaphores
215 * (an additional sema is actually allocated to serve as identifier).
216 * Dead Postgres sema sets are recycled if found, but we do not fail
217 * upon collision with non-Postgres sema sets.
218 *
219 * The idea here is to detect and re-use keys that may have been assigned
220 * by a crashed postmaster or backend.
221 */
222static IpcSemaphoreId
224{
225 int num_tries = 0;
226 IpcSemaphoreId semId;
227 union semun semun;
229
230 /* Loop till we find a free IPC key */
231 for (nextSemaKey++;; nextSemaKey++, num_tries++)
232 {
233 pid_t creatorPID;
234
235 /*
236 * Try to create new semaphore set. Give up after trying 1000
237 * distinct IPC keys.
238 */
240 num_tries < 1000);
241 if (semId >= 0)
242 break; /* successful create */
243
244 /* See if it looks to be leftover from a dead Postgres process */
245 semId = semget(nextSemaKey, numSems + 1, 0);
246 if (semId < 0)
247 continue; /* failed: must be some other app's */
249 continue; /* sema belongs to a non-Postgres app */
250
251 /*
252 * If the creator PID is my own PID or does not belong to any extant
253 * process, it's safe to zap it.
254 */
255 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
256 if (creatorPID <= 0)
257 continue; /* oops, GETPID failed */
258 if (creatorPID != getpid())
259 {
260 if (kill(creatorPID, 0) == 0 || errno != ESRCH)
261 continue; /* sema belongs to a live process */
262 }
263
264 /*
265 * The sema set appears to be from a dead Postgres process, or from a
266 * previous cycle of life in this same process. Zap it, if possible.
267 * This probably shouldn't fail, but if it does, assume the sema set
268 * belongs to someone else after all, and continue quietly.
269 */
270 semun.val = 0; /* unused, but keep compiler quiet */
271 if (semctl(semId, 0, IPC_RMID, semun) < 0)
272 continue;
273
274 /*
275 * Now try again to create the sema set.
276 */
278 if (semId >= 0)
279 break; /* successful create */
280
281 /*
282 * Can only get here if some other process managed to create the same
283 * sema key before we did. Let him have that one, loop around to try
284 * next key.
285 */
286 }
287
288 /*
289 * OK, we created a new sema set. Mark it as created by this process. We
290 * do this by setting the spare semaphore to PGSemaMagic-1 and then
291 * incrementing it with semop(). That leaves it with value PGSemaMagic
292 * and sempid referencing this process.
293 */
295 mysema.semId = semId;
296 mysema.semNum = numSems;
298
299 return semId;
300}
301
302
303/*
304 * Request shared memory needed for semaphores
305 */
306void
308{
309 /* Need a PGSemaphoreData per semaphore */
310 ShmemRequestStruct(.name = "Semaphores",
311 .size = mul_size(maxSemas, sizeof(PGSemaphoreData)),
312 .ptr = (void **) &sharedSemas,
313 );
314}
315
316/*
317 * PGSemaphoreInit --- initialize semaphore support
318 *
319 * This is called during postmaster start or shared memory reinitialization.
320 * It should do whatever is needed to be able to support up to maxSemas
321 * subsequent PGSemaphoreCreate calls. Also, if any system resources
322 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
323 * callback to release them.
324 *
325 * In the SysV implementation, we acquire semaphore sets on-demand; the
326 * maxSemas parameter is just used to size the arrays. There is an array
327 * of PGSemaphoreData structs in shared memory, and a postmaster-local array
328 * with one entry per SysV semaphore set, which we use for releasing the
329 * semaphore sets when done. (This design ensures that postmaster shutdown
330 * doesn't rely on the contents of shared memory, which a failed backend might
331 * have clobbered.)
332 */
333void
335{
336 struct stat statbuf;
337
338 /*
339 * We use the data directory's inode number to seed the search for free
340 * semaphore keys. This minimizes the odds of collision with other
341 * postmasters, while maximizing the odds that we will detect and clean up
342 * semaphores left over from a crashed postmaster in our own directory.
343 */
344 if (stat(DataDir, &statbuf) < 0)
347 errmsg("could not stat data directory \"%s\": %m",
348 DataDir)));
349
350 numSharedSemas = 0;
352
356 if (mySemaSets == NULL)
357 elog(PANIC, "out of memory");
358 numSemaSets = 0;
359 nextSemaKey = statbuf.st_ino;
360 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
361
363}
364
365/*
366 * Release semaphores at shutdown or shmem reinitialization
367 *
368 * (called as an on_shmem_exit callback, hence funny argument list)
369 */
370static void
372{
373 int i;
374
375 for (i = 0; i < numSemaSets; i++)
378}
379
380/*
381 * PGSemaphoreCreate
382 *
383 * Allocate a PGSemaphore structure with initial count 1
384 */
387{
388 PGSemaphore sema;
389
390 /* Can't do this in a backend, because static state is postmaster's */
392
394 {
395 /* Time to allocate another semaphore set */
397 elog(PANIC, "too many semaphores created");
399 numSemaSets++;
400 nextSemaNumber = 0;
401 }
402 /* Use the next shared PGSemaphoreData */
404 elog(PANIC, "too many semaphores created");
405 sema = &sharedSemas[numSharedSemas++];
406 /* Assign the next free semaphore in the current set */
407 sema->semId = mySemaSets[numSemaSets - 1];
408 sema->semNum = nextSemaNumber++;
409 /* Initialize it to count 1 */
410 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
411
412 return sema;
413}
414
415/*
416 * PGSemaphoreReset
417 *
418 * Reset a previously-initialized PGSemaphore to have count 0
419 */
420void
422{
423 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
424}
425
426/*
427 * PGSemaphoreLock
428 *
429 * Lock a semaphore (decrement count), blocking if count would be < 0
430 */
431void
433{
434 int errStatus;
435 struct sembuf sops;
436
437 sops.sem_op = -1; /* decrement */
438 sops.sem_flg = 0;
439 sops.sem_num = sema->semNum;
440
441 /*
442 * Note: if errStatus is -1 and errno == EINTR then it means we returned
443 * from the operation prematurely because we were sent a signal. So we
444 * try and lock the semaphore again.
445 *
446 * We used to check interrupts here, but that required servicing
447 * interrupts directly from signal handlers. Which is hard to do safely
448 * and portably.
449 */
450 do
451 {
452 errStatus = semop(sema->semId, &sops, 1);
453 } while (errStatus < 0 && errno == EINTR);
454
455 if (errStatus < 0)
456 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
457}
458
459/*
460 * PGSemaphoreUnlock
461 *
462 * Unlock a semaphore (increment count)
463 */
464void
466{
467 int errStatus;
468 struct sembuf sops;
469
470 sops.sem_op = 1; /* increment */
471 sops.sem_flg = 0;
472 sops.sem_num = sema->semNum;
473
474 /*
475 * Note: if errStatus is -1 and errno == EINTR then it means we returned
476 * from the operation prematurely because we were sent a signal. So we
477 * try and unlock the semaphore again. Not clear this can really happen,
478 * but might as well cope.
479 */
480 do
481 {
482 errStatus = semop(sema->semId, &sops, 1);
483 } while (errStatus < 0 && errno == EINTR);
484
485 if (errStatus < 0)
486 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
487}
488
489/*
490 * PGSemaphoreTryLock
491 *
492 * Lock a semaphore only if able to do so without blocking
493 */
494bool
496{
497 int errStatus;
498 struct sembuf sops;
499
500 sops.sem_op = -1; /* decrement */
501 sops.sem_flg = IPC_NOWAIT; /* but don't block */
502 sops.sem_num = sema->semNum;
503
504 /*
505 * Note: if errStatus is -1 and errno == EINTR then it means we returned
506 * from the operation prematurely because we were sent a signal. So we
507 * try and lock the semaphore again.
508 */
509 do
510 {
511 errStatus = semop(sema->semId, &sops, 1);
512 } while (errStatus < 0 && errno == EINTR);
513
514 if (errStatus < 0)
515 {
516 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
517#ifdef EAGAIN
518 if (errno == EAGAIN)
519 return false; /* failed to lock it */
520#endif
521#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
522 if (errno == EWOULDBLOCK)
523 return false; /* failed to lock it */
524#endif
525 /* Otherwise we got trouble */
526 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
527 }
528
529 return true;
530}
#define Assert(condition)
Definition c.h:943
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
#define LOG
Definition elog.h:32
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define FATAL
Definition elog.h:42
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define PANIC
Definition elog.h:44
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
bool IsUnderPostmaster
Definition globals.c:122
char * DataDir
Definition globals.c:73
static struct @177 value
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
int i
Definition isn.c:77
static char * errmsg
static int numSems
Definition posix_sema.c:66
uint64_t Datum
Definition postgres.h:70
static int fb(int x)
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
#define ShmemRequestStruct(...)
Definition shmem.h:176
#define free(a)
#define malloc(a)
#define PGSemaMagic
Definition sysv_sema.c:58
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems, bool retry_ok)
Definition sysv_sema.c:97
void PGSemaphoreUnlock(PGSemaphore sema)
Definition sysv_sema.c:465
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
Definition sysv_sema.c:203
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
Definition sysv_sema.c:192
key_t IpcSemaphoreKey
Definition sysv_sema.c:45
void PGSemaphoreShmemRequest(int maxSemas)
Definition sysv_sema.c:307
int IpcSemaphoreId
Definition sysv_sema.c:46
static void IpcSemaphoreKill(IpcSemaphoreId semId)
Definition sysv_sema.c:180
static int maxSharedSemas
Definition sysv_sema.c:63
#define SEMAS_PER_SET
Definition sysv_sema.c:54
void PGSemaphoreReset(PGSemaphore sema)
Definition sysv_sema.c:421
void PGSemaphoreLock(PGSemaphore sema)
Definition sysv_sema.c:432
void PGSemaphoreInit(int maxSemas)
Definition sysv_sema.c:334
#define IPCProtection
Definition sysv_sema.c:56
static IpcSemaphoreKey nextSemaKey
Definition sysv_sema.c:67
static int numSemaSets
Definition sysv_sema.c:65
bool PGSemaphoreTryLock(PGSemaphore sema)
Definition sysv_sema.c:495
static int nextSemaNumber
Definition sysv_sema.c:68
static PGSemaphore sharedSemas
Definition sysv_sema.c:61
static int maxSemaSets
Definition sysv_sema.c:66
PGSemaphore PGSemaphoreCreate(void)
Definition sysv_sema.c:386
static int numSharedSemas
Definition sysv_sema.c:62
static void ReleaseSemaphores(int status, Datum arg)
Definition sysv_sema.c:371
static IpcSemaphoreId * mySemaSets
Definition sysv_sema.c:64
static IpcSemaphoreId IpcSemaphoreCreate(int numSems)
Definition sysv_sema.c:223
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
Definition sysv_sema.c:157
int val
Definition sysv_sema.c:39
struct semid_ds * buf
Definition sysv_sema.c:40
unsigned short * array
Definition sysv_sema.c:41
const char * name
#define stat
Definition win32_port.h:74
#define SETVAL
Definition win32_port.h:108
#define EINTR
Definition win32_port.h:361
#define EWOULDBLOCK
Definition win32_port.h:367
#define IPC_NOWAIT
Definition win32_port.h:97
#define IPC_RMID
Definition win32_port.h:93
#define GETPID
Definition win32_port.h:109
#define kill(pid, sig)
Definition win32_port.h:490
long key_t
Definition win32_port.h:237
#define IPC_EXCL
Definition win32_port.h:95
#define IPC_CREAT
Definition win32_port.h:94
#define EIDRM
Definition win32_port.h:102
#define GETVAL
Definition win32_port.h:107
#define EAGAIN
Definition win32_port.h:359