PostgreSQL Source Code  git master
shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * shmem.c
4  * create shared memory and initialize shared memory data structures.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/storage/ipc/shmem.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * POSTGRES processes share one or more regions of shared memory.
17  * The shared memory is created by a postmaster and is inherited
18  * by each backend via fork() (or, in some ports, via other OS-specific
19  * methods). The routines in this file are used for allocating and
20  * binding to shared memory data structures.
21  *
22  * NOTES:
23  * (a) There are three kinds of shared memory data structures
24  * available to POSTGRES: fixed-size structures, queues and hash
25  * tables. Fixed-size structures contain things like global variables
26  * for a module and should never be allocated after the shared memory
27  * initialization phase. Hash tables have a fixed maximum size, but
28  * their actual size can vary dynamically. When entries are added
29  * to the table, more space is allocated. Queues link data structures
30  * that have been allocated either within fixed-size structures or as hash
31  * buckets. Each shared data structure has a string name to identify
32  * it (assigned in the module that declares it).
33  *
34  * (b) During initialization, each module looks for its
35  * shared data structures in a hash table called the "Shmem Index".
36  * If the data structure is not present, the caller can allocate
37  * a new one and initialize it. If the data structure is present,
38  * the caller "attaches" to the structure by initializing a pointer
39  * in the local address space.
40  * The shmem index has two purposes: first, it gives us
41  * a simple model of how the world looks when a backend process
42  * initializes. If something is present in the shmem index,
43  * it is initialized. If it is not, it is uninitialized. Second,
44  * the shmem index allows us to allocate shared memory on demand
45  * instead of trying to preallocate structures and hard-wire the
46  * sizes and locations in header files. If you are using a lot
47  * of shared memory in a lot of different places (and changing
48  * things during development), this is important.
49  *
50  * (c) In standard Unix-ish environments, individual backends do not
51  * need to re-establish their local pointers into shared memory, because
52  * they inherit correct values of those variables via fork() from the
53  * postmaster. However, this does not work in the EXEC_BACKEND case.
54  * In ports using EXEC_BACKEND, new backends have to set up their local
55  * pointers using the method described in (b) above.
56  *
57  * (d) memory allocation model: shared memory can never be
58  * freed, once allocated. Each hash table has its own free list,
59  * so hash buckets can be reused when an item is deleted. However,
60  * if one hash table grows very large and then shrinks, its space
61  * cannot be redistributed to other tables. We could build a simple
62  * hash bucket garbage collector if need be. Right now, it seems
63  * unnecessary.
64  */
65 
66 #include "postgres.h"
67 
68 #include "fmgr.h"
69 #include "funcapi.h"
70 #include "miscadmin.h"
71 #include "storage/lwlock.h"
72 #include "storage/pg_shmem.h"
73 #include "storage/shmem.h"
74 #include "storage/spin.h"
75 #include "utils/builtins.h"
76 
77 static void *ShmemAllocRaw(Size size, Size *allocated_size);
78 
79 /* shared memory global variables */
80 
81 static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
82 
83 static void *ShmemBase; /* start address of shared memory */
84 
85 static void *ShmemEnd; /* end+1 address of shared memory */
86 
87 slock_t *ShmemLock; /* spinlock for shared memory and LWLock
88  * allocation */
89 
90 static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
91 
92 
93 /*
94  * InitShmemAccess() --- set up basic pointers to shared memory.
95  *
96  * Note: the argument should be declared "PGShmemHeader *seghdr",
97  * but we use void to avoid having to include ipc.h in shmem.h.
98  */
99 void
100 InitShmemAccess(void *seghdr)
101 {
102  PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
103 
104  ShmemSegHdr = shmhdr;
105  ShmemBase = (void *) shmhdr;
106  ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
107 }
108 
109 /*
110  * InitShmemAllocation() --- set up shared-memory space allocation.
111  *
112  * This should be called only in the postmaster or a standalone backend.
113  */
114 void
116 {
117  PGShmemHeader *shmhdr = ShmemSegHdr;
118  char *aligned;
119 
120  Assert(shmhdr != NULL);
121 
122  /*
123  * Initialize the spinlock used by ShmemAlloc. We must use
124  * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
125  */
126  ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
127 
129 
130  /*
131  * Allocations after this point should go through ShmemAlloc, which
132  * expects to allocate everything on cache line boundaries. Make sure the
133  * first allocation begins on a cache line boundary.
134  */
135  aligned = (char *)
136  (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
137  shmhdr->freeoffset = aligned - (char *) shmhdr;
138 
139  /* ShmemIndex can't be set up yet (need LWLocks first) */
140  shmhdr->index = NULL;
141  ShmemIndex = (HTAB *) NULL;
142 }
143 
144 /*
145  * ShmemAlloc -- allocate max-aligned chunk from shared memory
146  *
147  * Throws error if request cannot be satisfied.
148  *
149  * Assumes ShmemLock and ShmemSegHdr are initialized.
150  */
151 void *
153 {
154  void *newSpace;
155  Size allocated_size;
156 
157  newSpace = ShmemAllocRaw(size, &allocated_size);
158  if (!newSpace)
159  ereport(ERROR,
160  (errcode(ERRCODE_OUT_OF_MEMORY),
161  errmsg("out of shared memory (%zu bytes requested)",
162  size)));
163  return newSpace;
164 }
165 
166 /*
167  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
168  *
169  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
170  */
171 void *
173 {
174  Size allocated_size;
175 
176  return ShmemAllocRaw(size, &allocated_size);
177 }
178 
179 /*
180  * ShmemAllocRaw -- allocate align chunk and return allocated size
181  *
182  * Also sets *allocated_size to the number of bytes allocated, which will
183  * be equal to the number requested plus any padding we choose to add.
184  */
185 static void *
186 ShmemAllocRaw(Size size, Size *allocated_size)
187 {
188  Size newStart;
189  Size newFree;
190  void *newSpace;
191 
192  /*
193  * Ensure all space is adequately aligned. We used to only MAXALIGN this
194  * space but experience has proved that on modern systems that is not good
195  * enough. Many parts of the system are very sensitive to critical data
196  * structures getting split across cache line boundaries. To avoid that,
197  * attempt to align the beginning of the allocation to a cache line
198  * boundary. The calling code will still need to be careful about how it
199  * uses the allocated space - e.g. by padding each element in an array of
200  * structures out to a power-of-two size - but without this, even that
201  * won't be sufficient.
202  */
204  *allocated_size = size;
205 
206  Assert(ShmemSegHdr != NULL);
207 
209 
210  newStart = ShmemSegHdr->freeoffset;
211 
212  newFree = newStart + size;
213  if (newFree <= ShmemSegHdr->totalsize)
214  {
215  newSpace = (void *) ((char *) ShmemBase + newStart);
216  ShmemSegHdr->freeoffset = newFree;
217  }
218  else
219  newSpace = NULL;
220 
222 
223  /* note this assert is okay with newSpace == NULL */
224  Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
225 
226  return newSpace;
227 }
228 
229 /*
230  * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
231  *
232  * Allocate space without locking ShmemLock. This should be used for,
233  * and only for, allocations that must happen before ShmemLock is ready.
234  *
235  * We consider maxalign, rather than cachealign, sufficient here.
236  */
237 void *
239 {
240  Size newStart;
241  Size newFree;
242  void *newSpace;
243 
244  /*
245  * Ensure allocated space is adequately aligned.
246  */
247  size = MAXALIGN(size);
248 
249  Assert(ShmemSegHdr != NULL);
250 
251  newStart = ShmemSegHdr->freeoffset;
252 
253  newFree = newStart + size;
254  if (newFree > ShmemSegHdr->totalsize)
255  ereport(ERROR,
256  (errcode(ERRCODE_OUT_OF_MEMORY),
257  errmsg("out of shared memory (%zu bytes requested)",
258  size)));
259  ShmemSegHdr->freeoffset = newFree;
260 
261  newSpace = (void *) ((char *) ShmemBase + newStart);
262 
263  Assert(newSpace == (void *) MAXALIGN(newSpace));
264 
265  return newSpace;
266 }
267 
268 /*
269  * ShmemAddrIsValid -- test if an address refers to shared memory
270  *
271  * Returns true if the pointer points within the shared memory segment.
272  */
273 bool
274 ShmemAddrIsValid(const void *addr)
275 {
276  return (addr >= ShmemBase) && (addr < ShmemEnd);
277 }
278 
279 /*
280  * InitShmemIndex() --- set up or attach to shmem index table.
281  */
282 void
284 {
285  HASHCTL info;
286 
287  /*
288  * Create the shared memory shmem index.
289  *
290  * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
291  * hashtable to exist already, we have a bit of a circularity problem in
292  * initializing the ShmemIndex itself. The special "ShmemIndex" hash
293  * table name will tell ShmemInitStruct to fake it.
294  */
296  info.entrysize = sizeof(ShmemIndexEnt);
297 
298  ShmemIndex = ShmemInitHash("ShmemIndex",
300  &info,
302 }
303 
304 /*
305  * ShmemInitHash -- Create and initialize, or attach to, a
306  * shared memory hash table.
307  *
308  * We assume caller is doing some kind of synchronization
309  * so that two processes don't try to create/initialize the same
310  * table at once. (In practice, all creations are done in the postmaster
311  * process; child processes should always be attaching to existing tables.)
312  *
313  * max_size is the estimated maximum number of hashtable entries. This is
314  * not a hard limit, but the access efficiency will degrade if it is
315  * exceeded substantially (since it's used to compute directory size and
316  * the hash table buckets will get overfull).
317  *
318  * init_size is the number of hashtable entries to preallocate. For a table
319  * whose maximum size is certain, this should be equal to max_size; that
320  * ensures that no run-time out-of-shared-memory failures can occur.
321  *
322  * *infoP and hash_flags must specify at least the entry sizes and key
323  * comparison semantics (see hash_create()). Flag bits and values specific
324  * to shared-memory hash tables are added here, except that callers may
325  * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
326  *
327  * Note: before Postgres 9.0, this function returned NULL for some failure
328  * cases. Now, it always throws error instead, so callers need not check
329  * for NULL.
330  */
331 HTAB *
332 ShmemInitHash(const char *name, /* table string name for shmem index */
333  long init_size, /* initial table size */
334  long max_size, /* max size of the table */
335  HASHCTL *infoP, /* info about key and bucket size */
336  int hash_flags) /* info about infoP */
337 {
338  bool found;
339  void *location;
340 
341  /*
342  * Hash tables allocated in shared memory have a fixed directory; it can't
343  * grow or other backends wouldn't be able to find it. So, make sure we
344  * make it big enough to start with.
345  *
346  * The shared memory allocator must be specified too.
347  */
348  infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
349  infoP->alloc = ShmemAllocNoError;
350  hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
351 
352  /* look it up in the shmem index */
353  location = ShmemInitStruct(name,
354  hash_get_shared_size(infoP, hash_flags),
355  &found);
356 
357  /*
358  * if it already exists, attach to it rather than allocate and initialize
359  * new space
360  */
361  if (found)
362  hash_flags |= HASH_ATTACH;
363 
364  /* Pass location of hashtable header to hash_create */
365  infoP->hctl = (HASHHDR *) location;
366 
367  return hash_create(name, init_size, infoP, hash_flags);
368 }
369 
370 /*
371  * ShmemInitStruct -- Create/attach to a structure in shared memory.
372  *
373  * This is called during initialization to find or allocate
374  * a data structure in shared memory. If no other process
375  * has created the structure, this routine allocates space
376  * for it. If it exists already, a pointer to the existing
377  * structure is returned.
378  *
379  * Returns: pointer to the object. *foundPtr is set true if the object was
380  * already in the shmem index (hence, already initialized).
381  *
382  * Note: before Postgres 9.0, this function returned NULL for some failure
383  * cases. Now, it always throws error instead, so callers need not check
384  * for NULL.
385  */
386 void *
387 ShmemInitStruct(const char *name, Size size, bool *foundPtr)
388 {
389  ShmemIndexEnt *result;
390  void *structPtr;
391 
392  LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
393 
394  if (!ShmemIndex)
395  {
396  PGShmemHeader *shmemseghdr = ShmemSegHdr;
397 
398  /* Must be trying to create/attach to ShmemIndex itself */
399  Assert(strcmp(name, "ShmemIndex") == 0);
400 
401  if (IsUnderPostmaster)
402  {
403  /* Must be initializing a (non-standalone) backend */
404  Assert(shmemseghdr->index != NULL);
405  structPtr = shmemseghdr->index;
406  *foundPtr = true;
407  }
408  else
409  {
410  /*
411  * If the shmem index doesn't exist, we are bootstrapping: we must
412  * be trying to init the shmem index itself.
413  *
414  * Notice that the ShmemIndexLock is released before the shmem
415  * index has been initialized. This should be OK because no other
416  * process can be accessing shared memory yet.
417  */
418  Assert(shmemseghdr->index == NULL);
419  structPtr = ShmemAlloc(size);
420  shmemseghdr->index = structPtr;
421  *foundPtr = false;
422  }
423  LWLockRelease(ShmemIndexLock);
424  return structPtr;
425  }
426 
427  /* look it up in the shmem index */
428  result = (ShmemIndexEnt *)
430 
431  if (!result)
432  {
433  LWLockRelease(ShmemIndexLock);
434  ereport(ERROR,
435  (errcode(ERRCODE_OUT_OF_MEMORY),
436  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
437  name)));
438  }
439 
440  if (*foundPtr)
441  {
442  /*
443  * Structure is in the shmem index so someone else has allocated it
444  * already. The size better be the same as the size we are trying to
445  * initialize to, or there is a name conflict (or worse).
446  */
447  if (result->size != size)
448  {
449  LWLockRelease(ShmemIndexLock);
450  ereport(ERROR,
451  (errmsg("ShmemIndex entry size is wrong for data structure"
452  " \"%s\": expected %zu, actual %zu",
453  name, size, result->size)));
454  }
455  structPtr = result->location;
456  }
457  else
458  {
459  Size allocated_size;
460 
461  /* It isn't in the table yet. allocate and initialize it */
462  structPtr = ShmemAllocRaw(size, &allocated_size);
463  if (structPtr == NULL)
464  {
465  /* out of memory; remove the failed ShmemIndex entry */
467  LWLockRelease(ShmemIndexLock);
468  ereport(ERROR,
469  (errcode(ERRCODE_OUT_OF_MEMORY),
470  errmsg("not enough shared memory for data structure"
471  " \"%s\" (%zu bytes requested)",
472  name, size)));
473  }
474  result->size = size;
475  result->allocated_size = allocated_size;
476  result->location = structPtr;
477  }
478 
479  LWLockRelease(ShmemIndexLock);
480 
481  Assert(ShmemAddrIsValid(structPtr));
482 
483  Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
484 
485  return structPtr;
486 }
487 
488 
489 /*
490  * Add two Size values, checking for overflow
491  */
492 Size
494 {
495  Size result;
496 
497  result = s1 + s2;
498  /* We are assuming Size is an unsigned type here... */
499  if (result < s1 || result < s2)
500  ereport(ERROR,
501  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
502  errmsg("requested shared memory size overflows size_t")));
503  return result;
504 }
505 
506 /*
507  * Multiply two Size values, checking for overflow
508  */
509 Size
511 {
512  Size result;
513 
514  if (s1 == 0 || s2 == 0)
515  return 0;
516  result = s1 * s2;
517  /* We are assuming Size is an unsigned type here... */
518  if (result / s2 != s1)
519  ereport(ERROR,
520  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
521  errmsg("requested shared memory size overflows size_t")));
522  return result;
523 }
524 
525 /* SQL SRF showing allocated shared memory */
526 Datum
528 {
529 #define PG_GET_SHMEM_SIZES_COLS 4
530  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
531  HASH_SEQ_STATUS hstat;
532  ShmemIndexEnt *ent;
533  Size named_allocated = 0;
535  bool nulls[PG_GET_SHMEM_SIZES_COLS];
536 
537  InitMaterializedSRF(fcinfo, 0);
538 
539  LWLockAcquire(ShmemIndexLock, LW_SHARED);
540 
541  hash_seq_init(&hstat, ShmemIndex);
542 
543  /* output all allocated entries */
544  memset(nulls, 0, sizeof(nulls));
545  while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
546  {
547  values[0] = CStringGetTextDatum(ent->key);
548  values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
549  values[2] = Int64GetDatum(ent->size);
550  values[3] = Int64GetDatum(ent->allocated_size);
551  named_allocated += ent->allocated_size;
552 
553  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
554  values, nulls);
555  }
556 
557  /* output shared memory allocated but not counted via the shmem index */
558  values[0] = CStringGetTextDatum("<anonymous>");
559  nulls[1] = true;
560  values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
561  values[3] = values[2];
562  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
563 
564  /* output as-of-yet unused shared memory */
565  nulls[0] = true;
567  nulls[1] = false;
569  values[3] = values[2];
570  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
571 
572  LWLockRelease(ShmemIndexLock);
573 
574  return (Datum) 0;
575 }
static Datum values[MAXATTR]
Definition: bootstrap.c:150
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define CACHELINEALIGN(LEN)
Definition: c.h:814
#define MAXALIGN(LEN)
Definition: c.h:811
#define Assert(condition)
Definition: c.h:858
size_t Size
Definition: c.h:605
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:854
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
long hash_select_dirsize(long num_entries)
Definition: dynahash.c:830
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
bool IsUnderPostmaster
Definition: globals.c:119
#define HASH_STRINGS
Definition: hsearch.h:96
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER_NULL
Definition: hsearch.h:116
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_ALLOC
Definition: hsearch.h:101
#define HASH_DIRSIZE
Definition: hsearch.h:94
#define HASH_ATTACH
Definition: hsearch.h:104
#define HASH_SHARED_MEM
Definition: hsearch.h:103
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
uintptr_t Datum
Definition: postgres.h:64
char * s1
char * s2
bool ShmemAddrIsValid(const void *addr)
Definition: shmem.c:274
void * ShmemAlloc(Size size)
Definition: shmem.c:152
void * ShmemAllocNoError(Size size)
Definition: shmem.c:172
static void * ShmemBase
Definition: shmem.c:83
Datum pg_get_shmem_allocations(PG_FUNCTION_ARGS)
Definition: shmem.c:527
void InitShmemIndex(void)
Definition: shmem.c:283
static void * ShmemEnd
Definition: shmem.c:85
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
slock_t * ShmemLock
Definition: shmem.c:87
#define PG_GET_SHMEM_SIZES_COLS
void InitShmemAllocation(void)
Definition: shmem.c:115
static PGShmemHeader * ShmemSegHdr
Definition: shmem.c:81
static void * ShmemAllocRaw(Size size, Size *allocated_size)
Definition: shmem.c:186
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:332
static HTAB * ShmemIndex
Definition: shmem.c:90
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:238
void InitShmemAccess(void *seghdr)
Definition: shmem.c:100
#define SHMEM_INDEX_SIZE
Definition: shmem.h:50
#define SHMEM_INDEX_KEYSIZE
Definition: shmem.h:48
static pg_noinline void Size size
Definition: slab.c:607
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
HashAllocFunc alloc
Definition: hsearch.h:84
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
long dsize
Definition: hsearch.h:72
HASHHDR * hctl
Definition: hsearch.h:88
long max_dsize
Definition: hsearch.h:73
Definition: dynahash.c:220
Size freeoffset
Definition: pg_shmem.h:35
void * index
Definition: pg_shmem.h:37
Size totalsize
Definition: pg_shmem.h:34
TupleDesc setDesc
Definition: execnodes.h:343
Tuplestorestate * setResult
Definition: execnodes.h:342
void * location
Definition: shmem.h:56
Size size
Definition: shmem.h:57
Size allocated_size
Definition: shmem.h:58
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
const char * name