PostgreSQL Source Code  git master
shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * shmem.c
4  * create shared memory and initialize shared memory data structures.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/storage/ipc/shmem.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * POSTGRES processes share one or more regions of shared memory.
17  * The shared memory is created by a postmaster and is inherited
18  * by each backend via fork() (or, in some ports, via other OS-specific
19  * methods). The routines in this file are used for allocating and
20  * binding to shared memory data structures.
21  *
22  * NOTES:
23  * (a) There are three kinds of shared memory data structures
24  * available to POSTGRES: fixed-size structures, queues and hash
25  * tables. Fixed-size structures contain things like global variables
26  * for a module and should never be allocated after the shared memory
27  * initialization phase. Hash tables have a fixed maximum size, but
28  * their actual size can vary dynamically. When entries are added
29  * to the table, more space is allocated. Queues link data structures
30  * that have been allocated either within fixed-size structures or as hash
31  * buckets. Each shared data structure has a string name to identify
32  * it (assigned in the module that declares it).
33  *
34  * (b) During initialization, each module looks for its
35  * shared data structures in a hash table called the "Shmem Index".
36  * If the data structure is not present, the caller can allocate
37  * a new one and initialize it. If the data structure is present,
38  * the caller "attaches" to the structure by initializing a pointer
39  * in the local address space.
40  * The shmem index has two purposes: first, it gives us
41  * a simple model of how the world looks when a backend process
42  * initializes. If something is present in the shmem index,
43  * it is initialized. If it is not, it is uninitialized. Second,
44  * the shmem index allows us to allocate shared memory on demand
45  * instead of trying to preallocate structures and hard-wire the
46  * sizes and locations in header files. If you are using a lot
47  * of shared memory in a lot of different places (and changing
48  * things during development), this is important.
49  *
50  * (c) In standard Unix-ish environments, individual backends do not
51  * need to re-establish their local pointers into shared memory, because
52  * they inherit correct values of those variables via fork() from the
53  * postmaster. However, this does not work in the EXEC_BACKEND case.
54  * In ports using EXEC_BACKEND, new backends have to set up their local
55  * pointers using the method described in (b) above.
56  *
57  * (d) memory allocation model: shared memory can never be
58  * freed, once allocated. Each hash table has its own free list,
59  * so hash buckets can be reused when an item is deleted. However,
60  * if one hash table grows very large and then shrinks, its space
61  * cannot be redistributed to other tables. We could build a simple
62  * hash bucket garbage collector if need be. Right now, it seems
63  * unnecessary.
64  */
65 
66 #include "postgres.h"
67 
68 #include "access/transam.h"
69 #include "fmgr.h"
70 #include "funcapi.h"
71 #include "miscadmin.h"
72 #include "storage/lwlock.h"
73 #include "storage/pg_shmem.h"
74 #include "storage/shmem.h"
75 #include "storage/spin.h"
76 #include "utils/builtins.h"
77 
78 static void *ShmemAllocRaw(Size size, Size *allocated_size);
79 
80 /* shared memory global variables */
81 
82 static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
83 
84 static void *ShmemBase; /* start address of shared memory */
85 
86 static void *ShmemEnd; /* end+1 address of shared memory */
87 
88 slock_t *ShmemLock; /* spinlock for shared memory and LWLock
89  * allocation */
90 
91 static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
92 
93 
94 /*
95  * InitShmemAccess() --- set up basic pointers to shared memory.
96  *
97  * Note: the argument should be declared "PGShmemHeader *seghdr",
98  * but we use void to avoid having to include ipc.h in shmem.h.
99  */
100 void
101 InitShmemAccess(void *seghdr)
102 {
103  PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
104 
105  ShmemSegHdr = shmhdr;
106  ShmemBase = (void *) shmhdr;
107  ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
108 }
109 
110 /*
111  * InitShmemAllocation() --- set up shared-memory space allocation.
112  *
113  * This should be called only in the postmaster or a standalone backend.
114  */
115 void
117 {
118  PGShmemHeader *shmhdr = ShmemSegHdr;
119  char *aligned;
120 
121  Assert(shmhdr != NULL);
122 
123  /*
124  * Initialize the spinlock used by ShmemAlloc. We must use
125  * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
126  */
128 
130 
131  /*
132  * Allocations after this point should go through ShmemAlloc, which
133  * expects to allocate everything on cache line boundaries. Make sure the
134  * first allocation begins on a cache line boundary.
135  */
136  aligned = (char *)
137  (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
138  shmhdr->freeoffset = aligned - (char *) shmhdr;
139 
140  /* ShmemIndex can't be set up yet (need LWLocks first) */
141  shmhdr->index = NULL;
142  ShmemIndex = (HTAB *) NULL;
143 }
144 
145 /*
146  * ShmemAlloc -- allocate max-aligned chunk from shared memory
147  *
148  * Throws error if request cannot be satisfied.
149  *
150  * Assumes ShmemLock and ShmemSegHdr are initialized.
151  */
152 void *
154 {
155  void *newSpace;
156  Size allocated_size;
157 
158  newSpace = ShmemAllocRaw(size, &allocated_size);
159  if (!newSpace)
160  ereport(ERROR,
161  (errcode(ERRCODE_OUT_OF_MEMORY),
162  errmsg("out of shared memory (%zu bytes requested)",
163  size)));
164  return newSpace;
165 }
166 
167 /*
168  * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
169  *
170  * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
171  */
172 void *
174 {
175  Size allocated_size;
176 
177  return ShmemAllocRaw(size, &allocated_size);
178 }
179 
180 /*
181  * ShmemAllocRaw -- allocate align chunk and return allocated size
182  *
183  * Also sets *allocated_size to the number of bytes allocated, which will
184  * be equal to the number requested plus any padding we choose to add.
185  */
186 static void *
187 ShmemAllocRaw(Size size, Size *allocated_size)
188 {
189  Size newStart;
190  Size newFree;
191  void *newSpace;
192 
193  /*
194  * Ensure all space is adequately aligned. We used to only MAXALIGN this
195  * space but experience has proved that on modern systems that is not good
196  * enough. Many parts of the system are very sensitive to critical data
197  * structures getting split across cache line boundaries. To avoid that,
198  * attempt to align the beginning of the allocation to a cache line
199  * boundary. The calling code will still need to be careful about how it
200  * uses the allocated space - e.g. by padding each element in an array of
201  * structures out to a power-of-two size - but without this, even that
202  * won't be sufficient.
203  */
204  size = CACHELINEALIGN(size);
205  *allocated_size = size;
206 
207  Assert(ShmemSegHdr != NULL);
208 
210 
211  newStart = ShmemSegHdr->freeoffset;
212 
213  newFree = newStart + size;
214  if (newFree <= ShmemSegHdr->totalsize)
215  {
216  newSpace = (void *) ((char *) ShmemBase + newStart);
217  ShmemSegHdr->freeoffset = newFree;
218  }
219  else
220  newSpace = NULL;
221 
223 
224  /* note this assert is okay with newSpace == NULL */
225  Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
226 
227  return newSpace;
228 }
229 
230 /*
231  * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
232  *
233  * Allocate space without locking ShmemLock. This should be used for,
234  * and only for, allocations that must happen before ShmemLock is ready.
235  *
236  * We consider maxalign, rather than cachealign, sufficient here.
237  */
238 void *
240 {
241  Size newStart;
242  Size newFree;
243  void *newSpace;
244 
245  /*
246  * Ensure allocated space is adequately aligned.
247  */
248  size = MAXALIGN(size);
249 
250  Assert(ShmemSegHdr != NULL);
251 
252  newStart = ShmemSegHdr->freeoffset;
253 
254  newFree = newStart + size;
255  if (newFree > ShmemSegHdr->totalsize)
256  ereport(ERROR,
257  (errcode(ERRCODE_OUT_OF_MEMORY),
258  errmsg("out of shared memory (%zu bytes requested)",
259  size)));
260  ShmemSegHdr->freeoffset = newFree;
261 
262  newSpace = (void *) ((char *) ShmemBase + newStart);
263 
264  Assert(newSpace == (void *) MAXALIGN(newSpace));
265 
266  return newSpace;
267 }
268 
269 /*
270  * ShmemAddrIsValid -- test if an address refers to shared memory
271  *
272  * Returns true if the pointer points within the shared memory segment.
273  */
274 bool
275 ShmemAddrIsValid(const void *addr)
276 {
277  return (addr >= ShmemBase) && (addr < ShmemEnd);
278 }
279 
280 /*
281  * InitShmemIndex() --- set up or attach to shmem index table.
282  */
283 void
285 {
286  HASHCTL info;
287 
288  /*
289  * Create the shared memory shmem index.
290  *
291  * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
292  * hashtable to exist already, we have a bit of a circularity problem in
293  * initializing the ShmemIndex itself. The special "ShmemIndex" hash
294  * table name will tell ShmemInitStruct to fake it.
295  */
297  info.entrysize = sizeof(ShmemIndexEnt);
298 
299  ShmemIndex = ShmemInitHash("ShmemIndex",
301  &info,
303 }
304 
305 /*
306  * ShmemInitHash -- Create and initialize, or attach to, a
307  * shared memory hash table.
308  *
309  * We assume caller is doing some kind of synchronization
310  * so that two processes don't try to create/initialize the same
311  * table at once. (In practice, all creations are done in the postmaster
312  * process; child processes should always be attaching to existing tables.)
313  *
314  * max_size is the estimated maximum number of hashtable entries. This is
315  * not a hard limit, but the access efficiency will degrade if it is
316  * exceeded substantially (since it's used to compute directory size and
317  * the hash table buckets will get overfull).
318  *
319  * init_size is the number of hashtable entries to preallocate. For a table
320  * whose maximum size is certain, this should be equal to max_size; that
321  * ensures that no run-time out-of-shared-memory failures can occur.
322  *
323  * *infoP and hash_flags must specify at least the entry sizes and key
324  * comparison semantics (see hash_create()). Flag bits and values specific
325  * to shared-memory hash tables are added here, except that callers may
326  * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
327  *
328  * Note: before Postgres 9.0, this function returned NULL for some failure
329  * cases. Now, it always throws error instead, so callers need not check
330  * for NULL.
331  */
332 HTAB *
333 ShmemInitHash(const char *name, /* table string name for shmem index */
334  long init_size, /* initial table size */
335  long max_size, /* max size of the table */
336  HASHCTL *infoP, /* info about key and bucket size */
337  int hash_flags) /* info about infoP */
338 {
339  bool found;
340  void *location;
341 
342  /*
343  * Hash tables allocated in shared memory have a fixed directory; it can't
344  * grow or other backends wouldn't be able to find it. So, make sure we
345  * make it big enough to start with.
346  *
347  * The shared memory allocator must be specified too.
348  */
349  infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
350  infoP->alloc = ShmemAllocNoError;
351  hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
352 
353  /* look it up in the shmem index */
354  location = ShmemInitStruct(name,
355  hash_get_shared_size(infoP, hash_flags),
356  &found);
357 
358  /*
359  * if it already exists, attach to it rather than allocate and initialize
360  * new space
361  */
362  if (found)
363  hash_flags |= HASH_ATTACH;
364 
365  /* Pass location of hashtable header to hash_create */
366  infoP->hctl = (HASHHDR *) location;
367 
368  return hash_create(name, init_size, infoP, hash_flags);
369 }
370 
371 /*
372  * ShmemInitStruct -- Create/attach to a structure in shared memory.
373  *
374  * This is called during initialization to find or allocate
375  * a data structure in shared memory. If no other process
376  * has created the structure, this routine allocates space
377  * for it. If it exists already, a pointer to the existing
378  * structure is returned.
379  *
380  * Returns: pointer to the object. *foundPtr is set true if the object was
381  * already in the shmem index (hence, already initialized).
382  *
383  * Note: before Postgres 9.0, this function returned NULL for some failure
384  * cases. Now, it always throws error instead, so callers need not check
385  * for NULL.
386  */
387 void *
388 ShmemInitStruct(const char *name, Size size, bool *foundPtr)
389 {
390  ShmemIndexEnt *result;
391  void *structPtr;
392 
393  LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
394 
395  if (!ShmemIndex)
396  {
397  PGShmemHeader *shmemseghdr = ShmemSegHdr;
398 
399  /* Must be trying to create/attach to ShmemIndex itself */
400  Assert(strcmp(name, "ShmemIndex") == 0);
401 
402  if (IsUnderPostmaster)
403  {
404  /* Must be initializing a (non-standalone) backend */
405  Assert(shmemseghdr->index != NULL);
406  structPtr = shmemseghdr->index;
407  *foundPtr = true;
408  }
409  else
410  {
411  /*
412  * If the shmem index doesn't exist, we are bootstrapping: we must
413  * be trying to init the shmem index itself.
414  *
415  * Notice that the ShmemIndexLock is released before the shmem
416  * index has been initialized. This should be OK because no other
417  * process can be accessing shared memory yet.
418  */
419  Assert(shmemseghdr->index == NULL);
420  structPtr = ShmemAlloc(size);
421  shmemseghdr->index = structPtr;
422  *foundPtr = false;
423  }
424  LWLockRelease(ShmemIndexLock);
425  return structPtr;
426  }
427 
428  /* look it up in the shmem index */
429  result = (ShmemIndexEnt *)
431 
432  if (!result)
433  {
434  LWLockRelease(ShmemIndexLock);
435  ereport(ERROR,
436  (errcode(ERRCODE_OUT_OF_MEMORY),
437  errmsg("could not create ShmemIndex entry for data structure \"%s\"",
438  name)));
439  }
440 
441  if (*foundPtr)
442  {
443  /*
444  * Structure is in the shmem index so someone else has allocated it
445  * already. The size better be the same as the size we are trying to
446  * initialize to, or there is a name conflict (or worse).
447  */
448  if (result->size != size)
449  {
450  LWLockRelease(ShmemIndexLock);
451  ereport(ERROR,
452  (errmsg("ShmemIndex entry size is wrong for data structure"
453  " \"%s\": expected %zu, actual %zu",
454  name, size, result->size)));
455  }
456  structPtr = result->location;
457  }
458  else
459  {
460  Size allocated_size;
461 
462  /* It isn't in the table yet. allocate and initialize it */
463  structPtr = ShmemAllocRaw(size, &allocated_size);
464  if (structPtr == NULL)
465  {
466  /* out of memory; remove the failed ShmemIndex entry */
468  LWLockRelease(ShmemIndexLock);
469  ereport(ERROR,
470  (errcode(ERRCODE_OUT_OF_MEMORY),
471  errmsg("not enough shared memory for data structure"
472  " \"%s\" (%zu bytes requested)",
473  name, size)));
474  }
475  result->size = size;
476  result->allocated_size = allocated_size;
477  result->location = structPtr;
478  }
479 
480  LWLockRelease(ShmemIndexLock);
481 
482  Assert(ShmemAddrIsValid(structPtr));
483 
484  Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
485 
486  return structPtr;
487 }
488 
489 
490 /*
491  * Add two Size values, checking for overflow
492  */
493 Size
495 {
496  Size result;
497 
498  result = s1 + s2;
499  /* We are assuming Size is an unsigned type here... */
500  if (result < s1 || result < s2)
501  ereport(ERROR,
502  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
503  errmsg("requested shared memory size overflows size_t")));
504  return result;
505 }
506 
507 /*
508  * Multiply two Size values, checking for overflow
509  */
510 Size
512 {
513  Size result;
514 
515  if (s1 == 0 || s2 == 0)
516  return 0;
517  result = s1 * s2;
518  /* We are assuming Size is an unsigned type here... */
519  if (result / s2 != s1)
520  ereport(ERROR,
521  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
522  errmsg("requested shared memory size overflows size_t")));
523  return result;
524 }
525 
526 /* SQL SRF showing allocated shared memory */
527 Datum
529 {
530 #define PG_GET_SHMEM_SIZES_COLS 4
531  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
532  HASH_SEQ_STATUS hstat;
533  ShmemIndexEnt *ent;
534  Size named_allocated = 0;
536  bool nulls[PG_GET_SHMEM_SIZES_COLS];
537 
538  InitMaterializedSRF(fcinfo, 0);
539 
540  LWLockAcquire(ShmemIndexLock, LW_SHARED);
541 
542  hash_seq_init(&hstat, ShmemIndex);
543 
544  /* output all allocated entries */
545  memset(nulls, 0, sizeof(nulls));
546  while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
547  {
548  values[0] = CStringGetTextDatum(ent->key);
549  values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
550  values[2] = Int64GetDatum(ent->size);
551  values[3] = Int64GetDatum(ent->allocated_size);
552  named_allocated += ent->allocated_size;
553 
554  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
555  values, nulls);
556  }
557 
558  /* output shared memory allocated but not counted via the shmem index */
559  values[0] = CStringGetTextDatum("<anonymous>");
560  nulls[1] = true;
561  values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
562  values[3] = values[2];
563  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
564 
565  /* output as-of-yet unused shared memory */
566  nulls[0] = true;
568  nulls[1] = false;
570  values[3] = values[2];
571  tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
572 
573  LWLockRelease(ShmemIndexLock);
574 
575  return (Datum) 0;
576 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define CACHELINEALIGN(LEN)
Definition: c.h:803
#define MAXALIGN(LEN)
Definition: c.h:800
size_t Size
Definition: c.h:594
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:852
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:953
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:350
long hash_select_dirsize(long num_entries)
Definition: dynahash.c:828
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1431
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1421
int errcode(int sqlerrcode)
Definition: elog.c:860
int errmsg(const char *fmt,...)
Definition: elog.c:1075
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
bool IsUnderPostmaster
Definition: globals.c:116
#define HASH_STRINGS
Definition: hsearch.h:96
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER_NULL
Definition: hsearch.h:116
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_ALLOC
Definition: hsearch.h:101
#define HASH_DIRSIZE
Definition: hsearch.h:94
#define HASH_ATTACH
Definition: hsearch.h:104
#define HASH_SHARED_MEM
Definition: hsearch.h:103
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:117
@ LW_EXCLUSIVE
Definition: lwlock.h:116
uintptr_t Datum
Definition: postgres.h:64
char * s1
char * s2
int slock_t
Definition: s_lock.h:754
bool ShmemAddrIsValid(const void *addr)
Definition: shmem.c:275
void * ShmemAlloc(Size size)
Definition: shmem.c:153
void * ShmemAllocNoError(Size size)
Definition: shmem.c:173
static void * ShmemBase
Definition: shmem.c:84
Datum pg_get_shmem_allocations(PG_FUNCTION_ARGS)
Definition: shmem.c:528
void InitShmemIndex(void)
Definition: shmem.c:284
static void * ShmemEnd
Definition: shmem.c:86
Size add_size(Size s1, Size s2)
Definition: shmem.c:494
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:388
Size mul_size(Size s1, Size s2)
Definition: shmem.c:511
slock_t * ShmemLock
Definition: shmem.c:88
#define PG_GET_SHMEM_SIZES_COLS
void InitShmemAllocation(void)
Definition: shmem.c:116
static PGShmemHeader * ShmemSegHdr
Definition: shmem.c:82
static void * ShmemAllocRaw(Size size, Size *allocated_size)
Definition: shmem.c:187
HTAB * ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:333
static HTAB * ShmemIndex
Definition: shmem.c:91
void * ShmemAllocUnlocked(Size size)
Definition: shmem.c:239
void InitShmemAccess(void *seghdr)
Definition: shmem.c:101
#define SHMEM_INDEX_SIZE
Definition: shmem.h:48
#define SHMEM_INDEX_KEYSIZE
Definition: shmem.h:46
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
HashAllocFunc alloc
Definition: hsearch.h:84
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
long dsize
Definition: hsearch.h:72
HASHHDR * hctl
Definition: hsearch.h:88
long max_dsize
Definition: hsearch.h:73
Definition: dynahash.c:220
Size freeoffset
Definition: pg_shmem.h:35
void * index
Definition: pg_shmem.h:37
Size totalsize
Definition: pg_shmem.h:34
TupleDesc setDesc
Definition: execnodes.h:340
Tuplestorestate * setResult
Definition: execnodes.h:339
void * location
Definition: shmem.h:54
Size size
Definition: shmem.h:55
Size allocated_size
Definition: shmem.h:56
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:750
const char * name