PostgreSQL Source Code  git master
win32_shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * win32_shmem.c
4  * Implement shared memory using win32 facilities
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/port/win32_shmem.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19 
20 /*
21  * Early in a process's life, Windows asynchronously creates threads for the
22  * process's "default thread pool"
23  * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24  * Occasionally, thread creation allocates a stack after
25  * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26  * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27  * if the allocator preferred the just-released region for allocating the new
28  * thread stack. We observed such failures in some Windows Server 2016
29  * configurations. To give the system another region to prefer, reserve and
30  * release an additional, protective region immediately before reserving or
31  * releasing shared memory. The idea is that, if the allocator handed out
32  * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33  * both regions are free. Windows Server 2016 exhibits that behavior, and a
34  * system behaving differently would have less need to protect
35  * UsedShmemSegAddr. The protective region must be at least large enough for
36  * one thread stack. However, ten times as much is less than 2% of the 32-bit
37  * address space and is negligible relative to the 64-bit address space.
38  */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void *ShmemProtectiveRegion = NULL;
41 
42 HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void *UsedShmemSegAddr = NULL;
45 
46 static bool EnableLockPagesPrivilege(int elevel);
47 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
48 
49 /*
50  * Generate shared memory segment name. Expand the data directory, to generate
51  * an identifier unique for this data directory. Then replace all backslashes
52  * with forward slashes, since backslashes aren't permitted in global object names.
53  *
54  * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
55  * 2000, but that's all we support for other reasons as well), to make sure you can't
56  * open two postmasters in different sessions against the same data directory.
57  *
58  * XXX: What happens with junctions? It's only someone breaking things on purpose,
59  * and this is still better than before, but we might want to do something about
60  * that sometime in the future.
61  */
62 static char *
64 {
65  char *retptr;
66  DWORD bufsize;
67  DWORD r;
68  char *cp;
69 
70  bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
71  if (bufsize == 0)
72  elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
73  DataDir, GetLastError());
74 
75  retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
76  if (retptr == NULL)
77  elog(FATAL, "could not allocate memory for shared memory name");
78 
79  strcpy(retptr, "Global\\PostgreSQL:");
80  r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
81  if (r == 0 || r > bufsize)
82  elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
83  DataDir, GetLastError());
84 
85  /*
86  * XXX: Intentionally overwriting the Global\ part here. This was not the
87  * original approach, but putting it in the actual Global\ namespace
88  * causes permission errors in a lot of cases, so we leave it in the
89  * default namespace for now.
90  */
91  for (cp = retptr; *cp; cp++)
92  if (*cp == '\\')
93  *cp = '/';
94 
95  return retptr;
96 }
97 
98 
99 /*
100  * PGSharedMemoryIsInUse
101  *
102  * Is a previously-existing shmem segment still existing and in use?
103  *
104  * The point of this exercise is to detect the case where a prior postmaster
105  * crashed, but it left child backends that are still running. Therefore
106  * we only care about shmem segments that are associated with the intended
107  * DataDir. This is an important consideration since accidental matches of
108  * shmem segment IDs are reasonably common.
109  */
110 bool
111 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
112 {
113  char *szShareMem;
114  HANDLE hmap;
115 
116  szShareMem = GetSharedMemName();
117 
118  hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
119 
120  free(szShareMem);
121 
122  if (hmap == NULL)
123  return false;
124 
125  CloseHandle(hmap);
126  return true;
127 }
128 
129 /*
130  * EnableLockPagesPrivilege
131  *
132  * Try to acquire SeLockMemoryPrivilege so we can use large pages.
133  */
134 static bool
136 {
137  HANDLE hToken;
138  TOKEN_PRIVILEGES tp;
139  LUID luid;
140 
141  if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
142  {
143  ereport(elevel,
144  (errmsg("could not enable user right \"%s\": error code %lu",
145 
146  /*
147  * translator: This is a term from Windows and should be translated to
148  * match the Windows localization.
149  */
150  _("Lock pages in memory"),
151  GetLastError()),
152  errdetail("Failed system call was %s.", "OpenProcessToken")));
153  return FALSE;
154  }
155 
156  if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
157  {
158  ereport(elevel,
159  (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
160  errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
161  CloseHandle(hToken);
162  return FALSE;
163  }
164  tp.PrivilegeCount = 1;
165  tp.Privileges[0].Luid = luid;
166  tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
167 
168  if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
169  {
170  ereport(elevel,
171  (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
172  errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
173  CloseHandle(hToken);
174  return FALSE;
175  }
176 
177  if (GetLastError() != ERROR_SUCCESS)
178  {
179  if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
180  ereport(elevel,
181  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
182  errmsg("could not enable user right \"%s\"", _("Lock pages in memory")),
183  errhint("Assign user right \"%s\" to the Windows user account which runs PostgreSQL.",
184  _("Lock pages in memory"))));
185  else
186  ereport(elevel,
187  (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
188  errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
189  CloseHandle(hToken);
190  return FALSE;
191  }
192 
193  CloseHandle(hToken);
194 
195  return TRUE;
196 }
197 
198 /*
199  * PGSharedMemoryCreate
200  *
201  * Create a shared memory segment of the given size and initialize its
202  * standard header.
203  */
206  PGShmemHeader **shim)
207 {
208  void *memAddress;
209  PGShmemHeader *hdr;
210  HANDLE hmap,
211  hmap2;
212  char *szShareMem;
213  int i;
214  DWORD size_high;
215  DWORD size_low;
216  SIZE_T largePageSize = 0;
217  Size orig_size = size;
218  DWORD flProtect = PAGE_READWRITE;
219 
220  ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
221  MEM_RESERVE, PAGE_NOACCESS);
222  if (ShmemProtectiveRegion == NULL)
223  elog(FATAL, "could not reserve memory region: error code %lu",
224  GetLastError());
225 
226  /* Room for a header? */
227  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
228 
229  szShareMem = GetSharedMemName();
230 
231  UsedShmemSegAddr = NULL;
232 
234  {
235  /* Does the processor support large pages? */
236  largePageSize = GetLargePageMinimum();
237  if (largePageSize == 0)
238  {
240  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241  errmsg("the processor does not support large pages")));
242  ereport(DEBUG1,
243  (errmsg_internal("disabling huge pages")));
244  }
246  {
247  ereport(DEBUG1,
248  (errmsg_internal("disabling huge pages")));
249  }
250  else
251  {
252  /* Huge pages available and privilege enabled, so turn on */
253  flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
254 
255  /* Round size up as appropriate. */
256  if (size % largePageSize != 0)
257  size += largePageSize - (size % largePageSize);
258  }
259  }
260 
261 retry:
262 #ifdef _WIN64
263  size_high = size >> 32;
264 #else
265  size_high = 0;
266 #endif
267  size_low = (DWORD) size;
268 
269  /*
270  * When recycling a shared memory segment, it may take a short while
271  * before it gets dropped from the global namespace. So re-try after
272  * sleeping for a second, and continue retrying 10 times. (both the 1
273  * second time and the 10 retries are completely arbitrary)
274  */
275  for (i = 0; i < 10; i++)
276  {
277  /*
278  * In case CreateFileMapping() doesn't set the error code to 0 on
279  * success
280  */
281  SetLastError(0);
282 
283  hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
284  NULL, /* Default security attrs */
285  flProtect,
286  size_high, /* Size Upper 32 Bits */
287  size_low, /* Size Lower 32 bits */
288  szShareMem);
289 
290  if (!hmap)
291  {
292  if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
294  (flProtect & SEC_LARGE_PAGES) != 0)
295  {
296  elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
297  "huge pages disabled",
298  size);
299 
300  /*
301  * Use the original size, not the rounded-up value, when
302  * falling back to non-huge pages.
303  */
304  size = orig_size;
305  flProtect = PAGE_READWRITE;
306  goto retry;
307  }
308  else
309  ereport(FATAL,
310  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
311  errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
312  size, szShareMem)));
313  }
314 
315  /*
316  * If the segment already existed, CreateFileMapping() will return a
317  * handle to the existing one and set ERROR_ALREADY_EXISTS.
318  */
319  if (GetLastError() == ERROR_ALREADY_EXISTS)
320  {
321  CloseHandle(hmap); /* Close the handle, since we got a valid one
322  * to the previous segment. */
323  hmap = NULL;
324  Sleep(1000);
325  continue;
326  }
327  break;
328  }
329 
330  /*
331  * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
332  * shared memory segment exists and we assume it belongs to somebody else.
333  */
334  if (!hmap)
335  ereport(FATAL,
336  (errmsg("pre-existing shared memory block is still in use"),
337  errhint("Check if there are any old server processes still running, and terminate them.")));
338 
339  free(szShareMem);
340 
341  /*
342  * Make the handle inheritable
343  */
344  if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
345  ereport(FATAL,
346  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
347  errdetail("Failed system call was DuplicateHandle.")));
348 
349  /*
350  * Close the old, non-inheritable handle. If this fails we don't really
351  * care.
352  */
353  if (!CloseHandle(hmap))
354  elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
355 
356 
357  /*
358  * Get a pointer to the new shared memory segment. Map the whole segment
359  * at once, and let the system decide on the initial address.
360  */
361  memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
362  if (!memAddress)
363  ereport(FATAL,
364  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
365  errdetail("Failed system call was MapViewOfFileEx.")));
366 
367 
368 
369  /*
370  * OK, we created a new segment. Mark it as created by this process. The
371  * order of assignments here is critical so that another Postgres process
372  * can't see the header as valid but belonging to an invalid PID!
373  */
374  hdr = (PGShmemHeader *) memAddress;
375  hdr->creatorPID = getpid();
376  hdr->magic = PGShmemMagic;
377 
378  /*
379  * Initialize space allocation status for segment.
380  */
381  hdr->totalsize = size;
382  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
383  hdr->dsm_control = 0;
384 
385  /* Save info for possible future use */
386  UsedShmemSegAddr = memAddress;
387  UsedShmemSegSize = size;
388  UsedShmemSegID = hmap2;
389 
390  /* Register on-exit routine to delete the new segment */
392 
393  *shim = hdr;
394  return hdr;
395 }
396 
397 /*
398  * PGSharedMemoryReAttach
399  *
400  * This is called during startup of a postmaster child process to re-attach to
401  * an already existing shared memory segment, using the handle inherited from
402  * the postmaster.
403  *
404  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
405  * parameters to this routine. The caller must have already restored them to
406  * the postmaster's values.
407  */
408 void
410 {
411  PGShmemHeader *hdr;
412  void *origUsedShmemSegAddr = UsedShmemSegAddr;
413 
414  Assert(ShmemProtectiveRegion != NULL);
415  Assert(UsedShmemSegAddr != NULL);
417 
418  /*
419  * Release memory region reservations made by the postmaster
420  */
421  if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
422  elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
423  ShmemProtectiveRegion, GetLastError());
424  if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
425  elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
426  UsedShmemSegAddr, GetLastError());
427 
428  hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
429  if (!hdr)
430  elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
431  UsedShmemSegID, UsedShmemSegAddr, GetLastError());
432  if (hdr != origUsedShmemSegAddr)
433  elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
434  hdr, origUsedShmemSegAddr);
435  if (hdr->magic != PGShmemMagic)
436  elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
437  dsm_set_control_handle(hdr->dsm_control);
438 
439  UsedShmemSegAddr = hdr; /* probably redundant */
440 }
441 
442 /*
443  * PGSharedMemoryNoReAttach
444  *
445  * This is called during startup of a postmaster child process when we choose
446  * *not* to re-attach to the existing shared memory segment. We must clean up
447  * to leave things in the appropriate state.
448  *
449  * The child process startup logic might or might not call PGSharedMemoryDetach
450  * after this; make sure that it will be a no-op if called.
451  *
452  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
453  * parameters to this routine. The caller must have already restored them to
454  * the postmaster's values.
455  */
456 void
458 {
459  Assert(ShmemProtectiveRegion != NULL);
460  Assert(UsedShmemSegAddr != NULL);
462 
463  /*
464  * Under Windows we will not have mapped the segment, so we don't need to
465  * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
466  */
467  UsedShmemSegAddr = NULL;
468 
469  /*
470  * We *must* close the inherited shmem segment handle, else Windows will
471  * consider the existence of this process to mean it can't release the
472  * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
473  */
475 }
476 
477 /*
478  * PGSharedMemoryDetach
479  *
480  * Detach from the shared memory segment, if still attached. This is not
481  * intended to be called explicitly by the process that originally created the
482  * segment (it will have an on_shmem_exit callback registered to do that).
483  * Rather, this is for subprocesses that have inherited an attachment and want
484  * to get rid of it.
485  *
486  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
487  * parameters to this routine.
488  */
489 void
491 {
492  /*
493  * Releasing the protective region liberates an unimportant quantity of
494  * address space, but be tidy.
495  */
496  if (ShmemProtectiveRegion != NULL)
497  {
498  if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
499  elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
500  ShmemProtectiveRegion, GetLastError());
501 
502  ShmemProtectiveRegion = NULL;
503  }
504 
505  /* Unmap the view, if it's mapped */
506  if (UsedShmemSegAddr != NULL)
507  {
508  if (!UnmapViewOfFile(UsedShmemSegAddr))
509  elog(LOG, "could not unmap view of shared memory: error code %lu",
510  GetLastError());
511 
512  UsedShmemSegAddr = NULL;
513  }
514 
515  /* And close the shmem handle, if we have one */
516  if (UsedShmemSegID != INVALID_HANDLE_VALUE)
517  {
518  if (!CloseHandle(UsedShmemSegID))
519  elog(LOG, "could not close handle to shared memory: error code %lu",
520  GetLastError());
521 
522  UsedShmemSegID = INVALID_HANDLE_VALUE;
523  }
524 }
525 
526 
527 /*
528  * pgwin32_SharedMemoryDelete
529  *
530  * Detach from and delete the shared memory segment
531  * (called as an on_shmem_exit callback, hence funny argument list)
532  */
533 static void
535 {
538 }
539 
540 /*
541  * pgwin32_ReserveSharedMemoryRegion(hChild)
542  *
543  * Reserve the memory region that will be used for shared memory in a child
544  * process. It is called before the child process starts, to make sure the
545  * memory is available.
546  *
547  * Once the child starts, DLLs loading in different order or threads getting
548  * scheduled differently may allocate memory which can conflict with the
549  * address space we need for our shared memory. By reserving the shared
550  * memory region before the child starts, and freeing it only just before we
551  * attempt to get access to the shared memory forces these allocations to
552  * be given different address ranges that don't conflict.
553  *
554  * NOTE! This function executes in the postmaster, and should for this
555  * reason not use elog(FATAL) since that would take down the postmaster.
556  */
557 int
559 {
560  void *address;
561 
562  Assert(ShmemProtectiveRegion != NULL);
563  Assert(UsedShmemSegAddr != NULL);
564  Assert(UsedShmemSegSize != 0);
565 
566  /* ShmemProtectiveRegion */
567  address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
569  MEM_RESERVE, PAGE_NOACCESS);
570  if (address == NULL)
571  {
572  /* Don't use FATAL since we're running in the postmaster */
573  elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
574  ShmemProtectiveRegion, hChild, GetLastError());
575  return false;
576  }
577  if (address != ShmemProtectiveRegion)
578  {
579  /*
580  * Should never happen - in theory if allocation granularity causes
581  * strange effects it could, so check just in case.
582  *
583  * Don't use FATAL since we're running in the postmaster.
584  */
585  elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
586  address, ShmemProtectiveRegion);
587  return false;
588  }
589 
590  /* UsedShmemSegAddr */
591  address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
592  MEM_RESERVE, PAGE_READWRITE);
593  if (address == NULL)
594  {
595  elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
596  UsedShmemSegAddr, hChild, GetLastError());
597  return false;
598  }
599  if (address != UsedShmemSegAddr)
600  {
601  elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
602  address, UsedShmemSegAddr);
603  return false;
604  }
605 
606  return true;
607 }
pid_t creatorPID
Definition: pg_shmem.h:33
void PGSharedMemoryDetach(void)
Definition: win32_shmem.c:490
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1156
PGShmemHeader * PGSharedMemoryCreate(Size size, PGShmemHeader **shim)
Definition: win32_shmem.c:205
int pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
Definition: win32_shmem.c:558
#define PROTECTIVE_REGION_SIZE
Definition: win32_shmem.c:39
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:600
static bool EnableLockPagesPrivilege(int elevel)
Definition: win32_shmem.c:135
void PGSharedMemoryNoReAttach(void)
Definition: win32_shmem.c:457
int errcode(int sqlerrcode)
Definition: elog.c:698
void * ShmemProtectiveRegion
Definition: win32_shmem.c:40
#define LOG
Definition: elog.h:26
#define malloc(a)
Definition: header.h:50
#define FATAL
Definition: elog.h:49
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
bool IsUnderPostmaster
Definition: globals.c:112
HANDLE UsedShmemSegID
Definition: win32_shmem.c:42
int errdetail(const char *fmt,...)
Definition: elog.c:1042
void PGSharedMemoryReAttach(void)
Definition: win32_shmem.c:409
static void pgwin32_SharedMemoryDelete(int status, Datum shmId)
Definition: win32_shmem.c:534
int32 magic
Definition: pg_shmem.h:31
static Size UsedShmemSegSize
Definition: win32_shmem.c:44
static int elevel
Definition: vacuumlazy.c:400
Size totalsize
Definition: pg_shmem.h:34
uintptr_t Datum
Definition: postgres.h:411
static char * GetSharedMemName(void)
Definition: win32_shmem.c:63
#define ereport(elevel,...)
Definition: elog.h:157
#define free(a)
Definition: header.h:65
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
#define Assert(condition)
Definition: c.h:804
#define PGShmemMagic
Definition: pg_shmem.h:32
Size freeoffset
Definition: pg_shmem.h:35
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
int huge_pages
Definition: guc.c:624
#define DatumGetPointer(X)
Definition: postgres.h:593
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
Definition: win32_shmem.c:111
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232
int i
char * DataDir
Definition: globals.c:65
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
#define _(x)
Definition: elog.c:89
void * UsedShmemSegAddr
Definition: win32_shmem.c:43