PostgreSQL Source Code  git master
win32_shmem.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * win32_shmem.c
4  * Implement shared memory using win32 facilities
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/port/win32_shmem.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19 
20 /*
21  * Early in a process's life, Windows asynchronously creates threads for the
22  * process's "default thread pool"
23  * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24  * Occasionally, thread creation allocates a stack after
25  * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26  * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27  * if the allocator preferred the just-released region for allocating the new
28  * thread stack. We observed such failures in some Windows Server 2016
29  * configurations. To give the system another region to prefer, reserve and
30  * release an additional, protective region immediately before reserving or
31  * releasing shared memory. The idea is that, if the allocator handed out
32  * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33  * both regions are free. Windows Server 2016 exhibits that behavior, and a
34  * system behaving differently would have less need to protect
35  * UsedShmemSegAddr. The protective region must be at least large enough for
36  * one thread stack. However, ten times as much is less than 2% of the 32-bit
37  * address space and is negligible relative to the 64-bit address space.
38  */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void *ShmemProtectiveRegion = NULL;
41 
42 HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void *UsedShmemSegAddr = NULL;
45 
46 static bool EnableLockPagesPrivilege(int elevel);
47 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
48 
49 /*
50  * Generate shared memory segment name. Expand the data directory, to generate
51  * an identifier unique for this data directory. Then replace all backslashes
52  * with forward slashes, since backslashes aren't permitted in global object names.
53  *
54  * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
55  * 2000, but that's all we support for other reasons as well), to make sure you can't
56  * open two postmasters in different sessions against the same data directory.
57  *
58  * XXX: What happens with junctions? It's only someone breaking things on purpose,
59  * and this is still better than before, but we might want to do something about
60  * that sometime in the future.
61  */
62 static char *
64 {
65  char *retptr;
66  DWORD bufsize;
67  DWORD r;
68  char *cp;
69 
70  bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
71  if (bufsize == 0)
72  elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
73  DataDir, GetLastError());
74 
75  retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
76  if (retptr == NULL)
77  elog(FATAL, "could not allocate memory for shared memory name");
78 
79  strcpy(retptr, "Global\\PostgreSQL:");
80  r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
81  if (r == 0 || r > bufsize)
82  elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
83  DataDir, GetLastError());
84 
85  /*
86  * XXX: Intentionally overwriting the Global\ part here. This was not the
87  * original approach, but putting it in the actual Global\ namespace
88  * causes permission errors in a lot of cases, so we leave it in the
89  * default namespace for now.
90  */
91  for (cp = retptr; *cp; cp++)
92  if (*cp == '\\')
93  *cp = '/';
94 
95  return retptr;
96 }
97 
98 
99 /*
100  * PGSharedMemoryIsInUse
101  *
102  * Is a previously-existing shmem segment still existing and in use?
103  *
104  * The point of this exercise is to detect the case where a prior postmaster
105  * crashed, but it left child backends that are still running. Therefore
106  * we only care about shmem segments that are associated with the intended
107  * DataDir. This is an important consideration since accidental matches of
108  * shmem segment IDs are reasonably common.
109  */
110 bool
111 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
112 {
113  char *szShareMem;
114  HANDLE hmap;
115 
116  szShareMem = GetSharedMemName();
117 
118  hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
119 
120  free(szShareMem);
121 
122  if (hmap == NULL)
123  return false;
124 
125  CloseHandle(hmap);
126  return true;
127 }
128 
129 /*
130  * EnableLockPagesPrivilege
131  *
132  * Try to acquire SeLockMemoryPrivilege so we can use large pages.
133  */
134 static bool
136 {
137  HANDLE hToken;
138  TOKEN_PRIVILEGES tp;
139  LUID luid;
140 
141  if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
142  {
143  ereport(elevel,
144  (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
145  errdetail("Failed system call was %s.", "OpenProcessToken")));
146  return FALSE;
147  }
148 
149  if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
150  {
151  ereport(elevel,
152  (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
153  errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
154  CloseHandle(hToken);
155  return FALSE;
156  }
157  tp.PrivilegeCount = 1;
158  tp.Privileges[0].Luid = luid;
159  tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
160 
161  if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
162  {
163  ereport(elevel,
164  (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
165  errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
166  CloseHandle(hToken);
167  return FALSE;
168  }
169 
170  if (GetLastError() != ERROR_SUCCESS)
171  {
172  if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
173  ereport(elevel,
174  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
175  errmsg("could not enable Lock Pages in Memory user right"),
176  errhint("Assign Lock Pages in Memory user right to the Windows user account which runs PostgreSQL.")));
177  else
178  ereport(elevel,
179  (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
180  errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
181  CloseHandle(hToken);
182  return FALSE;
183  }
184 
185  CloseHandle(hToken);
186 
187  return TRUE;
188 }
189 
190 /*
191  * PGSharedMemoryCreate
192  *
193  * Create a shared memory segment of the given size and initialize its
194  * standard header.
195  */
198  PGShmemHeader **shim)
199 {
200  void *memAddress;
201  PGShmemHeader *hdr;
202  HANDLE hmap,
203  hmap2;
204  char *szShareMem;
205  int i;
206  DWORD size_high;
207  DWORD size_low;
208  SIZE_T largePageSize = 0;
209  Size orig_size = size;
210  DWORD flProtect = PAGE_READWRITE;
211 
212  ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
213  MEM_RESERVE, PAGE_NOACCESS);
214  if (ShmemProtectiveRegion == NULL)
215  elog(FATAL, "could not reserve memory region: error code %lu",
216  GetLastError());
217 
218  /* Room for a header? */
219  Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
220 
221  szShareMem = GetSharedMemName();
222 
223  UsedShmemSegAddr = NULL;
224 
226  {
227  /* Does the processor support large pages? */
228  largePageSize = GetLargePageMinimum();
229  if (largePageSize == 0)
230  {
232  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
233  errmsg("the processor does not support large pages")));
234  ereport(DEBUG1,
235  (errmsg("disabling huge pages")));
236  }
238  {
239  ereport(DEBUG1,
240  (errmsg("disabling huge pages")));
241  }
242  else
243  {
244  /* Huge pages available and privilege enabled, so turn on */
245  flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
246 
247  /* Round size up as appropriate. */
248  if (size % largePageSize != 0)
249  size += largePageSize - (size % largePageSize);
250  }
251  }
252 
253 retry:
254 #ifdef _WIN64
255  size_high = size >> 32;
256 #else
257  size_high = 0;
258 #endif
259  size_low = (DWORD) size;
260 
261  /*
262  * When recycling a shared memory segment, it may take a short while
263  * before it gets dropped from the global namespace. So re-try after
264  * sleeping for a second, and continue retrying 10 times. (both the 1
265  * second time and the 10 retries are completely arbitrary)
266  */
267  for (i = 0; i < 10; i++)
268  {
269  /*
270  * In case CreateFileMapping() doesn't set the error code to 0 on
271  * success
272  */
273  SetLastError(0);
274 
275  hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
276  NULL, /* Default security attrs */
277  flProtect,
278  size_high, /* Size Upper 32 Bits */
279  size_low, /* Size Lower 32 bits */
280  szShareMem);
281 
282  if (!hmap)
283  {
284  if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
286  (flProtect & SEC_LARGE_PAGES) != 0)
287  {
288  elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
289  "huge pages disabled",
290  size);
291 
292  /*
293  * Use the original size, not the rounded-up value, when
294  * falling back to non-huge pages.
295  */
296  size = orig_size;
297  flProtect = PAGE_READWRITE;
298  goto retry;
299  }
300  else
301  ereport(FATAL,
302  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
303  errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
304  size, szShareMem)));
305  }
306 
307  /*
308  * If the segment already existed, CreateFileMapping() will return a
309  * handle to the existing one and set ERROR_ALREADY_EXISTS.
310  */
311  if (GetLastError() == ERROR_ALREADY_EXISTS)
312  {
313  CloseHandle(hmap); /* Close the handle, since we got a valid one
314  * to the previous segment. */
315  hmap = NULL;
316  Sleep(1000);
317  continue;
318  }
319  break;
320  }
321 
322  /*
323  * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
324  * shared memory segment exists and we assume it belongs to somebody else.
325  */
326  if (!hmap)
327  ereport(FATAL,
328  (errmsg("pre-existing shared memory block is still in use"),
329  errhint("Check if there are any old server processes still running, and terminate them.")));
330 
331  free(szShareMem);
332 
333  /*
334  * Make the handle inheritable
335  */
336  if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
337  ereport(FATAL,
338  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
339  errdetail("Failed system call was DuplicateHandle.")));
340 
341  /*
342  * Close the old, non-inheritable handle. If this fails we don't really
343  * care.
344  */
345  if (!CloseHandle(hmap))
346  elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
347 
348 
349  /*
350  * Get a pointer to the new shared memory segment. Map the whole segment
351  * at once, and let the system decide on the initial address.
352  */
353  memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
354  if (!memAddress)
355  ereport(FATAL,
356  (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
357  errdetail("Failed system call was MapViewOfFileEx.")));
358 
359 
360 
361  /*
362  * OK, we created a new segment. Mark it as created by this process. The
363  * order of assignments here is critical so that another Postgres process
364  * can't see the header as valid but belonging to an invalid PID!
365  */
366  hdr = (PGShmemHeader *) memAddress;
367  hdr->creatorPID = getpid();
368  hdr->magic = PGShmemMagic;
369 
370  /*
371  * Initialize space allocation status for segment.
372  */
373  hdr->totalsize = size;
374  hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
375  hdr->dsm_control = 0;
376 
377  /* Save info for possible future use */
378  UsedShmemSegAddr = memAddress;
379  UsedShmemSegSize = size;
380  UsedShmemSegID = hmap2;
381 
382  /* Register on-exit routine to delete the new segment */
384 
385  *shim = hdr;
386  return hdr;
387 }
388 
389 /*
390  * PGSharedMemoryReAttach
391  *
392  * This is called during startup of a postmaster child process to re-attach to
393  * an already existing shared memory segment, using the handle inherited from
394  * the postmaster.
395  *
396  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
397  * parameters to this routine. The caller must have already restored them to
398  * the postmaster's values.
399  */
400 void
402 {
403  PGShmemHeader *hdr;
404  void *origUsedShmemSegAddr = UsedShmemSegAddr;
405 
406  Assert(ShmemProtectiveRegion != NULL);
407  Assert(UsedShmemSegAddr != NULL);
409 
410  /*
411  * Release memory region reservations made by the postmaster
412  */
413  if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
414  elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
415  ShmemProtectiveRegion, GetLastError());
416  if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
417  elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
418  UsedShmemSegAddr, GetLastError());
419 
420  hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
421  if (!hdr)
422  elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
423  UsedShmemSegID, UsedShmemSegAddr, GetLastError());
424  if (hdr != origUsedShmemSegAddr)
425  elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
426  hdr, origUsedShmemSegAddr);
427  if (hdr->magic != PGShmemMagic)
428  elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
429  dsm_set_control_handle(hdr->dsm_control);
430 
431  UsedShmemSegAddr = hdr; /* probably redundant */
432 }
433 
434 /*
435  * PGSharedMemoryNoReAttach
436  *
437  * This is called during startup of a postmaster child process when we choose
438  * *not* to re-attach to the existing shared memory segment. We must clean up
439  * to leave things in the appropriate state.
440  *
441  * The child process startup logic might or might not call PGSharedMemoryDetach
442  * after this; make sure that it will be a no-op if called.
443  *
444  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
445  * parameters to this routine. The caller must have already restored them to
446  * the postmaster's values.
447  */
448 void
450 {
451  Assert(ShmemProtectiveRegion != NULL);
452  Assert(UsedShmemSegAddr != NULL);
454 
455  /*
456  * Under Windows we will not have mapped the segment, so we don't need to
457  * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
458  */
459  UsedShmemSegAddr = NULL;
460 
461  /*
462  * We *must* close the inherited shmem segment handle, else Windows will
463  * consider the existence of this process to mean it can't release the
464  * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
465  */
467 }
468 
469 /*
470  * PGSharedMemoryDetach
471  *
472  * Detach from the shared memory segment, if still attached. This is not
473  * intended to be called explicitly by the process that originally created the
474  * segment (it will have an on_shmem_exit callback registered to do that).
475  * Rather, this is for subprocesses that have inherited an attachment and want
476  * to get rid of it.
477  *
478  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
479  * parameters to this routine.
480  */
481 void
483 {
484  /*
485  * Releasing the protective region liberates an unimportant quantity of
486  * address space, but be tidy.
487  */
488  if (ShmemProtectiveRegion != NULL)
489  {
490  if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
491  elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
492  ShmemProtectiveRegion, GetLastError());
493 
494  ShmemProtectiveRegion = NULL;
495  }
496 
497  /* Unmap the view, if it's mapped */
498  if (UsedShmemSegAddr != NULL)
499  {
500  if (!UnmapViewOfFile(UsedShmemSegAddr))
501  elog(LOG, "could not unmap view of shared memory: error code %lu",
502  GetLastError());
503 
504  UsedShmemSegAddr = NULL;
505  }
506 
507  /* And close the shmem handle, if we have one */
508  if (UsedShmemSegID != INVALID_HANDLE_VALUE)
509  {
510  if (!CloseHandle(UsedShmemSegID))
511  elog(LOG, "could not close handle to shared memory: error code %lu",
512  GetLastError());
513 
514  UsedShmemSegID = INVALID_HANDLE_VALUE;
515  }
516 }
517 
518 
519 /*
520  * pgwin32_SharedMemoryDelete
521  *
522  * Detach from and delete the shared memory segment
523  * (called as an on_shmem_exit callback, hence funny argument list)
524  */
525 static void
527 {
530 }
531 
532 /*
533  * pgwin32_ReserveSharedMemoryRegion(hChild)
534  *
535  * Reserve the memory region that will be used for shared memory in a child
536  * process. It is called before the child process starts, to make sure the
537  * memory is available.
538  *
539  * Once the child starts, DLLs loading in different order or threads getting
540  * scheduled differently may allocate memory which can conflict with the
541  * address space we need for our shared memory. By reserving the shared
542  * memory region before the child starts, and freeing it only just before we
543  * attempt to get access to the shared memory forces these allocations to
544  * be given different address ranges that don't conflict.
545  *
546  * NOTE! This function executes in the postmaster, and should for this
547  * reason not use elog(FATAL) since that would take down the postmaster.
548  */
549 int
551 {
552  void *address;
553 
554  Assert(ShmemProtectiveRegion != NULL);
555  Assert(UsedShmemSegAddr != NULL);
556  Assert(UsedShmemSegSize != 0);
557 
558  /* ShmemProtectiveRegion */
559  address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
561  MEM_RESERVE, PAGE_NOACCESS);
562  if (address == NULL)
563  {
564  /* Don't use FATAL since we're running in the postmaster */
565  elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
566  ShmemProtectiveRegion, hChild, GetLastError());
567  return false;
568  }
569  if (address != ShmemProtectiveRegion)
570  {
571  /*
572  * Should never happen - in theory if allocation granularity causes
573  * strange effects it could, so check just in case.
574  *
575  * Don't use FATAL since we're running in the postmaster.
576  */
577  elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
578  address, ShmemProtectiveRegion);
579  return false;
580  }
581 
582  /* UsedShmemSegAddr */
583  address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
584  MEM_RESERVE, PAGE_READWRITE);
585  if (address == NULL)
586  {
587  elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
588  UsedShmemSegAddr, hChild, GetLastError());
589  return false;
590  }
591  if (address != UsedShmemSegAddr)
592  {
593  elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
594  address, UsedShmemSegAddr);
595  return false;
596  }
597 
598  return true;
599 }
#define TRUE
Definition: ecpglib.h:35
pid_t creatorPID
Definition: pg_shmem.h:33
void PGSharedMemoryDetach(void)
Definition: win32_shmem.c:482
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:974
PGShmemHeader * PGSharedMemoryCreate(Size size, PGShmemHeader **shim)
Definition: win32_shmem.c:197
int pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
Definition: win32_shmem.c:550
#define PROTECTIVE_REGION_SIZE
Definition: win32_shmem.c:39
dsm_handle dsm_control
Definition: pg_shmem.h:36
#define PointerGetDatum(X)
Definition: postgres.h:556
#define FALSE
Definition: ecpglib.h:39
static bool EnableLockPagesPrivilege(int elevel)
Definition: win32_shmem.c:135
void PGSharedMemoryNoReAttach(void)
Definition: win32_shmem.c:449
int errcode(int sqlerrcode)
Definition: elog.c:570
void * ShmemProtectiveRegion
Definition: win32_shmem.c:40
#define LOG
Definition: elog.h:26
#define malloc(a)
Definition: header.h:50
#define FATAL
Definition: elog.h:52
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
bool IsUnderPostmaster
Definition: globals.c:109
HANDLE UsedShmemSegID
Definition: win32_shmem.c:42
int errdetail(const char *fmt,...)
Definition: elog.c:860
void PGSharedMemoryReAttach(void)
Definition: win32_shmem.c:401
static void pgwin32_SharedMemoryDelete(int status, Datum shmId)
Definition: win32_shmem.c:526
#define ereport(elevel, rest)
Definition: elog.h:141
int32 magic
Definition: pg_shmem.h:31
static Size UsedShmemSegSize
Definition: win32_shmem.c:44
static int elevel
Definition: vacuumlazy.c:143
Size totalsize
Definition: pg_shmem.h:34
uintptr_t Datum
Definition: postgres.h:367
static char * GetSharedMemName(void)
Definition: win32_shmem.c:63
#define free(a)
Definition: header.h:65
#define Assert(condition)
Definition: c.h:732
#define PGShmemMagic
Definition: pg_shmem.h:32
Size freeoffset
Definition: pg_shmem.h:35
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:685
int huge_pages
Definition: guc.c:548
#define DatumGetPointer(X)
Definition: postgres.h:549
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
Definition: win32_shmem.c:111
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
int i
char * DataDir
Definition: globals.c:62
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
void * UsedShmemSegAddr
Definition: win32_shmem.c:43