PostgreSQL Source Code  git master
dsm_impl.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include "common/file_perm.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "portability/mem.h"
#include "postmaster/postmaster.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "utils/guc.h"
#include "utils/memutils.h"
Include dependency graph for dsm_impl.c:

Go to the source code of this file.

Macros

#define ZBUFFER_SIZE   8192
 
#define SEGMENT_NAME_PREFIX   "Global/PostgreSQL"
 

Functions

static bool dsm_impl_sysv (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
static bool dsm_impl_mmap (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
static int errcode_for_dynamic_shared_memory (void)
 
bool dsm_impl_op (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
void dsm_impl_pin_segment (dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
 
void dsm_impl_unpin_segment (dsm_handle handle, void **impl_private)
 

Variables

const struct config_enum_entry dynamic_shared_memory_options []
 
int dynamic_shared_memory_type
 

Macro Definition Documentation

◆ SEGMENT_NAME_PREFIX

#define SEGMENT_NAME_PREFIX   "Global/PostgreSQL"

Definition at line 119 of file dsm_impl.c.

Referenced by dsm_impl_pin_segment(), dsm_impl_sysv(), and dsm_impl_unpin_segment().

◆ ZBUFFER_SIZE

#define ZBUFFER_SIZE   8192

Definition at line 117 of file dsm_impl.c.

Referenced by dsm_impl_mmap().

Function Documentation

◆ dsm_impl_mmap()

static bool dsm_impl_mmap ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)
static

Definition at line 781 of file dsm_impl.c.

References CloseTransientFile(), DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, ereport, errcode_for_dynamic_shared_memory(), errcode_for_file_access(), errmsg(), fd(), MAP_FAILED, MAP_HASSEMAPHORE, MAP_NOSYNC, name, OpenTransientFile(), palloc0(), PG_DYNSHMEM_DIR, PG_DYNSHMEM_MMAP_FILE_PREFIX, pgstat_report_wait_end(), pgstat_report_wait_start(), remaining, snprintf, stat, success, WAIT_EVENT_DSM_FILL_ZERO_WRITE, write, and ZBUFFER_SIZE.

Referenced by dsm_impl_op().

784 {
785  char name[64];
786  int flags;
787  int fd;
788  char *address;
789 
791  handle);
792 
793  /* Handle teardown cases. */
794  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
795  {
796  if (*mapped_address != NULL
797  && munmap(*mapped_address, *mapped_size) != 0)
798  {
799  ereport(elevel,
801  errmsg("could not unmap shared memory segment \"%s\": %m",
802  name)));
803  return false;
804  }
805  *mapped_address = NULL;
806  *mapped_size = 0;
807  if (op == DSM_OP_DESTROY && unlink(name) != 0)
808  {
809  ereport(elevel,
811  errmsg("could not remove shared memory segment \"%s\": %m",
812  name)));
813  return false;
814  }
815  return true;
816  }
817 
818  /* Create new segment or open an existing one for attach. */
819  flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
820  if ((fd = OpenTransientFile(name, flags)) == -1)
821  {
822  if (errno != EEXIST)
823  ereport(elevel,
825  errmsg("could not open shared memory segment \"%s\": %m",
826  name)));
827  return false;
828  }
829 
830  /*
831  * If we're attaching the segment, determine the current size; if we are
832  * creating the segment, set the size to the requested value.
833  */
834  if (op == DSM_OP_ATTACH)
835  {
836  struct stat st;
837 
838  if (fstat(fd, &st) != 0)
839  {
840  int save_errno;
841 
842  /* Back out what's already been done. */
843  save_errno = errno;
844  CloseTransientFile(fd);
845  errno = save_errno;
846 
847  ereport(elevel,
849  errmsg("could not stat shared memory segment \"%s\": %m",
850  name)));
851  return false;
852  }
853  request_size = st.st_size;
854  }
855  else
856  {
857  /*
858  * Allocate a buffer full of zeros.
859  *
860  * Note: palloc zbuffer, instead of just using a local char array, to
861  * ensure it is reasonably well-aligned; this may save a few cycles
862  * transferring data to the kernel.
863  */
864  char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
865  uint32 remaining = request_size;
866  bool success = true;
867 
868  /*
869  * Zero-fill the file. We have to do this the hard way to ensure that
870  * all the file space has really been allocated, so that we don't
871  * later seg fault when accessing the memory mapping. This is pretty
872  * pessimal.
873  */
874  while (success && remaining > 0)
875  {
876  Size goal = remaining;
877 
878  if (goal > ZBUFFER_SIZE)
879  goal = ZBUFFER_SIZE;
881  if (write(fd, zbuffer, goal) == goal)
882  remaining -= goal;
883  else
884  success = false;
886  }
887 
888  if (!success)
889  {
890  int save_errno;
891 
892  /* Back out what's already been done. */
893  save_errno = errno;
894  CloseTransientFile(fd);
895  unlink(name);
896  errno = save_errno ? save_errno : ENOSPC;
897 
898  ereport(elevel,
900  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
901  name, request_size)));
902  return false;
903  }
904  }
905 
906  /* Map it. */
907  address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
908  MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
909  if (address == MAP_FAILED)
910  {
911  int save_errno;
912 
913  /* Back out what's already been done. */
914  save_errno = errno;
915  CloseTransientFile(fd);
916  if (op == DSM_OP_CREATE)
917  unlink(name);
918  errno = save_errno;
919 
920  ereport(elevel,
922  errmsg("could not map shared memory segment \"%s\": %m",
923  name)));
924  return false;
925  }
926  *mapped_address = address;
927  *mapped_size = request_size;
928 
929  if (CloseTransientFile(fd) != 0)
930  {
931  ereport(elevel,
933  errmsg("could not close shared memory segment \"%s\": %m",
934  name)));
935  return false;
936  }
937 
938  return true;
939 }
int remaining
Definition: informix.c:667
#define MAP_HASSEMAPHORE
Definition: mem.h:30
#define MAP_FAILED
Definition: mem.h:45
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
#define write(a, b, c)
Definition: win32.h:14
#define MAP_NOSYNC
Definition: mem.h:38
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1034
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2370
int errcode_for_file_access(void)
Definition: elog.c:633
unsigned int uint32
Definition: c.h:367
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1380
int CloseTransientFile(int fd)
Definition: fd.c:2547
#define stat(a, b)
Definition: win32_port.h:255
static int elevel
Definition: vacuumlazy.c:323
void * palloc0(Size size)
Definition: mcxt.c:980
#define ereport(elevel,...)
Definition: elog.h:144
size_t Size
Definition: c.h:466
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1356
const char * name
Definition: encode.c:555
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:51
#define ZBUFFER_SIZE
Definition: dsm_impl.c:117
static bool success
Definition: initdb.c:161
#define snprintf
Definition: port.h:193

◆ dsm_impl_op()

bool dsm_impl_op ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)

Definition at line 158 of file dsm_impl.c.

References Assert, CHECK_FOR_INTERRUPTS, close, DSM_IMPL_MMAP, dsm_impl_mmap(), DSM_IMPL_POSIX, DSM_IMPL_SYSV, dsm_impl_sysv(), DSM_IMPL_WINDOWS, DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, dynamic_shared_memory_type, EINTR, elevel, elog, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, fd(), ftruncate, MAP_FAILED, MAP_HASSEMAPHORE, MAP_NOSYNC, name, PG_FILE_MODE_OWNER, pgstat_report_wait_end(), pgstat_report_wait_start(), ProcDiePending, QueryCancelPending, ReleaseExternalFD(), ReserveExternalFD(), snprintf, stat, and WAIT_EVENT_DSM_FILL_ZERO_WRITE.

Referenced by dsm_attach(), dsm_backend_startup(), dsm_cleanup_using_control_segment(), dsm_create(), dsm_detach(), dsm_detach_all(), dsm_postmaster_shutdown(), dsm_postmaster_startup(), and dsm_unpin_segment().

161 {
162  Assert(op == DSM_OP_CREATE || request_size == 0);
163  Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
164  (*mapped_address == NULL && *mapped_size == 0));
165 
167  {
168 #ifdef USE_DSM_POSIX
169  case DSM_IMPL_POSIX:
170  return dsm_impl_posix(op, handle, request_size, impl_private,
171  mapped_address, mapped_size, elevel);
172 #endif
173 #ifdef USE_DSM_SYSV
174  case DSM_IMPL_SYSV:
175  return dsm_impl_sysv(op, handle, request_size, impl_private,
176  mapped_address, mapped_size, elevel);
177 #endif
178 #ifdef USE_DSM_WINDOWS
179  case DSM_IMPL_WINDOWS:
180  return dsm_impl_windows(op, handle, request_size, impl_private,
181  mapped_address, mapped_size, elevel);
182 #endif
183 #ifdef USE_DSM_MMAP
184  case DSM_IMPL_MMAP:
185  return dsm_impl_mmap(op, handle, request_size, impl_private,
186  mapped_address, mapped_size, elevel);
187 #endif
188  default:
189  elog(ERROR, "unexpected dynamic shared memory type: %d",
191  return false;
192  }
193 }
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
#define DSM_IMPL_SYSV
Definition: dsm_impl.h:18
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:781
#define ERROR
Definition: elog.h:43
#define DSM_IMPL_POSIX
Definition: dsm_impl.h:17
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
static int elevel
Definition: vacuumlazy.c:323
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:412
#define Assert(condition)
Definition: c.h:738
#define elog(elevel,...)
Definition: elog.h:214

◆ dsm_impl_pin_segment()

void dsm_impl_pin_segment ( dsm_handle  handle,
void *  impl_private,
void **  impl_private_pm_handle 
)

Definition at line 952 of file dsm_impl.c.

References _dosmaperr(), DSM_IMPL_WINDOWS, dynamic_shared_memory_type, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, name, SEGMENT_NAME_PREFIX, and snprintf.

Referenced by dsm_pin_segment().

954 {
956  {
957 #ifdef USE_DSM_WINDOWS
958  case DSM_IMPL_WINDOWS:
959  {
960  HANDLE hmap;
961 
962  if (!DuplicateHandle(GetCurrentProcess(), impl_private,
963  PostmasterHandle, &hmap, 0, FALSE,
964  DUPLICATE_SAME_ACCESS))
965  {
966  char name[64];
967 
968  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
969  _dosmaperr(GetLastError());
970  ereport(ERROR,
972  errmsg("could not duplicate handle for \"%s\": %m",
973  name)));
974  }
975 
976  /*
977  * Here, we remember the handle that we created in the
978  * postmaster process. This handle isn't actually usable in
979  * any process other than the postmaster, but that doesn't
980  * matter. We're just holding onto it so that, if the segment
981  * is unpinned, dsm_impl_unpin_segment can close it.
982  */
983  *impl_private_pm_handle = hmap;
984  break;
985  }
986 #endif
987  default:
988  break;
989  }
990 }
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1034
#define ERROR
Definition: elog.h:43
#define SEGMENT_NAME_PREFIX
Definition: dsm_impl.c:119
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
#define ereport(elevel,...)
Definition: elog.h:144
const char * name
Definition: encode.c:555
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define snprintf
Definition: port.h:193

◆ dsm_impl_sysv()

static bool dsm_impl_sysv ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)
static

Definition at line 412 of file dsm_impl.c.

References _dosmaperr(), DEBUG4, DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, elevel, elog, ereport, errcode(), errcode_for_dynamic_shared_memory(), errmsg(), IPC_CREAT, IPC_EXCL, IPC_PRIVATE, IPC_RMID, IPC_STAT, IPCProtection, sort-test::key, MemoryContextAlloc(), name, pfree(), PG_SHMAT_FLAGS, SEGMENT_NAME_PREFIX, snprintf, and TopMemoryContext.

Referenced by dsm_impl_op().

415 {
416  key_t key;
417  int ident;
418  char *address;
419  char name[64];
420  int *ident_cache;
421 
422  /*
423  * POSIX shared memory and mmap-based shared memory identify segments with
424  * names. To avoid needless error message variation, we use the handle as
425  * the name.
426  */
427  snprintf(name, 64, "%u", handle);
428 
429  /*
430  * The System V shared memory namespace is very restricted; names are of
431  * type key_t, which is expected to be some sort of integer data type, but
432  * not necessarily the same one as dsm_handle. Since we use dsm_handle to
433  * identify shared memory segments across processes, this might seem like
434  * a problem, but it's really not. If dsm_handle is bigger than key_t,
435  * the cast below might truncate away some bits from the handle the
436  * user-provided, but it'll truncate exactly the same bits away in exactly
437  * the same fashion every time we use that handle, which is all that
438  * really matters. Conversely, if dsm_handle is smaller than key_t, we
439  * won't use the full range of available key space, but that's no big deal
440  * either.
441  *
442  * We do make sure that the key isn't negative, because that might not be
443  * portable.
444  */
445  key = (key_t) handle;
446  if (key < 1) /* avoid compiler warning if type is unsigned */
447  key = -key;
448 
449  /*
450  * There's one special key, IPC_PRIVATE, which can't be used. If we end
451  * up with that value by chance during a create operation, just pretend it
452  * already exists, so that caller will retry. If we run into it anywhere
453  * else, the caller has passed a handle that doesn't correspond to
454  * anything we ever created, which should not happen.
455  */
456  if (key == IPC_PRIVATE)
457  {
458  if (op != DSM_OP_CREATE)
459  elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
460  errno = EEXIST;
461  return false;
462  }
463 
464  /*
465  * Before we can do anything with a shared memory segment, we have to map
466  * the shared memory key to a shared memory identifier using shmget(). To
467  * avoid repeated lookups, we store the key using impl_private.
468  */
469  if (*impl_private != NULL)
470  {
471  ident_cache = *impl_private;
472  ident = *ident_cache;
473  }
474  else
475  {
476  int flags = IPCProtection;
477  size_t segsize;
478 
479  /*
480  * Allocate the memory BEFORE acquiring the resource, so that we don't
481  * leak the resource if memory allocation fails.
482  */
483  ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
484 
485  /*
486  * When using shmget to find an existing segment, we must pass the
487  * size as 0. Passing a non-zero size which is greater than the
488  * actual size will result in EINVAL.
489  */
490  segsize = 0;
491 
492  if (op == DSM_OP_CREATE)
493  {
494  flags |= IPC_CREAT | IPC_EXCL;
495  segsize = request_size;
496  }
497 
498  if ((ident = shmget(key, segsize, flags)) == -1)
499  {
500  if (errno != EEXIST)
501  {
502  int save_errno = errno;
503 
504  pfree(ident_cache);
505  errno = save_errno;
506  ereport(elevel,
508  errmsg("could not get shared memory segment: %m")));
509  }
510  return false;
511  }
512 
513  *ident_cache = ident;
514  *impl_private = ident_cache;
515  }
516 
517  /* Handle teardown cases. */
518  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
519  {
520  pfree(ident_cache);
521  *impl_private = NULL;
522  if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
523  {
524  ereport(elevel,
526  errmsg("could not unmap shared memory segment \"%s\": %m",
527  name)));
528  return false;
529  }
530  *mapped_address = NULL;
531  *mapped_size = 0;
532  if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
533  {
534  ereport(elevel,
536  errmsg("could not remove shared memory segment \"%s\": %m",
537  name)));
538  return false;
539  }
540  return true;
541  }
542 
543  /* If we're attaching it, we must use IPC_STAT to determine the size. */
544  if (op == DSM_OP_ATTACH)
545  {
546  struct shmid_ds shm;
547 
548  if (shmctl(ident, IPC_STAT, &shm) != 0)
549  {
550  ereport(elevel,
552  errmsg("could not stat shared memory segment \"%s\": %m",
553  name)));
554  return false;
555  }
556  request_size = shm.shm_segsz;
557  }
558 
559  /* Map it. */
560  address = shmat(ident, NULL, PG_SHMAT_FLAGS);
561  if (address == (void *) -1)
562  {
563  int save_errno;
564 
565  /* Back out what's already been done. */
566  save_errno = errno;
567  if (op == DSM_OP_CREATE)
568  shmctl(ident, IPC_RMID, NULL);
569  errno = save_errno;
570 
571  ereport(elevel,
573  errmsg("could not map shared memory segment \"%s\": %m",
574  name)));
575  return false;
576  }
577  *mapped_address = address;
578  *mapped_size = request_size;
579 
580  return true;
581 }
#define IPC_CREAT
Definition: win32_port.h:81
#define IPCProtection
Definition: posix_sema.c:59
#define PG_SHMAT_FLAGS
Definition: mem.h:20
#define DEBUG4
Definition: elog.h:22
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1034
void pfree(void *pointer)
Definition: mcxt.c:1056
#define IPC_PRIVATE
Definition: win32_port.h:83
MemoryContext TopMemoryContext
Definition: mcxt.c:44
static int elevel
Definition: vacuumlazy.c:323
#define IPC_RMID
Definition: win32_port.h:80
#define ereport(elevel,...)
Definition: elog.h:144
long key_t
Definition: win32_port.h:233
#define IPC_EXCL
Definition: win32_port.h:82
const char * name
Definition: encode.c:555
int errmsg(const char *fmt,...)
Definition: elog.c:824
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
#define elog(elevel,...)
Definition: elog.h:214
#define snprintf
Definition: port.h:193
#define IPC_STAT
Definition: win32_port.h:85

◆ dsm_impl_unpin_segment()

void dsm_impl_unpin_segment ( dsm_handle  handle,
void **  impl_private 
)

Definition at line 1002 of file dsm_impl.c.

References _dosmaperr(), DSM_IMPL_WINDOWS, dynamic_shared_memory_type, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, name, SEGMENT_NAME_PREFIX, and snprintf.

Referenced by dsm_unpin_segment().

1003 {
1005  {
1006 #ifdef USE_DSM_WINDOWS
1007  case DSM_IMPL_WINDOWS:
1008  {
1009  if (*impl_private &&
1010  !DuplicateHandle(PostmasterHandle, *impl_private,
1011  NULL, NULL, 0, FALSE,
1012  DUPLICATE_CLOSE_SOURCE))
1013  {
1014  char name[64];
1015 
1016  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1017  _dosmaperr(GetLastError());
1018  ereport(ERROR,
1020  errmsg("could not duplicate handle for \"%s\": %m",
1021  name)));
1022  }
1023 
1024  *impl_private = NULL;
1025  break;
1026  }
1027 #endif
1028  default:
1029  break;
1030  }
1031 }
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1034
#define ERROR
Definition: elog.h:43
#define SEGMENT_NAME_PREFIX
Definition: dsm_impl.c:119
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
#define ereport(elevel,...)
Definition: elog.h:144
const char * name
Definition: encode.c:555
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define snprintf
Definition: port.h:193

◆ errcode_for_dynamic_shared_memory()

static int errcode_for_dynamic_shared_memory ( void  )
static

Definition at line 1034 of file dsm_impl.c.

References errcode(), and errcode_for_file_access().

Referenced by dsm_impl_mmap(), dsm_impl_op(), dsm_impl_pin_segment(), dsm_impl_sysv(), and dsm_impl_unpin_segment().

1035 {
1036  if (errno == EFBIG || errno == ENOMEM)
1037  return errcode(ERRCODE_OUT_OF_MEMORY);
1038  else
1039  return errcode_for_file_access();
1040 }
int errcode(int sqlerrcode)
Definition: elog.c:610
int errcode_for_file_access(void)
Definition: elog.c:633

Variable Documentation

◆ dynamic_shared_memory_options

const struct config_enum_entry dynamic_shared_memory_options[]
Initial value:
= {
{"sysv", DSM_IMPL_SYSV, false},
{"mmap", DSM_IMPL_MMAP, false},
{NULL, 0, false}
}
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
#define DSM_IMPL_SYSV
Definition: dsm_impl.h:18

Definition at line 97 of file dsm_impl.c.

◆ dynamic_shared_memory_type

int dynamic_shared_memory_type