PostgreSQL Source Code  git master
dsm_impl.c File Reference
#include "postgres.h"
#include "miscadmin.h"
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include "common/file_perm.h"
#include "pgstat.h"
#include "portability/mem.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "postmaster/postmaster.h"
Include dependency graph for dsm_impl.c:

Go to the source code of this file.

Macros

#define ZBUFFER_SIZE   8192
 
#define SEGMENT_NAME_PREFIX   "Global/PostgreSQL"
 

Functions

static bool dsm_impl_sysv (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
static bool dsm_impl_mmap (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
static int errcode_for_dynamic_shared_memory (void)
 
bool dsm_impl_op (dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
 
void dsm_impl_pin_segment (dsm_handle handle, void *impl_private, void **impl_private_pm_handle)
 
void dsm_impl_unpin_segment (dsm_handle handle, void **impl_private)
 

Variables

const struct config_enum_entry dynamic_shared_memory_options []
 
int dynamic_shared_memory_type
 

Macro Definition Documentation

◆ SEGMENT_NAME_PREFIX

#define SEGMENT_NAME_PREFIX   "Global/PostgreSQL"

Definition at line 119 of file dsm_impl.c.

Referenced by dsm_impl_pin_segment(), dsm_impl_sysv(), and dsm_impl_unpin_segment().

◆ ZBUFFER_SIZE

#define ZBUFFER_SIZE   8192

Definition at line 117 of file dsm_impl.c.

Referenced by dsm_impl_mmap().

Function Documentation

◆ dsm_impl_mmap()

static bool dsm_impl_mmap ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)
static

Definition at line 772 of file dsm_impl.c.

References CloseTransientFile(), DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, ereport, errcode_for_dynamic_shared_memory(), errcode_for_file_access(), errmsg(), fd(), MAP_FAILED, MAP_HASSEMAPHORE, MAP_NOSYNC, name, OpenTransientFile(), palloc0(), PG_DYNSHMEM_DIR, PG_DYNSHMEM_MMAP_FILE_PREFIX, pgstat_report_wait_end(), pgstat_report_wait_start(), remaining, snprintf, stat, success, WAIT_EVENT_DSM_FILL_ZERO_WRITE, write, and ZBUFFER_SIZE.

Referenced by dsm_impl_op().

775 {
776  char name[64];
777  int flags;
778  int fd;
779  char *address;
780 
782  handle);
783 
784  /* Handle teardown cases. */
785  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
786  {
787  if (*mapped_address != NULL
788  && munmap(*mapped_address, *mapped_size) != 0)
789  {
790  ereport(elevel,
792  errmsg("could not unmap shared memory segment \"%s\": %m",
793  name)));
794  return false;
795  }
796  *mapped_address = NULL;
797  *mapped_size = 0;
798  if (op == DSM_OP_DESTROY && unlink(name) != 0)
799  {
800  ereport(elevel,
802  errmsg("could not remove shared memory segment \"%s\": %m",
803  name)));
804  return false;
805  }
806  return true;
807  }
808 
809  /* Create new segment or open an existing one for attach. */
810  flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
811  if ((fd = OpenTransientFile(name, flags)) == -1)
812  {
813  if (errno != EEXIST)
814  ereport(elevel,
816  errmsg("could not open shared memory segment \"%s\": %m",
817  name)));
818  return false;
819  }
820 
821  /*
822  * If we're attaching the segment, determine the current size; if we are
823  * creating the segment, set the size to the requested value.
824  */
825  if (op == DSM_OP_ATTACH)
826  {
827  struct stat st;
828 
829  if (fstat(fd, &st) != 0)
830  {
831  int save_errno;
832 
833  /* Back out what's already been done. */
834  save_errno = errno;
835  CloseTransientFile(fd);
836  errno = save_errno;
837 
838  ereport(elevel,
840  errmsg("could not stat shared memory segment \"%s\": %m",
841  name)));
842  return false;
843  }
844  request_size = st.st_size;
845  }
846  else
847  {
848  /*
849  * Allocate a buffer full of zeros.
850  *
851  * Note: palloc zbuffer, instead of just using a local char array, to
852  * ensure it is reasonably well-aligned; this may save a few cycles
853  * transferring data to the kernel.
854  */
855  char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
856  uint32 remaining = request_size;
857  bool success = true;
858 
859  /*
860  * Zero-fill the file. We have to do this the hard way to ensure that
861  * all the file space has really been allocated, so that we don't
862  * later seg fault when accessing the memory mapping. This is pretty
863  * pessimal.
864  */
865  while (success && remaining > 0)
866  {
867  Size goal = remaining;
868 
869  if (goal > ZBUFFER_SIZE)
870  goal = ZBUFFER_SIZE;
872  if (write(fd, zbuffer, goal) == goal)
873  remaining -= goal;
874  else
875  success = false;
877  }
878 
879  if (!success)
880  {
881  int save_errno;
882 
883  /* Back out what's already been done. */
884  save_errno = errno;
885  CloseTransientFile(fd);
886  unlink(name);
887  errno = save_errno ? save_errno : ENOSPC;
888 
889  ereport(elevel,
891  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
892  name, request_size)));
893  return false;
894  }
895  }
896 
897  /* Map it. */
898  address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
899  MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
900  if (address == MAP_FAILED)
901  {
902  int save_errno;
903 
904  /* Back out what's already been done. */
905  save_errno = errno;
906  CloseTransientFile(fd);
907  if (op == DSM_OP_CREATE)
908  unlink(name);
909  errno = save_errno;
910 
911  ereport(elevel,
913  errmsg("could not map shared memory segment \"%s\": %m",
914  name)));
915  return false;
916  }
917  *mapped_address = address;
918  *mapped_size = request_size;
919 
920  if (CloseTransientFile(fd) != 0)
921  {
922  ereport(elevel,
924  errmsg("could not close shared memory segment \"%s\": %m",
925  name)));
926  return false;
927  }
928 
929  return true;
930 }
int remaining
Definition: informix.c:687
#define MAP_HASSEMAPHORE
Definition: mem.h:30
#define MAP_FAILED
Definition: mem.h:45
#define PG_DYNSHMEM_DIR
Definition: dsm_impl.h:50
#define write(a, b, c)
Definition: win32.h:14
#define MAP_NOSYNC
Definition: mem.h:38
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1025
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2257
int errcode_for_file_access(void)
Definition: elog.c:593
unsigned int uint32
Definition: c.h:358
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1342
#define ereport(elevel, rest)
Definition: elog.h:141
int CloseTransientFile(int fd)
Definition: fd.c:2434
#define stat(a, b)
Definition: win32_port.h:264
static int elevel
Definition: vacuumlazy.c:143
void * palloc0(Size size)
Definition: mcxt.c:955
size_t Size
Definition: c.h:466
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1318
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define PG_DYNSHMEM_MMAP_FILE_PREFIX
Definition: dsm_impl.h:51
#define ZBUFFER_SIZE
Definition: dsm_impl.c:117
static bool success
Definition: initdb.c:163
#define snprintf
Definition: port.h:192

◆ dsm_impl_op()

bool dsm_impl_op ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)

Definition at line 158 of file dsm_impl.c.

References Assert, CHECK_FOR_INTERRUPTS, close, DSM_IMPL_MMAP, dsm_impl_mmap(), DSM_IMPL_POSIX, DSM_IMPL_SYSV, dsm_impl_sysv(), DSM_IMPL_WINDOWS, DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, dynamic_shared_memory_type, EINTR, elevel, elog, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, fd(), ftruncate, MAP_FAILED, MAP_HASSEMAPHORE, MAP_NOSYNC, name, PG_FILE_MODE_OWNER, ProcDiePending, QueryCancelPending, snprintf, and stat.

Referenced by dsm_attach(), dsm_backend_startup(), dsm_cleanup_using_control_segment(), dsm_create(), dsm_detach(), dsm_detach_all(), dsm_postmaster_shutdown(), dsm_postmaster_startup(), and dsm_unpin_segment().

161 {
162  Assert(op == DSM_OP_CREATE || request_size == 0);
163  Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
164  (*mapped_address == NULL && *mapped_size == 0));
165 
167  {
168 #ifdef USE_DSM_POSIX
169  case DSM_IMPL_POSIX:
170  return dsm_impl_posix(op, handle, request_size, impl_private,
171  mapped_address, mapped_size, elevel);
172 #endif
173 #ifdef USE_DSM_SYSV
174  case DSM_IMPL_SYSV:
175  return dsm_impl_sysv(op, handle, request_size, impl_private,
176  mapped_address, mapped_size, elevel);
177 #endif
178 #ifdef USE_DSM_WINDOWS
179  case DSM_IMPL_WINDOWS:
180  return dsm_impl_windows(op, handle, request_size, impl_private,
181  mapped_address, mapped_size, elevel);
182 #endif
183 #ifdef USE_DSM_MMAP
184  case DSM_IMPL_MMAP:
185  return dsm_impl_mmap(op, handle, request_size, impl_private,
186  mapped_address, mapped_size, elevel);
187 #endif
188  default:
189  elog(ERROR, "unexpected dynamic shared memory type: %d",
191  return false;
192  }
193 }
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
#define DSM_IMPL_SYSV
Definition: dsm_impl.h:18
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:772
#define ERROR
Definition: elog.h:43
#define DSM_IMPL_POSIX
Definition: dsm_impl.h:17
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
static int elevel
Definition: vacuumlazy.c:143
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel)
Definition: dsm_impl.c:403
#define Assert(condition)
Definition: c.h:732
#define elog(elevel,...)
Definition: elog.h:226

◆ dsm_impl_pin_segment()

void dsm_impl_pin_segment ( dsm_handle  handle,
void *  impl_private,
void **  impl_private_pm_handle 
)

Definition at line 943 of file dsm_impl.c.

References _dosmaperr(), DSM_IMPL_WINDOWS, dynamic_shared_memory_type, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, FALSE, name, SEGMENT_NAME_PREFIX, and snprintf.

Referenced by dsm_pin_segment().

945 {
947  {
948 #ifdef USE_DSM_WINDOWS
949  case DSM_IMPL_WINDOWS:
950  {
951  HANDLE hmap;
952 
953  if (!DuplicateHandle(GetCurrentProcess(), impl_private,
954  PostmasterHandle, &hmap, 0, FALSE,
955  DUPLICATE_SAME_ACCESS))
956  {
957  char name[64];
958 
959  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
960  _dosmaperr(GetLastError());
961  ereport(ERROR,
963  errmsg("could not duplicate handle for \"%s\": %m",
964  name)));
965  }
966 
967  /*
968  * Here, we remember the handle that we created in the
969  * postmaster process. This handle isn't actually usable in
970  * any process other than the postmaster, but that doesn't
971  * matter. We're just holding onto it so that, if the segment
972  * is unpinned, dsm_impl_unpin_segment can close it.
973  */
974  *impl_private_pm_handle = hmap;
975  break;
976  }
977 #endif
978  default:
979  break;
980  }
981 }
#define FALSE
Definition: ecpglib.h:39
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1025
#define ERROR
Definition: elog.h:43
#define SEGMENT_NAME_PREFIX
Definition: dsm_impl.c:119
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
#define ereport(elevel, rest)
Definition: elog.h:141
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define snprintf
Definition: port.h:192

◆ dsm_impl_sysv()

static bool dsm_impl_sysv ( dsm_op  op,
dsm_handle  handle,
Size  request_size,
void **  impl_private,
void **  mapped_address,
Size mapped_size,
int  elevel 
)
static

Definition at line 403 of file dsm_impl.c.

References _dosmaperr(), DEBUG4, DSM_OP_ATTACH, DSM_OP_CREATE, DSM_OP_DESTROY, DSM_OP_DETACH, elevel, elog, ereport, errcode(), errcode_for_dynamic_shared_memory(), errmsg(), FALSE, IPC_CREAT, IPC_EXCL, IPC_PRIVATE, IPC_RMID, IPC_STAT, IPCProtection, sort-test::key, MemoryContextAlloc(), name, pfree(), PG_SHMAT_FLAGS, SEGMENT_NAME_PREFIX, snprintf, and TopMemoryContext.

Referenced by dsm_impl_op().

406 {
407  key_t key;
408  int ident;
409  char *address;
410  char name[64];
411  int *ident_cache;
412 
413  /*
414  * POSIX shared memory and mmap-based shared memory identify segments with
415  * names. To avoid needless error message variation, we use the handle as
416  * the name.
417  */
418  snprintf(name, 64, "%u", handle);
419 
420  /*
421  * The System V shared memory namespace is very restricted; names are of
422  * type key_t, which is expected to be some sort of integer data type, but
423  * not necessarily the same one as dsm_handle. Since we use dsm_handle to
424  * identify shared memory segments across processes, this might seem like
425  * a problem, but it's really not. If dsm_handle is bigger than key_t,
426  * the cast below might truncate away some bits from the handle the
427  * user-provided, but it'll truncate exactly the same bits away in exactly
428  * the same fashion every time we use that handle, which is all that
429  * really matters. Conversely, if dsm_handle is smaller than key_t, we
430  * won't use the full range of available key space, but that's no big deal
431  * either.
432  *
433  * We do make sure that the key isn't negative, because that might not be
434  * portable.
435  */
436  key = (key_t) handle;
437  if (key < 1) /* avoid compiler warning if type is unsigned */
438  key = -key;
439 
440  /*
441  * There's one special key, IPC_PRIVATE, which can't be used. If we end
442  * up with that value by chance during a create operation, just pretend it
443  * already exists, so that caller will retry. If we run into it anywhere
444  * else, the caller has passed a handle that doesn't correspond to
445  * anything we ever created, which should not happen.
446  */
447  if (key == IPC_PRIVATE)
448  {
449  if (op != DSM_OP_CREATE)
450  elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
451  errno = EEXIST;
452  return false;
453  }
454 
455  /*
456  * Before we can do anything with a shared memory segment, we have to map
457  * the shared memory key to a shared memory identifier using shmget(). To
458  * avoid repeated lookups, we store the key using impl_private.
459  */
460  if (*impl_private != NULL)
461  {
462  ident_cache = *impl_private;
463  ident = *ident_cache;
464  }
465  else
466  {
467  int flags = IPCProtection;
468  size_t segsize;
469 
470  /*
471  * Allocate the memory BEFORE acquiring the resource, so that we don't
472  * leak the resource if memory allocation fails.
473  */
474  ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
475 
476  /*
477  * When using shmget to find an existing segment, we must pass the
478  * size as 0. Passing a non-zero size which is greater than the
479  * actual size will result in EINVAL.
480  */
481  segsize = 0;
482 
483  if (op == DSM_OP_CREATE)
484  {
485  flags |= IPC_CREAT | IPC_EXCL;
486  segsize = request_size;
487  }
488 
489  if ((ident = shmget(key, segsize, flags)) == -1)
490  {
491  if (errno != EEXIST)
492  {
493  int save_errno = errno;
494 
495  pfree(ident_cache);
496  errno = save_errno;
497  ereport(elevel,
499  errmsg("could not get shared memory segment: %m")));
500  }
501  return false;
502  }
503 
504  *ident_cache = ident;
505  *impl_private = ident_cache;
506  }
507 
508  /* Handle teardown cases. */
509  if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
510  {
511  pfree(ident_cache);
512  *impl_private = NULL;
513  if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
514  {
515  ereport(elevel,
517  errmsg("could not unmap shared memory segment \"%s\": %m",
518  name)));
519  return false;
520  }
521  *mapped_address = NULL;
522  *mapped_size = 0;
523  if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
524  {
525  ereport(elevel,
527  errmsg("could not remove shared memory segment \"%s\": %m",
528  name)));
529  return false;
530  }
531  return true;
532  }
533 
534  /* If we're attaching it, we must use IPC_STAT to determine the size. */
535  if (op == DSM_OP_ATTACH)
536  {
537  struct shmid_ds shm;
538 
539  if (shmctl(ident, IPC_STAT, &shm) != 0)
540  {
541  ereport(elevel,
543  errmsg("could not stat shared memory segment \"%s\": %m",
544  name)));
545  return false;
546  }
547  request_size = shm.shm_segsz;
548  }
549 
550  /* Map it. */
551  address = shmat(ident, NULL, PG_SHMAT_FLAGS);
552  if (address == (void *) -1)
553  {
554  int save_errno;
555 
556  /* Back out what's already been done. */
557  save_errno = errno;
558  if (op == DSM_OP_CREATE)
559  shmctl(ident, IPC_RMID, NULL);
560  errno = save_errno;
561 
562  ereport(elevel,
564  errmsg("could not map shared memory segment \"%s\": %m",
565  name)));
566  return false;
567  }
568  *mapped_address = address;
569  *mapped_size = request_size;
570 
571  return true;
572 }
#define IPC_CREAT
Definition: win32_port.h:82
#define IPCProtection
Definition: posix_sema.c:58
#define PG_SHMAT_FLAGS
Definition: mem.h:20
#define DEBUG4
Definition: elog.h:22
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1025
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ereport(elevel, rest)
Definition: elog.h:141
#define IPC_PRIVATE
Definition: win32_port.h:84
MemoryContext TopMemoryContext
Definition: mcxt.c:44
static int elevel
Definition: vacuumlazy.c:143
#define IPC_RMID
Definition: win32_port.h:81
long key_t
Definition: win32_port.h:242
#define IPC_EXCL
Definition: win32_port.h:83
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
#define elog(elevel,...)
Definition: elog.h:226
#define snprintf
Definition: port.h:192
#define IPC_STAT
Definition: win32_port.h:86

◆ dsm_impl_unpin_segment()

void dsm_impl_unpin_segment ( dsm_handle  handle,
void **  impl_private 
)

Definition at line 993 of file dsm_impl.c.

References _dosmaperr(), DSM_IMPL_WINDOWS, dynamic_shared_memory_type, ereport, errcode_for_dynamic_shared_memory(), errmsg(), ERROR, FALSE, name, SEGMENT_NAME_PREFIX, and snprintf.

Referenced by dsm_unpin_segment().

994 {
996  {
997 #ifdef USE_DSM_WINDOWS
998  case DSM_IMPL_WINDOWS:
999  {
1000  if (*impl_private &&
1001  !DuplicateHandle(PostmasterHandle, *impl_private,
1002  NULL, NULL, 0, FALSE,
1003  DUPLICATE_CLOSE_SOURCE))
1004  {
1005  char name[64];
1006 
1007  snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1008  _dosmaperr(GetLastError());
1009  ereport(ERROR,
1011  errmsg("could not duplicate handle for \"%s\": %m",
1012  name)));
1013  }
1014 
1015  *impl_private = NULL;
1016  break;
1017  }
1018 #endif
1019  default:
1020  break;
1021  }
1022 }
#define FALSE
Definition: ecpglib.h:39
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define DSM_IMPL_WINDOWS
Definition: dsm_impl.h:19
static int errcode_for_dynamic_shared_memory(void)
Definition: dsm_impl.c:1025
#define ERROR
Definition: elog.h:43
#define SEGMENT_NAME_PREFIX
Definition: dsm_impl.c:119
int dynamic_shared_memory_type
Definition: dsm_impl.c:114
#define ereport(elevel, rest)
Definition: elog.h:141
const char * name
Definition: encode.c:521
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define snprintf
Definition: port.h:192

◆ errcode_for_dynamic_shared_memory()

static int errcode_for_dynamic_shared_memory ( void  )
static

Definition at line 1025 of file dsm_impl.c.

References errcode(), and errcode_for_file_access().

Referenced by dsm_impl_mmap(), dsm_impl_op(), dsm_impl_pin_segment(), dsm_impl_sysv(), and dsm_impl_unpin_segment().

1026 {
1027  if (errno == EFBIG || errno == ENOMEM)
1028  return errcode(ERRCODE_OUT_OF_MEMORY);
1029  else
1030  return errcode_for_file_access();
1031 }
int errcode(int sqlerrcode)
Definition: elog.c:570
int errcode_for_file_access(void)
Definition: elog.c:593

Variable Documentation

◆ dynamic_shared_memory_options

const struct config_enum_entry dynamic_shared_memory_options[]
Initial value:
= {
{"sysv", DSM_IMPL_SYSV, false},
{"mmap", DSM_IMPL_MMAP, false},
{NULL, 0, false}
}
#define DSM_IMPL_MMAP
Definition: dsm_impl.h:20
#define DSM_IMPL_SYSV
Definition: dsm_impl.h:18

Definition at line 97 of file dsm_impl.c.

◆ dynamic_shared_memory_type

int dynamic_shared_memory_type