PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "miscadmin.h"
#include "access/xlogutils.h"
#include "access/xlog.h"
#include "commands/tablespace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
#include "storage/bufmgr.h"
#include "storage/md.h"
#include "storage/relfilenode.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "pg_trace.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forkNum)
 
void mdcreate (SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 
void mdunlink (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileNode *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 108 of file md.c.

Referenced by _mdfd_getseg(), and mdextend().

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 110 of file md.c.

Referenced by _mdfd_getseg(), mdread(), and mdwrite().

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 118 of file md.c.

Referenced by _mdfd_getseg(), and mdsyncfiletag().

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 104 of file md.c.

Referenced by _mdfd_getseg(), mdnblocks(), mdprefetch(), mdread(), and mdwrite().

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 106 of file md.c.

Referenced by _mdfd_getseg(), mdexists(), mdopenfork(), mdsyncfiletag(), and mdwriteback().

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rnode,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rnode = (xx_rnode), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
Definition: sync.h:45

Definition at line 92 of file md.c.

Referenced by ForgetDatabaseSyncRequests(), register_dirty_segment(), register_forget_request(), and register_unlink_segment().

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1033 of file md.c.

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdopenfork(), and mdtruncate().

1036 {
1037  if (nseg == 0)
1038  {
1039  if (reln->md_num_open_segs[forknum] > 0)
1040  {
1041  pfree(reln->md_seg_fds[forknum]);
1042  reln->md_seg_fds[forknum] = NULL;
1043  }
1044  }
1045  else if (reln->md_num_open_segs[forknum] == 0)
1046  {
1047  reln->md_seg_fds[forknum] =
1048  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1049  }
1050  else
1051  {
1052  /*
1053  * It doesn't seem worthwhile complicating the code by having a more
1054  * aggressive growth strategy here; the number of segments doesn't
1055  * grow that fast, and the memory context internally will sometimes
1056  * avoid doing an actual reallocation.
1057  */
1058  reln->md_seg_fds[forknum] =
1059  repalloc(reln->md_seg_fds[forknum],
1060  sizeof(MdfdVec) * nseg);
1061  }
1062 
1063  reln->md_num_open_segs[forknum] = nseg;
1064 }
static MemoryContext MdCxt
Definition: md.c:88
void pfree(void *pointer)
Definition: mcxt.c:1031
Definition: md.c:82
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1134 of file md.c.

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc0(), and pfree().

Referenced by mdextend(), mdprefetch(), mdread(), mdsyncfiletag(), mdwrite(), and mdwriteback().

1136 {
1137  MdfdVec *v;
1138  BlockNumber targetseg;
1139  BlockNumber nextsegno;
1140 
1141  /* some way to handle non-existent segments needs to be specified */
1142  Assert(behavior &
1144 
1145  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1146 
1147  /* if an existing and opened segment, we're done */
1148  if (targetseg < reln->md_num_open_segs[forknum])
1149  {
1150  v = &reln->md_seg_fds[forknum][targetseg];
1151  return v;
1152  }
1153 
1154  /*
1155  * The target segment is not yet open. Iterate over all the segments
1156  * between the last opened and the target segment. This way missing
1157  * segments either raise an error, or get created (according to
1158  * 'behavior'). Start with either the last opened, or the first segment if
1159  * none was opened before.
1160  */
1161  if (reln->md_num_open_segs[forknum] > 0)
1162  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1163  else
1164  {
1165  v = mdopenfork(reln, forknum, behavior);
1166  if (!v)
1167  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1168  }
1169 
1170  for (nextsegno = reln->md_num_open_segs[forknum];
1171  nextsegno <= targetseg; nextsegno++)
1172  {
1173  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1174  int flags = 0;
1175 
1176  Assert(nextsegno == v->mdfd_segno + 1);
1177 
1178  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1179  elog(FATAL, "segment too big");
1180 
1181  if ((behavior & EXTENSION_CREATE) ||
1182  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1183  {
1184  /*
1185  * Normally we will create new segments only if authorized by the
1186  * caller (i.e., we are doing mdextend()). But when doing WAL
1187  * recovery, create segments anyway; this allows cases such as
1188  * replaying WAL data that has a write into a high-numbered
1189  * segment of a relation that was later deleted. We want to go
1190  * ahead and create the segments so we can finish out the replay.
1191  *
1192  * We have to maintain the invariant that segments before the last
1193  * active segment are of size RELSEG_SIZE; therefore, if
1194  * extending, pad them out with zeroes if needed. (This only
1195  * matters if in recovery, or if the caller is extending the
1196  * relation discontiguously, but that can happen in hash indexes.)
1197  */
1198  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1199  {
1200  char *zerobuf = palloc0(BLCKSZ);
1201 
1202  mdextend(reln, forknum,
1203  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1204  zerobuf, skipFsync);
1205  pfree(zerobuf);
1206  }
1207  flags = O_CREAT;
1208  }
1209  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1210  nblocks < ((BlockNumber) RELSEG_SIZE))
1211  {
1212  /*
1213  * When not extending (or explicitly including truncated
1214  * segments), only open the next segment if the current one is
1215  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1216  * or fail.
1217  */
1218  if (behavior & EXTENSION_RETURN_NULL)
1219  {
1220  /*
1221  * Some callers discern between reasons for _mdfd_getseg()
1222  * returning NULL based on errno. As there's no failing
1223  * syscall involved in this case, explicitly set errno to
1224  * ENOENT, as that seems the closest interpretation.
1225  */
1226  errno = ENOENT;
1227  return NULL;
1228  }
1229 
1230  ereport(ERROR,
1232  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1233  _mdfd_segpath(reln, forknum, nextsegno),
1234  blkno, nblocks)));
1235  }
1236 
1237  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1238 
1239  if (v == NULL)
1240  {
1241  if ((behavior & EXTENSION_RETURN_NULL) &&
1242  FILE_POSSIBLY_DELETED(errno))
1243  return NULL;
1244  ereport(ERROR,
1246  errmsg("could not open file \"%s\" (target block %u): %m",
1247  _mdfd_segpath(reln, forknum, nextsegno),
1248  blkno)));
1249  }
1250  }
1251 
1252  return v;
1253 }
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
BlockNumber mdfd_segno
Definition: md.c:85
bool InRecovery
Definition: xlog.c:200
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:104
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: md.c:388
void pfree(void *pointer)
Definition: mcxt.c:1031
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:452
#define ERROR
Definition: elog.h:43
#define EXTENSION_RETURN_NULL
Definition: md.c:106
#define FATAL
Definition: elog.h:52
int errcode_for_file_access(void)
Definition: elog.c:593
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
#define ereport(elevel, rest)
Definition: elog.h:141
void * palloc0(Size size)
Definition: mcxt.c:955
Definition: md.c:82
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
Definition: md.c:1094
#define EXTENSION_CREATE
Definition: md.c:108
#define Assert(condition)
Definition: c.h:732
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1259
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:65
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1071

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1094 of file md.c.

References _fdvec_resize(), _mdfd_segpath(), _mdnblocks(), Assert, fd(), SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), and PG_BINARY.

Referenced by _mdfd_getseg(), and mdnblocks().

1096 {
1097  MdfdVec *v;
1098  int fd;
1099  char *fullpath;
1100 
1101  fullpath = _mdfd_segpath(reln, forknum, segno);
1102 
1103  /* open the file */
1104  fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
1105 
1106  pfree(fullpath);
1107 
1108  if (fd < 0)
1109  return NULL;
1110 
1111  if (segno <= reln->md_num_open_segs[forknum])
1112  _fdvec_resize(reln, forknum, segno + 1);
1113 
1114  /* fill the entry */
1115  v = &reln->md_seg_fds[forknum][segno];
1116  v->mdfd_vfd = fd;
1117  v->mdfd_segno = segno;
1118 
1119  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1120 
1121  /* all done */
1122  return v;
1123 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1321
BlockNumber mdfd_segno
Definition: md.c:85
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1191
void pfree(void *pointer)
Definition: mcxt.c:1031
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1033
Definition: md.c:82
#define Assert(condition)
Definition: c.h:732
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1259
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
File mdfd_vfd
Definition: md.c:84
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1071

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1071 of file md.c.

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

1072 {
1073  char *path,
1074  *fullpath;
1075 
1076  path = relpath(reln->smgr_rnode, forknum);
1077 
1078  if (segno > 0)
1079  {
1080  fullpath = psprintf("%s.%u", path, segno);
1081  pfree(path);
1082  }
1083  else
1084  fullpath = path;
1085 
1086  return fullpath;
1087 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void pfree(void *pointer)
Definition: mcxt.c:1031
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define relpath(rnode, forknum)
Definition: relpath.h:87

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1259 of file md.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), and mdopenfork().

1260 {
1261  off_t len;
1262 
1263  len = FileSize(seg->mdfd_vfd);
1264  if (len < 0)
1265  ereport(ERROR,
1267  errmsg("could not seek to end of file \"%s\": %m",
1268  FilePathName(seg->mdfd_vfd))));
1269  /* note that this calculation will ignore any partial block at EOF */
1270  return (BlockNumber) (len / BLCKSZ);
1271 }
off_t FileSize(File file)
Definition: fd.c:2033
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:2085
#define ERROR
Definition: elog.h:43
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
int errmsg(const char *fmt,...)
Definition: elog.c:784
File mdfd_vfd
Definition: md.c:84

◆ DropRelationFiles()

void DropRelationFiles ( RelFileNode delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1001 of file md.c.

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

1002 {
1003  SMgrRelation *srels;
1004  int i;
1005 
1006  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1007  for (i = 0; i < ndelrels; i++)
1008  {
1009  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1010 
1011  if (isRedo)
1012  {
1013  ForkNumber fork;
1014 
1015  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1016  XLogDropRelation(delrels[i], fork);
1017  }
1018  srels[i] = srel;
1019  }
1020 
1021  smgrdounlinkall(srels, ndelrels, isRedo);
1022 
1023  for (i = 0; i < ndelrels; i++)
1024  smgrclose(srels[i]);
1025  pfree(srels);
1026 }
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:405
void pfree(void *pointer)
Definition: mcxt.c:1031
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:145
ForkNumber
Definition: relpath.h:40
#define InvalidBackendId
Definition: backendid.h:23
#define MAX_FORKNUM
Definition: relpath.h:55
void XLogDropRelation(RelFileNode rnode, ForkNumber forknum)
Definition: xlogutils.c:606
void * palloc(Size size)
Definition: mcxt.c:924
int i

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 983 of file md.c.

References RelFileNode::dbNode, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileNode::relNode, RelFileNode::spcNode, and SYNC_FILTER_REQUEST.

Referenced by dbase_redo(), and dropdb().

984 {
985  FileTag tag;
986  RelFileNode rnode;
987 
988  rnode.dbNode = dbid;
989  rnode.spcNode = 0;
990  rnode.relNode = 0;
991 
993 
994  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
995 }
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
Definition: md.c:92
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:530
#define InvalidBlockNumber
Definition: block.h:33
Definition: sync.h:45

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 506 of file md.c.

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

507 {
508  int nopensegs = reln->md_num_open_segs[forknum];
509 
510  /* No work if already closed */
511  if (nopensegs == 0)
512  return;
513 
514  /* close segments starting from the end */
515  while (nopensegs > 0)
516  {
517  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
518 
519  /* if not closed already */
520  if (v->mdfd_vfd >= 0)
521  {
522  FileClose(v->mdfd_vfd);
523  v->mdfd_vfd = -1;
524  }
525 
526  nopensegs--;
527  }
528 
529  /* resize just once, avoids pointless reallocations */
530  _fdvec_resize(reln, forknum, 0);
531 }
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1033
Definition: md.c:82
void FileClose(File file)
Definition: fd.c:1711
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
File mdfd_vfd
Definition: md.c:84

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 178 of file md.c.

References _fdvec_resize(), Assert, RelFileNode::dbNode, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RelFileNodeBackend::node, PathNameOpenFile(), pfree(), PG_BINARY, relpath, SMgrRelationData::smgr_rnode, RelFileNode::spcNode, and TablespaceCreateDbspace().

179 {
180  MdfdVec *mdfd;
181  char *path;
182  File fd;
183 
184  if (isRedo && reln->md_num_open_segs[forkNum] > 0)
185  return; /* created and opened already... */
186 
187  Assert(reln->md_num_open_segs[forkNum] == 0);
188 
189  /*
190  * We may be using the target table space for the first time in this
191  * database, so create a per-database subdirectory if needed.
192  *
193  * XXX this is a fairly ugly violation of module layering, but this seems
194  * to be the best place to put the check. Maybe TablespaceCreateDbspace
195  * should be here and not in commands/tablespace.c? But that would imply
196  * importing a lot of stuff that smgr.c oughtn't know, either.
197  */
199  reln->smgr_rnode.node.dbNode,
200  isRedo);
201 
202  path = relpath(reln->smgr_rnode, forkNum);
203 
204  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
205 
206  if (fd < 0)
207  {
208  int save_errno = errno;
209 
210  if (isRedo)
211  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
212  if (fd < 0)
213  {
214  /* be sure to report the error reported by create, not open */
215  errno = save_errno;
216  ereport(ERROR,
218  errmsg("could not create file \"%s\": %m", path)));
219  }
220  }
221 
222  pfree(path);
223 
224  _fdvec_resize(reln, forkNum, 1);
225  mdfd = &reln->md_seg_fds[forkNum][0];
226  mdfd->mdfd_vfd = fd;
227  mdfd->mdfd_segno = 0;
228 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1321
BlockNumber mdfd_segno
Definition: md.c:85
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1191
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1033
Definition: md.c:82
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:732
void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
Definition: tablespace.c:116
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define relpath(rnode, forknum)
Definition: relpath.h:87
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
File mdfd_vfd
Definition: md.c:84
int File
Definition: fd.h:45

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forkNum 
)

Definition at line 161 of file md.c.

References EXTENSION_RETURN_NULL, mdclose(), and mdopenfork().

162 {
163  /*
164  * Close it first, to ensure that we notice if the fork has been unlinked
165  * since we opened it.
166  */
167  mdclose(reln, forkNum);
168 
169  return (mdopenfork(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
170 }
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:452
#define EXTENSION_RETURN_NULL
Definition: md.c:106
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:506

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 388 of file md.c.

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

390 {
391  off_t seekpos;
392  int nbytes;
393  MdfdVec *v;
394 
395  /* This assert is too expensive to have on normally ... */
396 #ifdef CHECK_WRITE_VS_EXTEND
397  Assert(blocknum >= mdnblocks(reln, forknum));
398 #endif
399 
400  /*
401  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
402  * more --- we mustn't create a block whose number actually is
403  * InvalidBlockNumber.
404  */
405  if (blocknum == InvalidBlockNumber)
406  ereport(ERROR,
407  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
408  errmsg("cannot extend file \"%s\" beyond %u blocks",
409  relpath(reln->smgr_rnode, forknum),
411 
412  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
413 
414  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
415 
416  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
417 
418  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
419  {
420  if (nbytes < 0)
421  ereport(ERROR,
423  errmsg("could not extend file \"%s\": %m",
424  FilePathName(v->mdfd_vfd)),
425  errhint("Check free disk space.")));
426  /* short write: complain appropriately */
427  ereport(ERROR,
428  (errcode(ERRCODE_DISK_FULL),
429  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
431  nbytes, BLCKSZ, blocknum),
432  errhint("Check free disk space.")));
433  }
434 
435  if (!skipFsync && !SmgrIsTemp(reln))
436  register_dirty_segment(reln, forknum, v);
437 
438  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
439 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
int errhint(const char *fmt,...)
Definition: elog.c:974
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:739
int errcode(int sqlerrcode)
Definition: elog.c:570
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:2085
#define SmgrIsTemp(smgr)
Definition: smgr.h:79
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:1914
Definition: md.c:82
#define EXTENSION_CREATE
Definition: md.c:108
#define Assert(condition)
Definition: c.h:732
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1259
#define InvalidBlockNumber
Definition: block.h:33
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define relpath(rnode, forknum)
Definition: relpath.h:87
File mdfd_vfd
Definition: md.c:84
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:926

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1330 of file md.c.

References RelFileNode::dbNode, and FileTag::rnode.

1331 {
1332  /*
1333  * For now we only use filter requests as a way to drop all scheduled
1334  * callbacks relating to a given database, when dropping the database.
1335  * We'll return true for all candidates that have the same database OID as
1336  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1337  */
1338  return ftag->rnode.dbNode == candidate->rnode.dbNode;
1339 }
RelFileNode rnode
Definition: sync.h:49

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 891 of file md.c.

References data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

892 {
893  int segno;
894 
895  /*
896  * NOTE: mdnblocks makes sure we have opened all active segments, so that
897  * fsync loop will get them all!
898  */
899  mdnblocks(reln, forknum);
900 
901  segno = reln->md_num_open_segs[forknum];
902 
903  while (segno > 0)
904  {
905  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
906 
910  errmsg("could not fsync file \"%s\": %m",
911  FilePathName(v->mdfd_vfd))));
912  segno--;
913  }
914 }
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:739
char * FilePathName(File file)
Definition: fd.c:2085
#define ERROR
Definition: elog.h:43
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2012
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
int data_sync_elevel(int elevel)
Definition: fd.c:3482
Definition: md.c:82
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
int errmsg(const char *fmt,...)
Definition: elog.c:784
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
File mdfd_vfd
Definition: md.c:84

◆ mdinit()

void mdinit ( void  )

Definition at line 148 of file md.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, and TopMemoryContext.

149 {
151  "MdSmgr",
153 }
#define AllocSetContextCreate
Definition: memutils.h:169
static MemoryContext MdCxt
Definition: md.c:88
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:191
MemoryContext TopMemoryContext
Definition: mcxt.c:44

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 739 of file md.c.

References _mdfd_openseg(), _mdnblocks(), Assert, elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

740 {
741  MdfdVec *v = mdopenfork(reln, forknum, EXTENSION_FAIL);
742  BlockNumber nblocks;
743  BlockNumber segno = 0;
744 
745  /* mdopen has opened the first segment */
746  Assert(reln->md_num_open_segs[forknum] > 0);
747 
748  /*
749  * Start from the last open segments, to avoid redundant seeks. We have
750  * previously verified that these segments are exactly RELSEG_SIZE long,
751  * and it's useless to recheck that each time.
752  *
753  * NOTE: this assumption could only be wrong if another backend has
754  * truncated the relation. We rely on higher code levels to handle that
755  * scenario by closing and re-opening the md fd, which is handled via
756  * relcache flush. (Since the checkpointer doesn't participate in
757  * relcache flush, it could have segment entries for inactive segments;
758  * that's OK because the checkpointer never needs to compute relation
759  * size.)
760  */
761  segno = reln->md_num_open_segs[forknum] - 1;
762  v = &reln->md_seg_fds[forknum][segno];
763 
764  for (;;)
765  {
766  nblocks = _mdnblocks(reln, forknum, v);
767  if (nblocks > ((BlockNumber) RELSEG_SIZE))
768  elog(FATAL, "segment too big");
769  if (nblocks < ((BlockNumber) RELSEG_SIZE))
770  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
771 
772  /*
773  * If segment is exactly RELSEG_SIZE, advance to next one.
774  */
775  segno++;
776 
777  /*
778  * We used to pass O_CREAT here, but that has the disadvantage that it
779  * might create a segment which has vanished through some operating
780  * system misadventure. In such a case, creating the segment here
781  * undermines _mdfd_getseg's attempts to notice and report an error
782  * upon access to a missing segment.
783  */
784  v = _mdfd_openseg(reln, forknum, segno, 0);
785  if (v == NULL)
786  return segno * ((BlockNumber) RELSEG_SIZE);
787  }
788 }
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:104
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:452
#define FATAL
Definition: elog.h:52
Definition: md.c:82
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
Definition: md.c:1094
#define Assert(condition)
Definition: c.h:732
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1259
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
#define elog(elevel,...)
Definition: elog.h:226
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 495 of file md.c.

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

496 {
497  /* mark it not open */
498  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
499  reln->md_num_open_segs[forknum] = 0;
500 }
#define MAX_FORKNUM
Definition: relpath.h:55
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 452 of file md.c.

References _fdvec_resize(), _mdnblocks(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

453 {
454  MdfdVec *mdfd;
455  char *path;
456  File fd;
457 
458  /* No work if already open */
459  if (reln->md_num_open_segs[forknum] > 0)
460  return &reln->md_seg_fds[forknum][0];
461 
462  path = relpath(reln->smgr_rnode, forknum);
463 
464  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
465 
466  if (fd < 0)
467  {
468  if ((behavior & EXTENSION_RETURN_NULL) &&
469  FILE_POSSIBLY_DELETED(errno))
470  {
471  pfree(path);
472  return NULL;
473  }
474  ereport(ERROR,
476  errmsg("could not open file \"%s\": %m", path)));
477  }
478 
479  pfree(path);
480 
481  _fdvec_resize(reln, forknum, 1);
482  mdfd = &reln->md_seg_fds[forknum][0];
483  mdfd->mdfd_vfd = fd;
484  mdfd->mdfd_segno = 0;
485 
486  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
487 
488  return mdfd;
489 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1321
BlockNumber mdfd_segno
Definition: md.c:85
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1191
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ERROR
Definition: elog.h:43
#define EXTENSION_RETURN_NULL
Definition: md.c:106
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1033
Definition: md.c:82
#define Assert(condition)
Definition: c.h:732
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1259
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define relpath(rnode, forknum)
Definition: relpath.h:87
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
File mdfd_vfd
Definition: md.c:84
int File
Definition: fd.h:45
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:65

◆ mdprefetch()

void mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 537 of file md.c.

References _mdfd_getseg(), Assert, EXTENSION_FAIL, FilePrefetch(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

538 {
539 #ifdef USE_PREFETCH
540  off_t seekpos;
541  MdfdVec *v;
542 
543  v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
544 
545  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
546 
547  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
548 
549  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
550 #endif /* USE_PREFETCH */
551 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:104
Definition: md.c:82
int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
Definition: fd.c:1807
#define Assert(condition)
Definition: c.h:732
File mdfd_vfd
Definition: md.c:84

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer 
)

Definition at line 609 of file md.c.

References _mdfd_getseg(), Assert, RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, _MdfdVec::mdfd_vfd, MemSet, RelFileNodeBackend::node, RelFileNode::relNode, SMgrRelationData::smgr_rnode, RelFileNode::spcNode, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

611 {
612  off_t seekpos;
613  int nbytes;
614  MdfdVec *v;
615 
616  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
617  reln->smgr_rnode.node.spcNode,
618  reln->smgr_rnode.node.dbNode,
619  reln->smgr_rnode.node.relNode,
620  reln->smgr_rnode.backend);
621 
622  v = _mdfd_getseg(reln, forknum, blocknum, false,
624 
625  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
626 
627  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
628 
629  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
630 
631  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
632  reln->smgr_rnode.node.spcNode,
633  reln->smgr_rnode.node.dbNode,
634  reln->smgr_rnode.node.relNode,
635  reln->smgr_rnode.backend,
636  nbytes,
637  BLCKSZ);
638 
639  if (nbytes != BLCKSZ)
640  {
641  if (nbytes < 0)
642  ereport(ERROR,
644  errmsg("could not read block %u in file \"%s\": %m",
645  blocknum, FilePathName(v->mdfd_vfd))));
646 
647  /*
648  * Short read: we are at or past EOF, or we read a partial block at
649  * EOF. Normally this is an error; upper levels should never try to
650  * read a nonexistent block. However, if zero_damaged_pages is ON or
651  * we are InRecovery, we should instead return zeroes without
652  * complaining. This allows, for example, the case of trying to
653  * update a block that was later truncated away.
654  */
656  MemSet(buffer, 0, BLCKSZ);
657  else
658  ereport(ERROR,
660  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
661  blocknum, FilePathName(v->mdfd_vfd),
662  nbytes, BLCKSZ)));
663  }
664 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
bool InRecovery
Definition: xlog.c:200
int errcode(int sqlerrcode)
Definition: elog.c:570
#define MemSet(start, val, len)
Definition: c.h:955
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:2085
#define EXTENSION_FAIL
Definition: md.c:104
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
#define ereport(elevel, rest)
Definition: elog.h:141
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:44
Definition: md.c:82
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:732
BackendId backend
Definition: relfilenode.h:75
int errmsg(const char *fmt,...)
Definition: elog.c:784
int FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:1858
File mdfd_vfd
Definition: md.c:84
bool zero_damaged_pages
Definition: bufmgr.c:109

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1280 of file md.c.

References _mdfd_getseg(), _mdfd_segpath(), EXTENSION_DONT_CHECK_SIZE, EXTENSION_RETURN_NULL, FileSync(), FileTag::forknum, InvalidBackendId, MAXPGPATH, _MdfdVec::mdfd_vfd, pfree(), FileTag::rnode, FileTag::segno, smgropen(), strlcpy(), and WAIT_EVENT_DATA_FILE_SYNC.

1281 {
1283  MdfdVec *v;
1284  char *p;
1285 
1286  /* Provide the path for informational messages. */
1287  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1288  strlcpy(path, p, MAXPGPATH);
1289  pfree(p);
1290 
1291  /* Try to open the requested segment. */
1292  v = _mdfd_getseg(reln,
1293  ftag->forknum,
1294  ftag->segno * (BlockNumber) RELSEG_SIZE,
1295  false,
1297  if (v == NULL)
1298  return -1;
1299 
1300  /* Try to fsync the file. */
1302 }
uint32 segno
Definition: sync.h:50
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
int16 forknum
Definition: sync.h:48
RelFileNode rnode
Definition: sync.h:49
uint32 BlockNumber
Definition: block.h:31
void pfree(void *pointer)
Definition: mcxt.c:1031
#define EXTENSION_RETURN_NULL
Definition: md.c:106
#define MAXPGPATH
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2012
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:145
#define InvalidBackendId
Definition: backendid.h:23
Definition: md.c:82
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
File mdfd_vfd
Definition: md.c:84
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1071

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 794 of file md.c.

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

795 {
796  BlockNumber curnblk;
797  BlockNumber priorblocks;
798  int curopensegs;
799 
800  /*
801  * NOTE: mdnblocks makes sure we have opened all active segments, so that
802  * truncation loop will get them all!
803  */
804  curnblk = mdnblocks(reln, forknum);
805  if (nblocks > curnblk)
806  {
807  /* Bogus request ... but no complaint if InRecovery */
808  if (InRecovery)
809  return;
810  ereport(ERROR,
811  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
812  relpath(reln->smgr_rnode, forknum),
813  nblocks, curnblk)));
814  }
815  if (nblocks == curnblk)
816  return; /* no work */
817 
818  /*
819  * Truncate segments, starting at the last one. Starting at the end makes
820  * managing the memory for the fd array easier, should there be errors.
821  */
822  curopensegs = reln->md_num_open_segs[forknum];
823  while (curopensegs > 0)
824  {
825  MdfdVec *v;
826 
827  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
828 
829  v = &reln->md_seg_fds[forknum][curopensegs - 1];
830 
831  if (priorblocks > nblocks)
832  {
833  /*
834  * This segment is no longer active. We truncate the file, but do
835  * not delete it, for reasons explained in the header comments.
836  */
838  ereport(ERROR,
840  errmsg("could not truncate file \"%s\": %m",
841  FilePathName(v->mdfd_vfd))));
842 
843  if (!SmgrIsTemp(reln))
844  register_dirty_segment(reln, forknum, v);
845 
846  /* we never drop the 1st segment */
847  Assert(v != &reln->md_seg_fds[forknum][0]);
848 
849  FileClose(v->mdfd_vfd);
850  _fdvec_resize(reln, forknum, curopensegs - 1);
851  }
852  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
853  {
854  /*
855  * This is the last segment we want to keep. Truncate the file to
856  * the right length. NOTE: if nblocks is exactly a multiple K of
857  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
858  * keep it. This adheres to the invariant given in the header
859  * comments.
860  */
861  BlockNumber lastsegblocks = nblocks - priorblocks;
862 
863  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
864  ereport(ERROR,
866  errmsg("could not truncate file \"%s\" to %u blocks: %m",
868  nblocks)));
869  if (!SmgrIsTemp(reln))
870  register_dirty_segment(reln, forknum, v);
871  }
872  else
873  {
874  /*
875  * We still need this segment, so nothing to do for this and any
876  * earlier segment.
877  */
878  break;
879  }
880  curopensegs--;
881  }
882 }
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:739
bool InRecovery
Definition: xlog.c:200
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:2085
#define SmgrIsTemp(smgr)
Definition: smgr.h:79
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1033
Definition: md.c:82
void FileClose(File file)
Definition: fd.c:1711
#define Assert(condition)
Definition: c.h:732
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:70
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define relpath(rnode, forknum)
Definition: relpath.h:87
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:71
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2050
File mdfd_vfd
Definition: md.c:84
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:926

◆ mdunlink()

void mdunlink ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 278 of file md.c.

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

279 {
280  /* Now do the per-fork work */
281  if (forkNum == InvalidForkNumber)
282  {
283  for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
284  mdunlinkfork(rnode, forkNum, isRedo);
285  }
286  else
287  mdunlinkfork(rnode, forkNum, isRedo);
288 }
static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
Definition: md.c:291
#define MAX_FORKNUM
Definition: relpath.h:55

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1311 of file md.c.

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rnode, and strlcpy().

1312 {
1313  char *p;
1314 
1315  /* Compute the path. */
1316  p = relpathperm(ftag->rnode, MAIN_FORKNUM);
1317  strlcpy(path, p, MAXPGPATH);
1318  pfree(p);
1319 
1320  /* Try to unlink the file. */
1321  return unlink(path);
1322 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
RelFileNode rnode
Definition: sync.h:49
void pfree(void *pointer)
Definition: mcxt.c:1031
#define MAXPGPATH
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)
static

Definition at line 291 of file md.c.

References CloseTransientFile(), ereport, errcode_for_file_access(), errmsg(), fd(), ftruncate, MAIN_FORKNUM, OpenTransientFile(), palloc(), pfree(), PG_BINARY, register_forget_request(), register_unlink_segment(), RelFileNodeBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

292 {
293  char *path;
294  int ret;
295 
296  path = relpath(rnode, forkNum);
297 
298  /*
299  * Delete or truncate the first segment.
300  */
301  if (isRedo || forkNum != MAIN_FORKNUM || RelFileNodeBackendIsTemp(rnode))
302  {
303  /* First, forget any pending sync requests for the first segment */
304  if (!RelFileNodeBackendIsTemp(rnode))
305  register_forget_request(rnode, forkNum, 0 /* first seg */ );
306 
307  /* Next unlink the file */
308  ret = unlink(path);
309  if (ret < 0 && errno != ENOENT)
312  errmsg("could not remove file \"%s\": %m", path)));
313  }
314  else
315  {
316  /* truncate(2) would be easier here, but Windows hasn't got it */
317  int fd;
318 
319  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
320  if (fd >= 0)
321  {
322  int save_errno;
323 
324  ret = ftruncate(fd, 0);
325  save_errno = errno;
326  CloseTransientFile(fd);
327  errno = save_errno;
328  }
329  else
330  ret = -1;
331  if (ret < 0 && errno != ENOENT)
334  errmsg("could not truncate file \"%s\": %m", path)));
335 
336  /* Register request to unlink first segment later */
337  register_unlink_segment(rnode, forkNum, 0 /* first seg */ );
338  }
339 
340  /*
341  * Delete any additional segments.
342  */
343  if (ret >= 0)
344  {
345  char *segpath = (char *) palloc(strlen(path) + 12);
346  BlockNumber segno;
347 
348  /*
349  * Note that because we loop until getting ENOENT, we will correctly
350  * remove all inactive segments as well as active ones.
351  */
352  for (segno = 1;; segno++)
353  {
354  /*
355  * Forget any pending sync requests for this segment before we try
356  * to unlink.
357  */
358  if (!RelFileNodeBackendIsTemp(rnode))
359  register_forget_request(rnode, forkNum, segno);
360 
361  sprintf(segpath, "%s.%u", path, segno);
362  if (unlink(segpath) < 0)
363  {
364  /* ENOENT is expected after the last segment... */
365  if (errno != ENOENT)
368  errmsg("could not remove file \"%s\": %m", segpath)));
369  break;
370  }
371  }
372  pfree(segpath);
373  }
374 
375  pfree(path);
376 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1191
static void register_forget_request(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:969
#define sprintf
Definition: port.h:194
void pfree(void *pointer)
Definition: mcxt.c:1031
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2255
static void register_unlink_segment(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:952
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
int CloseTransientFile(int fd)
Definition: fd.c:2432
#define WARNING
Definition: elog.h:40
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define relpath(rnode, forknum)
Definition: relpath.h:87
#define ftruncate(a, b)
Definition: win32_port.h:60

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 674 of file md.c.

References _mdfd_getseg(), Assert, RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), _MdfdVec::mdfd_vfd, mdnblocks(), RelFileNodeBackend::node, register_dirty_segment(), RelFileNode::relNode, SMgrRelationData::smgr_rnode, SmgrIsTemp, RelFileNode::spcNode, and WAIT_EVENT_DATA_FILE_WRITE.

676 {
677  off_t seekpos;
678  int nbytes;
679  MdfdVec *v;
680 
681  /* This assert is too expensive to have on normally ... */
682 #ifdef CHECK_WRITE_VS_EXTEND
683  Assert(blocknum < mdnblocks(reln, forknum));
684 #endif
685 
686  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
687  reln->smgr_rnode.node.spcNode,
688  reln->smgr_rnode.node.dbNode,
689  reln->smgr_rnode.node.relNode,
690  reln->smgr_rnode.backend);
691 
692  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
694 
695  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
696 
697  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
698 
699  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
700 
701  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
702  reln->smgr_rnode.node.spcNode,
703  reln->smgr_rnode.node.dbNode,
704  reln->smgr_rnode.node.relNode,
705  reln->smgr_rnode.backend,
706  nbytes,
707  BLCKSZ);
708 
709  if (nbytes != BLCKSZ)
710  {
711  if (nbytes < 0)
712  ereport(ERROR,
714  errmsg("could not write block %u in file \"%s\": %m",
715  blocknum, FilePathName(v->mdfd_vfd))));
716  /* short write: complain appropriately */
717  ereport(ERROR,
718  (errcode(ERRCODE_DISK_FULL),
719  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
720  blocknum,
722  nbytes, BLCKSZ),
723  errhint("Check free disk space.")));
724  }
725 
726  if (!skipFsync && !SmgrIsTemp(reln))
727  register_dirty_segment(reln, forknum, v);
728 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
int errhint(const char *fmt,...)
Definition: elog.c:974
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:739
int errcode(int sqlerrcode)
Definition: elog.c:570
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:2085
#define EXTENSION_FAIL
Definition: md.c:104
#define SmgrIsTemp(smgr)
Definition: smgr.h:79
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int errcode_for_file_access(void)
Definition: elog.c:593
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
#define ereport(elevel, rest)
Definition: elog.h:141
int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:1914
Definition: md.c:82
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:732
BackendId backend
Definition: relfilenode.h:75
int errmsg(const char *fmt,...)
Definition: elog.c:784
File mdfd_vfd
Definition: md.c:84
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:926

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 560 of file md.c.

References _mdfd_getseg(), Assert, EXTENSION_RETURN_NULL, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.

562 {
563  /*
564  * Issue flush requests in as few requests as possible; have to split at
565  * segment boundaries though, since those are actually separate files.
566  */
567  while (nblocks > 0)
568  {
569  BlockNumber nflush = nblocks;
570  off_t seekpos;
571  MdfdVec *v;
572  int segnum_start,
573  segnum_end;
574 
575  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
577 
578  /*
579  * We might be flushing buffers of already removed relations, that's
580  * ok, just ignore that case.
581  */
582  if (!v)
583  return;
584 
585  /* compute offset inside the current segment */
586  segnum_start = blocknum / RELSEG_SIZE;
587 
588  /* compute number of desired writes within the current segment */
589  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
590  if (segnum_start != segnum_end)
591  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
592 
593  Assert(nflush >= 1);
594  Assert(nflush <= nblocks);
595 
596  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
597 
598  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
599 
600  nblocks -= nflush;
601  blocknum += nflush;
602  }
603 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1134
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_RETURN_NULL
Definition: md.c:106
Definition: md.c:82
#define Assert(condition)
Definition: c.h:732
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:1835
File mdfd_vfd
Definition: md.c:84

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 926 of file md.c.

References Assert, data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RelFileNodeBackend::node, RegisterSyncRequest(), SMgrRelationData::smgr_rnode, SmgrIsTemp, SYNC_REQUEST, and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by mdextend(), mdtruncate(), and mdwrite().

927 {
928  FileTag tag;
929 
930  INIT_MD_FILETAG(tag, reln->smgr_rnode.node, forknum, seg->mdfd_segno);
931 
932  /* Temp relations should never be fsync'd */
933  Assert(!SmgrIsTemp(reln));
934 
935  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
936  {
937  ereport(DEBUG1,
938  (errmsg("could not forward fsync request because request queue is full")));
939 
943  errmsg("could not fsync file \"%s\": %m",
944  FilePathName(seg->mdfd_vfd))));
945  }
946 }
#define DEBUG1
Definition: elog.h:25
BlockNumber mdfd_segno
Definition: md.c:85
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
Definition: md.c:92
char * FilePathName(File file)
Definition: fd.c:2085
#define SmgrIsTemp(smgr)
Definition: smgr.h:79
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2012
int errcode_for_file_access(void)
Definition: elog.c:593
#define ereport(elevel, rest)
Definition: elog.h:141
int data_sync_elevel(int elevel)
Definition: fd.c:3482
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:530
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:732
int errmsg(const char *fmt,...)
Definition: elog.c:784
File mdfd_vfd
Definition: md.c:84
Definition: sync.h:45

◆ register_forget_request()

static void register_forget_request ( RelFileNodeBackend  rnode,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 969 of file md.c.

References INIT_MD_FILETAG, RelFileNodeBackend::node, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

971 {
972  FileTag tag;
973 
974  INIT_MD_FILETAG(tag, rnode.node, forknum, segno);
975 
976  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
977 }
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
Definition: md.c:92
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:530
RelFileNode node
Definition: relfilenode.h:74
Definition: sync.h:45

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileNodeBackend  rnode,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 952 of file md.c.

References Assert, INIT_MD_FILETAG, RelFileNodeBackend::node, RegisterSyncRequest(), RelFileNodeBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

954 {
955  FileTag tag;
956 
957  INIT_MD_FILETAG(tag, rnode.node, forknum, segno);
958 
959  /* Should never be used with temp relations */
961 
962  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
963 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
Definition: md.c:92
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:530
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:732
Definition: sync.h:45

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 88 of file md.c.