PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilenode.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forkNum)
 
void mdcreate (SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 
void mdunlink (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileNode *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 108 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 110 of file md.c.

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 118 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 120 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 104 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 106 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rnode,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rnode = (xx_rnode), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 92 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1089 of file md.c.

1092 {
1093  if (nseg == 0)
1094  {
1095  if (reln->md_num_open_segs[forknum] > 0)
1096  {
1097  pfree(reln->md_seg_fds[forknum]);
1098  reln->md_seg_fds[forknum] = NULL;
1099  }
1100  }
1101  else if (reln->md_num_open_segs[forknum] == 0)
1102  {
1103  reln->md_seg_fds[forknum] =
1104  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1105  }
1106  else
1107  {
1108  /*
1109  * It doesn't seem worthwhile complicating the code to amortize
1110  * repalloc() calls. Those are far faster than PathNameOpenFile() or
1111  * FileClose(), and the memory context internally will sometimes avoid
1112  * doing an actual reallocation.
1113  */
1114  reln->md_seg_fds[forknum] =
1115  repalloc(reln->md_seg_fds[forknum],
1116  sizeof(MdfdVec) * nseg);
1117  }
1118 
1119  reln->md_num_open_segs[forknum] = nseg;
1120 }
void pfree(void *pointer)
Definition: mcxt.c:1175
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
static MemoryContext MdCxt
Definition: md.c:88
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1195 of file md.c.

1197 {
1198  MdfdVec *v;
1199  BlockNumber targetseg;
1200  BlockNumber nextsegno;
1201 
1202  /* some way to handle non-existent segments needs to be specified */
1203  Assert(behavior &
1206 
1207  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1208 
1209  /* if an existing and opened segment, we're done */
1210  if (targetseg < reln->md_num_open_segs[forknum])
1211  {
1212  v = &reln->md_seg_fds[forknum][targetseg];
1213  return v;
1214  }
1215 
1216  /* The caller only wants the segment if we already had it open. */
1217  if (behavior & EXTENSION_DONT_OPEN)
1218  return NULL;
1219 
1220  /*
1221  * The target segment is not yet open. Iterate over all the segments
1222  * between the last opened and the target segment. This way missing
1223  * segments either raise an error, or get created (according to
1224  * 'behavior'). Start with either the last opened, or the first segment if
1225  * none was opened before.
1226  */
1227  if (reln->md_num_open_segs[forknum] > 0)
1228  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1229  else
1230  {
1231  v = mdopenfork(reln, forknum, behavior);
1232  if (!v)
1233  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1234  }
1235 
1236  for (nextsegno = reln->md_num_open_segs[forknum];
1237  nextsegno <= targetseg; nextsegno++)
1238  {
1239  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1240  int flags = 0;
1241 
1242  Assert(nextsegno == v->mdfd_segno + 1);
1243 
1244  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1245  elog(FATAL, "segment too big");
1246 
1247  if ((behavior & EXTENSION_CREATE) ||
1248  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1249  {
1250  /*
1251  * Normally we will create new segments only if authorized by the
1252  * caller (i.e., we are doing mdextend()). But when doing WAL
1253  * recovery, create segments anyway; this allows cases such as
1254  * replaying WAL data that has a write into a high-numbered
1255  * segment of a relation that was later deleted. We want to go
1256  * ahead and create the segments so we can finish out the replay.
1257  *
1258  * We have to maintain the invariant that segments before the last
1259  * active segment are of size RELSEG_SIZE; therefore, if
1260  * extending, pad them out with zeroes if needed. (This only
1261  * matters if in recovery, or if the caller is extending the
1262  * relation discontiguously, but that can happen in hash indexes.)
1263  */
1264  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1265  {
1266  char *zerobuf = palloc0(BLCKSZ);
1267 
1268  mdextend(reln, forknum,
1269  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1270  zerobuf, skipFsync);
1271  pfree(zerobuf);
1272  }
1273  flags = O_CREAT;
1274  }
1275  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1276  nblocks < ((BlockNumber) RELSEG_SIZE))
1277  {
1278  /*
1279  * When not extending (or explicitly including truncated
1280  * segments), only open the next segment if the current one is
1281  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1282  * or fail.
1283  */
1284  if (behavior & EXTENSION_RETURN_NULL)
1285  {
1286  /*
1287  * Some callers discern between reasons for _mdfd_getseg()
1288  * returning NULL based on errno. As there's no failing
1289  * syscall involved in this case, explicitly set errno to
1290  * ENOENT, as that seems the closest interpretation.
1291  */
1292  errno = ENOENT;
1293  return NULL;
1294  }
1295 
1296  ereport(ERROR,
1298  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1299  _mdfd_segpath(reln, forknum, nextsegno),
1300  blkno, nblocks)));
1301  }
1302 
1303  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1304 
1305  if (v == NULL)
1306  {
1307  if ((behavior & EXTENSION_RETURN_NULL) &&
1308  FILE_POSSIBLY_DELETED(errno))
1309  return NULL;
1310  ereport(ERROR,
1312  errmsg("could not open file \"%s\" (target block %u): %m",
1313  _mdfd_segpath(reln, forknum, nextsegno),
1314  blkno)));
1315  }
1316  }
1317 
1318  return v;
1319 }
uint32 BlockNumber
Definition: block.h:31
int errcode_for_file_access(void)
Definition: elog.c:716
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define FATAL
Definition: elog.h:35
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc0(Size size)
Definition: mcxt.c:1099
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1325
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
Definition: md.c:1150
#define EXTENSION_DONT_OPEN
Definition: md.c:120
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: md.c:418
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1127
#define EXTENSION_CREATE
Definition: md.c:108
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
#define EXTENSION_FAIL
Definition: md.c:104
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:483
BlockNumber mdfd_segno
Definition: md.c:85
bool InRecovery
Definition: xlogutils.c:53

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc0(), and pfree().

Referenced by mdextend(), mdprefetch(), mdread(), mdwrite(), and mdwriteback().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1150 of file md.c.

1152 {
1153  MdfdVec *v;
1154  File fd;
1155  char *fullpath;
1156 
1157  fullpath = _mdfd_segpath(reln, forknum, segno);
1158 
1159  /* open the file */
1160  fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
1161 
1162  pfree(fullpath);
1163 
1164  if (fd < 0)
1165  return NULL;
1166 
1167  /*
1168  * Segments are always opened in order from lowest to highest, so we must
1169  * be adding a new one at the end.
1170  */
1171  Assert(segno == reln->md_num_open_segs[forknum]);
1172 
1173  _fdvec_resize(reln, forknum, segno + 1);
1174 
1175  /* fill the entry */
1176  v = &reln->md_seg_fds[forknum][segno];
1177  v->mdfd_vfd = fd;
1178  v->mdfd_segno = segno;
1179 
1180  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1181 
1182  /* all done */
1183  return v;
1184 }
#define PG_BINARY
Definition: c.h:1268
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1566
int File
Definition: fd.h:54
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1089
static int fd(const char *x, int i)
Definition: preproc-init.c:105
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), _mdfd_segpath(), _mdnblocks(), Assert(), fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), and PG_BINARY.

Referenced by _mdfd_getseg(), mdimmedsync(), and mdnblocks().

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1127 of file md.c.

1128 {
1129  char *path,
1130  *fullpath;
1131 
1132  path = relpath(reln->smgr_rnode, forknum);
1133 
1134  if (segno > 0)
1135  {
1136  fullpath = psprintf("%s.%u", path, segno);
1137  pfree(path);
1138  }
1139  else
1140  fullpath = path;
1141 
1142  return fullpath;
1143 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpath(rnode, forknum)
Definition: relpath.h:87
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1325 of file md.c.

1326 {
1327  off_t len;
1328 
1329  len = FileSize(seg->mdfd_vfd);
1330  if (len < 0)
1331  ereport(ERROR,
1333  errmsg("could not seek to end of file \"%s\": %m",
1334  FilePathName(seg->mdfd_vfd))));
1335  /* note that this calculation will ignore any partial block at EOF */
1336  return (BlockNumber) (len / BLCKSZ);
1337 }
char * FilePathName(File file)
Definition: fd.c:2339
off_t FileSize(File file)
Definition: fd.c:2287
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), and mdopenfork().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 297 of file md.c.

298 {
299  int save_errno;
300  int ret;
301 
302  ret = pg_truncate(path, 0);
303 
304  /* Log a warning here to avoid repetition in callers. */
305  if (ret < 0 && errno != ENOENT)
306  {
307  save_errno = errno;
310  errmsg("could not truncate file \"%s\": %m", path)));
311  errno = save_errno;
312  }
313 
314  return ret;
315 }
#define WARNING
Definition: elog.h:30
int pg_truncate(const char *path, off_t length)
Definition: fd.c:642

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileNode delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1057 of file md.c.

1058 {
1059  SMgrRelation *srels;
1060  int i;
1061 
1062  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1063  for (i = 0; i < ndelrels; i++)
1064  {
1065  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1066 
1067  if (isRedo)
1068  {
1069  ForkNumber fork;
1070 
1071  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1072  XLogDropRelation(delrels[i], fork);
1073  }
1074  srels[i] = srel;
1075  }
1076 
1077  smgrdounlinkall(srels, ndelrels, isRedo);
1078 
1079  for (i = 0; i < ndelrels; i++)
1080  smgrclose(srels[i]);
1081  pfree(srels);
1082 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1068
ForkNumber
Definition: relpath.h:41
#define MAX_FORKNUM
Definition: relpath.h:55
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:420
void XLogDropRelation(RelFileNode rnode, ForkNumber forknum)
Definition: xlogutils.c:655

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1039 of file md.c.

1040 {
1041  FileTag tag;
1042  RelFileNode rnode;
1043 
1044  rnode.dbNode = dbid;
1045  rnode.spcNode = 0;
1046  rnode.relNode = 0;
1047 
1049 
1050  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1051 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:42
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:587
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileNode::dbNode, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileNode::relNode, RelFileNode::spcNode, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 537 of file md.c.

538 {
539  int nopensegs = reln->md_num_open_segs[forknum];
540 
541  /* No work if already closed */
542  if (nopensegs == 0)
543  return;
544 
545  /* close segments starting from the end */
546  while (nopensegs > 0)
547  {
548  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
549 
550  FileClose(v->mdfd_vfd);
551  _fdvec_resize(reln, forknum, nopensegs - 1);
552  nopensegs--;
553  }
554 }
void FileClose(File file)
Definition: fd.c:1961

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 182 of file md.c.

183 {
184  MdfdVec *mdfd;
185  char *path;
186  File fd;
187 
188  if (isRedo && reln->md_num_open_segs[forkNum] > 0)
189  return; /* created and opened already... */
190 
191  Assert(reln->md_num_open_segs[forkNum] == 0);
192 
193  /*
194  * We may be using the target table space for the first time in this
195  * database, so create a per-database subdirectory if needed.
196  *
197  * XXX this is a fairly ugly violation of module layering, but this seems
198  * to be the best place to put the check. Maybe TablespaceCreateDbspace
199  * should be here and not in commands/tablespace.c? But that would imply
200  * importing a lot of stuff that smgr.c oughtn't know, either.
201  */
203  reln->smgr_rnode.node.dbNode,
204  isRedo);
205 
206  path = relpath(reln->smgr_rnode, forkNum);
207 
208  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
209 
210  if (fd < 0)
211  {
212  int save_errno = errno;
213 
214  if (isRedo)
215  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
216  if (fd < 0)
217  {
218  /* be sure to report the error reported by create, not open */
219  errno = save_errno;
220  ereport(ERROR,
222  errmsg("could not create file \"%s\": %m", path)));
223  }
224  }
225 
226  pfree(path);
227 
228  _fdvec_resize(reln, forkNum, 1);
229  mdfd = &reln->md_seg_fds[forkNum][0];
230  mdfd->mdfd_vfd = fd;
231  mdfd->mdfd_segno = 0;
232 }
void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
Definition: tablespace.c:118
RelFileNode node
Definition: relfilenode.h:74

References _fdvec_resize(), Assert(), RelFileNode::dbNode, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RelFileNodeBackend::node, PathNameOpenFile(), pfree(), PG_BINARY, relpath, SMgrRelationData::smgr_rnode, RelFileNode::spcNode, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forkNum 
)

Definition at line 163 of file md.c.

164 {
165  /*
166  * Close it first, to ensure that we notice if the fork has been unlinked
167  * since we opened it. As an optimization, we can skip that in recovery,
168  * which already closes relations when dropping them.
169  */
170  if (!InRecovery)
171  mdclose(reln, forkNum);
172 
173  return (mdopenfork(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
174 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:537

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 418 of file md.c.

420 {
421  off_t seekpos;
422  int nbytes;
423  MdfdVec *v;
424 
425  /* This assert is too expensive to have on normally ... */
426 #ifdef CHECK_WRITE_VS_EXTEND
427  Assert(blocknum >= mdnblocks(reln, forknum));
428 #endif
429 
430  /*
431  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
432  * more --- we mustn't create a block whose number actually is
433  * InvalidBlockNumber. (Note that this failure should be unreachable
434  * because of upstream checks in bufmgr.c.)
435  */
436  if (blocknum == InvalidBlockNumber)
437  ereport(ERROR,
438  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
439  errmsg("cannot extend file \"%s\" beyond %u blocks",
440  relpath(reln->smgr_rnode, forknum),
442 
443  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
444 
445  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
446 
447  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
448 
449  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
450  {
451  if (nbytes < 0)
452  ereport(ERROR,
454  errmsg("could not extend file \"%s\": %m",
455  FilePathName(v->mdfd_vfd)),
456  errhint("Check free disk space.")));
457  /* short write: complain appropriately */
458  ereport(ERROR,
459  (errcode(ERRCODE_DISK_FULL),
460  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
462  nbytes, BLCKSZ, blocknum),
463  errhint("Check free disk space.")));
464  }
465 
466  if (!skipFsync && !SmgrIsTemp(reln))
467  register_dirty_segment(reln, forknum, v);
468 
469  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
470 }
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2168
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:770
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:982
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1195
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
@ WAIT_EVENT_DATA_FILE_EXTEND
Definition: wait_event.h:173

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1412 of file md.c.

1413 {
1414  /*
1415  * For now we only use filter requests as a way to drop all scheduled
1416  * callbacks relating to a given database, when dropping the database.
1417  * We'll return true for all candidates that have the same database OID as
1418  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1419  */
1420  return ftag->rnode.dbNode == candidate->rnode.dbNode;
1421 }
RelFileNode rnode
Definition: sync.h:54

References RelFileNode::dbNode, and FileTag::rnode.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 929 of file md.c.

930 {
931  int segno;
932  int min_inactive_seg;
933 
934  /*
935  * NOTE: mdnblocks makes sure we have opened all active segments, so that
936  * fsync loop will get them all!
937  */
938  mdnblocks(reln, forknum);
939 
940  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
941 
942  /*
943  * Temporarily open inactive segments, then close them after sync. There
944  * may be some inactive segments left opened after fsync() error, but that
945  * is harmless. We don't bother to clean them up and take a risk of
946  * further trouble. The next mdclose() will soon close them.
947  */
948  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
949  segno++;
950 
951  while (segno > 0)
952  {
953  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
954 
958  errmsg("could not fsync file \"%s\": %m",
959  FilePathName(v->mdfd_vfd))));
960 
961  /* Close inactive segments immediately */
962  if (segno > min_inactive_seg)
963  {
964  FileClose(v->mdfd_vfd);
965  _fdvec_resize(reln, forknum, segno - 1);
966  }
967 
968  segno--;
969  }
970 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2266
int data_sync_elevel(int elevel)
Definition: fd.c:3826
@ WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC
Definition: wait_event.h:175

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

◆ mdinit()

void mdinit ( void  )

Definition at line 150 of file md.c.

151 {
153  "MdSmgr",
155 }
MemoryContext TopMemoryContext
Definition: mcxt.c:48
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:197

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 770 of file md.c.

771 {
772  MdfdVec *v;
773  BlockNumber nblocks;
774  BlockNumber segno;
775 
776  mdopenfork(reln, forknum, EXTENSION_FAIL);
777 
778  /* mdopen has opened the first segment */
779  Assert(reln->md_num_open_segs[forknum] > 0);
780 
781  /*
782  * Start from the last open segments, to avoid redundant seeks. We have
783  * previously verified that these segments are exactly RELSEG_SIZE long,
784  * and it's useless to recheck that each time.
785  *
786  * NOTE: this assumption could only be wrong if another backend has
787  * truncated the relation. We rely on higher code levels to handle that
788  * scenario by closing and re-opening the md fd, which is handled via
789  * relcache flush. (Since the checkpointer doesn't participate in
790  * relcache flush, it could have segment entries for inactive segments;
791  * that's OK because the checkpointer never needs to compute relation
792  * size.)
793  */
794  segno = reln->md_num_open_segs[forknum] - 1;
795  v = &reln->md_seg_fds[forknum][segno];
796 
797  for (;;)
798  {
799  nblocks = _mdnblocks(reln, forknum, v);
800  if (nblocks > ((BlockNumber) RELSEG_SIZE))
801  elog(FATAL, "segment too big");
802  if (nblocks < ((BlockNumber) RELSEG_SIZE))
803  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
804 
805  /*
806  * If segment is exactly RELSEG_SIZE, advance to next one.
807  */
808  segno++;
809 
810  /*
811  * We used to pass O_CREAT here, but that has the disadvantage that it
812  * might create a segment which has vanished through some operating
813  * system misadventure. In such a case, creating the segment here
814  * undermines _mdfd_getseg's attempts to notice and report an error
815  * upon access to a missing segment.
816  */
817  v = _mdfd_openseg(reln, forknum, segno, 0);
818  if (v == NULL)
819  return segno * ((BlockNumber) RELSEG_SIZE);
820  }
821 }

References _mdfd_openseg(), _mdnblocks(), Assert(), elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 526 of file md.c.

527 {
528  /* mark it not open */
529  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
530  reln->md_num_open_segs[forknum] = 0;
531 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 483 of file md.c.

484 {
485  MdfdVec *mdfd;
486  char *path;
487  File fd;
488 
489  /* No work if already open */
490  if (reln->md_num_open_segs[forknum] > 0)
491  return &reln->md_seg_fds[forknum][0];
492 
493  path = relpath(reln->smgr_rnode, forknum);
494 
495  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
496 
497  if (fd < 0)
498  {
499  if ((behavior & EXTENSION_RETURN_NULL) &&
500  FILE_POSSIBLY_DELETED(errno))
501  {
502  pfree(path);
503  return NULL;
504  }
505  ereport(ERROR,
507  errmsg("could not open file \"%s\": %m", path)));
508  }
509 
510  pfree(path);
511 
512  _fdvec_resize(reln, forknum, 1);
513  mdfd = &reln->md_seg_fds[forknum][0];
514  mdfd->mdfd_vfd = fd;
515  mdfd->mdfd_segno = 0;
516 
517  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
518 
519  return mdfd;
520 }

References _fdvec_resize(), _mdnblocks(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 560 of file md.c.

561 {
562 #ifdef USE_PREFETCH
563  off_t seekpos;
564  MdfdVec *v;
565 
566  v = _mdfd_getseg(reln, forknum, blocknum, false,
568  if (v == NULL)
569  return false;
570 
571  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
572 
573  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
574 
575  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
576 #endif /* USE_PREFETCH */
577 
578  return true;
579 }
int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
Definition: fd.c:2061
@ WAIT_EVENT_DATA_FILE_PREFETCH
Definition: wait_event.h:176

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer 
)

Definition at line 640 of file md.c.

642 {
643  off_t seekpos;
644  int nbytes;
645  MdfdVec *v;
646 
647  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
648  reln->smgr_rnode.node.spcNode,
649  reln->smgr_rnode.node.dbNode,
650  reln->smgr_rnode.node.relNode,
651  reln->smgr_rnode.backend);
652 
653  v = _mdfd_getseg(reln, forknum, blocknum, false,
655 
656  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
657 
658  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
659 
660  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
661 
662  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
663  reln->smgr_rnode.node.spcNode,
664  reln->smgr_rnode.node.dbNode,
665  reln->smgr_rnode.node.relNode,
666  reln->smgr_rnode.backend,
667  nbytes,
668  BLCKSZ);
669 
670  if (nbytes != BLCKSZ)
671  {
672  if (nbytes < 0)
673  ereport(ERROR,
675  errmsg("could not read block %u in file \"%s\": %m",
676  blocknum, FilePathName(v->mdfd_vfd))));
677 
678  /*
679  * Short read: we are at or past EOF, or we read a partial block at
680  * EOF. Normally this is an error; upper levels should never try to
681  * read a nonexistent block. However, if zero_damaged_pages is ON or
682  * we are InRecovery, we should instead return zeroes without
683  * complaining. This allows, for example, the case of trying to
684  * update a block that was later truncated away.
685  */
687  MemSet(buffer, 0, BLCKSZ);
688  else
689  ereport(ERROR,
691  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
692  blocknum, FilePathName(v->mdfd_vfd),
693  nbytes, BLCKSZ)));
694  }
695 }
bool zero_damaged_pages
Definition: bufmgr.c:134
#define MemSet(start, val, len)
Definition: c.h:1008
int FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2112
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:43
BackendId backend
Definition: relfilenode.h:75
@ WAIT_EVENT_DATA_FILE_READ
Definition: wait_event.h:177

References _mdfd_getseg(), Assert(), RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, _MdfdVec::mdfd_vfd, MemSet, RelFileNodeBackend::node, RelFileNode::relNode, SMgrRelationData::smgr_rnode, RelFileNode::spcNode, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1346 of file md.c.

1347 {
1349  File file;
1350  bool need_to_close;
1351  int result,
1352  save_errno;
1353 
1354  /* See if we already have the file open, or need to open it. */
1355  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1356  {
1357  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1358  strlcpy(path, FilePathName(file), MAXPGPATH);
1359  need_to_close = false;
1360  }
1361  else
1362  {
1363  char *p;
1364 
1365  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1366  strlcpy(path, p, MAXPGPATH);
1367  pfree(p);
1368 
1369  file = PathNameOpenFile(path, O_RDWR | PG_BINARY);
1370  if (file < 0)
1371  return -1;
1372  need_to_close = true;
1373  }
1374 
1375  /* Sync the file. */
1376  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1377  save_errno = errno;
1378 
1379  if (need_to_close)
1380  FileClose(file);
1381 
1382  errno = save_errno;
1383  return result;
1384 }
#define MAXPGPATH
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint32 segno
Definition: sync.h:55
@ WAIT_EVENT_DATA_FILE_SYNC
Definition: wait_event.h:178

References _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, FileTag::rnode, FileTag::segno, smgropen(), strlcpy(), and WAIT_EVENT_DATA_FILE_SYNC.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 827 of file md.c.

828 {
829  BlockNumber curnblk;
830  BlockNumber priorblocks;
831  int curopensegs;
832 
833  /*
834  * NOTE: mdnblocks makes sure we have opened all active segments, so that
835  * truncation loop will get them all!
836  */
837  curnblk = mdnblocks(reln, forknum);
838  if (nblocks > curnblk)
839  {
840  /* Bogus request ... but no complaint if InRecovery */
841  if (InRecovery)
842  return;
843  ereport(ERROR,
844  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
845  relpath(reln->smgr_rnode, forknum),
846  nblocks, curnblk)));
847  }
848  if (nblocks == curnblk)
849  return; /* no work */
850 
851  /*
852  * Truncate segments, starting at the last one. Starting at the end makes
853  * managing the memory for the fd array easier, should there be errors.
854  */
855  curopensegs = reln->md_num_open_segs[forknum];
856  while (curopensegs > 0)
857  {
858  MdfdVec *v;
859 
860  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
861 
862  v = &reln->md_seg_fds[forknum][curopensegs - 1];
863 
864  if (priorblocks > nblocks)
865  {
866  /*
867  * This segment is no longer active. We truncate the file, but do
868  * not delete it, for reasons explained in the header comments.
869  */
871  ereport(ERROR,
873  errmsg("could not truncate file \"%s\": %m",
874  FilePathName(v->mdfd_vfd))));
875 
876  if (!SmgrIsTemp(reln))
877  register_dirty_segment(reln, forknum, v);
878 
879  /* we never drop the 1st segment */
880  Assert(v != &reln->md_seg_fds[forknum][0]);
881 
882  FileClose(v->mdfd_vfd);
883  _fdvec_resize(reln, forknum, curopensegs - 1);
884  }
885  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
886  {
887  /*
888  * This is the last segment we want to keep. Truncate the file to
889  * the right length. NOTE: if nblocks is exactly a multiple K of
890  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
891  * keep it. This adheres to the invariant given in the header
892  * comments.
893  */
894  BlockNumber lastsegblocks = nblocks - priorblocks;
895 
896  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
897  ereport(ERROR,
899  errmsg("could not truncate file \"%s\" to %u blocks: %m",
901  nblocks)));
902  if (!SmgrIsTemp(reln))
903  register_dirty_segment(reln, forknum, v);
904  }
905  else
906  {
907  /*
908  * We still need this segment, so nothing to do for this and any
909  * earlier segment.
910  */
911  break;
912  }
913  curopensegs--;
914  }
915 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2304
@ WAIT_EVENT_DATA_FILE_TRUNCATE
Definition: wait_event.h:179

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

◆ mdunlink()

void mdunlink ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 281 of file md.c.

282 {
283  /* Now do the per-fork work */
284  if (forkNum == InvalidForkNumber)
285  {
286  for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
287  mdunlinkfork(rnode, forkNum, isRedo);
288  }
289  else
290  mdunlinkfork(rnode, forkNum, isRedo);
291 }
static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
Definition: md.c:318

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1393 of file md.c.

1394 {
1395  char *p;
1396 
1397  /* Compute the path. */
1398  p = relpathperm(ftag->rnode, MAIN_FORKNUM);
1399  strlcpy(path, p, MAXPGPATH);
1400  pfree(p);
1401 
1402  /* Try to unlink the file. */
1403  return unlink(path);
1404 }
@ MAIN_FORKNUM
Definition: relpath.h:43
#define relpathperm(rnode, forknum)
Definition: relpath.h:83

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rnode, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)
static

Definition at line 318 of file md.c.

319 {
320  char *path;
321  int ret;
322 
323  path = relpath(rnode, forkNum);
324 
325  /*
326  * Delete or truncate the first segment.
327  */
328  if (isRedo || forkNum != MAIN_FORKNUM || RelFileNodeBackendIsTemp(rnode))
329  {
330  if (!RelFileNodeBackendIsTemp(rnode))
331  {
332  /* Prevent other backends' fds from holding on to the disk space */
333  ret = do_truncate(path);
334 
335  /* Forget any pending sync requests for the first segment */
336  register_forget_request(rnode, forkNum, 0 /* first seg */ );
337  }
338  else
339  ret = 0;
340 
341  /* Next unlink the file, unless it was already found to be missing */
342  if (ret == 0 || errno != ENOENT)
343  {
344  ret = unlink(path);
345  if (ret < 0 && errno != ENOENT)
348  errmsg("could not remove file \"%s\": %m", path)));
349  }
350  }
351  else
352  {
353  /* Prevent other backends' fds from holding on to the disk space */
354  ret = do_truncate(path);
355 
356  /* Register request to unlink first segment later */
357  register_unlink_segment(rnode, forkNum, 0 /* first seg */ );
358  }
359 
360  /*
361  * Delete any additional segments.
362  */
363  if (ret >= 0)
364  {
365  char *segpath = (char *) palloc(strlen(path) + 12);
366  BlockNumber segno;
367 
368  /*
369  * Note that because we loop until getting ENOENT, we will correctly
370  * remove all inactive segments as well as active ones.
371  */
372  for (segno = 1;; segno++)
373  {
374  sprintf(segpath, "%s.%u", path, segno);
375 
376  if (!RelFileNodeBackendIsTemp(rnode))
377  {
378  /*
379  * Prevent other backends' fds from holding on to the disk
380  * space.
381  */
382  if (do_truncate(segpath) < 0 && errno == ENOENT)
383  break;
384 
385  /*
386  * Forget any pending sync requests for this segment before we
387  * try to unlink.
388  */
389  register_forget_request(rnode, forkNum, segno);
390  }
391 
392  if (unlink(segpath) < 0)
393  {
394  /* ENOENT is expected after the last segment... */
395  if (errno != ENOENT)
398  errmsg("could not remove file \"%s\": %m", segpath)));
399  break;
400  }
401  }
402  pfree(segpath);
403  }
404 
405  pfree(path);
406 }
static void register_unlink_segment(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1008
static int do_truncate(const char *path)
Definition: md.c:297
static void register_forget_request(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1025
#define sprintf
Definition: port.h:227
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), MAIN_FORKNUM, palloc(), pfree(), register_forget_request(), register_unlink_segment(), RelFileNodeBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 705 of file md.c.

707 {
708  off_t seekpos;
709  int nbytes;
710  MdfdVec *v;
711 
712  /* This assert is too expensive to have on normally ... */
713 #ifdef CHECK_WRITE_VS_EXTEND
714  Assert(blocknum < mdnblocks(reln, forknum));
715 #endif
716 
717  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
718  reln->smgr_rnode.node.spcNode,
719  reln->smgr_rnode.node.dbNode,
720  reln->smgr_rnode.node.relNode,
721  reln->smgr_rnode.backend);
722 
723  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
725 
726  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
727 
728  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
729 
730  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
731 
732  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
733  reln->smgr_rnode.node.spcNode,
734  reln->smgr_rnode.node.dbNode,
735  reln->smgr_rnode.node.relNode,
736  reln->smgr_rnode.backend,
737  nbytes,
738  BLCKSZ);
739 
740  if (nbytes != BLCKSZ)
741  {
742  if (nbytes < 0)
743  ereport(ERROR,
745  errmsg("could not write block %u in file \"%s\": %m",
746  blocknum, FilePathName(v->mdfd_vfd))));
747  /* short write: complain appropriately */
748  ereport(ERROR,
749  (errcode(ERRCODE_DISK_FULL),
750  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
751  blocknum,
753  nbytes, BLCKSZ),
754  errhint("Check free disk space.")));
755  }
756 
757  if (!skipFsync && !SmgrIsTemp(reln))
758  register_dirty_segment(reln, forknum, v);
759 }
@ WAIT_EVENT_DATA_FILE_WRITE
Definition: wait_event.h:180

References _mdfd_getseg(), Assert(), RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), _MdfdVec::mdfd_vfd, mdnblocks(), RelFileNodeBackend::node, register_dirty_segment(), RelFileNode::relNode, SMgrRelationData::smgr_rnode, SmgrIsTemp, RelFileNode::spcNode, and WAIT_EVENT_DATA_FILE_WRITE.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 588 of file md.c.

590 {
591  /*
592  * Issue flush requests in as few requests as possible; have to split at
593  * segment boundaries though, since those are actually separate files.
594  */
595  while (nblocks > 0)
596  {
597  BlockNumber nflush = nblocks;
598  off_t seekpos;
599  MdfdVec *v;
600  int segnum_start,
601  segnum_end;
602 
603  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
605 
606  /*
607  * We might be flushing buffers of already removed relations, that's
608  * ok, just ignore that case. If the segment file wasn't open already
609  * (ie from a recent mdwrite()), then we don't want to re-open it, to
610  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
611  * us with a descriptor to a file that is about to be unlinked.
612  */
613  if (!v)
614  return;
615 
616  /* compute offset inside the current segment */
617  segnum_start = blocknum / RELSEG_SIZE;
618 
619  /* compute number of desired writes within the current segment */
620  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
621  if (segnum_start != segnum_end)
622  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
623 
624  Assert(nflush >= 1);
625  Assert(nflush <= nblocks);
626 
627  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
628 
629  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
630 
631  nblocks -= nflush;
632  blocknum += nflush;
633  }
634 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2089
@ WAIT_EVENT_DATA_FILE_FLUSH
Definition: wait_event.h:174

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 982 of file md.c.

983 {
984  FileTag tag;
985 
986  INIT_MD_FILETAG(tag, reln->smgr_rnode.node, forknum, seg->mdfd_segno);
987 
988  /* Temp relations should never be fsync'd */
989  Assert(!SmgrIsTemp(reln));
990 
991  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
992  {
993  ereport(DEBUG1,
994  (errmsg_internal("could not forward fsync request because request queue is full")));
995 
999  errmsg("could not fsync file \"%s\": %m",
1000  FilePathName(seg->mdfd_vfd))));
1001  }
1002 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:991
#define DEBUG1
Definition: elog.h:24
@ SYNC_REQUEST
Definition: sync.h:25

References Assert(), data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RelFileNodeBackend::node, RegisterSyncRequest(), SMgrRelationData::smgr_rnode, SmgrIsTemp, SYNC_REQUEST, and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by mdextend(), mdtruncate(), and mdwrite().

◆ register_forget_request()

static void register_forget_request ( RelFileNodeBackend  rnode,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1025 of file md.c.

1027 {
1028  FileTag tag;
1029 
1030  INIT_MD_FILETAG(tag, rnode.node, forknum, segno);
1031 
1032  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1033 }
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileNodeBackend::node, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileNodeBackend  rnode,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1008 of file md.c.

1010 {
1011  FileTag tag;
1012 
1013  INIT_MD_FILETAG(tag, rnode.node, forknum, segno);
1014 
1015  /* Should never be used with temp relations */
1017 
1018  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1019 }
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert(), INIT_MD_FILETAG, RelFileNodeBackend::node, RegisterSyncRequest(), RelFileNodeBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 88 of file md.c.

Referenced by _fdvec_resize(), and mdinit().