PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 108 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 110 of file md.c.

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 118 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 120 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 104 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 106 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rlocator,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rlocator = (xx_rlocator), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 92 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1142 of file md.c.

1145 {
1146  if (nseg == 0)
1147  {
1148  if (reln->md_num_open_segs[forknum] > 0)
1149  {
1150  pfree(reln->md_seg_fds[forknum]);
1151  reln->md_seg_fds[forknum] = NULL;
1152  }
1153  }
1154  else if (reln->md_num_open_segs[forknum] == 0)
1155  {
1156  reln->md_seg_fds[forknum] =
1157  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1158  }
1159  else
1160  {
1161  /*
1162  * It doesn't seem worthwhile complicating the code to amortize
1163  * repalloc() calls. Those are far faster than PathNameOpenFile() or
1164  * FileClose(), and the memory context internally will sometimes avoid
1165  * doing an actual reallocation.
1166  */
1167  reln->md_seg_fds[forknum] =
1168  repalloc(reln->md_seg_fds[forknum],
1169  sizeof(MdfdVec) * nseg);
1170  }
1171 
1172  reln->md_num_open_segs[forknum] = nseg;
1173 }
void pfree(void *pointer)
Definition: mcxt.c:1436
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1456
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1005
static MemoryContext MdCxt
Definition: md.c:88
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1248 of file md.c.

1250 {
1251  MdfdVec *v;
1252  BlockNumber targetseg;
1253  BlockNumber nextsegno;
1254 
1255  /* some way to handle non-existent segments needs to be specified */
1256  Assert(behavior &
1259 
1260  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1261 
1262  /* if an existing and opened segment, we're done */
1263  if (targetseg < reln->md_num_open_segs[forknum])
1264  {
1265  v = &reln->md_seg_fds[forknum][targetseg];
1266  return v;
1267  }
1268 
1269  /* The caller only wants the segment if we already had it open. */
1270  if (behavior & EXTENSION_DONT_OPEN)
1271  return NULL;
1272 
1273  /*
1274  * The target segment is not yet open. Iterate over all the segments
1275  * between the last opened and the target segment. This way missing
1276  * segments either raise an error, or get created (according to
1277  * 'behavior'). Start with either the last opened, or the first segment if
1278  * none was opened before.
1279  */
1280  if (reln->md_num_open_segs[forknum] > 0)
1281  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1282  else
1283  {
1284  v = mdopenfork(reln, forknum, behavior);
1285  if (!v)
1286  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1287  }
1288 
1289  for (nextsegno = reln->md_num_open_segs[forknum];
1290  nextsegno <= targetseg; nextsegno++)
1291  {
1292  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1293  int flags = 0;
1294 
1295  Assert(nextsegno == v->mdfd_segno + 1);
1296 
1297  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1298  elog(FATAL, "segment too big");
1299 
1300  if ((behavior & EXTENSION_CREATE) ||
1301  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1302  {
1303  /*
1304  * Normally we will create new segments only if authorized by the
1305  * caller (i.e., we are doing mdextend()). But when doing WAL
1306  * recovery, create segments anyway; this allows cases such as
1307  * replaying WAL data that has a write into a high-numbered
1308  * segment of a relation that was later deleted. We want to go
1309  * ahead and create the segments so we can finish out the replay.
1310  *
1311  * We have to maintain the invariant that segments before the last
1312  * active segment are of size RELSEG_SIZE; therefore, if
1313  * extending, pad them out with zeroes if needed. (This only
1314  * matters if in recovery, or if the caller is extending the
1315  * relation discontiguously, but that can happen in hash indexes.)
1316  */
1317  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1318  {
1319  char *zerobuf = palloc0(BLCKSZ);
1320 
1321  mdextend(reln, forknum,
1322  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1323  zerobuf, skipFsync);
1324  pfree(zerobuf);
1325  }
1326  flags = O_CREAT;
1327  }
1328  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1329  nblocks < ((BlockNumber) RELSEG_SIZE))
1330  {
1331  /*
1332  * When not extending (or explicitly including truncated
1333  * segments), only open the next segment if the current one is
1334  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1335  * or fail.
1336  */
1337  if (behavior & EXTENSION_RETURN_NULL)
1338  {
1339  /*
1340  * Some callers discern between reasons for _mdfd_getseg()
1341  * returning NULL based on errno. As there's no failing
1342  * syscall involved in this case, explicitly set errno to
1343  * ENOENT, as that seems the closest interpretation.
1344  */
1345  errno = ENOENT;
1346  return NULL;
1347  }
1348 
1349  ereport(ERROR,
1351  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1352  _mdfd_segpath(reln, forknum, nextsegno),
1353  blkno, nblocks)));
1354  }
1355 
1356  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1357 
1358  if (v == NULL)
1359  {
1360  if ((behavior & EXTENSION_RETURN_NULL) &&
1361  FILE_POSSIBLY_DELETED(errno))
1362  return NULL;
1363  ereport(ERROR,
1365  errmsg("could not open file \"%s\" (target block %u): %m",
1366  _mdfd_segpath(reln, forknum, nextsegno),
1367  blkno)));
1368  }
1369  }
1370 
1371  return v;
1372 }
uint32 BlockNumber
Definition: block.h:31
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:75
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc0(Size size)
Definition: mcxt.c:1241
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1378
#define EXTENSION_DONT_OPEN
Definition: md.c:120
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:449
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1203
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1180
#define EXTENSION_CREATE
Definition: md.c:108
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
#define EXTENSION_FAIL
Definition: md.c:104
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:514
BlockNumber mdfd_segno
Definition: md.c:85
bool InRecovery
Definition: xlogutils.c:53

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert(), elog(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc0(), and pfree().

Referenced by mdextend(), mdprefetch(), mdread(), mdwrite(), and mdwriteback().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1203 of file md.c.

1205 {
1206  MdfdVec *v;
1207  File fd;
1208  char *fullpath;
1209 
1210  fullpath = _mdfd_segpath(reln, forknum, segno);
1211 
1212  /* open the file */
1213  fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
1214 
1215  pfree(fullpath);
1216 
1217  if (fd < 0)
1218  return NULL;
1219 
1220  /*
1221  * Segments are always opened in order from lowest to highest, so we must
1222  * be adding a new one at the end.
1223  */
1224  Assert(segno == reln->md_num_open_segs[forknum]);
1225 
1226  _fdvec_resize(reln, forknum, segno + 1);
1227 
1228  /* fill the entry */
1229  v = &reln->md_seg_fds[forknum][segno];
1230  v->mdfd_vfd = fd;
1231  v->mdfd_segno = segno;
1232 
1233  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1234 
1235  /* all done */
1236  return v;
1237 }
#define PG_BINARY
Definition: c.h:1260
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1481
int File
Definition: fd.h:54
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1142
static int fd(const char *x, int i)
Definition: preproc-init.c:105
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), _mdfd_segpath(), _mdnblocks(), Assert(), fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), and PG_BINARY.

Referenced by _mdfd_getseg(), mdimmedsync(), and mdnblocks().

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1180 of file md.c.

1181 {
1182  char *path,
1183  *fullpath;
1184 
1185  path = relpath(reln->smgr_rlocator, forknum);
1186 
1187  if (segno > 0)
1188  {
1189  fullpath = psprintf("%s.%u", path, segno);
1190  pfree(path);
1191  }
1192  else
1193  fullpath = path;
1194 
1195  return fullpath;
1196 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpath(rlocator, forknum)
Definition: relpath.h:94
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1378 of file md.c.

1379 {
1380  off_t len;
1381 
1382  len = FileSize(seg->mdfd_vfd);
1383  if (len < 0)
1384  ereport(ERROR,
1386  errmsg("could not seek to end of file \"%s\": %m",
1387  FilePathName(seg->mdfd_vfd))));
1388  /* note that this calculation will ignore any partial block at EOF */
1389  return (BlockNumber) (len / BLCKSZ);
1390 }
char * FilePathName(File file)
Definition: fd.c:2262
off_t FileSize(File file)
Definition: fd.c:2210
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), and mdopenfork().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 312 of file md.c.

313 {
314  int save_errno;
315  int ret;
316 
317  ret = pg_truncate(path, 0);
318 
319  /* Log a warning here to avoid repetition in callers. */
320  if (ret < 0 && errno != ENOENT)
321  {
322  save_errno = errno;
325  errmsg("could not truncate file \"%s\": %m", path)));
326  errno = save_errno;
327  }
328 
329  return ret;
330 }
#define WARNING
Definition: elog.h:36
int pg_truncate(const char *path, off_t length)
Definition: fd.c:631

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1110 of file md.c.

1111 {
1112  SMgrRelation *srels;
1113  int i;
1114 
1115  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1116  for (i = 0; i < ndelrels; i++)
1117  {
1118  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1119 
1120  if (isRedo)
1121  {
1122  ForkNumber fork;
1123 
1124  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1125  XLogDropRelation(delrels[i], fork);
1126  }
1127  srels[i] = srel;
1128  }
1129 
1130  smgrdounlinkall(srels, ndelrels, isRedo);
1131 
1132  for (i = 0; i < ndelrels; i++)
1133  smgrclose(srels[i]);
1134  pfree(srels);
1135 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1210
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:420
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:658

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1092 of file md.c.

1093 {
1094  FileTag tag;
1095  RelFileLocator rlocator;
1096 
1097  rlocator.dbOid = dbid;
1098  rlocator.spcOid = 0;
1099  rlocator.relNumber = 0;
1100 
1102 
1103  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1104 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:49
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:587
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 568 of file md.c.

569 {
570  int nopensegs = reln->md_num_open_segs[forknum];
571 
572  /* No work if already closed */
573  if (nopensegs == 0)
574  return;
575 
576  /* close segments starting from the end */
577  while (nopensegs > 0)
578  {
579  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
580 
581  FileClose(v->mdfd_vfd);
582  _fdvec_resize(reln, forknum, nopensegs - 1);
583  nopensegs--;
584  }
585 }
void FileClose(File file)
Definition: fd.c:1884

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 182 of file md.c.

183 {
184  MdfdVec *mdfd;
185  char *path;
186  File fd;
187 
188  if (isRedo && reln->md_num_open_segs[forknum] > 0)
189  return; /* created and opened already... */
190 
191  Assert(reln->md_num_open_segs[forknum] == 0);
192 
193  /*
194  * We may be using the target table space for the first time in this
195  * database, so create a per-database subdirectory if needed.
196  *
197  * XXX this is a fairly ugly violation of module layering, but this seems
198  * to be the best place to put the check. Maybe TablespaceCreateDbspace
199  * should be here and not in commands/tablespace.c? But that would imply
200  * importing a lot of stuff that smgr.c oughtn't know, either.
201  */
204  isRedo);
205 
206  path = relpath(reln->smgr_rlocator, forknum);
207 
208  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
209 
210  if (fd < 0)
211  {
212  int save_errno = errno;
213 
214  if (isRedo)
215  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
216  if (fd < 0)
217  {
218  /* be sure to report the error reported by create, not open */
219  errno = save_errno;
220  ereport(ERROR,
222  errmsg("could not create file \"%s\": %m", path)));
223  }
224  }
225 
226  pfree(path);
227 
228  _fdvec_resize(reln, forknum, 1);
229  mdfd = &reln->md_seg_fds[forknum][0];
230  mdfd->mdfd_vfd = fd;
231  mdfd->mdfd_segno = 0;
232 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:118
RelFileLocator locator

References _fdvec_resize(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 163 of file md.c.

164 {
165  /*
166  * Close it first, to ensure that we notice if the fork has been unlinked
167  * since we opened it. As an optimization, we can skip that in recovery,
168  * which already closes relations when dropping them.
169  */
170  if (!InRecovery)
171  mdclose(reln, forknum);
172 
173  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
174 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:568

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 449 of file md.c.

451 {
452  off_t seekpos;
453  int nbytes;
454  MdfdVec *v;
455 
456  /* This assert is too expensive to have on normally ... */
457 #ifdef CHECK_WRITE_VS_EXTEND
458  Assert(blocknum >= mdnblocks(reln, forknum));
459 #endif
460 
461  /*
462  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
463  * more --- we mustn't create a block whose number actually is
464  * InvalidBlockNumber. (Note that this failure should be unreachable
465  * because of upstream checks in bufmgr.c.)
466  */
467  if (blocknum == InvalidBlockNumber)
468  ereport(ERROR,
469  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
470  errmsg("cannot extend file \"%s\" beyond %u blocks",
471  relpath(reln->smgr_rlocator, forknum),
473 
474  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
475 
476  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
477 
478  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
479 
480  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
481  {
482  if (nbytes < 0)
483  ereport(ERROR,
485  errmsg("could not extend file \"%s\": %m",
486  FilePathName(v->mdfd_vfd)),
487  errhint("Check free disk space.")));
488  /* short write: complain appropriately */
489  ereport(ERROR,
490  (errcode(ERRCODE_DISK_FULL),
491  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
493  nbytes, BLCKSZ, blocknum),
494  errhint("Check free disk space.")));
495  }
496 
497  if (!skipFsync && !SmgrIsTemp(reln))
498  register_dirty_segment(reln, forknum, v);
499 
500  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
501 }
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2091
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:801
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1022
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1248
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
@ WAIT_EVENT_DATA_FILE_EXTEND
Definition: wait_event.h:177

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1467 of file md.c.

1468 {
1469  /*
1470  * For now we only use filter requests as a way to drop all scheduled
1471  * callbacks relating to a given database, when dropping the database.
1472  * We'll return true for all candidates that have the same database OID as
1473  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1474  */
1475  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1476 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 960 of file md.c.

961 {
962  int segno;
963  int min_inactive_seg;
964 
965  /*
966  * NOTE: mdnblocks makes sure we have opened all active segments, so that
967  * fsync loop will get them all!
968  */
969  mdnblocks(reln, forknum);
970 
971  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
972 
973  /*
974  * Temporarily open inactive segments, then close them after sync. There
975  * may be some inactive segments left opened after fsync() error, but that
976  * is harmless. We don't bother to clean them up and take a risk of
977  * further trouble. The next mdclose() will soon close them.
978  */
979  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
980  segno++;
981 
982  while (segno > 0)
983  {
984  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
985 
986  /*
987  * fsyncs done through mdimmedsync() should be tracked in a separate
988  * IOContext than those done through mdsyncfiletag() to differentiate
989  * between unavoidable client backend fsyncs (e.g. those done during
990  * index build) and those which ideally would have been done by the
991  * checkpointer. Since other IO operations bypassing the buffer
992  * manager could also be tracked in such an IOContext, wait until
993  * these are also tracked to track immediate fsyncs.
994  */
998  errmsg("could not fsync file \"%s\": %m",
999  FilePathName(v->mdfd_vfd))));
1000 
1001  /* Close inactive segments immediately */
1002  if (segno > min_inactive_seg)
1003  {
1004  FileClose(v->mdfd_vfd);
1005  _fdvec_resize(reln, forknum, segno - 1);
1006  }
1007 
1008  segno--;
1009  }
1010 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2189
int data_sync_elevel(int elevel)
Definition: fd.c:3737
@ WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC
Definition: wait_event.h:179

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

◆ mdinit()

void mdinit ( void  )

Definition at line 150 of file md.c.

151 {
153  "MdSmgr",
155 }
MemoryContext TopMemoryContext
Definition: mcxt.c:141
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 801 of file md.c.

802 {
803  MdfdVec *v;
804  BlockNumber nblocks;
805  BlockNumber segno;
806 
807  mdopenfork(reln, forknum, EXTENSION_FAIL);
808 
809  /* mdopen has opened the first segment */
810  Assert(reln->md_num_open_segs[forknum] > 0);
811 
812  /*
813  * Start from the last open segments, to avoid redundant seeks. We have
814  * previously verified that these segments are exactly RELSEG_SIZE long,
815  * and it's useless to recheck that each time.
816  *
817  * NOTE: this assumption could only be wrong if another backend has
818  * truncated the relation. We rely on higher code levels to handle that
819  * scenario by closing and re-opening the md fd, which is handled via
820  * relcache flush. (Since the checkpointer doesn't participate in
821  * relcache flush, it could have segment entries for inactive segments;
822  * that's OK because the checkpointer never needs to compute relation
823  * size.)
824  */
825  segno = reln->md_num_open_segs[forknum] - 1;
826  v = &reln->md_seg_fds[forknum][segno];
827 
828  for (;;)
829  {
830  nblocks = _mdnblocks(reln, forknum, v);
831  if (nblocks > ((BlockNumber) RELSEG_SIZE))
832  elog(FATAL, "segment too big");
833  if (nblocks < ((BlockNumber) RELSEG_SIZE))
834  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
835 
836  /*
837  * If segment is exactly RELSEG_SIZE, advance to next one.
838  */
839  segno++;
840 
841  /*
842  * We used to pass O_CREAT here, but that has the disadvantage that it
843  * might create a segment which has vanished through some operating
844  * system misadventure. In such a case, creating the segment here
845  * undermines _mdfd_getseg's attempts to notice and report an error
846  * upon access to a missing segment.
847  */
848  v = _mdfd_openseg(reln, forknum, segno, 0);
849  if (v == NULL)
850  return segno * ((BlockNumber) RELSEG_SIZE);
851  }
852 }

References _mdfd_openseg(), _mdnblocks(), Assert(), elog(), EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 557 of file md.c.

558 {
559  /* mark it not open */
560  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
561  reln->md_num_open_segs[forknum] = 0;
562 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 514 of file md.c.

515 {
516  MdfdVec *mdfd;
517  char *path;
518  File fd;
519 
520  /* No work if already open */
521  if (reln->md_num_open_segs[forknum] > 0)
522  return &reln->md_seg_fds[forknum][0];
523 
524  path = relpath(reln->smgr_rlocator, forknum);
525 
526  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
527 
528  if (fd < 0)
529  {
530  if ((behavior & EXTENSION_RETURN_NULL) &&
531  FILE_POSSIBLY_DELETED(errno))
532  {
533  pfree(path);
534  return NULL;
535  }
536  ereport(ERROR,
538  errmsg("could not open file \"%s\": %m", path)));
539  }
540 
541  pfree(path);
542 
543  _fdvec_resize(reln, forknum, 1);
544  mdfd = &reln->md_seg_fds[forknum][0];
545  mdfd->mdfd_vfd = fd;
546  mdfd->mdfd_segno = 0;
547 
548  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
549 
550  return mdfd;
551 }

References _fdvec_resize(), _mdnblocks(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 591 of file md.c.

592 {
593 #ifdef USE_PREFETCH
594  off_t seekpos;
595  MdfdVec *v;
596 
597  v = _mdfd_getseg(reln, forknum, blocknum, false,
599  if (v == NULL)
600  return false;
601 
602  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
603 
604  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
605 
606  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
607 #endif /* USE_PREFETCH */
608 
609  return true;
610 }
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:1984
@ WAIT_EVENT_DATA_FILE_PREFETCH
Definition: wait_event.h:180

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void *  buffer 
)

Definition at line 671 of file md.c.

673 {
674  off_t seekpos;
675  int nbytes;
676  MdfdVec *v;
677 
678  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
682  reln->smgr_rlocator.backend);
683 
684  v = _mdfd_getseg(reln, forknum, blocknum, false,
686 
687  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
688 
689  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
690 
691  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
692 
693  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
697  reln->smgr_rlocator.backend,
698  nbytes,
699  BLCKSZ);
700 
701  if (nbytes != BLCKSZ)
702  {
703  if (nbytes < 0)
704  ereport(ERROR,
706  errmsg("could not read block %u in file \"%s\": %m",
707  blocknum, FilePathName(v->mdfd_vfd))));
708 
709  /*
710  * Short read: we are at or past EOF, or we read a partial block at
711  * EOF. Normally this is an error; upper levels should never try to
712  * read a nonexistent block. However, if zero_damaged_pages is ON or
713  * we are InRecovery, we should instead return zeroes without
714  * complaining. This allows, for example, the case of trying to
715  * update a block that was later truncated away.
716  */
718  MemSet(buffer, 0, BLCKSZ);
719  else
720  ereport(ERROR,
722  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
723  blocknum, FilePathName(v->mdfd_vfd),
724  nbytes, BLCKSZ)));
725  }
726 }
bool zero_damaged_pages
Definition: bufmgr.c:134
#define MemSet(start, val, len)
Definition: c.h:1004
int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2035
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ WAIT_EVENT_DATA_FILE_READ
Definition: wait_event.h:181

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, MemSet, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1399 of file md.c.

1400 {
1402  File file;
1403  bool need_to_close;
1404  int result,
1405  save_errno;
1406 
1407  /* See if we already have the file open, or need to open it. */
1408  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1409  {
1410  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1411  strlcpy(path, FilePathName(file), MAXPGPATH);
1412  need_to_close = false;
1413  }
1414  else
1415  {
1416  char *p;
1417 
1418  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1419  strlcpy(path, p, MAXPGPATH);
1420  pfree(p);
1421 
1422  file = PathNameOpenFile(path, O_RDWR | PG_BINARY);
1423  if (file < 0)
1424  return -1;
1425  need_to_close = true;
1426  }
1427 
1428  /* Sync the file. */
1429  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1430  save_errno = errno;
1431 
1432  if (need_to_close)
1433  FileClose(file);
1434 
1436 
1437  errno = save_errno;
1438  return result;
1439 }
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:278
@ IOCONTEXT_NORMAL
Definition: pgstat.h:288
@ IOOP_FSYNC
Definition: pgstat.h:298
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:66
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint32 segno
Definition: sync.h:55
@ WAIT_EVENT_DATA_FILE_SYNC
Definition: wait_event.h:182

References _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, pgstat_count_io_op(), FileTag::rlocator, FileTag::segno, smgropen(), strlcpy(), and WAIT_EVENT_DATA_FILE_SYNC.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 858 of file md.c.

859 {
860  BlockNumber curnblk;
861  BlockNumber priorblocks;
862  int curopensegs;
863 
864  /*
865  * NOTE: mdnblocks makes sure we have opened all active segments, so that
866  * truncation loop will get them all!
867  */
868  curnblk = mdnblocks(reln, forknum);
869  if (nblocks > curnblk)
870  {
871  /* Bogus request ... but no complaint if InRecovery */
872  if (InRecovery)
873  return;
874  ereport(ERROR,
875  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
876  relpath(reln->smgr_rlocator, forknum),
877  nblocks, curnblk)));
878  }
879  if (nblocks == curnblk)
880  return; /* no work */
881 
882  /*
883  * Truncate segments, starting at the last one. Starting at the end makes
884  * managing the memory for the fd array easier, should there be errors.
885  */
886  curopensegs = reln->md_num_open_segs[forknum];
887  while (curopensegs > 0)
888  {
889  MdfdVec *v;
890 
891  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
892 
893  v = &reln->md_seg_fds[forknum][curopensegs - 1];
894 
895  if (priorblocks > nblocks)
896  {
897  /*
898  * This segment is no longer active. We truncate the file, but do
899  * not delete it, for reasons explained in the header comments.
900  */
902  ereport(ERROR,
904  errmsg("could not truncate file \"%s\": %m",
905  FilePathName(v->mdfd_vfd))));
906 
907  if (!SmgrIsTemp(reln))
908  register_dirty_segment(reln, forknum, v);
909 
910  /* we never drop the 1st segment */
911  Assert(v != &reln->md_seg_fds[forknum][0]);
912 
913  FileClose(v->mdfd_vfd);
914  _fdvec_resize(reln, forknum, curopensegs - 1);
915  }
916  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
917  {
918  /*
919  * This is the last segment we want to keep. Truncate the file to
920  * the right length. NOTE: if nblocks is exactly a multiple K of
921  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
922  * keep it. This adheres to the invariant given in the header
923  * comments.
924  */
925  BlockNumber lastsegblocks = nblocks - priorblocks;
926 
927  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
928  ereport(ERROR,
930  errmsg("could not truncate file \"%s\" to %u blocks: %m",
932  nblocks)));
933  if (!SmgrIsTemp(reln))
934  register_dirty_segment(reln, forknum, v);
935  }
936  else
937  {
938  /*
939  * We still need this segment, so nothing to do for this and any
940  * earlier segment.
941  */
942  break;
943  }
944  curopensegs--;
945  }
946 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2227
@ WAIT_EVENT_DATA_FILE_TRUNCATE
Definition: wait_event.h:183

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 296 of file md.c.

297 {
298  /* Now do the per-fork work */
299  if (forknum == InvalidForkNumber)
300  {
301  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
302  mdunlinkfork(rlocator, forknum, isRedo);
303  }
304  else
305  mdunlinkfork(rlocator, forknum, isRedo);
306 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:333

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1448 of file md.c.

1449 {
1450  char *p;
1451 
1452  /* Compute the path. */
1453  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1454  strlcpy(path, p, MAXPGPATH);
1455  pfree(p);
1456 
1457  /* Try to unlink the file. */
1458  return unlink(path);
1459 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
static

Definition at line 333 of file md.c.

334 {
335  char *path;
336  int ret;
337  int save_errno;
338 
339  path = relpath(rlocator, forknum);
340 
341  /*
342  * Truncate and then unlink the first segment, or just register a request
343  * to unlink it later, as described in the comments for mdunlink().
344  */
345  if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
346  RelFileLocatorBackendIsTemp(rlocator))
347  {
348  if (!RelFileLocatorBackendIsTemp(rlocator))
349  {
350  /* Prevent other backends' fds from holding on to the disk space */
351  ret = do_truncate(path);
352 
353  /* Forget any pending sync requests for the first segment */
354  save_errno = errno;
355  register_forget_request(rlocator, forknum, 0 /* first seg */ );
356  errno = save_errno;
357  }
358  else
359  ret = 0;
360 
361  /* Next unlink the file, unless it was already found to be missing */
362  if (ret >= 0 || errno != ENOENT)
363  {
364  ret = unlink(path);
365  if (ret < 0 && errno != ENOENT)
366  {
367  save_errno = errno;
370  errmsg("could not remove file \"%s\": %m", path)));
371  errno = save_errno;
372  }
373  }
374  }
375  else
376  {
377  /* Prevent other backends' fds from holding on to the disk space */
378  ret = do_truncate(path);
379 
380  /* Register request to unlink first segment later */
381  save_errno = errno;
382  register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
383  errno = save_errno;
384  }
385 
386  /*
387  * Delete any additional segments.
388  *
389  * Note that because we loop until getting ENOENT, we will correctly
390  * remove all inactive segments as well as active ones. Ideally we'd
391  * continue the loop until getting exactly that errno, but that risks an
392  * infinite loop if the problem is directory-wide (for instance, if we
393  * suddenly can't read the data directory itself). We compromise by
394  * continuing after a non-ENOENT truncate error, but stopping after any
395  * unlink error. If there is indeed a directory-wide problem, additional
396  * unlink attempts wouldn't work anyway.
397  */
398  if (ret >= 0 || errno != ENOENT)
399  {
400  char *segpath = (char *) palloc(strlen(path) + 12);
401  BlockNumber segno;
402 
403  for (segno = 1;; segno++)
404  {
405  sprintf(segpath, "%s.%u", path, segno);
406 
407  if (!RelFileLocatorBackendIsTemp(rlocator))
408  {
409  /*
410  * Prevent other backends' fds from holding on to the disk
411  * space. We're done if we see ENOENT, though.
412  */
413  if (do_truncate(segpath) < 0 && errno == ENOENT)
414  break;
415 
416  /*
417  * Forget any pending sync requests for this segment before we
418  * try to unlink.
419  */
420  register_forget_request(rlocator, forknum, segno);
421  }
422 
423  if (unlink(segpath) < 0)
424  {
425  /* ENOENT is expected after the last segment... */
426  if (errno != ENOENT)
429  errmsg("could not remove file \"%s\": %m", segpath)));
430  break;
431  }
432  }
433  pfree(segpath);
434  }
435 
436  pfree(path);
437 }
bool IsBinaryUpgrade
Definition: globals.c:114
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1078
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1061
static int do_truncate(const char *path)
Definition: md.c:312
#define sprintf
Definition: port.h:240
#define RelFileLocatorBackendIsTemp(rlocator)

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), IsBinaryUpgrade, MAIN_FORKNUM, palloc(), pfree(), register_forget_request(), register_unlink_segment(), RelFileLocatorBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 736 of file md.c.

738 {
739  off_t seekpos;
740  int nbytes;
741  MdfdVec *v;
742 
743  /* This assert is too expensive to have on normally ... */
744 #ifdef CHECK_WRITE_VS_EXTEND
745  Assert(blocknum < mdnblocks(reln, forknum));
746 #endif
747 
748  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
752  reln->smgr_rlocator.backend);
753 
754  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
756 
757  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
758 
759  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
760 
761  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
762 
763  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
767  reln->smgr_rlocator.backend,
768  nbytes,
769  BLCKSZ);
770 
771  if (nbytes != BLCKSZ)
772  {
773  if (nbytes < 0)
774  ereport(ERROR,
776  errmsg("could not write block %u in file \"%s\": %m",
777  blocknum, FilePathName(v->mdfd_vfd))));
778  /* short write: complain appropriately */
779  ereport(ERROR,
780  (errcode(ERRCODE_DISK_FULL),
781  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
782  blocknum,
784  nbytes, BLCKSZ),
785  errhint("Check free disk space.")));
786  }
787 
788  if (!skipFsync && !SmgrIsTemp(reln))
789  register_dirty_segment(reln, forknum, v);
790 }
@ WAIT_EVENT_DATA_FILE_WRITE
Definition: wait_event.h:184

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and WAIT_EVENT_DATA_FILE_WRITE.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 619 of file md.c.

621 {
622  /*
623  * Issue flush requests in as few requests as possible; have to split at
624  * segment boundaries though, since those are actually separate files.
625  */
626  while (nblocks > 0)
627  {
628  BlockNumber nflush = nblocks;
629  off_t seekpos;
630  MdfdVec *v;
631  int segnum_start,
632  segnum_end;
633 
634  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
636 
637  /*
638  * We might be flushing buffers of already removed relations, that's
639  * ok, just ignore that case. If the segment file wasn't open already
640  * (ie from a recent mdwrite()), then we don't want to re-open it, to
641  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
642  * us with a descriptor to a file that is about to be unlinked.
643  */
644  if (!v)
645  return;
646 
647  /* compute offset inside the current segment */
648  segnum_start = blocknum / RELSEG_SIZE;
649 
650  /* compute number of desired writes within the current segment */
651  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
652  if (segnum_start != segnum_end)
653  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
654 
655  Assert(nflush >= 1);
656  Assert(nflush <= nblocks);
657 
658  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
659 
660  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
661 
662  nblocks -= nflush;
663  blocknum += nflush;
664  }
665 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2012
@ WAIT_EVENT_DATA_FILE_FLUSH
Definition: wait_event.h:178

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1022 of file md.c.

1023 {
1024  FileTag tag;
1025 
1026  INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
1027 
1028  /* Temp relations should never be fsync'd */
1029  Assert(!SmgrIsTemp(reln));
1030 
1031  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
1032  {
1033  /*
1034  * We have no way of knowing if the current IOContext is
1035  * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
1036  * point, so count the fsync as being in the IOCONTEXT_NORMAL
1037  * IOContext. This is probably okay, because the number of backend
1038  * fsyncs doesn't say anything about the efficacy of the
1039  * BufferAccessStrategy. And counting both fsyncs done in
1040  * IOCONTEXT_NORMAL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under
1041  * IOCONTEXT_NORMAL is likely clearer when investigating the number of
1042  * backend fsyncs.
1043  */
1045 
1046  ereport(DEBUG1,
1047  (errmsg_internal("could not forward fsync request because request queue is full")));
1048 
1052  errmsg("could not fsync file \"%s\": %m",
1053  FilePathName(seg->mdfd_vfd))));
1054  }
1055 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
#define DEBUG1
Definition: elog.h:30
@ SYNC_REQUEST
Definition: sync.h:25

References Assert(), data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, RelFileLocatorBackend::locator, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, pgstat_count_io_op(), RegisterSyncRequest(), SMgrRelationData::smgr_rlocator, SmgrIsTemp, SYNC_REQUEST, and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by mdextend(), mdtruncate(), and mdwrite().

◆ register_forget_request()

static void register_forget_request ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1078 of file md.c.

1080 {
1081  FileTag tag;
1082 
1083  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1084 
1085  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1086 }
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1061 of file md.c.

1063 {
1064  FileTag tag;
1065 
1066  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1067 
1068  /* Should never be used with temp relations */
1069  Assert(!RelFileLocatorBackendIsTemp(rlocator));
1070 
1071  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1072 }
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert(), INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), RelFileLocatorBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 88 of file md.c.

Referenced by _fdvec_resize(), and mdinit().