PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 108 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 110 of file md.c.

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 118 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 120 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 104 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 106 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rlocator,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rlocator = (xx_rlocator), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 92 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1120 of file md.c.

1123 {
1124  if (nseg == 0)
1125  {
1126  if (reln->md_num_open_segs[forknum] > 0)
1127  {
1128  pfree(reln->md_seg_fds[forknum]);
1129  reln->md_seg_fds[forknum] = NULL;
1130  }
1131  }
1132  else if (reln->md_num_open_segs[forknum] == 0)
1133  {
1134  reln->md_seg_fds[forknum] =
1135  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1136  }
1137  else
1138  {
1139  /*
1140  * It doesn't seem worthwhile complicating the code to amortize
1141  * repalloc() calls. Those are far faster than PathNameOpenFile() or
1142  * FileClose(), and the memory context internally will sometimes avoid
1143  * doing an actual reallocation.
1144  */
1145  reln->md_seg_fds[forknum] =
1146  repalloc(reln->md_seg_fds[forknum],
1147  sizeof(MdfdVec) * nseg);
1148  }
1149 
1150  reln->md_num_open_segs[forknum] = nseg;
1151 }
void pfree(void *pointer)
Definition: mcxt.c:1306
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:994
static MemoryContext MdCxt
Definition: md.c:88
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1226 of file md.c.

1228 {
1229  MdfdVec *v;
1230  BlockNumber targetseg;
1231  BlockNumber nextsegno;
1232 
1233  /* some way to handle non-existent segments needs to be specified */
1234  Assert(behavior &
1237 
1238  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1239 
1240  /* if an existing and opened segment, we're done */
1241  if (targetseg < reln->md_num_open_segs[forknum])
1242  {
1243  v = &reln->md_seg_fds[forknum][targetseg];
1244  return v;
1245  }
1246 
1247  /* The caller only wants the segment if we already had it open. */
1248  if (behavior & EXTENSION_DONT_OPEN)
1249  return NULL;
1250 
1251  /*
1252  * The target segment is not yet open. Iterate over all the segments
1253  * between the last opened and the target segment. This way missing
1254  * segments either raise an error, or get created (according to
1255  * 'behavior'). Start with either the last opened, or the first segment if
1256  * none was opened before.
1257  */
1258  if (reln->md_num_open_segs[forknum] > 0)
1259  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1260  else
1261  {
1262  v = mdopenfork(reln, forknum, behavior);
1263  if (!v)
1264  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1265  }
1266 
1267  for (nextsegno = reln->md_num_open_segs[forknum];
1268  nextsegno <= targetseg; nextsegno++)
1269  {
1270  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1271  int flags = 0;
1272 
1273  Assert(nextsegno == v->mdfd_segno + 1);
1274 
1275  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1276  elog(FATAL, "segment too big");
1277 
1278  if ((behavior & EXTENSION_CREATE) ||
1279  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1280  {
1281  /*
1282  * Normally we will create new segments only if authorized by the
1283  * caller (i.e., we are doing mdextend()). But when doing WAL
1284  * recovery, create segments anyway; this allows cases such as
1285  * replaying WAL data that has a write into a high-numbered
1286  * segment of a relation that was later deleted. We want to go
1287  * ahead and create the segments so we can finish out the replay.
1288  *
1289  * We have to maintain the invariant that segments before the last
1290  * active segment are of size RELSEG_SIZE; therefore, if
1291  * extending, pad them out with zeroes if needed. (This only
1292  * matters if in recovery, or if the caller is extending the
1293  * relation discontiguously, but that can happen in hash indexes.)
1294  */
1295  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1296  {
1297  char *zerobuf = palloc0(BLCKSZ);
1298 
1299  mdextend(reln, forknum,
1300  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1301  zerobuf, skipFsync);
1302  pfree(zerobuf);
1303  }
1304  flags = O_CREAT;
1305  }
1306  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1307  nblocks < ((BlockNumber) RELSEG_SIZE))
1308  {
1309  /*
1310  * When not extending (or explicitly including truncated
1311  * segments), only open the next segment if the current one is
1312  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1313  * or fail.
1314  */
1315  if (behavior & EXTENSION_RETURN_NULL)
1316  {
1317  /*
1318  * Some callers discern between reasons for _mdfd_getseg()
1319  * returning NULL based on errno. As there's no failing
1320  * syscall involved in this case, explicitly set errno to
1321  * ENOENT, as that seems the closest interpretation.
1322  */
1323  errno = ENOENT;
1324  return NULL;
1325  }
1326 
1327  ereport(ERROR,
1329  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1330  _mdfd_segpath(reln, forknum, nextsegno),
1331  blkno, nblocks)));
1332  }
1333 
1334  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1335 
1336  if (v == NULL)
1337  {
1338  if ((behavior & EXTENSION_RETURN_NULL) &&
1339  FILE_POSSIBLY_DELETED(errno))
1340  return NULL;
1341  ereport(ERROR,
1343  errmsg("could not open file \"%s\" (target block %u): %m",
1344  _mdfd_segpath(reln, forknum, nextsegno),
1345  blkno)));
1346  }
1347  }
1348 
1349  return v;
1350 }
uint32 BlockNumber
Definition: block.h:31
int errcode_for_file_access(void)
Definition: elog.c:718
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define FATAL
Definition: elog.h:37
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:75
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc0(Size size)
Definition: mcxt.c:1230
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1356
#define EXTENSION_DONT_OPEN
Definition: md.c:120
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: md.c:449
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1181
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1158
#define EXTENSION_CREATE
Definition: md.c:108
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
#define EXTENSION_FAIL
Definition: md.c:104
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:514
BlockNumber mdfd_segno
Definition: md.c:85
bool InRecovery
Definition: xlogutils.c:53

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert(), elog(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc0(), and pfree().

Referenced by mdextend(), mdprefetch(), mdread(), mdwrite(), and mdwriteback().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1181 of file md.c.

1183 {
1184  MdfdVec *v;
1185  File fd;
1186  char *fullpath;
1187 
1188  fullpath = _mdfd_segpath(reln, forknum, segno);
1189 
1190  /* open the file */
1191  fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
1192 
1193  pfree(fullpath);
1194 
1195  if (fd < 0)
1196  return NULL;
1197 
1198  /*
1199  * Segments are always opened in order from lowest to highest, so we must
1200  * be adding a new one at the end.
1201  */
1202  Assert(segno == reln->md_num_open_segs[forknum]);
1203 
1204  _fdvec_resize(reln, forknum, segno + 1);
1205 
1206  /* fill the entry */
1207  v = &reln->md_seg_fds[forknum][segno];
1208  v->mdfd_vfd = fd;
1209  v->mdfd_segno = segno;
1210 
1211  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1212 
1213  /* all done */
1214  return v;
1215 }
#define PG_BINARY
Definition: c.h:1209
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1488
int File
Definition: fd.h:54
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1120
static int fd(const char *x, int i)
Definition: preproc-init.c:105
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), _mdfd_segpath(), _mdnblocks(), Assert(), fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), and PG_BINARY.

Referenced by _mdfd_getseg(), mdimmedsync(), and mdnblocks().

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1158 of file md.c.

1159 {
1160  char *path,
1161  *fullpath;
1162 
1163  path = relpath(reln->smgr_rlocator, forknum);
1164 
1165  if (segno > 0)
1166  {
1167  fullpath = psprintf("%s.%u", path, segno);
1168  pfree(path);
1169  }
1170  else
1171  fullpath = path;
1172 
1173  return fullpath;
1174 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpath(rlocator, forknum)
Definition: relpath.h:94
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1356 of file md.c.

1357 {
1358  off_t len;
1359 
1360  len = FileSize(seg->mdfd_vfd);
1361  if (len < 0)
1362  ereport(ERROR,
1364  errmsg("could not seek to end of file \"%s\": %m",
1365  FilePathName(seg->mdfd_vfd))));
1366  /* note that this calculation will ignore any partial block at EOF */
1367  return (BlockNumber) (len / BLCKSZ);
1368 }
char * FilePathName(File file)
Definition: fd.c:2261
off_t FileSize(File file)
Definition: fd.c:2209
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), and mdopenfork().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 312 of file md.c.

313 {
314  int save_errno;
315  int ret;
316 
317  ret = pg_truncate(path, 0);
318 
319  /* Log a warning here to avoid repetition in callers. */
320  if (ret < 0 && errno != ENOENT)
321  {
322  save_errno = errno;
325  errmsg("could not truncate file \"%s\": %m", path)));
326  errno = save_errno;
327  }
328 
329  return ret;
330 }
#define WARNING
Definition: elog.h:32
int pg_truncate(const char *path, off_t length)
Definition: fd.c:631

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1088 of file md.c.

1089 {
1090  SMgrRelation *srels;
1091  int i;
1092 
1093  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1094  for (i = 0; i < ndelrels; i++)
1095  {
1096  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1097 
1098  if (isRedo)
1099  {
1100  ForkNumber fork;
1101 
1102  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1103  XLogDropRelation(delrels[i], fork);
1104  }
1105  srels[i] = srel;
1106  }
1107 
1108  smgrdounlinkall(srels, ndelrels, isRedo);
1109 
1110  for (i = 0; i < ndelrels; i++)
1111  smgrclose(srels[i]);
1112  pfree(srels);
1113 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1199
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:420
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:658

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1070 of file md.c.

1071 {
1072  FileTag tag;
1073  RelFileLocator rlocator;
1074 
1075  rlocator.dbOid = dbid;
1076  rlocator.spcOid = 0;
1077  rlocator.relNumber = 0;
1078 
1080 
1081  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1082 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:49
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:587
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 568 of file md.c.

569 {
570  int nopensegs = reln->md_num_open_segs[forknum];
571 
572  /* No work if already closed */
573  if (nopensegs == 0)
574  return;
575 
576  /* close segments starting from the end */
577  while (nopensegs > 0)
578  {
579  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
580 
581  FileClose(v->mdfd_vfd);
582  _fdvec_resize(reln, forknum, nopensegs - 1);
583  nopensegs--;
584  }
585 }
void FileClose(File file)
Definition: fd.c:1883

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 182 of file md.c.

183 {
184  MdfdVec *mdfd;
185  char *path;
186  File fd;
187 
188  if (isRedo && reln->md_num_open_segs[forknum] > 0)
189  return; /* created and opened already... */
190 
191  Assert(reln->md_num_open_segs[forknum] == 0);
192 
193  /*
194  * We may be using the target table space for the first time in this
195  * database, so create a per-database subdirectory if needed.
196  *
197  * XXX this is a fairly ugly violation of module layering, but this seems
198  * to be the best place to put the check. Maybe TablespaceCreateDbspace
199  * should be here and not in commands/tablespace.c? But that would imply
200  * importing a lot of stuff that smgr.c oughtn't know, either.
201  */
204  isRedo);
205 
206  path = relpath(reln->smgr_rlocator, forknum);
207 
208  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
209 
210  if (fd < 0)
211  {
212  int save_errno = errno;
213 
214  if (isRedo)
215  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
216  if (fd < 0)
217  {
218  /* be sure to report the error reported by create, not open */
219  errno = save_errno;
220  ereport(ERROR,
222  errmsg("could not create file \"%s\": %m", path)));
223  }
224  }
225 
226  pfree(path);
227 
228  _fdvec_resize(reln, forknum, 1);
229  mdfd = &reln->md_seg_fds[forknum][0];
230  mdfd->mdfd_vfd = fd;
231  mdfd->mdfd_segno = 0;
232 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:118
RelFileLocator locator

References _fdvec_resize(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 163 of file md.c.

164 {
165  /*
166  * Close it first, to ensure that we notice if the fork has been unlinked
167  * since we opened it. As an optimization, we can skip that in recovery,
168  * which already closes relations when dropping them.
169  */
170  if (!InRecovery)
171  mdclose(reln, forknum);
172 
173  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
174 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:568

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 449 of file md.c.

451 {
452  off_t seekpos;
453  int nbytes;
454  MdfdVec *v;
455 
456  /* This assert is too expensive to have on normally ... */
457 #ifdef CHECK_WRITE_VS_EXTEND
458  Assert(blocknum >= mdnblocks(reln, forknum));
459 #endif
460 
461  /*
462  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
463  * more --- we mustn't create a block whose number actually is
464  * InvalidBlockNumber. (Note that this failure should be unreachable
465  * because of upstream checks in bufmgr.c.)
466  */
467  if (blocknum == InvalidBlockNumber)
468  ereport(ERROR,
469  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
470  errmsg("cannot extend file \"%s\" beyond %u blocks",
471  relpath(reln->smgr_rlocator, forknum),
473 
474  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
475 
476  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
477 
478  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
479 
480  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
481  {
482  if (nbytes < 0)
483  ereport(ERROR,
485  errmsg("could not extend file \"%s\": %m",
486  FilePathName(v->mdfd_vfd)),
487  errhint("Check free disk space.")));
488  /* short write: complain appropriately */
489  ereport(ERROR,
490  (errcode(ERRCODE_DISK_FULL),
491  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
493  nbytes, BLCKSZ, blocknum),
494  errhint("Check free disk space.")));
495  }
496 
497  if (!skipFsync && !SmgrIsTemp(reln))
498  register_dirty_segment(reln, forknum, v);
499 
500  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
501 }
int errhint(const char *fmt,...)
Definition: elog.c:1153
int errcode(int sqlerrcode)
Definition: elog.c:695
int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2090
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:801
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1013
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1226
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
@ WAIT_EVENT_DATA_FILE_EXTEND
Definition: wait_event.h:174

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1443 of file md.c.

1444 {
1445  /*
1446  * For now we only use filter requests as a way to drop all scheduled
1447  * callbacks relating to a given database, when dropping the database.
1448  * We'll return true for all candidates that have the same database OID as
1449  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1450  */
1451  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1452 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 960 of file md.c.

961 {
962  int segno;
963  int min_inactive_seg;
964 
965  /*
966  * NOTE: mdnblocks makes sure we have opened all active segments, so that
967  * fsync loop will get them all!
968  */
969  mdnblocks(reln, forknum);
970 
971  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
972 
973  /*
974  * Temporarily open inactive segments, then close them after sync. There
975  * may be some inactive segments left opened after fsync() error, but that
976  * is harmless. We don't bother to clean them up and take a risk of
977  * further trouble. The next mdclose() will soon close them.
978  */
979  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
980  segno++;
981 
982  while (segno > 0)
983  {
984  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
985 
989  errmsg("could not fsync file \"%s\": %m",
990  FilePathName(v->mdfd_vfd))));
991 
992  /* Close inactive segments immediately */
993  if (segno > min_inactive_seg)
994  {
995  FileClose(v->mdfd_vfd);
996  _fdvec_resize(reln, forknum, segno - 1);
997  }
998 
999  segno--;
1000  }
1001 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2188
int data_sync_elevel(int elevel)
Definition: fd.c:3736
@ WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC
Definition: wait_event.h:176

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

◆ mdinit()

void mdinit ( void  )

Definition at line 150 of file md.c.

151 {
153  "MdSmgr",
155 }
MemoryContext TopMemoryContext
Definition: mcxt.c:130
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 801 of file md.c.

802 {
803  MdfdVec *v;
804  BlockNumber nblocks;
805  BlockNumber segno;
806 
807  mdopenfork(reln, forknum, EXTENSION_FAIL);
808 
809  /* mdopen has opened the first segment */
810  Assert(reln->md_num_open_segs[forknum] > 0);
811 
812  /*
813  * Start from the last open segments, to avoid redundant seeks. We have
814  * previously verified that these segments are exactly RELSEG_SIZE long,
815  * and it's useless to recheck that each time.
816  *
817  * NOTE: this assumption could only be wrong if another backend has
818  * truncated the relation. We rely on higher code levels to handle that
819  * scenario by closing and re-opening the md fd, which is handled via
820  * relcache flush. (Since the checkpointer doesn't participate in
821  * relcache flush, it could have segment entries for inactive segments;
822  * that's OK because the checkpointer never needs to compute relation
823  * size.)
824  */
825  segno = reln->md_num_open_segs[forknum] - 1;
826  v = &reln->md_seg_fds[forknum][segno];
827 
828  for (;;)
829  {
830  nblocks = _mdnblocks(reln, forknum, v);
831  if (nblocks > ((BlockNumber) RELSEG_SIZE))
832  elog(FATAL, "segment too big");
833  if (nblocks < ((BlockNumber) RELSEG_SIZE))
834  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
835 
836  /*
837  * If segment is exactly RELSEG_SIZE, advance to next one.
838  */
839  segno++;
840 
841  /*
842  * We used to pass O_CREAT here, but that has the disadvantage that it
843  * might create a segment which has vanished through some operating
844  * system misadventure. In such a case, creating the segment here
845  * undermines _mdfd_getseg's attempts to notice and report an error
846  * upon access to a missing segment.
847  */
848  v = _mdfd_openseg(reln, forknum, segno, 0);
849  if (v == NULL)
850  return segno * ((BlockNumber) RELSEG_SIZE);
851  }
852 }

References _mdfd_openseg(), _mdnblocks(), Assert(), elog(), EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 557 of file md.c.

558 {
559  /* mark it not open */
560  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
561  reln->md_num_open_segs[forknum] = 0;
562 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 514 of file md.c.

515 {
516  MdfdVec *mdfd;
517  char *path;
518  File fd;
519 
520  /* No work if already open */
521  if (reln->md_num_open_segs[forknum] > 0)
522  return &reln->md_seg_fds[forknum][0];
523 
524  path = relpath(reln->smgr_rlocator, forknum);
525 
526  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
527 
528  if (fd < 0)
529  {
530  if ((behavior & EXTENSION_RETURN_NULL) &&
531  FILE_POSSIBLY_DELETED(errno))
532  {
533  pfree(path);
534  return NULL;
535  }
536  ereport(ERROR,
538  errmsg("could not open file \"%s\": %m", path)));
539  }
540 
541  pfree(path);
542 
543  _fdvec_resize(reln, forknum, 1);
544  mdfd = &reln->md_seg_fds[forknum][0];
545  mdfd->mdfd_vfd = fd;
546  mdfd->mdfd_segno = 0;
547 
548  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
549 
550  return mdfd;
551 }

References _fdvec_resize(), _mdnblocks(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 591 of file md.c.

592 {
593 #ifdef USE_PREFETCH
594  off_t seekpos;
595  MdfdVec *v;
596 
597  v = _mdfd_getseg(reln, forknum, blocknum, false,
599  if (v == NULL)
600  return false;
601 
602  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
603 
604  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
605 
606  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
607 #endif /* USE_PREFETCH */
608 
609  return true;
610 }
int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
Definition: fd.c:1983
@ WAIT_EVENT_DATA_FILE_PREFETCH
Definition: wait_event.h:177

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer 
)

Definition at line 671 of file md.c.

673 {
674  off_t seekpos;
675  int nbytes;
676  MdfdVec *v;
677 
678  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
682  reln->smgr_rlocator.backend);
683 
684  v = _mdfd_getseg(reln, forknum, blocknum, false,
686 
687  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
688 
689  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
690 
691  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
692 
693  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
697  reln->smgr_rlocator.backend,
698  nbytes,
699  BLCKSZ);
700 
701  if (nbytes != BLCKSZ)
702  {
703  if (nbytes < 0)
704  ereport(ERROR,
706  errmsg("could not read block %u in file \"%s\": %m",
707  blocknum, FilePathName(v->mdfd_vfd))));
708 
709  /*
710  * Short read: we are at or past EOF, or we read a partial block at
711  * EOF. Normally this is an error; upper levels should never try to
712  * read a nonexistent block. However, if zero_damaged_pages is ON or
713  * we are InRecovery, we should instead return zeroes without
714  * complaining. This allows, for example, the case of trying to
715  * update a block that was later truncated away.
716  */
718  MemSet(buffer, 0, BLCKSZ);
719  else
720  ereport(ERROR,
722  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
723  blocknum, FilePathName(v->mdfd_vfd),
724  nbytes, BLCKSZ)));
725  }
726 }
bool zero_damaged_pages
Definition: bufmgr.c:134
#define MemSet(start, val, len)
Definition: c.h:953
int FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2034
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ WAIT_EVENT_DATA_FILE_READ
Definition: wait_event.h:178

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, MemSet, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1377 of file md.c.

1378 {
1380  File file;
1381  bool need_to_close;
1382  int result,
1383  save_errno;
1384 
1385  /* See if we already have the file open, or need to open it. */
1386  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1387  {
1388  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1389  strlcpy(path, FilePathName(file), MAXPGPATH);
1390  need_to_close = false;
1391  }
1392  else
1393  {
1394  char *p;
1395 
1396  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1397  strlcpy(path, p, MAXPGPATH);
1398  pfree(p);
1399 
1400  file = PathNameOpenFile(path, O_RDWR | PG_BINARY);
1401  if (file < 0)
1402  return -1;
1403  need_to_close = true;
1404  }
1405 
1406  /* Sync the file. */
1407  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1408  save_errno = errno;
1409 
1410  if (need_to_close)
1411  FileClose(file);
1412 
1413  errno = save_errno;
1414  return result;
1415 }
#define MAXPGPATH
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint32 segno
Definition: sync.h:55
@ WAIT_EVENT_DATA_FILE_SYNC
Definition: wait_event.h:179

References _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, FileTag::rlocator, FileTag::segno, smgropen(), strlcpy(), and WAIT_EVENT_DATA_FILE_SYNC.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 858 of file md.c.

859 {
860  BlockNumber curnblk;
861  BlockNumber priorblocks;
862  int curopensegs;
863 
864  /*
865  * NOTE: mdnblocks makes sure we have opened all active segments, so that
866  * truncation loop will get them all!
867  */
868  curnblk = mdnblocks(reln, forknum);
869  if (nblocks > curnblk)
870  {
871  /* Bogus request ... but no complaint if InRecovery */
872  if (InRecovery)
873  return;
874  ereport(ERROR,
875  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
876  relpath(reln->smgr_rlocator, forknum),
877  nblocks, curnblk)));
878  }
879  if (nblocks == curnblk)
880  return; /* no work */
881 
882  /*
883  * Truncate segments, starting at the last one. Starting at the end makes
884  * managing the memory for the fd array easier, should there be errors.
885  */
886  curopensegs = reln->md_num_open_segs[forknum];
887  while (curopensegs > 0)
888  {
889  MdfdVec *v;
890 
891  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
892 
893  v = &reln->md_seg_fds[forknum][curopensegs - 1];
894 
895  if (priorblocks > nblocks)
896  {
897  /*
898  * This segment is no longer active. We truncate the file, but do
899  * not delete it, for reasons explained in the header comments.
900  */
902  ereport(ERROR,
904  errmsg("could not truncate file \"%s\": %m",
905  FilePathName(v->mdfd_vfd))));
906 
907  if (!SmgrIsTemp(reln))
908  register_dirty_segment(reln, forknum, v);
909 
910  /* we never drop the 1st segment */
911  Assert(v != &reln->md_seg_fds[forknum][0]);
912 
913  FileClose(v->mdfd_vfd);
914  _fdvec_resize(reln, forknum, curopensegs - 1);
915  }
916  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
917  {
918  /*
919  * This is the last segment we want to keep. Truncate the file to
920  * the right length. NOTE: if nblocks is exactly a multiple K of
921  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
922  * keep it. This adheres to the invariant given in the header
923  * comments.
924  */
925  BlockNumber lastsegblocks = nblocks - priorblocks;
926 
927  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
928  ereport(ERROR,
930  errmsg("could not truncate file \"%s\" to %u blocks: %m",
932  nblocks)));
933  if (!SmgrIsTemp(reln))
934  register_dirty_segment(reln, forknum, v);
935  }
936  else
937  {
938  /*
939  * We still need this segment, so nothing to do for this and any
940  * earlier segment.
941  */
942  break;
943  }
944  curopensegs--;
945  }
946 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2226
@ WAIT_EVENT_DATA_FILE_TRUNCATE
Definition: wait_event.h:180

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 296 of file md.c.

297 {
298  /* Now do the per-fork work */
299  if (forknum == InvalidForkNumber)
300  {
301  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
302  mdunlinkfork(rlocator, forknum, isRedo);
303  }
304  else
305  mdunlinkfork(rlocator, forknum, isRedo);
306 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:333

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1424 of file md.c.

1425 {
1426  char *p;
1427 
1428  /* Compute the path. */
1429  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1430  strlcpy(path, p, MAXPGPATH);
1431  pfree(p);
1432 
1433  /* Try to unlink the file. */
1434  return unlink(path);
1435 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
static

Definition at line 333 of file md.c.

334 {
335  char *path;
336  int ret;
337  int save_errno;
338 
339  path = relpath(rlocator, forknum);
340 
341  /*
342  * Truncate and then unlink the first segment, or just register a request
343  * to unlink it later, as described in the comments for mdunlink().
344  */
345  if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
346  RelFileLocatorBackendIsTemp(rlocator))
347  {
348  if (!RelFileLocatorBackendIsTemp(rlocator))
349  {
350  /* Prevent other backends' fds from holding on to the disk space */
351  ret = do_truncate(path);
352 
353  /* Forget any pending sync requests for the first segment */
354  save_errno = errno;
355  register_forget_request(rlocator, forknum, 0 /* first seg */ );
356  errno = save_errno;
357  }
358  else
359  ret = 0;
360 
361  /* Next unlink the file, unless it was already found to be missing */
362  if (ret >= 0 || errno != ENOENT)
363  {
364  ret = unlink(path);
365  if (ret < 0 && errno != ENOENT)
366  {
367  save_errno = errno;
370  errmsg("could not remove file \"%s\": %m", path)));
371  errno = save_errno;
372  }
373  }
374  }
375  else
376  {
377  /* Prevent other backends' fds from holding on to the disk space */
378  ret = do_truncate(path);
379 
380  /* Register request to unlink first segment later */
381  save_errno = errno;
382  register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
383  errno = save_errno;
384  }
385 
386  /*
387  * Delete any additional segments.
388  *
389  * Note that because we loop until getting ENOENT, we will correctly
390  * remove all inactive segments as well as active ones. Ideally we'd
391  * continue the loop until getting exactly that errno, but that risks an
392  * infinite loop if the problem is directory-wide (for instance, if we
393  * suddenly can't read the data directory itself). We compromise by
394  * continuing after a non-ENOENT truncate error, but stopping after any
395  * unlink error. If there is indeed a directory-wide problem, additional
396  * unlink attempts wouldn't work anyway.
397  */
398  if (ret >= 0 || errno != ENOENT)
399  {
400  char *segpath = (char *) palloc(strlen(path) + 12);
401  BlockNumber segno;
402 
403  for (segno = 1;; segno++)
404  {
405  sprintf(segpath, "%s.%u", path, segno);
406 
407  if (!RelFileLocatorBackendIsTemp(rlocator))
408  {
409  /*
410  * Prevent other backends' fds from holding on to the disk
411  * space. We're done if we see ENOENT, though.
412  */
413  if (do_truncate(segpath) < 0 && errno == ENOENT)
414  break;
415 
416  /*
417  * Forget any pending sync requests for this segment before we
418  * try to unlink.
419  */
420  register_forget_request(rlocator, forknum, segno);
421  }
422 
423  if (unlink(segpath) < 0)
424  {
425  /* ENOENT is expected after the last segment... */
426  if (errno != ENOENT)
429  errmsg("could not remove file \"%s\": %m", segpath)));
430  break;
431  }
432  }
433  pfree(segpath);
434  }
435 
436  pfree(path);
437 }
bool IsBinaryUpgrade
Definition: globals.c:114
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1056
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1039
static int do_truncate(const char *path)
Definition: md.c:312
#define sprintf
Definition: port.h:240
#define RelFileLocatorBackendIsTemp(rlocator)

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), IsBinaryUpgrade, MAIN_FORKNUM, palloc(), pfree(), register_forget_request(), register_unlink_segment(), RelFileLocatorBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 736 of file md.c.

738 {
739  off_t seekpos;
740  int nbytes;
741  MdfdVec *v;
742 
743  /* This assert is too expensive to have on normally ... */
744 #ifdef CHECK_WRITE_VS_EXTEND
745  Assert(blocknum < mdnblocks(reln, forknum));
746 #endif
747 
748  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
752  reln->smgr_rlocator.backend);
753 
754  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
756 
757  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
758 
759  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
760 
761  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
762 
763  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
767  reln->smgr_rlocator.backend,
768  nbytes,
769  BLCKSZ);
770 
771  if (nbytes != BLCKSZ)
772  {
773  if (nbytes < 0)
774  ereport(ERROR,
776  errmsg("could not write block %u in file \"%s\": %m",
777  blocknum, FilePathName(v->mdfd_vfd))));
778  /* short write: complain appropriately */
779  ereport(ERROR,
780  (errcode(ERRCODE_DISK_FULL),
781  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
782  blocknum,
784  nbytes, BLCKSZ),
785  errhint("Check free disk space.")));
786  }
787 
788  if (!skipFsync && !SmgrIsTemp(reln))
789  register_dirty_segment(reln, forknum, v);
790 }
@ WAIT_EVENT_DATA_FILE_WRITE
Definition: wait_event.h:181

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and WAIT_EVENT_DATA_FILE_WRITE.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 619 of file md.c.

621 {
622  /*
623  * Issue flush requests in as few requests as possible; have to split at
624  * segment boundaries though, since those are actually separate files.
625  */
626  while (nblocks > 0)
627  {
628  BlockNumber nflush = nblocks;
629  off_t seekpos;
630  MdfdVec *v;
631  int segnum_start,
632  segnum_end;
633 
634  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
636 
637  /*
638  * We might be flushing buffers of already removed relations, that's
639  * ok, just ignore that case. If the segment file wasn't open already
640  * (ie from a recent mdwrite()), then we don't want to re-open it, to
641  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
642  * us with a descriptor to a file that is about to be unlinked.
643  */
644  if (!v)
645  return;
646 
647  /* compute offset inside the current segment */
648  segnum_start = blocknum / RELSEG_SIZE;
649 
650  /* compute number of desired writes within the current segment */
651  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
652  if (segnum_start != segnum_end)
653  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
654 
655  Assert(nflush >= 1);
656  Assert(nflush <= nblocks);
657 
658  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
659 
660  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
661 
662  nblocks -= nflush;
663  blocknum += nflush;
664  }
665 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2011
@ WAIT_EVENT_DATA_FILE_FLUSH
Definition: wait_event.h:175

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1013 of file md.c.

1014 {
1015  FileTag tag;
1016 
1017  INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
1018 
1019  /* Temp relations should never be fsync'd */
1020  Assert(!SmgrIsTemp(reln));
1021 
1022  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
1023  {
1024  ereport(DEBUG1,
1025  (errmsg_internal("could not forward fsync request because request queue is full")));
1026 
1030  errmsg("could not fsync file \"%s\": %m",
1031  FilePathName(seg->mdfd_vfd))));
1032  }
1033 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
#define DEBUG1
Definition: elog.h:26
@ SYNC_REQUEST
Definition: sync.h:25

References Assert(), data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, RelFileLocatorBackend::locator, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RegisterSyncRequest(), SMgrRelationData::smgr_rlocator, SmgrIsTemp, SYNC_REQUEST, and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by mdextend(), mdtruncate(), and mdwrite().

◆ register_forget_request()

static void register_forget_request ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1056 of file md.c.

1058 {
1059  FileTag tag;
1060 
1061  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1062 
1063  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1064 }
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1039 of file md.c.

1041 {
1042  FileTag tag;
1043 
1044  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1045 
1046  /* Should never be used with temp relations */
1047  Assert(!RelFileLocatorBackendIsTemp(rlocator));
1048 
1049  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1050 }
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert(), INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), RelFileLocatorBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 88 of file md.c.

Referenced by _fdvec_resize(), and mdinit().