PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static int _mdfd_open_flags (void)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 108 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 110 of file md.c.

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 118 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 120 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 104 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 106 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rlocator,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rlocator = (xx_rlocator), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 92 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1284 of file md.c.

1287 {
1288  if (nseg == 0)
1289  {
1290  if (reln->md_num_open_segs[forknum] > 0)
1291  {
1292  pfree(reln->md_seg_fds[forknum]);
1293  reln->md_seg_fds[forknum] = NULL;
1294  }
1295  }
1296  else if (reln->md_num_open_segs[forknum] == 0)
1297  {
1298  reln->md_seg_fds[forknum] =
1299  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1300  }
1301  else
1302  {
1303  /*
1304  * It doesn't seem worthwhile complicating the code to amortize
1305  * repalloc() calls. Those are far faster than PathNameOpenFile() or
1306  * FileClose(), and the memory context internally will sometimes avoid
1307  * doing an actual reallocation.
1308  */
1309  reln->md_seg_fds[forknum] =
1310  repalloc(reln->md_seg_fds[forknum],
1311  sizeof(MdfdVec) * nseg);
1312  }
1313 
1314  reln->md_num_open_segs[forknum] = nseg;
1315 }
void pfree(void *pointer)
Definition: mcxt.c:1456
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1476
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1021
static MemoryContext MdCxt
Definition: md.c:88
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1390 of file md.c.

1392 {
1393  MdfdVec *v;
1394  BlockNumber targetseg;
1395  BlockNumber nextsegno;
1396 
1397  /* some way to handle non-existent segments needs to be specified */
1398  Assert(behavior &
1401 
1402  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1403 
1404  /* if an existing and opened segment, we're done */
1405  if (targetseg < reln->md_num_open_segs[forknum])
1406  {
1407  v = &reln->md_seg_fds[forknum][targetseg];
1408  return v;
1409  }
1410 
1411  /* The caller only wants the segment if we already had it open. */
1412  if (behavior & EXTENSION_DONT_OPEN)
1413  return NULL;
1414 
1415  /*
1416  * The target segment is not yet open. Iterate over all the segments
1417  * between the last opened and the target segment. This way missing
1418  * segments either raise an error, or get created (according to
1419  * 'behavior'). Start with either the last opened, or the first segment if
1420  * none was opened before.
1421  */
1422  if (reln->md_num_open_segs[forknum] > 0)
1423  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1424  else
1425  {
1426  v = mdopenfork(reln, forknum, behavior);
1427  if (!v)
1428  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1429  }
1430 
1431  for (nextsegno = reln->md_num_open_segs[forknum];
1432  nextsegno <= targetseg; nextsegno++)
1433  {
1434  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1435  int flags = 0;
1436 
1437  Assert(nextsegno == v->mdfd_segno + 1);
1438 
1439  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1440  elog(FATAL, "segment too big");
1441 
1442  if ((behavior & EXTENSION_CREATE) ||
1443  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1444  {
1445  /*
1446  * Normally we will create new segments only if authorized by the
1447  * caller (i.e., we are doing mdextend()). But when doing WAL
1448  * recovery, create segments anyway; this allows cases such as
1449  * replaying WAL data that has a write into a high-numbered
1450  * segment of a relation that was later deleted. We want to go
1451  * ahead and create the segments so we can finish out the replay.
1452  *
1453  * We have to maintain the invariant that segments before the last
1454  * active segment are of size RELSEG_SIZE; therefore, if
1455  * extending, pad them out with zeroes if needed. (This only
1456  * matters if in recovery, or if the caller is extending the
1457  * relation discontiguously, but that can happen in hash indexes.)
1458  */
1459  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1460  {
1461  char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
1462  MCXT_ALLOC_ZERO);
1463 
1464  mdextend(reln, forknum,
1465  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1466  zerobuf, skipFsync);
1467  pfree(zerobuf);
1468  }
1469  flags = O_CREAT;
1470  }
1471  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1472  nblocks < ((BlockNumber) RELSEG_SIZE))
1473  {
1474  /*
1475  * When not extending (or explicitly including truncated
1476  * segments), only open the next segment if the current one is
1477  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1478  * or fail.
1479  */
1480  if (behavior & EXTENSION_RETURN_NULL)
1481  {
1482  /*
1483  * Some callers discern between reasons for _mdfd_getseg()
1484  * returning NULL based on errno. As there's no failing
1485  * syscall involved in this case, explicitly set errno to
1486  * ENOENT, as that seems the closest interpretation.
1487  */
1488  errno = ENOENT;
1489  return NULL;
1490  }
1491 
1492  ereport(ERROR,
1494  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1495  _mdfd_segpath(reln, forknum, nextsegno),
1496  blkno, nblocks)));
1497  }
1498 
1499  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1500 
1501  if (v == NULL)
1502  {
1503  if ((behavior & EXTENSION_RETURN_NULL) &&
1504  FILE_POSSIBLY_DELETED(errno))
1505  return NULL;
1506  ereport(ERROR,
1508  errmsg("could not open file \"%s\" (target block %u): %m",
1509  _mdfd_segpath(reln, forknum, nextsegno),
1510  blkno)));
1511  }
1512  }
1513 
1514  return v;
1515 }
uint32 BlockNumber
Definition: block.h:31
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:76
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:18
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1446
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1521
#define EXTENSION_DONT_OPEN
Definition: md.c:120
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:462
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1345
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1322
#define EXTENSION_CREATE
Definition: md.c:108
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:118
#define EXTENSION_FAIL
Definition: md.c:104
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:639
#define PG_IO_ALIGN_SIZE
BlockNumber mdfd_segno
Definition: md.c:85
bool InRecovery
Definition: xlogutils.c:53

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert(), elog(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, MCXT_ALLOC_ZERO, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc_aligned(), pfree(), and PG_IO_ALIGN_SIZE.

Referenced by mdextend(), mdprefetch(), mdread(), mdwrite(), mdwriteback(), and mdzeroextend().

◆ _mdfd_open_flags()

static int _mdfd_open_flags ( void  )
inlinestatic

Definition at line 146 of file md.c.

147 {
148  int flags = O_RDWR | PG_BINARY;
149 
151  flags |= PG_O_DIRECT;
152 
153  return flags;
154 }
#define PG_BINARY
Definition: c.h:1283
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:52
#define PG_O_DIRECT
Definition: fd.h:95

References IO_DIRECT_DATA, io_direct_flags, PG_BINARY, and PG_O_DIRECT.

Referenced by _mdfd_openseg(), mdcreate(), mdopenfork(), and mdsyncfiletag().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1345 of file md.c.

1347 {
1348  MdfdVec *v;
1349  File fd;
1350  char *fullpath;
1351 
1352  fullpath = _mdfd_segpath(reln, forknum, segno);
1353 
1354  /* open the file */
1355  fd = PathNameOpenFile(fullpath, _mdfd_open_flags() | oflags);
1356 
1357  pfree(fullpath);
1358 
1359  if (fd < 0)
1360  return NULL;
1361 
1362  /*
1363  * Segments are always opened in order from lowest to highest, so we must
1364  * be adding a new one at the end.
1365  */
1366  Assert(segno == reln->md_num_open_segs[forknum]);
1367 
1368  _fdvec_resize(reln, forknum, segno + 1);
1369 
1370  /* fill the entry */
1371  v = &reln->md_seg_fds[forknum][segno];
1372  v->mdfd_vfd = fd;
1373  v->mdfd_segno = segno;
1374 
1375  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1376 
1377  /* all done */
1378  return v;
1379 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1527
int File
Definition: fd.h:49
static int _mdfd_open_flags(void)
Definition: md.c:146
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1284
static int fd(const char *x, int i)
Definition: preproc-init.c:105
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), _mdfd_open_flags(), _mdfd_segpath(), _mdnblocks(), Assert(), fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), and pfree().

Referenced by _mdfd_getseg(), mdimmedsync(), and mdnblocks().

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1322 of file md.c.

1323 {
1324  char *path,
1325  *fullpath;
1326 
1327  path = relpath(reln->smgr_rlocator, forknum);
1328 
1329  if (segno > 0)
1330  {
1331  fullpath = psprintf("%s.%u", path, segno);
1332  pfree(path);
1333  }
1334  else
1335  fullpath = path;
1336 
1337  return fullpath;
1338 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpath(rlocator, forknum)
Definition: relpath.h:94
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1521 of file md.c.

1522 {
1523  off_t len;
1524 
1525  len = FileSize(seg->mdfd_vfd);
1526  if (len < 0)
1527  ereport(ERROR,
1529  errmsg("could not seek to end of file \"%s\": %m",
1530  FilePathName(seg->mdfd_vfd))));
1531  /* note that this calculation will ignore any partial block at EOF */
1532  return (BlockNumber) (len / BLCKSZ);
1533 }
char * FilePathName(File file)
Definition: fd.c:2406
off_t FileSize(File file)
Definition: fd.c:2354
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), mdopenfork(), and mdzeroextend().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 325 of file md.c.

326 {
327  int save_errno;
328  int ret;
329 
330  ret = pg_truncate(path, 0);
331 
332  /* Log a warning here to avoid repetition in callers. */
333  if (ret < 0 && errno != ENOENT)
334  {
335  save_errno = errno;
338  errmsg("could not truncate file \"%s\": %m", path)));
339  errno = save_errno;
340  }
341 
342  return ret;
343 }
#define WARNING
Definition: elog.h:36
int pg_truncate(const char *path, off_t length)
Definition: fd.c:672

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1252 of file md.c.

1253 {
1254  SMgrRelation *srels;
1255  int i;
1256 
1257  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1258  for (i = 0; i < ndelrels; i++)
1259  {
1260  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1261 
1262  if (isRedo)
1263  {
1264  ForkNumber fork;
1265 
1266  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1267  XLogDropRelation(delrels[i], fork);
1268  }
1269  srels[i] = srel;
1270  }
1271 
1272  smgrdounlinkall(srels, ndelrels, isRedo);
1273 
1274  for (i = 0; i < ndelrels; i++)
1275  smgrclose(srels[i]);
1276  pfree(srels);
1277 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1226
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrclose(SMgrRelation reln)
Definition: smgr.c:260
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:150
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:425
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:643

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1234 of file md.c.

1235 {
1236  FileTag tag;
1237  RelFileLocator rlocator;
1238 
1239  rlocator.dbOid = dbid;
1240  rlocator.spcOid = 0;
1241  rlocator.relNumber = 0;
1242 
1244 
1245  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1246 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:49
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:585
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 693 of file md.c.

694 {
695  int nopensegs = reln->md_num_open_segs[forknum];
696 
697  /* No work if already closed */
698  if (nopensegs == 0)
699  return;
700 
701  /* close segments starting from the end */
702  while (nopensegs > 0)
703  {
704  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
705 
706  FileClose(v->mdfd_vfd);
707  _fdvec_resize(reln, forknum, nopensegs - 1);
708  nopensegs--;
709  }
710 }
void FileClose(File file)
Definition: fd.c:1930

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 192 of file md.c.

193 {
194  MdfdVec *mdfd;
195  char *path;
196  File fd;
197 
198  if (isRedo && reln->md_num_open_segs[forknum] > 0)
199  return; /* created and opened already... */
200 
201  Assert(reln->md_num_open_segs[forknum] == 0);
202 
203  /*
204  * We may be using the target table space for the first time in this
205  * database, so create a per-database subdirectory if needed.
206  *
207  * XXX this is a fairly ugly violation of module layering, but this seems
208  * to be the best place to put the check. Maybe TablespaceCreateDbspace
209  * should be here and not in commands/tablespace.c? But that would imply
210  * importing a lot of stuff that smgr.c oughtn't know, either.
211  */
214  isRedo);
215 
216  path = relpath(reln->smgr_rlocator, forknum);
217 
218  fd = PathNameOpenFile(path, _mdfd_open_flags() | O_CREAT | O_EXCL);
219 
220  if (fd < 0)
221  {
222  int save_errno = errno;
223 
224  if (isRedo)
226  if (fd < 0)
227  {
228  /* be sure to report the error reported by create, not open */
229  errno = save_errno;
230  ereport(ERROR,
232  errmsg("could not create file \"%s\": %m", path)));
233  }
234  }
235 
236  pfree(path);
237 
238  _fdvec_resize(reln, forknum, 1);
239  mdfd = &reln->md_seg_fds[forknum][0];
240  mdfd->mdfd_vfd = fd;
241  mdfd->mdfd_segno = 0;
242 
243  if (!SmgrIsTemp(reln))
244  register_dirty_segment(reln, forknum, mdfd);
245 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:118
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1159
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
RelFileLocator locator

References _fdvec_resize(), _mdfd_open_flags(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 173 of file md.c.

174 {
175  /*
176  * Close it first, to ensure that we notice if the fork has been unlinked
177  * since we opened it. As an optimization, we can skip that in recovery,
178  * which already closes relations when dropping them.
179  */
180  if (!InRecovery)
181  mdclose(reln, forknum);
182 
183  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
184 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:693

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 462 of file md.c.

464 {
465  off_t seekpos;
466  int nbytes;
467  MdfdVec *v;
468 
469  /* If this build supports direct I/O, the buffer must be I/O aligned. */
470  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
471  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
472 
473  /* This assert is too expensive to have on normally ... */
474 #ifdef CHECK_WRITE_VS_EXTEND
475  Assert(blocknum >= mdnblocks(reln, forknum));
476 #endif
477 
478  /*
479  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
480  * more --- we mustn't create a block whose number actually is
481  * InvalidBlockNumber. (Note that this failure should be unreachable
482  * because of upstream checks in bufmgr.c.)
483  */
484  if (blocknum == InvalidBlockNumber)
485  ereport(ERROR,
486  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
487  errmsg("cannot extend file \"%s\" beyond %u blocks",
488  relpath(reln->smgr_rlocator, forknum),
490 
491  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
492 
493  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
494 
495  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
496 
497  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
498  {
499  if (nbytes < 0)
500  ereport(ERROR,
502  errmsg("could not extend file \"%s\": %m",
503  FilePathName(v->mdfd_vfd)),
504  errhint("Check free disk space.")));
505  /* short write: complain appropriately */
506  ereport(ERROR,
507  (errcode(ERRCODE_DISK_FULL),
508  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
510  nbytes, BLCKSZ, blocknum),
511  errhint("Check free disk space.")));
512  }
513 
514  if (!skipFsync && !SmgrIsTemp(reln))
515  register_dirty_segment(reln, forknum, v);
516 
517  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
518 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:793
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2144
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:938
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1390

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1614 of file md.c.

1615 {
1616  /*
1617  * For now we only use filter requests as a way to drop all scheduled
1618  * callbacks relating to a given database, when dropping the database.
1619  * We'll return true for all candidates that have the same database OID as
1620  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1621  */
1622  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1623 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1097 of file md.c.

1098 {
1099  int segno;
1100  int min_inactive_seg;
1101 
1102  /*
1103  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1104  * fsync loop will get them all!
1105  */
1106  mdnblocks(reln, forknum);
1107 
1108  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1109 
1110  /*
1111  * Temporarily open inactive segments, then close them after sync. There
1112  * may be some inactive segments left opened after fsync() error, but that
1113  * is harmless. We don't bother to clean them up and take a risk of
1114  * further trouble. The next mdclose() will soon close them.
1115  */
1116  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1117  segno++;
1118 
1119  while (segno > 0)
1120  {
1121  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1122 
1123  /*
1124  * fsyncs done through mdimmedsync() should be tracked in a separate
1125  * IOContext than those done through mdsyncfiletag() to differentiate
1126  * between unavoidable client backend fsyncs (e.g. those done during
1127  * index build) and those which ideally would have been done by the
1128  * checkpointer. Since other IO operations bypassing the buffer
1129  * manager could also be tracked in such an IOContext, wait until
1130  * these are also tracked to track immediate fsyncs.
1131  */
1132  if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
1135  errmsg("could not fsync file \"%s\": %m",
1136  FilePathName(v->mdfd_vfd))));
1137 
1138  /* Close inactive segments immediately */
1139  if (segno > min_inactive_seg)
1140  {
1141  FileClose(v->mdfd_vfd);
1142  _fdvec_resize(reln, forknum, segno - 1);
1143  }
1144 
1145  segno--;
1146  }
1147 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2242
int data_sync_elevel(int elevel)
Definition: fd.c:3881

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )

Definition at line 160 of file md.c.

161 {
163  "MdSmgr",
165 }
MemoryContext TopMemoryContext
Definition: mcxt.c:141
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 938 of file md.c.

939 {
940  MdfdVec *v;
941  BlockNumber nblocks;
942  BlockNumber segno;
943 
944  mdopenfork(reln, forknum, EXTENSION_FAIL);
945 
946  /* mdopen has opened the first segment */
947  Assert(reln->md_num_open_segs[forknum] > 0);
948 
949  /*
950  * Start from the last open segments, to avoid redundant seeks. We have
951  * previously verified that these segments are exactly RELSEG_SIZE long,
952  * and it's useless to recheck that each time.
953  *
954  * NOTE: this assumption could only be wrong if another backend has
955  * truncated the relation. We rely on higher code levels to handle that
956  * scenario by closing and re-opening the md fd, which is handled via
957  * relcache flush. (Since the checkpointer doesn't participate in
958  * relcache flush, it could have segment entries for inactive segments;
959  * that's OK because the checkpointer never needs to compute relation
960  * size.)
961  */
962  segno = reln->md_num_open_segs[forknum] - 1;
963  v = &reln->md_seg_fds[forknum][segno];
964 
965  for (;;)
966  {
967  nblocks = _mdnblocks(reln, forknum, v);
968  if (nblocks > ((BlockNumber) RELSEG_SIZE))
969  elog(FATAL, "segment too big");
970  if (nblocks < ((BlockNumber) RELSEG_SIZE))
971  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
972 
973  /*
974  * If segment is exactly RELSEG_SIZE, advance to next one.
975  */
976  segno++;
977 
978  /*
979  * We used to pass O_CREAT here, but that has the disadvantage that it
980  * might create a segment which has vanished through some operating
981  * system misadventure. In such a case, creating the segment here
982  * undermines _mdfd_getseg's attempts to notice and report an error
983  * upon access to a missing segment.
984  */
985  v = _mdfd_openseg(reln, forknum, segno, 0);
986  if (v == NULL)
987  return segno * ((BlockNumber) RELSEG_SIZE);
988  }
989 }

References _mdfd_openseg(), _mdnblocks(), Assert(), elog(), EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), mdwrite(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 682 of file md.c.

683 {
684  /* mark it not open */
685  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
686  reln->md_num_open_segs[forknum] = 0;
687 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 639 of file md.c.

640 {
641  MdfdVec *mdfd;
642  char *path;
643  File fd;
644 
645  /* No work if already open */
646  if (reln->md_num_open_segs[forknum] > 0)
647  return &reln->md_seg_fds[forknum][0];
648 
649  path = relpath(reln->smgr_rlocator, forknum);
650 
652 
653  if (fd < 0)
654  {
655  if ((behavior & EXTENSION_RETURN_NULL) &&
656  FILE_POSSIBLY_DELETED(errno))
657  {
658  pfree(path);
659  return NULL;
660  }
661  ereport(ERROR,
663  errmsg("could not open file \"%s\": %m", path)));
664  }
665 
666  pfree(path);
667 
668  _fdvec_resize(reln, forknum, 1);
669  mdfd = &reln->md_seg_fds[forknum][0];
670  mdfd->mdfd_vfd = fd;
671  mdfd->mdfd_segno = 0;
672 
673  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
674 
675  return mdfd;
676 }

References _fdvec_resize(), _mdfd_open_flags(), _mdnblocks(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 716 of file md.c.

717 {
718 #ifdef USE_PREFETCH
719  off_t seekpos;
720  MdfdVec *v;
721 
723 
724  v = _mdfd_getseg(reln, forknum, blocknum, false,
726  if (v == NULL)
727  return false;
728 
729  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
730 
731  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
732 
733  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
734 #endif /* USE_PREFETCH */
735 
736  return true;
737 }
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2030

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void *  buffer 
)

Definition at line 743 of file md.c.

745 {
746  off_t seekpos;
747  int nbytes;
748  MdfdVec *v;
749 
750  /* If this build supports direct I/O, the buffer must be I/O aligned. */
751  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
752  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
753 
754  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
758  reln->smgr_rlocator.backend);
759 
760  v = _mdfd_getseg(reln, forknum, blocknum, false,
762 
763  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
764 
765  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
766 
767  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
768 
769  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
773  reln->smgr_rlocator.backend,
774  nbytes,
775  BLCKSZ);
776 
777  if (nbytes != BLCKSZ)
778  {
779  if (nbytes < 0)
780  ereport(ERROR,
782  errmsg("could not read block %u in file \"%s\": %m",
783  blocknum, FilePathName(v->mdfd_vfd))));
784 
785  /*
786  * Short read: we are at or past EOF, or we read a partial block at
787  * EOF. Normally this is an error; upper levels should never try to
788  * read a nonexistent block. However, if zero_damaged_pages is ON or
789  * we are InRecovery, we should instead return zeroes without
790  * complaining. This allows, for example, the case of trying to
791  * update a block that was later truncated away.
792  */
794  MemSet(buffer, 0, BLCKSZ);
795  else
796  ereport(ERROR,
798  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
799  blocknum, FilePathName(v->mdfd_vfd),
800  nbytes, BLCKSZ)));
801  }
802 }
bool zero_damaged_pages
Definition: bufmgr.c:135
#define MemSet(start, val, len)
Definition: c.h:1009
int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2088
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, MemSet, PG_IO_ALIGN_SIZE, PG_O_DIRECT, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, TYPEALIGN, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1542 of file md.c.

1543 {
1545  File file;
1546  instr_time io_start;
1547  bool need_to_close;
1548  int result,
1549  save_errno;
1550 
1551  /* See if we already have the file open, or need to open it. */
1552  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1553  {
1554  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1555  strlcpy(path, FilePathName(file), MAXPGPATH);
1556  need_to_close = false;
1557  }
1558  else
1559  {
1560  char *p;
1561 
1562  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1563  strlcpy(path, p, MAXPGPATH);
1564  pfree(p);
1565 
1566  file = PathNameOpenFile(path, _mdfd_open_flags());
1567  if (file < 0)
1568  return -1;
1569  need_to_close = true;
1570  }
1571 
1572  io_start = pgstat_prepare_io_time();
1573 
1574  /* Sync the file. */
1575  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1576  save_errno = errno;
1577 
1578  if (need_to_close)
1579  FileClose(file);
1580 
1582  IOOP_FSYNC, io_start, 1);
1583 
1584  errno = save_errno;
1585  return result;
1586 }
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:278
@ IOCONTEXT_NORMAL
Definition: pgstat.h:288
@ IOOP_FSYNC
Definition: pgstat.h:298
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:112
instr_time pgstat_prepare_io_time(void)
Definition: pgstat_io.c:96
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint32 segno
Definition: sync.h:55

References _mdfd_open_flags(), _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), and strlcpy().

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 995 of file md.c.

996 {
997  BlockNumber curnblk;
998  BlockNumber priorblocks;
999  int curopensegs;
1000 
1001  /*
1002  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1003  * truncation loop will get them all!
1004  */
1005  curnblk = mdnblocks(reln, forknum);
1006  if (nblocks > curnblk)
1007  {
1008  /* Bogus request ... but no complaint if InRecovery */
1009  if (InRecovery)
1010  return;
1011  ereport(ERROR,
1012  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1013  relpath(reln->smgr_rlocator, forknum),
1014  nblocks, curnblk)));
1015  }
1016  if (nblocks == curnblk)
1017  return; /* no work */
1018 
1019  /*
1020  * Truncate segments, starting at the last one. Starting at the end makes
1021  * managing the memory for the fd array easier, should there be errors.
1022  */
1023  curopensegs = reln->md_num_open_segs[forknum];
1024  while (curopensegs > 0)
1025  {
1026  MdfdVec *v;
1027 
1028  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1029 
1030  v = &reln->md_seg_fds[forknum][curopensegs - 1];
1031 
1032  if (priorblocks > nblocks)
1033  {
1034  /*
1035  * This segment is no longer active. We truncate the file, but do
1036  * not delete it, for reasons explained in the header comments.
1037  */
1038  if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1039  ereport(ERROR,
1041  errmsg("could not truncate file \"%s\": %m",
1042  FilePathName(v->mdfd_vfd))));
1043 
1044  if (!SmgrIsTemp(reln))
1045  register_dirty_segment(reln, forknum, v);
1046 
1047  /* we never drop the 1st segment */
1048  Assert(v != &reln->md_seg_fds[forknum][0]);
1049 
1050  FileClose(v->mdfd_vfd);
1051  _fdvec_resize(reln, forknum, curopensegs - 1);
1052  }
1053  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1054  {
1055  /*
1056  * This is the last segment we want to keep. Truncate the file to
1057  * the right length. NOTE: if nblocks is exactly a multiple K of
1058  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1059  * keep it. This adheres to the invariant given in the header
1060  * comments.
1061  */
1062  BlockNumber lastsegblocks = nblocks - priorblocks;
1063 
1064  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1065  ereport(ERROR,
1067  errmsg("could not truncate file \"%s\" to %u blocks: %m",
1068  FilePathName(v->mdfd_vfd),
1069  nblocks)));
1070  if (!SmgrIsTemp(reln))
1071  register_dirty_segment(reln, forknum, v);
1072  }
1073  else
1074  {
1075  /*
1076  * We still need this segment, so nothing to do for this and any
1077  * earlier segment.
1078  */
1079  break;
1080  }
1081  curopensegs--;
1082  }
1083 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2371

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 309 of file md.c.

310 {
311  /* Now do the per-fork work */
312  if (forknum == InvalidForkNumber)
313  {
314  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
315  mdunlinkfork(rlocator, forknum, isRedo);
316  }
317  else
318  mdunlinkfork(rlocator, forknum, isRedo);
319 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:346

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1595 of file md.c.

1596 {
1597  char *p;
1598 
1599  /* Compute the path. */
1600  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1601  strlcpy(path, p, MAXPGPATH);
1602  pfree(p);
1603 
1604  /* Try to unlink the file. */
1605  return unlink(path);
1606 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
static

Definition at line 346 of file md.c.

347 {
348  char *path;
349  int ret;
350  int save_errno;
351 
352  path = relpath(rlocator, forknum);
353 
354  /*
355  * Truncate and then unlink the first segment, or just register a request
356  * to unlink it later, as described in the comments for mdunlink().
357  */
358  if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
359  RelFileLocatorBackendIsTemp(rlocator))
360  {
361  if (!RelFileLocatorBackendIsTemp(rlocator))
362  {
363  /* Prevent other backends' fds from holding on to the disk space */
364  ret = do_truncate(path);
365 
366  /* Forget any pending sync requests for the first segment */
367  save_errno = errno;
368  register_forget_request(rlocator, forknum, 0 /* first seg */ );
369  errno = save_errno;
370  }
371  else
372  ret = 0;
373 
374  /* Next unlink the file, unless it was already found to be missing */
375  if (ret >= 0 || errno != ENOENT)
376  {
377  ret = unlink(path);
378  if (ret < 0 && errno != ENOENT)
379  {
380  save_errno = errno;
383  errmsg("could not remove file \"%s\": %m", path)));
384  errno = save_errno;
385  }
386  }
387  }
388  else
389  {
390  /* Prevent other backends' fds from holding on to the disk space */
391  ret = do_truncate(path);
392 
393  /* Register request to unlink first segment later */
394  save_errno = errno;
395  register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
396  errno = save_errno;
397  }
398 
399  /*
400  * Delete any additional segments.
401  *
402  * Note that because we loop until getting ENOENT, we will correctly
403  * remove all inactive segments as well as active ones. Ideally we'd
404  * continue the loop until getting exactly that errno, but that risks an
405  * infinite loop if the problem is directory-wide (for instance, if we
406  * suddenly can't read the data directory itself). We compromise by
407  * continuing after a non-ENOENT truncate error, but stopping after any
408  * unlink error. If there is indeed a directory-wide problem, additional
409  * unlink attempts wouldn't work anyway.
410  */
411  if (ret >= 0 || errno != ENOENT)
412  {
413  char *segpath = (char *) palloc(strlen(path) + 12);
414  BlockNumber segno;
415 
416  for (segno = 1;; segno++)
417  {
418  sprintf(segpath, "%s.%u", path, segno);
419 
420  if (!RelFileLocatorBackendIsTemp(rlocator))
421  {
422  /*
423  * Prevent other backends' fds from holding on to the disk
424  * space. We're done if we see ENOENT, though.
425  */
426  if (do_truncate(segpath) < 0 && errno == ENOENT)
427  break;
428 
429  /*
430  * Forget any pending sync requests for this segment before we
431  * try to unlink.
432  */
433  register_forget_request(rlocator, forknum, segno);
434  }
435 
436  if (unlink(segpath) < 0)
437  {
438  /* ENOENT is expected after the last segment... */
439  if (errno != ENOENT)
442  errmsg("could not remove file \"%s\": %m", segpath)));
443  break;
444  }
445  }
446  pfree(segpath);
447  }
448 
449  pfree(path);
450 }
bool IsBinaryUpgrade
Definition: globals.c:114
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1220
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1203
static int do_truncate(const char *path)
Definition: md.c:325
#define sprintf
Definition: port.h:240
#define RelFileLocatorBackendIsTemp(rlocator)

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), IsBinaryUpgrade, MAIN_FORKNUM, palloc(), pfree(), register_forget_request(), register_unlink_segment(), RelFileLocatorBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 812 of file md.c.

814 {
815  off_t seekpos;
816  int nbytes;
817  MdfdVec *v;
818 
819  /* If this build supports direct I/O, the buffer must be I/O aligned. */
820  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
821  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
822 
823  /* This assert is too expensive to have on normally ... */
824 #ifdef CHECK_WRITE_VS_EXTEND
825  Assert(blocknum < mdnblocks(reln, forknum));
826 #endif
827 
828  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
832  reln->smgr_rlocator.backend);
833 
834  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
836 
837  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
838 
839  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
840 
841  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
842 
843  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
847  reln->smgr_rlocator.backend,
848  nbytes,
849  BLCKSZ);
850 
851  if (nbytes != BLCKSZ)
852  {
853  if (nbytes < 0)
854  ereport(ERROR,
856  errmsg("could not write block %u in file \"%s\": %m",
857  blocknum, FilePathName(v->mdfd_vfd))));
858  /* short write: complain appropriately */
859  ereport(ERROR,
860  (errcode(ERRCODE_DISK_FULL),
861  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
862  blocknum,
864  nbytes, BLCKSZ),
865  errhint("Check free disk space.")));
866  }
867 
868  if (!skipFsync && !SmgrIsTemp(reln))
869  register_dirty_segment(reln, forknum, v);
870 }

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and TYPEALIGN.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 879 of file md.c.

881 {
883 
884  /*
885  * Issue flush requests in as few requests as possible; have to split at
886  * segment boundaries though, since those are actually separate files.
887  */
888  while (nblocks > 0)
889  {
890  BlockNumber nflush = nblocks;
891  off_t seekpos;
892  MdfdVec *v;
893  int segnum_start,
894  segnum_end;
895 
896  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
898 
899  /*
900  * We might be flushing buffers of already removed relations, that's
901  * ok, just ignore that case. If the segment file wasn't open already
902  * (ie from a recent mdwrite()), then we don't want to re-open it, to
903  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
904  * us with a descriptor to a file that is about to be unlinked.
905  */
906  if (!v)
907  return;
908 
909  /* compute offset inside the current segment */
910  segnum_start = blocknum / RELSEG_SIZE;
911 
912  /* compute number of desired writes within the current segment */
913  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
914  if (segnum_start != segnum_end)
915  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
916 
917  Assert(nflush >= 1);
918  Assert(nflush <= nblocks);
919 
920  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
921 
922  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
923 
924  nblocks -= nflush;
925  blocknum += nflush;
926  }
927 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2062

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)

Definition at line 527 of file md.c.

529 {
530  MdfdVec *v;
531  BlockNumber curblocknum = blocknum;
532  int remblocks = nblocks;
533 
534  Assert(nblocks > 0);
535 
536  /* This assert is too expensive to have on normally ... */
537 #ifdef CHECK_WRITE_VS_EXTEND
538  Assert(blocknum >= mdnblocks(reln, forknum));
539 #endif
540 
541  /*
542  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
543  * more --- we mustn't create a block whose number actually is
544  * InvalidBlockNumber or larger.
545  */
546  if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
547  ereport(ERROR,
548  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
549  errmsg("cannot extend file \"%s\" beyond %u blocks",
550  relpath(reln->smgr_rlocator, forknum),
552 
553  while (remblocks > 0)
554  {
555  BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
556  off_t seekpos = (off_t) BLCKSZ * segstartblock;
557  int numblocks;
558 
559  if (segstartblock + remblocks > RELSEG_SIZE)
560  numblocks = RELSEG_SIZE - segstartblock;
561  else
562  numblocks = remblocks;
563 
564  v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
565 
566  Assert(segstartblock < RELSEG_SIZE);
567  Assert(segstartblock + numblocks <= RELSEG_SIZE);
568 
569  /*
570  * If available and useful, use posix_fallocate() (via
571  * FileFallocate()) to extend the relation. That's often more
572  * efficient than using write(), as it commonly won't cause the kernel
573  * to allocate page cache space for the extended pages.
574  *
575  * However, we don't use FileFallocate() for small extensions, as it
576  * defeats delayed allocation on some filesystems. Not clear where
577  * that decision should be made though? For now just use a cutoff of
578  * 8, anything between 4 and 8 worked OK in some local testing.
579  */
580  if (numblocks > 8)
581  {
582  int ret;
583 
584  ret = FileFallocate(v->mdfd_vfd,
585  seekpos, (off_t) BLCKSZ * numblocks,
586  WAIT_EVENT_DATA_FILE_EXTEND);
587  if (ret != 0)
588  {
589  ereport(ERROR,
591  errmsg("could not extend file \"%s\" with FileFallocate(): %m",
592  FilePathName(v->mdfd_vfd)),
593  errhint("Check free disk space."));
594  }
595  }
596  else
597  {
598  int ret;
599 
600  /*
601  * Even if we don't want to use fallocate, we can still extend a
602  * bit more efficiently than writing each 8kB block individually.
603  * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
604  * to avoid multiple writes or needing a zeroed buffer for the
605  * whole length of the extension.
606  */
607  ret = FileZero(v->mdfd_vfd,
608  seekpos, (off_t) BLCKSZ * numblocks,
609  WAIT_EVENT_DATA_FILE_EXTEND);
610  if (ret < 0)
611  ereport(ERROR,
613  errmsg("could not extend file \"%s\": %m",
614  FilePathName(v->mdfd_vfd)),
615  errhint("Check free disk space."));
616  }
617 
618  if (!skipFsync && !SmgrIsTemp(reln))
619  register_dirty_segment(reln, forknum, v);
620 
621  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
622 
623  remblocks -= numblocks;
624  curblocknum += numblocks;
625  }
626 }
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2314
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2269

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1159 of file md.c.

1160 {
1161  FileTag tag;
1162 
1163  INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
1164 
1165  /* Temp relations should never be fsync'd */
1166  Assert(!SmgrIsTemp(reln));
1167 
1168  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
1169  {
1170  instr_time io_start;
1171 
1172  ereport(DEBUG1,
1173  (errmsg_internal("could not forward fsync request because request queue is full")));
1174 
1175  io_start = pgstat_prepare_io_time();
1176 
1177  if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
1180  errmsg("could not fsync file \"%s\": %m",
1181  FilePathName(seg->mdfd_vfd))));
1182 
1183  /*
1184  * We have no way of knowing if the current IOContext is
1185  * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
1186  * point, so count the fsync as being in the IOCONTEXT_NORMAL
1187  * IOContext. This is probably okay, because the number of backend
1188  * fsyncs doesn't say anything about the efficacy of the
1189  * BufferAccessStrategy. And counting both fsyncs done in
1190  * IOCONTEXT_NORMAL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under
1191  * IOCONTEXT_NORMAL is likely clearer when investigating the number of
1192  * backend fsyncs.
1193  */
1195  IOOP_FSYNC, io_start, 1);
1196  }
1197 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
#define DEBUG1
Definition: elog.h:30
@ SYNC_REQUEST
Definition: sync.h:25

References Assert(), data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, RelFileLocatorBackend::locator, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RegisterSyncRequest(), SMgrRelationData::smgr_rlocator, SmgrIsTemp, and SYNC_REQUEST.

Referenced by mdcreate(), mdextend(), mdtruncate(), mdwrite(), and mdzeroextend().

◆ register_forget_request()

static void register_forget_request ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1220 of file md.c.

1222 {
1223  FileTag tag;
1224 
1225  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1226 
1227  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1228 }
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1203 of file md.c.

1205 {
1206  FileTag tag;
1207 
1208  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1209 
1210  /* Should never be used with temp relations */
1211  Assert(!RelFileLocatorBackendIsTemp(rlocator));
1212 
1213  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1214 }
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert(), INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), RelFileLocatorBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 88 of file md.c.

Referenced by _fdvec_resize(), and mdinit().