PostgreSQL Source Code  git master
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "common/file_utils.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 

Macros

#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 

Functions

static void mdunlinkfork (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static int _mdfd_open_flags (void)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
 
static int buffers_to_iovec (struct iovec *iov, void **buffers, int nblocks)
 
void mdreadv (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdwritev (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdregistersync (SMgrRelation reln, ForkNumber forknum)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 106 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 108 of file md.c.

◆ EXTENSION_DONT_CHECK_SIZE

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 116 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 118 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 102 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 104 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rlocator,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rlocator = (xx_rlocator), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 90 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1478 of file md.c.

1481 {
1482  if (nseg == 0)
1483  {
1484  if (reln->md_num_open_segs[forknum] > 0)
1485  {
1486  pfree(reln->md_seg_fds[forknum]);
1487  reln->md_seg_fds[forknum] = NULL;
1488  }
1489  }
1490  else if (reln->md_num_open_segs[forknum] == 0)
1491  {
1492  reln->md_seg_fds[forknum] =
1493  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1494  }
1495  else
1496  {
1497  /*
1498  * It doesn't seem worthwhile complicating the code to amortize
1499  * repalloc() calls. Those are far faster than PathNameOpenFile() or
1500  * FileClose(), and the memory context internally will sometimes avoid
1501  * doing an actual reallocation.
1502  */
1503  reln->md_seg_fds[forknum] =
1504  repalloc(reln->md_seg_fds[forknum],
1505  sizeof(MdfdVec) * nseg);
1506  }
1507 
1508  reln->md_num_open_segs[forknum] = nseg;
1509 }
void pfree(void *pointer)
Definition: mcxt.c:1520
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1180
static MemoryContext MdCxt
Definition: md.c:86
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:60
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:61
Definition: md.c:81

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), mdregistersync(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1584 of file md.c.

1586 {
1587  MdfdVec *v;
1588  BlockNumber targetseg;
1589  BlockNumber nextsegno;
1590 
1591  /* some way to handle non-existent segments needs to be specified */
1592  Assert(behavior &
1595 
1596  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1597 
1598  /* if an existing and opened segment, we're done */
1599  if (targetseg < reln->md_num_open_segs[forknum])
1600  {
1601  v = &reln->md_seg_fds[forknum][targetseg];
1602  return v;
1603  }
1604 
1605  /* The caller only wants the segment if we already had it open. */
1606  if (behavior & EXTENSION_DONT_OPEN)
1607  return NULL;
1608 
1609  /*
1610  * The target segment is not yet open. Iterate over all the segments
1611  * between the last opened and the target segment. This way missing
1612  * segments either raise an error, or get created (according to
1613  * 'behavior'). Start with either the last opened, or the first segment if
1614  * none was opened before.
1615  */
1616  if (reln->md_num_open_segs[forknum] > 0)
1617  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1618  else
1619  {
1620  v = mdopenfork(reln, forknum, behavior);
1621  if (!v)
1622  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1623  }
1624 
1625  for (nextsegno = reln->md_num_open_segs[forknum];
1626  nextsegno <= targetseg; nextsegno++)
1627  {
1628  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1629  int flags = 0;
1630 
1631  Assert(nextsegno == v->mdfd_segno + 1);
1632 
1633  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1634  elog(FATAL, "segment too big");
1635 
1636  if ((behavior & EXTENSION_CREATE) ||
1637  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1638  {
1639  /*
1640  * Normally we will create new segments only if authorized by the
1641  * caller (i.e., we are doing mdextend()). But when doing WAL
1642  * recovery, create segments anyway; this allows cases such as
1643  * replaying WAL data that has a write into a high-numbered
1644  * segment of a relation that was later deleted. We want to go
1645  * ahead and create the segments so we can finish out the replay.
1646  *
1647  * We have to maintain the invariant that segments before the last
1648  * active segment are of size RELSEG_SIZE; therefore, if
1649  * extending, pad them out with zeroes if needed. (This only
1650  * matters if in recovery, or if the caller is extending the
1651  * relation discontiguously, but that can happen in hash indexes.)
1652  */
1653  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1654  {
1655  char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
1656  MCXT_ALLOC_ZERO);
1657 
1658  mdextend(reln, forknum,
1659  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1660  zerobuf, skipFsync);
1661  pfree(zerobuf);
1662  }
1663  flags = O_CREAT;
1664  }
1665  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1666  nblocks < ((BlockNumber) RELSEG_SIZE))
1667  {
1668  /*
1669  * When not extending (or explicitly including truncated
1670  * segments), only open the next segment if the current one is
1671  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1672  * or fail.
1673  */
1674  if (behavior & EXTENSION_RETURN_NULL)
1675  {
1676  /*
1677  * Some callers discern between reasons for _mdfd_getseg()
1678  * returning NULL based on errno. As there's no failing
1679  * syscall involved in this case, explicitly set errno to
1680  * ENOENT, as that seems the closest interpretation.
1681  */
1682  errno = ENOENT;
1683  return NULL;
1684  }
1685 
1686  ereport(ERROR,
1688  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1689  _mdfd_segpath(reln, forknum, nextsegno),
1690  blkno, nblocks)));
1691  }
1692 
1693  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1694 
1695  if (v == NULL)
1696  {
1697  if ((behavior & EXTENSION_RETURN_NULL) &&
1698  FILE_POSSIBLY_DELETED(errno))
1699  return NULL;
1700  ereport(ERROR,
1702  errmsg("could not open file \"%s\" (target block %u): %m",
1703  _mdfd_segpath(reln, forknum, nextsegno),
1704  blkno)));
1705  }
1706  }
1707 
1708  return v;
1709 }
uint32 BlockNumber
Definition: block.h:31
#define Assert(condition)
Definition: c.h:858
int errcode_for_file_access(void)
Definition: elog.c:882
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:78
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:18
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1510
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:108
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1715
#define EXTENSION_DONT_OPEN
Definition: md.c:118
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:460
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1539
#define EXTENSION_RETURN_NULL
Definition: md.c:104
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1516
#define EXTENSION_CREATE
Definition: md.c:106
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:116
#define EXTENSION_FAIL
Definition: md.c:102
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:637
#define PG_IO_ALIGN_SIZE
BlockNumber mdfd_segno
Definition: md.c:83
bool InRecovery
Definition: xlogutils.c:50

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, MCXT_ALLOC_ZERO, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc_aligned(), pfree(), and PG_IO_ALIGN_SIZE.

Referenced by mdextend(), mdprefetch(), mdreadv(), mdwriteback(), mdwritev(), and mdzeroextend().

◆ _mdfd_open_flags()

static int _mdfd_open_flags ( void  )
inlinestatic

Definition at line 144 of file md.c.

145 {
146  int flags = O_RDWR | PG_BINARY;
147 
149  flags |= PG_O_DIRECT;
150 
151  return flags;
152 }
#define PG_BINARY
Definition: c.h:1273
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
#define PG_O_DIRECT
Definition: fd.h:97

References IO_DIRECT_DATA, io_direct_flags, PG_BINARY, and PG_O_DIRECT.

Referenced by _mdfd_openseg(), mdcreate(), mdopenfork(), and mdsyncfiletag().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1539 of file md.c.

1541 {
1542  MdfdVec *v;
1543  File fd;
1544  char *fullpath;
1545 
1546  fullpath = _mdfd_segpath(reln, forknum, segno);
1547 
1548  /* open the file */
1549  fd = PathNameOpenFile(fullpath, _mdfd_open_flags() | oflags);
1550 
1551  pfree(fullpath);
1552 
1553  if (fd < 0)
1554  return NULL;
1555 
1556  /*
1557  * Segments are always opened in order from lowest to highest, so we must
1558  * be adding a new one at the end.
1559  */
1560  Assert(segno == reln->md_num_open_segs[forknum]);
1561 
1562  _fdvec_resize(reln, forknum, segno + 1);
1563 
1564  /* fill the entry */
1565  v = &reln->md_seg_fds[forknum][segno];
1566  v->mdfd_vfd = fd;
1567  v->mdfd_segno = segno;
1568 
1569  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1570 
1571  /* all done */
1572  return v;
1573 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1575
int File
Definition: fd.h:51
static int _mdfd_open_flags(void)
Definition: md.c:144
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1478
static int fd(const char *x, int i)
Definition: preproc-init.c:105
File mdfd_vfd
Definition: md.c:82

References _fdvec_resize(), _mdfd_open_flags(), _mdfd_segpath(), _mdnblocks(), Assert, fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), and pfree().

Referenced by _mdfd_getseg(), mdimmedsync(), mdnblocks(), and mdregistersync().

◆ _mdfd_segpath()

static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1516 of file md.c.

1517 {
1518  char *path,
1519  *fullpath;
1520 
1521  path = relpath(reln->smgr_rlocator, forknum);
1522 
1523  if (segno > 0)
1524  {
1525  fullpath = psprintf("%s.%u", path, segno);
1526  pfree(path);
1527  }
1528  else
1529  fullpath = path;
1530 
1531  return fullpath;
1532 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpath(rlocator, forknum)
Definition: relpath.h:94
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1715 of file md.c.

1716 {
1717  off_t len;
1718 
1719  len = FileSize(seg->mdfd_vfd);
1720  if (len < 0)
1721  ereport(ERROR,
1723  errmsg("could not seek to end of file \"%s\": %m",
1724  FilePathName(seg->mdfd_vfd))));
1725  /* note that this calculation will ignore any partial block at EOF */
1726  return (BlockNumber) (len / BLCKSZ);
1727 }
char * FilePathName(File file)
Definition: fd.c:2461
off_t FileSize(File file)
Definition: fd.c:2409
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), mdopenfork(), and mdzeroextend().

◆ buffers_to_iovec()

static int buffers_to_iovec ( struct iovec *  iov,
void **  buffers,
int  nblocks 
)
static

Definition at line 762 of file md.c.

763 {
764  struct iovec *iovp;
765  int iovcnt;
766 
767  Assert(nblocks >= 1);
768 
769  /* If this build supports direct I/O, buffers must be I/O aligned. */
770  for (int i = 0; i < nblocks; ++i)
771  {
772  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
773  Assert((uintptr_t) buffers[i] ==
774  TYPEALIGN(PG_IO_ALIGN_SIZE, buffers[i]));
775  }
776 
777  /* Start the first iovec off with the first buffer. */
778  iovp = &iov[0];
779  iovp->iov_base = buffers[0];
780  iovp->iov_len = BLCKSZ;
781  iovcnt = 1;
782 
783  /* Try to merge the rest. */
784  for (int i = 1; i < nblocks; ++i)
785  {
786  void *buffer = buffers[i];
787 
788  if (((char *) iovp->iov_base + iovp->iov_len) == buffer)
789  {
790  /* Contiguous with the last iovec. */
791  iovp->iov_len += BLCKSZ;
792  }
793  else
794  {
795  /* Need a new iovec. */
796  iovp++;
797  iovp->iov_base = buffer;
798  iovp->iov_len = BLCKSZ;
799  iovcnt++;
800  }
801  }
802 
803  return iovcnt;
804 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:804
int i
Definition: isn.c:73

References Assert, i, PG_IO_ALIGN_SIZE, PG_O_DIRECT, and TYPEALIGN.

Referenced by mdreadv(), and mdwritev().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 323 of file md.c.

324 {
325  int save_errno;
326  int ret;
327 
328  ret = pg_truncate(path, 0);
329 
330  /* Log a warning here to avoid repetition in callers. */
331  if (ret < 0 && errno != ENOENT)
332  {
333  save_errno = errno;
336  errmsg("could not truncate file \"%s\": %m", path)));
337  errno = save_errno;
338  }
339 
340  return ret;
341 }
#define WARNING
Definition: elog.h:36
int pg_truncate(const char *path, off_t length)
Definition: fd.c:720

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1446 of file md.c.

1447 {
1448  SMgrRelation *srels;
1449  int i;
1450 
1451  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1452  for (i = 0; i < ndelrels; i++)
1453  {
1454  SMgrRelation srel = smgropen(delrels[i], INVALID_PROC_NUMBER);
1455 
1456  if (isRedo)
1457  {
1458  ForkNumber fork;
1459 
1460  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1461  XLogDropRelation(delrels[i], fork);
1462  }
1463  srels[i] = srel;
1464  }
1465 
1466  smgrdounlinkall(srels, ndelrels, isRedo);
1467 
1468  for (i = 0; i < ndelrels; i++)
1469  smgrclose(srels[i]);
1470  pfree(srels);
1471 }
void * palloc(Size size)
Definition: mcxt.c:1316
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:198
void smgrclose(SMgrRelation reln)
Definition: smgr.c:320
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:462
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:641

References i, INVALID_PROC_NUMBER, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1428 of file md.c.

1429 {
1430  FileTag tag;
1431  RelFileLocator rlocator;
1432 
1433  rlocator.dbOid = dbid;
1434  rlocator.spcOid = 0;
1435  rlocator.relNumber = 0;
1436 
1438 
1439  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1440 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:90
@ InvalidForkNumber
Definition: relpath.h:49
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 691 of file md.c.

692 {
693  int nopensegs = reln->md_num_open_segs[forknum];
694 
695  /* No work if already closed */
696  if (nopensegs == 0)
697  return;
698 
699  /* close segments starting from the end */
700  while (nopensegs > 0)
701  {
702  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
703 
704  FileClose(v->mdfd_vfd);
705  _fdvec_resize(reln, forknum, nopensegs - 1);
706  nopensegs--;
707  }
708 }
void FileClose(File file)
Definition: fd.c:1978

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 190 of file md.c.

191 {
192  MdfdVec *mdfd;
193  char *path;
194  File fd;
195 
196  if (isRedo && reln->md_num_open_segs[forknum] > 0)
197  return; /* created and opened already... */
198 
199  Assert(reln->md_num_open_segs[forknum] == 0);
200 
201  /*
202  * We may be using the target table space for the first time in this
203  * database, so create a per-database subdirectory if needed.
204  *
205  * XXX this is a fairly ugly violation of module layering, but this seems
206  * to be the best place to put the check. Maybe TablespaceCreateDbspace
207  * should be here and not in commands/tablespace.c? But that would imply
208  * importing a lot of stuff that smgr.c oughtn't know, either.
209  */
212  isRedo);
213 
214  path = relpath(reln->smgr_rlocator, forknum);
215 
216  fd = PathNameOpenFile(path, _mdfd_open_flags() | O_CREAT | O_EXCL);
217 
218  if (fd < 0)
219  {
220  int save_errno = errno;
221 
222  if (isRedo)
224  if (fd < 0)
225  {
226  /* be sure to report the error reported by create, not open */
227  errno = save_errno;
228  ereport(ERROR,
230  errmsg("could not create file \"%s\": %m", path)));
231  }
232  }
233 
234  pfree(path);
235 
236  _fdvec_resize(reln, forknum, 1);
237  mdfd = &reln->md_seg_fds[forknum][0];
238  mdfd->mdfd_vfd = fd;
239  mdfd->mdfd_segno = 0;
240 
241  if (!SmgrIsTemp(reln))
242  register_dirty_segment(reln, forknum, mdfd);
243 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:112
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1353
#define SmgrIsTemp(smgr)
Definition: smgr.h:73
RelFileLocator locator

References _fdvec_resize(), _mdfd_open_flags(), Assert, RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 171 of file md.c.

172 {
173  /*
174  * Close it first, to ensure that we notice if the fork has been unlinked
175  * since we opened it. As an optimization, we can skip that in recovery,
176  * which already closes relations when dropping them.
177  */
178  if (!InRecovery)
179  mdclose(reln, forknum);
180 
181  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
182 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:691

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 460 of file md.c.

462 {
463  off_t seekpos;
464  int nbytes;
465  MdfdVec *v;
466 
467  /* If this build supports direct I/O, the buffer must be I/O aligned. */
468  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
469  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
470 
471  /* This assert is too expensive to have on normally ... */
472 #ifdef CHECK_WRITE_VS_EXTEND
473  Assert(blocknum >= mdnblocks(reln, forknum));
474 #endif
475 
476  /*
477  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
478  * more --- we mustn't create a block whose number actually is
479  * InvalidBlockNumber. (Note that this failure should be unreachable
480  * because of upstream checks in bufmgr.c.)
481  */
482  if (blocknum == InvalidBlockNumber)
483  ereport(ERROR,
484  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
485  errmsg("cannot extend file \"%s\" beyond %u blocks",
486  relpath(reln->smgr_rlocator, forknum),
488 
489  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
490 
491  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
492 
493  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
494 
495  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
496  {
497  if (nbytes < 0)
498  ereport(ERROR,
500  errmsg("could not extend file \"%s\": %m",
501  FilePathName(v->mdfd_vfd)),
502  errhint("Check free disk space.")));
503  /* short write: complain appropriately */
504  ereport(ERROR,
505  (errcode(ERRCODE_DISK_FULL),
506  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
508  nbytes, BLCKSZ, blocknum),
509  errhint("Check free disk space.")));
510  }
511 
512  if (!skipFsync && !SmgrIsTemp(reln))
513  register_dirty_segment(reln, forknum, v);
514 
515  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
516 }
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.h:208
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1089
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1584

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1808 of file md.c.

1809 {
1810  /*
1811  * For now we only use filter requests as a way to drop all scheduled
1812  * callbacks relating to a given database, when dropping the database.
1813  * We'll return true for all candidates that have the same database OID as
1814  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1815  */
1816  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1817 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1291 of file md.c.

1292 {
1293  int segno;
1294  int min_inactive_seg;
1295 
1296  /*
1297  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1298  * the loop below will get them all!
1299  */
1300  mdnblocks(reln, forknum);
1301 
1302  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1303 
1304  /*
1305  * Temporarily open inactive segments, then close them after sync. There
1306  * may be some inactive segments left opened after fsync() error, but that
1307  * is harmless. We don't bother to clean them up and take a risk of
1308  * further trouble. The next mdclose() will soon close them.
1309  */
1310  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1311  segno++;
1312 
1313  while (segno > 0)
1314  {
1315  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1316 
1317  /*
1318  * fsyncs done through mdimmedsync() should be tracked in a separate
1319  * IOContext than those done through mdsyncfiletag() to differentiate
1320  * between unavoidable client backend fsyncs (e.g. those done during
1321  * index build) and those which ideally would have been done by the
1322  * checkpointer. Since other IO operations bypassing the buffer
1323  * manager could also be tracked in such an IOContext, wait until
1324  * these are also tracked to track immediate fsyncs.
1325  */
1326  if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
1329  errmsg("could not fsync file \"%s\": %m",
1330  FilePathName(v->mdfd_vfd))));
1331 
1332  /* Close inactive segments immediately */
1333  if (segno > min_inactive_seg)
1334  {
1335  FileClose(v->mdfd_vfd);
1336  _fdvec_resize(reln, forknum, segno - 1);
1337  }
1338 
1339  segno--;
1340  }
1341 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2297
int data_sync_elevel(int elevel)
Definition: fd.c:3936

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )

Definition at line 158 of file md.c.

159 {
161  "MdSmgr",
163 }
MemoryContext TopMemoryContext
Definition: mcxt.c:149
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1089 of file md.c.

1090 {
1091  MdfdVec *v;
1092  BlockNumber nblocks;
1093  BlockNumber segno;
1094 
1095  mdopenfork(reln, forknum, EXTENSION_FAIL);
1096 
1097  /* mdopen has opened the first segment */
1098  Assert(reln->md_num_open_segs[forknum] > 0);
1099 
1100  /*
1101  * Start from the last open segments, to avoid redundant seeks. We have
1102  * previously verified that these segments are exactly RELSEG_SIZE long,
1103  * and it's useless to recheck that each time.
1104  *
1105  * NOTE: this assumption could only be wrong if another backend has
1106  * truncated the relation. We rely on higher code levels to handle that
1107  * scenario by closing and re-opening the md fd, which is handled via
1108  * relcache flush. (Since the checkpointer doesn't participate in
1109  * relcache flush, it could have segment entries for inactive segments;
1110  * that's OK because the checkpointer never needs to compute relation
1111  * size.)
1112  */
1113  segno = reln->md_num_open_segs[forknum] - 1;
1114  v = &reln->md_seg_fds[forknum][segno];
1115 
1116  for (;;)
1117  {
1118  nblocks = _mdnblocks(reln, forknum, v);
1119  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1120  elog(FATAL, "segment too big");
1121  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1122  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
1123 
1124  /*
1125  * If segment is exactly RELSEG_SIZE, advance to next one.
1126  */
1127  segno++;
1128 
1129  /*
1130  * We used to pass O_CREAT here, but that has the disadvantage that it
1131  * might create a segment which has vanished through some operating
1132  * system misadventure. In such a case, creating the segment here
1133  * undermines _mdfd_getseg's attempts to notice and report an error
1134  * upon access to a missing segment.
1135  */
1136  v = _mdfd_openseg(reln, forknum, segno, 0);
1137  if (v == NULL)
1138  return segno * ((BlockNumber) RELSEG_SIZE);
1139  }
1140 }

References _mdfd_openseg(), _mdnblocks(), Assert, elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdregistersync(), mdtruncate(), mdwritev(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 680 of file md.c.

681 {
682  /* mark it not open */
683  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
684  reln->md_num_open_segs[forknum] = 0;
685 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 637 of file md.c.

638 {
639  MdfdVec *mdfd;
640  char *path;
641  File fd;
642 
643  /* No work if already open */
644  if (reln->md_num_open_segs[forknum] > 0)
645  return &reln->md_seg_fds[forknum][0];
646 
647  path = relpath(reln->smgr_rlocator, forknum);
648 
650 
651  if (fd < 0)
652  {
653  if ((behavior & EXTENSION_RETURN_NULL) &&
654  FILE_POSSIBLY_DELETED(errno))
655  {
656  pfree(path);
657  return NULL;
658  }
659  ereport(ERROR,
661  errmsg("could not open file \"%s\": %m", path)));
662  }
663 
664  pfree(path);
665 
666  _fdvec_resize(reln, forknum, 1);
667  mdfd = &reln->md_seg_fds[forknum][0];
668  mdfd->mdfd_vfd = fd;
669  mdfd->mdfd_segno = 0;
670 
671  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
672 
673  return mdfd;
674 }

References _fdvec_resize(), _mdfd_open_flags(), _mdnblocks(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), relpath, and SMgrRelationData::smgr_rlocator.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks 
)

Definition at line 714 of file md.c.

716 {
717 #ifdef USE_PREFETCH
718 
720 
721  if ((uint64) blocknum + nblocks > (uint64) MaxBlockNumber + 1)
722  return false;
723 
724  while (nblocks > 0)
725  {
726  off_t seekpos;
727  MdfdVec *v;
728  int nblocks_this_segment;
729 
730  v = _mdfd_getseg(reln, forknum, blocknum, false,
732  if (v == NULL)
733  return false;
734 
735  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
736 
737  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
738 
739  nblocks_this_segment =
740  Min(nblocks,
741  RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
742 
743  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ * nblocks_this_segment,
744  WAIT_EVENT_DATA_FILE_PREFETCH);
745 
746  blocknum += nblocks_this_segment;
747  nblocks -= nblocks_this_segment;
748  }
749 #endif /* USE_PREFETCH */
750 
751  return true;
752 }
#define MaxBlockNumber
Definition: block.h:35
#define Min(x, y)
Definition: c.h:1004
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2078

References _mdfd_getseg(), Assert, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, MaxBlockNumber, _MdfdVec::mdfd_vfd, and Min.

◆ mdreadv()

void mdreadv ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)

Definition at line 810 of file md.c.

812 {
813  while (nblocks > 0)
814  {
815  struct iovec iov[PG_IOV_MAX];
816  int iovcnt;
817  off_t seekpos;
818  int nbytes;
819  MdfdVec *v;
820  BlockNumber nblocks_this_segment;
821  size_t transferred_this_segment;
822  size_t size_this_segment;
823 
824  v = _mdfd_getseg(reln, forknum, blocknum, false,
826 
827  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
828 
829  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
830 
831  nblocks_this_segment =
832  Min(nblocks,
833  RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
834  nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
835 
836  iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
837  size_this_segment = nblocks_this_segment * BLCKSZ;
838  transferred_this_segment = 0;
839 
840  /*
841  * Inner loop to continue after a short read. We'll keep going until
842  * we hit EOF rather than assuming that a short read means we hit the
843  * end.
844  */
845  for (;;)
846  {
847  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
851  reln->smgr_rlocator.backend);
852  nbytes = FileReadV(v->mdfd_vfd, iov, iovcnt, seekpos,
853  WAIT_EVENT_DATA_FILE_READ);
854  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
858  reln->smgr_rlocator.backend,
859  nbytes,
860  size_this_segment - transferred_this_segment);
861 
862 #ifdef SIMULATE_SHORT_READ
863  nbytes = Min(nbytes, 4096);
864 #endif
865 
866  if (nbytes < 0)
867  ereport(ERROR,
869  errmsg("could not read blocks %u..%u in file \"%s\": %m",
870  blocknum,
871  blocknum + nblocks_this_segment - 1,
872  FilePathName(v->mdfd_vfd))));
873 
874  if (nbytes == 0)
875  {
876  /*
877  * We are at or past EOF, or we read a partial block at EOF.
878  * Normally this is an error; upper levels should never try to
879  * read a nonexistent block. However, if zero_damaged_pages
880  * is ON or we are InRecovery, we should instead return zeroes
881  * without complaining. This allows, for example, the case of
882  * trying to update a block that was later truncated away.
883  */
885  {
886  for (BlockNumber i = transferred_this_segment / BLCKSZ;
887  i < nblocks_this_segment;
888  ++i)
889  memset(buffers[i], 0, BLCKSZ);
890  break;
891  }
892  else
893  ereport(ERROR,
895  errmsg("could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
896  blocknum,
897  blocknum + nblocks_this_segment - 1,
899  transferred_this_segment,
900  size_this_segment)));
901  }
902 
903  /* One loop should usually be enough. */
904  transferred_this_segment += nbytes;
905  Assert(transferred_this_segment <= size_this_segment);
906  if (transferred_this_segment == size_this_segment)
907  break;
908 
909  /* Adjust position and vectors after a short read. */
910  seekpos += nbytes;
911  iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
912  }
913 
914  nblocks -= nblocks_this_segment;
915  buffers += nblocks_this_segment;
916  blocknum += nblocks_this_segment;
917  }
918 }
bool zero_damaged_pages
Definition: bufmgr.c:139
#define lengthof(array)
Definition: c.h:788
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
Definition: fd.c:2136
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Definition: file_utils.c:592
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
Definition: md.c:762
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define PG_IOV_MAX
Definition: pg_iovec.h:37

References _mdfd_getseg(), Assert, RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileReadV(), i, InRecovery, lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, Min, PG_IOV_MAX, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and zero_damaged_pages.

◆ mdregistersync()

void mdregistersync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1240 of file md.c.

1241 {
1242  int segno;
1243  int min_inactive_seg;
1244 
1245  /*
1246  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1247  * the loop below will get them all!
1248  */
1249  mdnblocks(reln, forknum);
1250 
1251  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1252 
1253  /*
1254  * Temporarily open inactive segments, then close them after sync. There
1255  * may be some inactive segments left opened after error, but that is
1256  * harmless. We don't bother to clean them up and take a risk of further
1257  * trouble. The next mdclose() will soon close them.
1258  */
1259  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1260  segno++;
1261 
1262  while (segno > 0)
1263  {
1264  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1265 
1266  register_dirty_segment(reln, forknum, v);
1267 
1268  /* Close inactive segments immediately */
1269  if (segno > min_inactive_seg)
1270  {
1271  FileClose(v->mdfd_vfd);
1272  _fdvec_resize(reln, forknum, segno - 1);
1273  }
1274 
1275  segno--;
1276  }
1277 }

References _fdvec_resize(), _mdfd_openseg(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and register_dirty_segment().

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1736 of file md.c.

1737 {
1739  File file;
1740  instr_time io_start;
1741  bool need_to_close;
1742  int result,
1743  save_errno;
1744 
1745  /* See if we already have the file open, or need to open it. */
1746  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1747  {
1748  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1749  strlcpy(path, FilePathName(file), MAXPGPATH);
1750  need_to_close = false;
1751  }
1752  else
1753  {
1754  char *p;
1755 
1756  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1757  strlcpy(path, p, MAXPGPATH);
1758  pfree(p);
1759 
1760  file = PathNameOpenFile(path, _mdfd_open_flags());
1761  if (file < 0)
1762  return -1;
1763  need_to_close = true;
1764  }
1765 
1767 
1768  /* Sync the file. */
1769  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1770  save_errno = errno;
1771 
1772  if (need_to_close)
1773  FileClose(file);
1774 
1776  IOOP_FSYNC, io_start, 1);
1777 
1778  errno = save_errno;
1779  return result;
1780 }
bool track_io_timing
Definition: bufmgr.c:142
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:280
@ IOCONTEXT_NORMAL
Definition: pgstat.h:290
@ IOOP_FSYNC
Definition: pgstat.h:300
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint64 segno
Definition: sync.h:55

References _mdfd_open_flags(), _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, INVALID_PROC_NUMBER, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), strlcpy(), and track_io_timing.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 1146 of file md.c.

1147 {
1148  BlockNumber curnblk;
1149  BlockNumber priorblocks;
1150  int curopensegs;
1151 
1152  /*
1153  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1154  * truncation loop will get them all!
1155  */
1156  curnblk = mdnblocks(reln, forknum);
1157  if (nblocks > curnblk)
1158  {
1159  /* Bogus request ... but no complaint if InRecovery */
1160  if (InRecovery)
1161  return;
1162  ereport(ERROR,
1163  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1164  relpath(reln->smgr_rlocator, forknum),
1165  nblocks, curnblk)));
1166  }
1167  if (nblocks == curnblk)
1168  return; /* no work */
1169 
1170  /*
1171  * Truncate segments, starting at the last one. Starting at the end makes
1172  * managing the memory for the fd array easier, should there be errors.
1173  */
1174  curopensegs = reln->md_num_open_segs[forknum];
1175  while (curopensegs > 0)
1176  {
1177  MdfdVec *v;
1178 
1179  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1180 
1181  v = &reln->md_seg_fds[forknum][curopensegs - 1];
1182 
1183  if (priorblocks > nblocks)
1184  {
1185  /*
1186  * This segment is no longer active. We truncate the file, but do
1187  * not delete it, for reasons explained in the header comments.
1188  */
1189  if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1190  ereport(ERROR,
1192  errmsg("could not truncate file \"%s\": %m",
1193  FilePathName(v->mdfd_vfd))));
1194 
1195  if (!SmgrIsTemp(reln))
1196  register_dirty_segment(reln, forknum, v);
1197 
1198  /* we never drop the 1st segment */
1199  Assert(v != &reln->md_seg_fds[forknum][0]);
1200 
1201  FileClose(v->mdfd_vfd);
1202  _fdvec_resize(reln, forknum, curopensegs - 1);
1203  }
1204  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1205  {
1206  /*
1207  * This is the last segment we want to keep. Truncate the file to
1208  * the right length. NOTE: if nblocks is exactly a multiple K of
1209  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1210  * keep it. This adheres to the invariant given in the header
1211  * comments.
1212  */
1213  BlockNumber lastsegblocks = nblocks - priorblocks;
1214 
1215  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1216  ereport(ERROR,
1218  errmsg("could not truncate file \"%s\" to %u blocks: %m",
1219  FilePathName(v->mdfd_vfd),
1220  nblocks)));
1221  if (!SmgrIsTemp(reln))
1222  register_dirty_segment(reln, forknum, v);
1223  }
1224  else
1225  {
1226  /*
1227  * We still need this segment, so nothing to do for this and any
1228  * earlier segment.
1229  */
1230  break;
1231  }
1232  curopensegs--;
1233  }
1234 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2426

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 307 of file md.c.

308 {
309  /* Now do the per-fork work */
310  if (forknum == InvalidForkNumber)
311  {
312  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
313  mdunlinkfork(rlocator, forknum, isRedo);
314  }
315  else
316  mdunlinkfork(rlocator, forknum, isRedo);
317 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:344

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1789 of file md.c.

1790 {
1791  char *p;
1792 
1793  /* Compute the path. */
1794  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1795  strlcpy(path, p, MAXPGPATH);
1796  pfree(p);
1797 
1798  /* Try to unlink the file. */
1799  return unlink(path);
1800 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
static

Definition at line 344 of file md.c.

345 {
346  char *path;
347  int ret;
348  int save_errno;
349 
350  path = relpath(rlocator, forknum);
351 
352  /*
353  * Truncate and then unlink the first segment, or just register a request
354  * to unlink it later, as described in the comments for mdunlink().
355  */
356  if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
357  RelFileLocatorBackendIsTemp(rlocator))
358  {
359  if (!RelFileLocatorBackendIsTemp(rlocator))
360  {
361  /* Prevent other backends' fds from holding on to the disk space */
362  ret = do_truncate(path);
363 
364  /* Forget any pending sync requests for the first segment */
365  save_errno = errno;
366  register_forget_request(rlocator, forknum, 0 /* first seg */ );
367  errno = save_errno;
368  }
369  else
370  ret = 0;
371 
372  /* Next unlink the file, unless it was already found to be missing */
373  if (ret >= 0 || errno != ENOENT)
374  {
375  ret = unlink(path);
376  if (ret < 0 && errno != ENOENT)
377  {
378  save_errno = errno;
381  errmsg("could not remove file \"%s\": %m", path)));
382  errno = save_errno;
383  }
384  }
385  }
386  else
387  {
388  /* Prevent other backends' fds from holding on to the disk space */
389  ret = do_truncate(path);
390 
391  /* Register request to unlink first segment later */
392  save_errno = errno;
393  register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
394  errno = save_errno;
395  }
396 
397  /*
398  * Delete any additional segments.
399  *
400  * Note that because we loop until getting ENOENT, we will correctly
401  * remove all inactive segments as well as active ones. Ideally we'd
402  * continue the loop until getting exactly that errno, but that risks an
403  * infinite loop if the problem is directory-wide (for instance, if we
404  * suddenly can't read the data directory itself). We compromise by
405  * continuing after a non-ENOENT truncate error, but stopping after any
406  * unlink error. If there is indeed a directory-wide problem, additional
407  * unlink attempts wouldn't work anyway.
408  */
409  if (ret >= 0 || errno != ENOENT)
410  {
411  char *segpath = (char *) palloc(strlen(path) + 12);
412  BlockNumber segno;
413 
414  for (segno = 1;; segno++)
415  {
416  sprintf(segpath, "%s.%u", path, segno);
417 
418  if (!RelFileLocatorBackendIsTemp(rlocator))
419  {
420  /*
421  * Prevent other backends' fds from holding on to the disk
422  * space. We're done if we see ENOENT, though.
423  */
424  if (do_truncate(segpath) < 0 && errno == ENOENT)
425  break;
426 
427  /*
428  * Forget any pending sync requests for this segment before we
429  * try to unlink.
430  */
431  register_forget_request(rlocator, forknum, segno);
432  }
433 
434  if (unlink(segpath) < 0)
435  {
436  /* ENOENT is expected after the last segment... */
437  if (errno != ENOENT)
440  errmsg("could not remove file \"%s\": %m", segpath)));
441  break;
442  }
443  }
444  pfree(segpath);
445  }
446 
447  pfree(path);
448 }
bool IsBinaryUpgrade
Definition: globals.c:118
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1414
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1397
static int do_truncate(const char *path)
Definition: md.c:323
#define sprintf
Definition: port.h:240
#define RelFileLocatorBackendIsTemp(rlocator)

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), IsBinaryUpgrade, MAIN_FORKNUM, palloc(), pfree(), register_forget_request(), register_unlink_segment(), RelFileLocatorBackendIsTemp, relpath, sprintf, and WARNING.

Referenced by mdunlink().

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 1030 of file md.c.

1032 {
1034 
1035  /*
1036  * Issue flush requests in as few requests as possible; have to split at
1037  * segment boundaries though, since those are actually separate files.
1038  */
1039  while (nblocks > 0)
1040  {
1041  BlockNumber nflush = nblocks;
1042  off_t seekpos;
1043  MdfdVec *v;
1044  int segnum_start,
1045  segnum_end;
1046 
1047  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
1049 
1050  /*
1051  * We might be flushing buffers of already removed relations, that's
1052  * ok, just ignore that case. If the segment file wasn't open already
1053  * (ie from a recent mdwrite()), then we don't want to re-open it, to
1054  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
1055  * us with a descriptor to a file that is about to be unlinked.
1056  */
1057  if (!v)
1058  return;
1059 
1060  /* compute offset inside the current segment */
1061  segnum_start = blocknum / RELSEG_SIZE;
1062 
1063  /* compute number of desired writes within the current segment */
1064  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1065  if (segnum_start != segnum_end)
1066  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
1067 
1068  Assert(nflush >= 1);
1069  Assert(nflush <= nblocks);
1070 
1071  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1072 
1073  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
1074 
1075  nblocks -= nflush;
1076  blocknum += nflush;
1077  }
1078 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2110

References _mdfd_getseg(), Assert, EXTENSION_DONT_OPEN, FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdwritev()

void mdwritev ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void **  buffers,
BlockNumber  nblocks,
bool  skipFsync 
)

Definition at line 928 of file md.c.

930 {
931  /* This assert is too expensive to have on normally ... */
932 #ifdef CHECK_WRITE_VS_EXTEND
933  Assert(blocknum < mdnblocks(reln, forknum));
934 #endif
935 
936  while (nblocks > 0)
937  {
938  struct iovec iov[PG_IOV_MAX];
939  int iovcnt;
940  off_t seekpos;
941  int nbytes;
942  MdfdVec *v;
943  BlockNumber nblocks_this_segment;
944  size_t transferred_this_segment;
945  size_t size_this_segment;
946 
947  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
949 
950  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
951 
952  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
953 
954  nblocks_this_segment =
955  Min(nblocks,
956  RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
957  nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
958 
959  iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
960  size_this_segment = nblocks_this_segment * BLCKSZ;
961  transferred_this_segment = 0;
962 
963  /*
964  * Inner loop to continue after a short write. If the reason is that
965  * we're out of disk space, a future attempt should get an ENOSPC
966  * error from the kernel.
967  */
968  for (;;)
969  {
970  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
974  reln->smgr_rlocator.backend);
975  nbytes = FileWriteV(v->mdfd_vfd, iov, iovcnt, seekpos,
976  WAIT_EVENT_DATA_FILE_WRITE);
977  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
981  reln->smgr_rlocator.backend,
982  nbytes,
983  size_this_segment - transferred_this_segment);
984 
985 #ifdef SIMULATE_SHORT_WRITE
986  nbytes = Min(nbytes, 4096);
987 #endif
988 
989  if (nbytes < 0)
990  {
991  bool enospc = errno == ENOSPC;
992 
993  ereport(ERROR,
995  errmsg("could not write blocks %u..%u in file \"%s\": %m",
996  blocknum,
997  blocknum + nblocks_this_segment - 1,
998  FilePathName(v->mdfd_vfd)),
999  enospc ? errhint("Check free disk space.") : 0));
1000  }
1001 
1002  /* One loop should usually be enough. */
1003  transferred_this_segment += nbytes;
1004  Assert(transferred_this_segment <= size_this_segment);
1005  if (transferred_this_segment == size_this_segment)
1006  break;
1007 
1008  /* Adjust position and iovecs after a short write. */
1009  seekpos += nbytes;
1010  iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
1011  }
1012 
1013  if (!skipFsync && !SmgrIsTemp(reln))
1014  register_dirty_segment(reln, forknum, v);
1015 
1016  nblocks -= nblocks_this_segment;
1017  buffers += nblocks_this_segment;
1018  blocknum += nblocks_this_segment;
1019  }
1020 }
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
Definition: fd.c:2192

References _mdfd_getseg(), Assert, RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWriteV(), lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), Min, PG_IOV_MAX, register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and RelFileLocator::spcOid.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)

Definition at line 525 of file md.c.

527 {
528  MdfdVec *v;
529  BlockNumber curblocknum = blocknum;
530  int remblocks = nblocks;
531 
532  Assert(nblocks > 0);
533 
534  /* This assert is too expensive to have on normally ... */
535 #ifdef CHECK_WRITE_VS_EXTEND
536  Assert(blocknum >= mdnblocks(reln, forknum));
537 #endif
538 
539  /*
540  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
541  * more --- we mustn't create a block whose number actually is
542  * InvalidBlockNumber or larger.
543  */
544  if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
545  ereport(ERROR,
546  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
547  errmsg("cannot extend file \"%s\" beyond %u blocks",
548  relpath(reln->smgr_rlocator, forknum),
550 
551  while (remblocks > 0)
552  {
553  BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
554  off_t seekpos = (off_t) BLCKSZ * segstartblock;
555  int numblocks;
556 
557  if (segstartblock + remblocks > RELSEG_SIZE)
558  numblocks = RELSEG_SIZE - segstartblock;
559  else
560  numblocks = remblocks;
561 
562  v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
563 
564  Assert(segstartblock < RELSEG_SIZE);
565  Assert(segstartblock + numblocks <= RELSEG_SIZE);
566 
567  /*
568  * If available and useful, use posix_fallocate() (via
569  * FileFallocate()) to extend the relation. That's often more
570  * efficient than using write(), as it commonly won't cause the kernel
571  * to allocate page cache space for the extended pages.
572  *
573  * However, we don't use FileFallocate() for small extensions, as it
574  * defeats delayed allocation on some filesystems. Not clear where
575  * that decision should be made though? For now just use a cutoff of
576  * 8, anything between 4 and 8 worked OK in some local testing.
577  */
578  if (numblocks > 8)
579  {
580  int ret;
581 
582  ret = FileFallocate(v->mdfd_vfd,
583  seekpos, (off_t) BLCKSZ * numblocks,
584  WAIT_EVENT_DATA_FILE_EXTEND);
585  if (ret != 0)
586  {
587  ereport(ERROR,
589  errmsg("could not extend file \"%s\" with FileFallocate(): %m",
590  FilePathName(v->mdfd_vfd)),
591  errhint("Check free disk space."));
592  }
593  }
594  else
595  {
596  int ret;
597 
598  /*
599  * Even if we don't want to use fallocate, we can still extend a
600  * bit more efficiently than writing each 8kB block individually.
601  * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
602  * to avoid multiple writes or needing a zeroed buffer for the
603  * whole length of the extension.
604  */
605  ret = FileZero(v->mdfd_vfd,
606  seekpos, (off_t) BLCKSZ * numblocks,
607  WAIT_EVENT_DATA_FILE_EXTEND);
608  if (ret < 0)
609  ereport(ERROR,
611  errmsg("could not extend file \"%s\": %m",
612  FilePathName(v->mdfd_vfd)),
613  errhint("Check free disk space."));
614  }
615 
616  if (!skipFsync && !SmgrIsTemp(reln))
617  register_dirty_segment(reln, forknum, v);
618 
619  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
620 
621  remblocks -= numblocks;
622  curblocknum += numblocks;
623  }
624 }
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2369
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2324

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1353 of file md.c.

1354 {
1355  FileTag tag;
1356 
1357  INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
1358 
1359  /* Temp relations should never be fsync'd */
1360  Assert(!SmgrIsTemp(reln));
1361 
1362  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
1363  {
1364  instr_time io_start;
1365 
1366  ereport(DEBUG1,
1367  (errmsg_internal("could not forward fsync request because request queue is full")));
1368 
1370 
1371  if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
1374  errmsg("could not fsync file \"%s\": %m",
1375  FilePathName(seg->mdfd_vfd))));
1376 
1377  /*
1378  * We have no way of knowing if the current IOContext is
1379  * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
1380  * point, so count the fsync as being in the IOCONTEXT_NORMAL
1381  * IOContext. This is probably okay, because the number of backend
1382  * fsyncs doesn't say anything about the efficacy of the
1383  * BufferAccessStrategy. And counting both fsyncs done in
1384  * IOCONTEXT_NORMAL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under
1385  * IOCONTEXT_NORMAL is likely clearer when investigating the number of
1386  * backend fsyncs.
1387  */
1389  IOOP_FSYNC, io_start, 1);
1390  }
1391 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
#define DEBUG1
Definition: elog.h:30
@ SYNC_REQUEST
Definition: sync.h:25

References Assert, data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, RelFileLocatorBackend::locator, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RegisterSyncRequest(), SMgrRelationData::smgr_rlocator, SmgrIsTemp, SYNC_REQUEST, and track_io_timing.

Referenced by mdcreate(), mdextend(), mdregistersync(), mdtruncate(), mdwritev(), and mdzeroextend().

◆ register_forget_request()

static void register_forget_request ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1414 of file md.c.

1416 {
1417  FileTag tag;
1418 
1419  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1420 
1421  RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1422 }
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1397 of file md.c.

1399 {
1400  FileTag tag;
1401 
1402  INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1403 
1404  /* Should never be used with temp relations */
1405  Assert(!RelFileLocatorBackendIsTemp(rlocator));
1406 
1407  RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1408 }
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert, INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), RelFileLocatorBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 86 of file md.c.

Referenced by _fdvec_resize(), and mdinit().