PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "access/xlogutils.h"
#include "commands/tablespace.h"
#include "common/file_utils.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/md.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
#include "utils/memutils.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 
struct  MdPathStr
 

Macros

#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_OPEN   (1 << 5)
 
#define SEGMENT_CHARS   OIDCHARS
 
#define MD_PATH_STR_MAXLEN
 

Typedefs

typedef struct _MdfdVec MdfdVec
 
typedef struct MdPathStr MdPathStr
 

Functions

static void mdunlinkfork (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static MdfdVecmdopenfork (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink_segment (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void register_forget_request (RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static MdPathStr _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static int _mdfd_open_flags (void)
 
void mdinit (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
static int do_truncate (const char *path)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
 
static int buffers_to_iovec (struct iovec *iov, void **buffers, int nblocks)
 
uint32 mdmaxcombine (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdreadv (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdwritev (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
 
void mdregistersync (SMgrRelation reln, ForkNumber forknum)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

static MemoryContext MdCxt
 

Macro Definition Documentation

◆ EXTENSION_CREATE

#define EXTENSION_CREATE   (1 << 2)

Definition at line 106 of file md.c.

◆ EXTENSION_CREATE_RECOVERY

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 108 of file md.c.

◆ EXTENSION_DONT_OPEN

#define EXTENSION_DONT_OPEN   (1 << 5)

Definition at line 110 of file md.c.

◆ EXTENSION_FAIL

#define EXTENSION_FAIL   (1 << 0)

Definition at line 102 of file md.c.

◆ EXTENSION_RETURN_NULL

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 104 of file md.c.

◆ INIT_MD_FILETAG

#define INIT_MD_FILETAG (   a,
  xx_rlocator,
  xx_forknum,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = SYNC_HANDLER_MD, \
(a).rlocator = (xx_rlocator), \
(a).forknum = (xx_forknum), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:70
Definition: sync.h:51
@ SYNC_HANDLER_MD
Definition: sync.h:37

Definition at line 90 of file md.c.

◆ MD_PATH_STR_MAXLEN

#define MD_PATH_STR_MAXLEN
Value:
(\
REL_PATH_STR_MAXLEN \
+ sizeof((char)'.') \
)
#define SEGMENT_CHARS
Definition: md.c:120

Definition at line 121 of file md.c.

◆ SEGMENT_CHARS

#define SEGMENT_CHARS   OIDCHARS

Definition at line 120 of file md.c.

Typedef Documentation

◆ MdfdVec

typedef struct _MdfdVec MdfdVec

◆ MdPathStr

typedef struct MdPathStr MdPathStr

Function Documentation

◆ _fdvec_resize()

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1503 of file md.c.

1506{
1507 if (nseg == 0)
1508 {
1509 if (reln->md_num_open_segs[forknum] > 0)
1510 {
1511 pfree(reln->md_seg_fds[forknum]);
1512 reln->md_seg_fds[forknum] = NULL;
1513 }
1514 }
1515 else if (reln->md_num_open_segs[forknum] == 0)
1516 {
1517 reln->md_seg_fds[forknum] =
1518 MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1519 }
1520 else if (nseg > reln->md_num_open_segs[forknum])
1521 {
1522 /*
1523 * It doesn't seem worthwhile complicating the code to amortize
1524 * repalloc() calls. Those are far faster than PathNameOpenFile() or
1525 * FileClose(), and the memory context internally will sometimes avoid
1526 * doing an actual reallocation.
1527 */
1528 reln->md_seg_fds[forknum] =
1529 repalloc(reln->md_seg_fds[forknum],
1530 sizeof(MdfdVec) * nseg);
1531 }
1532 else
1533 {
1534 /*
1535 * We don't reallocate a smaller array, because we want mdtruncate()
1536 * to be able to promise that it won't allocate memory, so that it is
1537 * allowed in a critical section. This means that a bit of space in
1538 * the array is now wasted, until the next time we add a segment and
1539 * reallocate.
1540 */
1541 }
1542
1543 reln->md_num_open_segs[forknum] = nseg;
1544}
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1544
void pfree(void *pointer)
Definition: mcxt.c:1524
static MemoryContext MdCxt
Definition: md.c:86
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:60
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:61
Definition: md.c:81

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MdCxt, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdimmedsync(), mdopenfork(), mdregistersync(), and mdtruncate().

◆ _mdfd_getseg()

static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1614 of file md.c.

1616{
1617 MdfdVec *v;
1618 BlockNumber targetseg;
1619 BlockNumber nextsegno;
1620
1621 /* some way to handle non-existent segments needs to be specified */
1622 Assert(behavior &
1625
1626 targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1627
1628 /* if an existing and opened segment, we're done */
1629 if (targetseg < reln->md_num_open_segs[forknum])
1630 {
1631 v = &reln->md_seg_fds[forknum][targetseg];
1632 return v;
1633 }
1634
1635 /* The caller only wants the segment if we already had it open. */
1636 if (behavior & EXTENSION_DONT_OPEN)
1637 return NULL;
1638
1639 /*
1640 * The target segment is not yet open. Iterate over all the segments
1641 * between the last opened and the target segment. This way missing
1642 * segments either raise an error, or get created (according to
1643 * 'behavior'). Start with either the last opened, or the first segment if
1644 * none was opened before.
1645 */
1646 if (reln->md_num_open_segs[forknum] > 0)
1647 v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1648 else
1649 {
1650 v = mdopenfork(reln, forknum, behavior);
1651 if (!v)
1652 return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1653 }
1654
1655 for (nextsegno = reln->md_num_open_segs[forknum];
1656 nextsegno <= targetseg; nextsegno++)
1657 {
1658 BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1659 int flags = 0;
1660
1661 Assert(nextsegno == v->mdfd_segno + 1);
1662
1663 if (nblocks > ((BlockNumber) RELSEG_SIZE))
1664 elog(FATAL, "segment too big");
1665
1666 if ((behavior & EXTENSION_CREATE) ||
1667 (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1668 {
1669 /*
1670 * Normally we will create new segments only if authorized by the
1671 * caller (i.e., we are doing mdextend()). But when doing WAL
1672 * recovery, create segments anyway; this allows cases such as
1673 * replaying WAL data that has a write into a high-numbered
1674 * segment of a relation that was later deleted. We want to go
1675 * ahead and create the segments so we can finish out the replay.
1676 *
1677 * We have to maintain the invariant that segments before the last
1678 * active segment are of size RELSEG_SIZE; therefore, if
1679 * extending, pad them out with zeroes if needed. (This only
1680 * matters if in recovery, or if the caller is extending the
1681 * relation discontiguously, but that can happen in hash indexes.)
1682 */
1683 if (nblocks < ((BlockNumber) RELSEG_SIZE))
1684 {
1685 char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
1687
1688 mdextend(reln, forknum,
1689 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1690 zerobuf, skipFsync);
1691 pfree(zerobuf);
1692 }
1693 flags = O_CREAT;
1694 }
1695 else if (nblocks < ((BlockNumber) RELSEG_SIZE))
1696 {
1697 /*
1698 * When not extending, only open the next segment if the current
1699 * one is exactly RELSEG_SIZE. If not (this branch), either
1700 * return NULL or fail.
1701 */
1702 if (behavior & EXTENSION_RETURN_NULL)
1703 {
1704 /*
1705 * Some callers discern between reasons for _mdfd_getseg()
1706 * returning NULL based on errno. As there's no failing
1707 * syscall involved in this case, explicitly set errno to
1708 * ENOENT, as that seems the closest interpretation.
1709 */
1710 errno = ENOENT;
1711 return NULL;
1712 }
1713
1714 ereport(ERROR,
1716 errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1717 _mdfd_segpath(reln, forknum, nextsegno).str,
1718 blkno, nblocks)));
1719 }
1720
1721 v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1722
1723 if (v == NULL)
1724 {
1725 if ((behavior & EXTENSION_RETURN_NULL) &&
1726 FILE_POSSIBLY_DELETED(errno))
1727 return NULL;
1728 ereport(ERROR,
1730 errmsg("could not open file \"%s\" (target block %u): %m",
1731 _mdfd_segpath(reln, forknum, nextsegno).str,
1732 blkno)));
1733 }
1734 }
1735
1736 return v;
1737}
uint32 BlockNumber
Definition: block.h:31
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define FILE_POSSIBLY_DELETED(err)
Definition: fd.h:78
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:30
Assert(PointerIsAligned(start, uint64))
const char * str
void * palloc_aligned(Size size, Size alignto, int flags)
Definition: mcxt.c:1514
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:108
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1743
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1551
#define EXTENSION_DONT_OPEN
Definition: md.c:110
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:467
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1571
#define EXTENSION_RETURN_NULL
Definition: md.c:104
#define EXTENSION_CREATE
Definition: md.c:106
#define EXTENSION_FAIL
Definition: md.c:102
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:644
#define PG_IO_ALIGN_SIZE
BlockNumber mdfd_segno
Definition: md.c:83
bool InRecovery
Definition: xlogutils.c:50

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_OPEN, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, MCXT_ALLOC_ZERO, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopenfork(), palloc_aligned(), pfree(), PG_IO_ALIGN_SIZE, and str.

Referenced by mdextend(), mdprefetch(), mdreadv(), mdwriteback(), mdwritev(), and mdzeroextend().

◆ _mdfd_open_flags()

static int _mdfd_open_flags ( void  )
inlinestatic

Definition at line 156 of file md.c.

157{
158 int flags = O_RDWR | PG_BINARY;
159
161 flags |= PG_O_DIRECT;
162
163 return flags;
164}
#define PG_BINARY
Definition: c.h:1244
int io_direct_flags
Definition: fd.c:167
#define IO_DIRECT_DATA
Definition: fd.h:54
#define PG_O_DIRECT
Definition: fd.h:97

References IO_DIRECT_DATA, io_direct_flags, PG_BINARY, and PG_O_DIRECT.

Referenced by _mdfd_openseg(), mdcreate(), mdopenfork(), and mdsyncfiletag().

◆ _mdfd_openseg()

static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1571 of file md.c.

1573{
1574 MdfdVec *v;
1575 File fd;
1576 MdPathStr fullpath;
1577
1578 fullpath = _mdfd_segpath(reln, forknum, segno);
1579
1580 /* open the file */
1581 fd = PathNameOpenFile(fullpath.str, _mdfd_open_flags() | oflags);
1582
1583 if (fd < 0)
1584 return NULL;
1585
1586 /*
1587 * Segments are always opened in order from lowest to highest, so we must
1588 * be adding a new one at the end.
1589 */
1590 Assert(segno == reln->md_num_open_segs[forknum]);
1591
1592 _fdvec_resize(reln, forknum, segno + 1);
1593
1594 /* fill the entry */
1595 v = &reln->md_seg_fds[forknum][segno];
1596 v->mdfd_vfd = fd;
1597 v->mdfd_segno = segno;
1598
1599 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1600
1601 /* all done */
1602 return v;
1603}
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1576
int File
Definition: fd.h:51
static int _mdfd_open_flags(void)
Definition: md.c:156
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1503
static int fd(const char *x, int i)
Definition: preproc-init.c:105
Definition: md.c:128
char str[MD_PATH_STR_MAXLEN+1]
Definition: md.c:129
File mdfd_vfd
Definition: md.c:82

References _fdvec_resize(), _mdfd_open_flags(), _mdfd_segpath(), _mdnblocks(), Assert(), fd(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), and MdPathStr::str.

Referenced by _mdfd_getseg(), mdimmedsync(), mdnblocks(), and mdregistersync().

◆ _mdfd_segpath()

static MdPathStr _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1551 of file md.c.

1552{
1553 RelPathStr path;
1554 MdPathStr fullpath;
1555
1556 path = relpath(reln->smgr_rlocator, forknum);
1557
1558 if (segno > 0)
1559 sprintf(fullpath.str, "%s.%u", path.str, segno);
1560 else
1561 strcpy(fullpath.str, path.str);
1562
1563 return fullpath;
1564}
#define sprintf
Definition: port.h:241
#define relpath(rlocator, forknum)
Definition: relpath.h:150
char str[REL_PATH_STR_MAXLEN+1]
Definition: relpath.h:123
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References relpath, SMgrRelationData::smgr_rlocator, sprintf, MdPathStr::str, and RelPathStr::str.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsyncfiletag().

◆ _mdnblocks()

static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1743 of file md.c.

1744{
1745 off_t len;
1746
1747 len = FileSize(seg->mdfd_vfd);
1748 if (len < 0)
1749 ereport(ERROR,
1751 errmsg("could not seek to end of file \"%s\": %m",
1752 FilePathName(seg->mdfd_vfd))));
1753 /* note that this calculation will ignore any partial block at EOF */
1754 return (BlockNumber) (len / BLCKSZ);
1755}
char * FilePathName(File file)
Definition: fd.c:2485
off_t FileSize(File file)
Definition: fd.c:2433
const void size_t len

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSize(), len, and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), mdopenfork(), and mdzeroextend().

◆ buffers_to_iovec()

static int buffers_to_iovec ( struct iovec *  iov,
void **  buffers,
int  nblocks 
)
static

Definition at line 764 of file md.c.

765{
766 struct iovec *iovp;
767 int iovcnt;
768
769 Assert(nblocks >= 1);
770
771 /* If this build supports direct I/O, buffers must be I/O aligned. */
772 for (int i = 0; i < nblocks; ++i)
773 {
774 if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
775 Assert((uintptr_t) buffers[i] ==
776 TYPEALIGN(PG_IO_ALIGN_SIZE, buffers[i]));
777 }
778
779 /* Start the first iovec off with the first buffer. */
780 iovp = &iov[0];
781 iovp->iov_base = buffers[0];
782 iovp->iov_len = BLCKSZ;
783 iovcnt = 1;
784
785 /* Try to merge the rest. */
786 for (int i = 1; i < nblocks; ++i)
787 {
788 void *buffer = buffers[i];
789
790 if (((char *) iovp->iov_base + iovp->iov_len) == buffer)
791 {
792 /* Contiguous with the last iovec. */
793 iovp->iov_len += BLCKSZ;
794 }
795 else
796 {
797 /* Need a new iovec. */
798 iovp++;
799 iovp->iov_base = buffer;
800 iovp->iov_len = BLCKSZ;
801 iovcnt++;
802 }
803 }
804
805 return iovcnt;
806}
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
int i
Definition: isn.c:74

References Assert(), i, PG_IO_ALIGN_SIZE, PG_O_DIRECT, and TYPEALIGN.

Referenced by mdreadv(), and mdwritev().

◆ do_truncate()

static int do_truncate ( const char *  path)
static

Definition at line 333 of file md.c.

334{
335 int save_errno;
336 int ret;
337
338 ret = pg_truncate(path, 0);
339
340 /* Log a warning here to avoid repetition in callers. */
341 if (ret < 0 && errno != ENOENT)
342 {
343 save_errno = errno;
346 errmsg("could not truncate file \"%s\": %m", path)));
347 errno = save_errno;
348 }
349
350 return ret;
351}
#define WARNING
Definition: elog.h:36
int pg_truncate(const char *path, off_t length)
Definition: fd.c:719

References ereport, errcode_for_file_access(), errmsg(), pg_truncate(), and WARNING.

Referenced by mdunlinkfork().

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1471 of file md.c.

1472{
1473 SMgrRelation *srels;
1474 int i;
1475
1476 srels = palloc(sizeof(SMgrRelation) * ndelrels);
1477 for (i = 0; i < ndelrels; i++)
1478 {
1479 SMgrRelation srel = smgropen(delrels[i], INVALID_PROC_NUMBER);
1480
1481 if (isRedo)
1482 {
1483 ForkNumber fork;
1484
1485 for (fork = 0; fork <= MAX_FORKNUM; fork++)
1486 XLogDropRelation(delrels[i], fork);
1487 }
1488 srels[i] = srel;
1489 }
1490
1491 smgrdounlinkall(srels, ndelrels, isRedo);
1492
1493 for (i = 0; i < ndelrels; i++)
1494 smgrclose(srels[i]);
1495 pfree(srels);
1496}
void * palloc(Size size)
Definition: mcxt.c:1317
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:56
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:222
void smgrclose(SMgrRelation reln)
Definition: smgr.c:356
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:520
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:630

References i, INVALID_PROC_NUMBER, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1453 of file md.c.

1454{
1455 FileTag tag;
1456 RelFileLocator rlocator;
1457
1458 rlocator.dbOid = dbid;
1459 rlocator.spcOid = 0;
1460 rlocator.relNumber = 0;
1461
1463
1464 RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1465}
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:90
@ InvalidForkNumber
Definition: relpath.h:57
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 693 of file md.c.

694{
695 int nopensegs = reln->md_num_open_segs[forknum];
696
697 /* No work if already closed */
698 if (nopensegs == 0)
699 return;
700
701 /* close segments starting from the end */
702 while (nopensegs > 0)
703 {
704 MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
705
707 _fdvec_resize(reln, forknum, nopensegs - 1);
708 nopensegs--;
709 }
710}
void FileClose(File file)
Definition: fd.c:1979

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 202 of file md.c.

203{
204 MdfdVec *mdfd;
205 RelPathStr path;
206 File fd;
207
208 if (isRedo && reln->md_num_open_segs[forknum] > 0)
209 return; /* created and opened already... */
210
211 Assert(reln->md_num_open_segs[forknum] == 0);
212
213 /*
214 * We may be using the target table space for the first time in this
215 * database, so create a per-database subdirectory if needed.
216 *
217 * XXX this is a fairly ugly violation of module layering, but this seems
218 * to be the best place to put the check. Maybe TablespaceCreateDbspace
219 * should be here and not in commands/tablespace.c? But that would imply
220 * importing a lot of stuff that smgr.c oughtn't know, either.
221 */
224 isRedo);
225
226 path = relpath(reln->smgr_rlocator, forknum);
227
228 fd = PathNameOpenFile(path.str, _mdfd_open_flags() | O_CREAT | O_EXCL);
229
230 if (fd < 0)
231 {
232 int save_errno = errno;
233
234 if (isRedo)
236 if (fd < 0)
237 {
238 /* be sure to report the error reported by create, not open */
239 errno = save_errno;
242 errmsg("could not create file \"%s\": %m", path.str)));
243 }
244 }
245
246 _fdvec_resize(reln, forknum, 1);
247 mdfd = &reln->md_seg_fds[forknum][0];
248 mdfd->mdfd_vfd = fd;
249 mdfd->mdfd_segno = 0;
250
251 if (!SmgrIsTemp(reln))
252 register_dirty_segment(reln, forknum, mdfd);
253}
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:112
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1378
#define SmgrIsTemp(smgr)
Definition: smgr.h:73
RelFileLocator locator

References _fdvec_resize(), _mdfd_open_flags(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, RelPathStr::str, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 183 of file md.c.

184{
185 /*
186 * Close it first, to ensure that we notice if the fork has been unlinked
187 * since we opened it. As an optimization, we can skip that in recovery,
188 * which already closes relations when dropping them.
189 */
190 if (!InRecovery)
191 mdclose(reln, forknum);
192
193 return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
194}
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:693

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 467 of file md.c.

469{
470 off_t seekpos;
471 int nbytes;
472 MdfdVec *v;
473
474 /* If this build supports direct I/O, the buffer must be I/O aligned. */
475 if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
476 Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
477
478 /* This assert is too expensive to have on normally ... */
479#ifdef CHECK_WRITE_VS_EXTEND
480 Assert(blocknum >= mdnblocks(reln, forknum));
481#endif
482
483 /*
484 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
485 * more --- we mustn't create a block whose number actually is
486 * InvalidBlockNumber. (Note that this failure should be unreachable
487 * because of upstream checks in bufmgr.c.)
488 */
489 if (blocknum == InvalidBlockNumber)
491 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
492 errmsg("cannot extend file \"%s\" beyond %u blocks",
493 relpath(reln->smgr_rlocator, forknum).str,
495
496 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
497
498 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
499
500 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
501
502 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
503 {
504 if (nbytes < 0)
507 errmsg("could not extend file \"%s\": %m",
509 errhint("Check free disk space.")));
510 /* short write: complain appropriately */
512 (errcode(ERRCODE_DISK_FULL),
513 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
515 nbytes, BLCKSZ, blocknum),
516 errhint("Check free disk space.")));
517 }
518
519 if (!skipFsync && !SmgrIsTemp(reln))
520 register_dirty_segment(reln, forknum, v);
521
522 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
523}
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.h:208
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1112
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1614

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1834 of file md.c.

1835{
1836 /*
1837 * For now we only use filter requests as a way to drop all scheduled
1838 * callbacks relating to a given database, when dropping the database.
1839 * We'll return true for all candidates that have the same database OID as
1840 * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1841 */
1842 return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1843}
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1316 of file md.c.

1317{
1318 int segno;
1319 int min_inactive_seg;
1320
1321 /*
1322 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1323 * the loop below will get them all!
1324 */
1325 mdnblocks(reln, forknum);
1326
1327 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1328
1329 /*
1330 * Temporarily open inactive segments, then close them after sync. There
1331 * may be some inactive segments left opened after fsync() error, but that
1332 * is harmless. We don't bother to clean them up and take a risk of
1333 * further trouble. The next mdclose() will soon close them.
1334 */
1335 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1336 segno++;
1337
1338 while (segno > 0)
1339 {
1340 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1341
1342 /*
1343 * fsyncs done through mdimmedsync() should be tracked in a separate
1344 * IOContext than those done through mdsyncfiletag() to differentiate
1345 * between unavoidable client backend fsyncs (e.g. those done during
1346 * index build) and those which ideally would have been done by the
1347 * checkpointer. Since other IO operations bypassing the buffer
1348 * manager could also be tracked in such an IOContext, wait until
1349 * these are also tracked to track immediate fsyncs.
1350 */
1351 if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
1354 errmsg("could not fsync file \"%s\": %m",
1355 FilePathName(v->mdfd_vfd))));
1356
1357 /* Close inactive segments immediately */
1358 if (segno > min_inactive_seg)
1359 {
1360 FileClose(v->mdfd_vfd);
1361 _fdvec_resize(reln, forknum, segno - 1);
1362 }
1363
1364 segno--;
1365 }
1366}
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2321
int data_sync_elevel(int elevel)
Definition: fd.c:3961

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )

Definition at line 170 of file md.c.

171{
173 "MdSmgr",
175}
MemoryContext TopMemoryContext
Definition: mcxt.c:149
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdmaxcombine()

uint32 mdmaxcombine ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 813 of file md.c.

815{
816 BlockNumber segoff;
817
818 segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
819
820 return RELSEG_SIZE - segoff;
821}

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1112 of file md.c.

1113{
1114 MdfdVec *v;
1115 BlockNumber nblocks;
1116 BlockNumber segno;
1117
1118 mdopenfork(reln, forknum, EXTENSION_FAIL);
1119
1120 /* mdopen has opened the first segment */
1121 Assert(reln->md_num_open_segs[forknum] > 0);
1122
1123 /*
1124 * Start from the last open segments, to avoid redundant seeks. We have
1125 * previously verified that these segments are exactly RELSEG_SIZE long,
1126 * and it's useless to recheck that each time.
1127 *
1128 * NOTE: this assumption could only be wrong if another backend has
1129 * truncated the relation. We rely on higher code levels to handle that
1130 * scenario by closing and re-opening the md fd, which is handled via
1131 * relcache flush. (Since the checkpointer doesn't participate in
1132 * relcache flush, it could have segment entries for inactive segments;
1133 * that's OK because the checkpointer never needs to compute relation
1134 * size.)
1135 */
1136 segno = reln->md_num_open_segs[forknum] - 1;
1137 v = &reln->md_seg_fds[forknum][segno];
1138
1139 for (;;)
1140 {
1141 nblocks = _mdnblocks(reln, forknum, v);
1142 if (nblocks > ((BlockNumber) RELSEG_SIZE))
1143 elog(FATAL, "segment too big");
1144 if (nblocks < ((BlockNumber) RELSEG_SIZE))
1145 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
1146
1147 /*
1148 * If segment is exactly RELSEG_SIZE, advance to next one.
1149 */
1150 segno++;
1151
1152 /*
1153 * We used to pass O_CREAT here, but that has the disadvantage that it
1154 * might create a segment which has vanished through some operating
1155 * system misadventure. In such a case, creating the segment here
1156 * undermines _mdfd_getseg's attempts to notice and report an error
1157 * upon access to a missing segment.
1158 */
1159 v = _mdfd_openseg(reln, forknum, segno, 0);
1160 if (v == NULL)
1161 return segno * ((BlockNumber) RELSEG_SIZE);
1162 }
1163}

References _mdfd_openseg(), _mdnblocks(), Assert(), elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdregistersync(), mdwritev(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 682 of file md.c.

683{
684 /* mark it not open */
685 for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
686 reln->md_num_open_segs[forknum] = 0;
687}

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdopenfork()

static MdfdVec * mdopenfork ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 644 of file md.c.

645{
646 MdfdVec *mdfd;
647 RelPathStr path;
648 File fd;
649
650 /* No work if already open */
651 if (reln->md_num_open_segs[forknum] > 0)
652 return &reln->md_seg_fds[forknum][0];
653
654 path = relpath(reln->smgr_rlocator, forknum);
655
657
658 if (fd < 0)
659 {
660 if ((behavior & EXTENSION_RETURN_NULL) &&
662 return NULL;
665 errmsg("could not open file \"%s\": %m", path.str)));
666 }
667
668 _fdvec_resize(reln, forknum, 1);
669 mdfd = &reln->md_seg_fds[forknum][0];
670 mdfd->mdfd_vfd = fd;
671 mdfd->mdfd_segno = 0;
672
673 Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
674
675 return mdfd;
676}

References _fdvec_resize(), _mdfd_open_flags(), _mdnblocks(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), relpath, SMgrRelationData::smgr_rlocator, and RelPathStr::str.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks 
)

Definition at line 716 of file md.c.

718{
719#ifdef USE_PREFETCH
720
722
723 if ((uint64) blocknum + nblocks > (uint64) MaxBlockNumber + 1)
724 return false;
725
726 while (nblocks > 0)
727 {
728 off_t seekpos;
729 MdfdVec *v;
730 int nblocks_this_segment;
731
732 v = _mdfd_getseg(reln, forknum, blocknum, false,
734 if (v == NULL)
735 return false;
736
737 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
738
739 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
740
741 nblocks_this_segment =
742 Min(nblocks,
743 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
744
745 (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ * nblocks_this_segment,
746 WAIT_EVENT_DATA_FILE_PREFETCH);
747
748 blocknum += nblocks_this_segment;
749 nblocks -= nblocks_this_segment;
750 }
751#endif /* USE_PREFETCH */
752
753 return true;
754}
#define MaxBlockNumber
Definition: block.h:35
#define Min(x, y)
Definition: c.h:975
uint64_t uint64
Definition: c.h:503
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2078

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, MaxBlockNumber, _MdfdVec::mdfd_vfd, and Min.

◆ mdreadv()

void mdreadv ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)

Definition at line 827 of file md.c.

829{
830 while (nblocks > 0)
831 {
832 struct iovec iov[PG_IOV_MAX];
833 int iovcnt;
834 off_t seekpos;
835 int nbytes;
836 MdfdVec *v;
837 BlockNumber nblocks_this_segment;
838 size_t transferred_this_segment;
839 size_t size_this_segment;
840
841 v = _mdfd_getseg(reln, forknum, blocknum, false,
843
844 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
845
846 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
847
848 nblocks_this_segment =
849 Min(nblocks,
850 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
851 nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
852
853 if (nblocks_this_segment != nblocks)
854 elog(ERROR, "read crosses segment boundary");
855
856 iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
857 size_this_segment = nblocks_this_segment * BLCKSZ;
858 transferred_this_segment = 0;
859
860 /*
861 * Inner loop to continue after a short read. We'll keep going until
862 * we hit EOF rather than assuming that a short read means we hit the
863 * end.
864 */
865 for (;;)
866 {
867 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
871 reln->smgr_rlocator.backend);
872 nbytes = FileReadV(v->mdfd_vfd, iov, iovcnt, seekpos,
873 WAIT_EVENT_DATA_FILE_READ);
874 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
879 nbytes,
880 size_this_segment - transferred_this_segment);
881
882#ifdef SIMULATE_SHORT_READ
883 nbytes = Min(nbytes, 4096);
884#endif
885
886 if (nbytes < 0)
889 errmsg("could not read blocks %u..%u in file \"%s\": %m",
890 blocknum,
891 blocknum + nblocks_this_segment - 1,
892 FilePathName(v->mdfd_vfd))));
893
894 if (nbytes == 0)
895 {
896 /*
897 * We are at or past EOF, or we read a partial block at EOF.
898 * Normally this is an error; upper levels should never try to
899 * read a nonexistent block. However, if zero_damaged_pages
900 * is ON or we are InRecovery, we should instead return zeroes
901 * without complaining. This allows, for example, the case of
902 * trying to update a block that was later truncated away.
903 */
905 {
906 for (BlockNumber i = transferred_this_segment / BLCKSZ;
907 i < nblocks_this_segment;
908 ++i)
909 memset(buffers[i], 0, BLCKSZ);
910 break;
911 }
912 else
915 errmsg("could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
916 blocknum,
917 blocknum + nblocks_this_segment - 1,
919 transferred_this_segment,
920 size_this_segment)));
921 }
922
923 /* One loop should usually be enough. */
924 transferred_this_segment += nbytes;
925 Assert(transferred_this_segment <= size_this_segment);
926 if (transferred_this_segment == size_this_segment)
927 break;
928
929 /* Adjust position and vectors after a short read. */
930 seekpos += nbytes;
931 iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
932 }
933
934 nblocks -= nblocks_this_segment;
935 buffers += nblocks_this_segment;
936 blocknum += nblocks_this_segment;
937 }
938}
bool zero_damaged_pages
Definition: bufmgr.c:140
#define lengthof(array)
Definition: c.h:759
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
Definition: fd.c:2160
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Definition: file_utils.c:614
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
Definition: md.c:764
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define PG_IOV_MAX
Definition: pg_iovec.h:41

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileReadV(), i, InRecovery, lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, Min, PG_IOV_MAX, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and zero_damaged_pages.

◆ mdregistersync()

void mdregistersync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1265 of file md.c.

1266{
1267 int segno;
1268 int min_inactive_seg;
1269
1270 /*
1271 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1272 * the loop below will get them all!
1273 */
1274 mdnblocks(reln, forknum);
1275
1276 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1277
1278 /*
1279 * Temporarily open inactive segments, then close them after sync. There
1280 * may be some inactive segments left opened after error, but that is
1281 * harmless. We don't bother to clean them up and take a risk of further
1282 * trouble. The next mdclose() will soon close them.
1283 */
1284 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1285 segno++;
1286
1287 while (segno > 0)
1288 {
1289 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1290
1291 register_dirty_segment(reln, forknum, v);
1292
1293 /* Close inactive segments immediately */
1294 if (segno > min_inactive_seg)
1295 {
1296 FileClose(v->mdfd_vfd);
1297 _fdvec_resize(reln, forknum, segno - 1);
1298 }
1299
1300 segno--;
1301 }
1302}

References _fdvec_resize(), _mdfd_openseg(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and register_dirty_segment().

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1764 of file md.c.

1765{
1767 File file;
1768 instr_time io_start;
1769 bool need_to_close;
1770 int result,
1771 save_errno;
1772
1773 /* See if we already have the file open, or need to open it. */
1774 if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1775 {
1776 file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1777 strlcpy(path, FilePathName(file), MAXPGPATH);
1778 need_to_close = false;
1779 }
1780 else
1781 {
1782 MdPathStr p;
1783
1784 p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1785 strlcpy(path, p.str, MD_PATH_STR_MAXLEN);
1786
1787 file = PathNameOpenFile(path, _mdfd_open_flags());
1788 if (file < 0)
1789 return -1;
1790 need_to_close = true;
1791 }
1792
1794
1795 /* Sync the file. */
1796 result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1797 save_errno = errno;
1798
1799 if (need_to_close)
1800 FileClose(file);
1801
1803 IOOP_FSYNC, io_start, 1, 0);
1804
1805 errno = save_errno;
1806 return result;
1807}
bool track_io_timing
Definition: bufmgr.c:143
#define MD_PATH_STR_MAXLEN
Definition: md.c:121
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:274
@ IOCONTEXT_NORMAL
Definition: pgstat.h:286
@ IOOP_FSYNC
Definition: pgstat.h:305
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:90
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:121
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint64 segno
Definition: sync.h:55

References _mdfd_open_flags(), _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, INVALID_PROC_NUMBER, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, MD_PATH_STR_MAXLEN, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), MdPathStr::str, strlcpy(), and track_io_timing.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  curnblk,
BlockNumber  nblocks 
)

Definition at line 1176 of file md.c.

1178{
1179 BlockNumber priorblocks;
1180 int curopensegs;
1181
1182 if (nblocks > curnblk)
1183 {
1184 /* Bogus request ... but no complaint if InRecovery */
1185 if (InRecovery)
1186 return;
1187 ereport(ERROR,
1188 (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1189 relpath(reln->smgr_rlocator, forknum).str,
1190 nblocks, curnblk)));
1191 }
1192 if (nblocks == curnblk)
1193 return; /* no work */
1194
1195 /*
1196 * Truncate segments, starting at the last one. Starting at the end makes
1197 * managing the memory for the fd array easier, should there be errors.
1198 */
1199 curopensegs = reln->md_num_open_segs[forknum];
1200 while (curopensegs > 0)
1201 {
1202 MdfdVec *v;
1203
1204 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1205
1206 v = &reln->md_seg_fds[forknum][curopensegs - 1];
1207
1208 if (priorblocks > nblocks)
1209 {
1210 /*
1211 * This segment is no longer active. We truncate the file, but do
1212 * not delete it, for reasons explained in the header comments.
1213 */
1214 if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1215 ereport(ERROR,
1217 errmsg("could not truncate file \"%s\": %m",
1218 FilePathName(v->mdfd_vfd))));
1219
1220 if (!SmgrIsTemp(reln))
1221 register_dirty_segment(reln, forknum, v);
1222
1223 /* we never drop the 1st segment */
1224 Assert(v != &reln->md_seg_fds[forknum][0]);
1225
1226 FileClose(v->mdfd_vfd);
1227 _fdvec_resize(reln, forknum, curopensegs - 1);
1228 }
1229 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1230 {
1231 /*
1232 * This is the last segment we want to keep. Truncate the file to
1233 * the right length. NOTE: if nblocks is exactly a multiple K of
1234 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1235 * keep it. This adheres to the invariant given in the header
1236 * comments.
1237 */
1238 BlockNumber lastsegblocks = nblocks - priorblocks;
1239
1240 if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1241 ereport(ERROR,
1243 errmsg("could not truncate file \"%s\" to %u blocks: %m",
1245 nblocks)));
1246 if (!SmgrIsTemp(reln))
1247 register_dirty_segment(reln, forknum, v);
1248 }
1249 else
1250 {
1251 /*
1252 * We still need this segment, so nothing to do for this and any
1253 * earlier segment.
1254 */
1255 break;
1256 }
1257 curopensegs--;
1258 }
1259}
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2450

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 317 of file md.c.

318{
319 /* Now do the per-fork work */
320 if (forknum == InvalidForkNumber)
321 {
322 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
323 mdunlinkfork(rlocator, forknum, isRedo);
324 }
325 else
326 mdunlinkfork(rlocator, forknum, isRedo);
327}
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:354

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1816 of file md.c.

1817{
1818 RelPathStr p;
1819
1820 /* Compute the path. */
1821 p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1822 strlcpy(path, p.str, MAXPGPATH);
1823
1824 /* Try to unlink the file. */
1825 return unlink(path);
1826}
@ MAIN_FORKNUM
Definition: relpath.h:58
#define relpathperm(rlocator, forknum)
Definition: relpath.h:146

References MAIN_FORKNUM, MAXPGPATH, relpathperm, FileTag::rlocator, RelPathStr::str, and strlcpy().

◆ mdunlinkfork()

static void mdunlinkfork ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
static

Definition at line 354 of file md.c.

355{
356 RelPathStr path;
357 int ret;
358 int save_errno;
359
360 path = relpath(rlocator, forknum);
361
362 /*
363 * Truncate and then unlink the first segment, or just register a request
364 * to unlink it later, as described in the comments for mdunlink().
365 */
366 if (isRedo || IsBinaryUpgrade || forknum != MAIN_FORKNUM ||
368 {
369 if (!RelFileLocatorBackendIsTemp(rlocator))
370 {
371 /* Prevent other backends' fds from holding on to the disk space */
372 ret = do_truncate(path.str);
373
374 /* Forget any pending sync requests for the first segment */
375 save_errno = errno;
376 register_forget_request(rlocator, forknum, 0 /* first seg */ );
377 errno = save_errno;
378 }
379 else
380 ret = 0;
381
382 /* Next unlink the file, unless it was already found to be missing */
383 if (ret >= 0 || errno != ENOENT)
384 {
385 ret = unlink(path.str);
386 if (ret < 0 && errno != ENOENT)
387 {
388 save_errno = errno;
391 errmsg("could not remove file \"%s\": %m", path.str)));
392 errno = save_errno;
393 }
394 }
395 }
396 else
397 {
398 /* Prevent other backends' fds from holding on to the disk space */
399 ret = do_truncate(path.str);
400
401 /* Register request to unlink first segment later */
402 save_errno = errno;
403 register_unlink_segment(rlocator, forknum, 0 /* first seg */ );
404 errno = save_errno;
405 }
406
407 /*
408 * Delete any additional segments.
409 *
410 * Note that because we loop until getting ENOENT, we will correctly
411 * remove all inactive segments as well as active ones. Ideally we'd
412 * continue the loop until getting exactly that errno, but that risks an
413 * infinite loop if the problem is directory-wide (for instance, if we
414 * suddenly can't read the data directory itself). We compromise by
415 * continuing after a non-ENOENT truncate error, but stopping after any
416 * unlink error. If there is indeed a directory-wide problem, additional
417 * unlink attempts wouldn't work anyway.
418 */
419 if (ret >= 0 || errno != ENOENT)
420 {
421 MdPathStr segpath;
422 BlockNumber segno;
423
424 for (segno = 1;; segno++)
425 {
426 sprintf(segpath.str, "%s.%u", path.str, segno);
427
428 if (!RelFileLocatorBackendIsTemp(rlocator))
429 {
430 /*
431 * Prevent other backends' fds from holding on to the disk
432 * space. We're done if we see ENOENT, though.
433 */
434 if (do_truncate(segpath.str) < 0 && errno == ENOENT)
435 break;
436
437 /*
438 * Forget any pending sync requests for this segment before we
439 * try to unlink.
440 */
441 register_forget_request(rlocator, forknum, segno);
442 }
443
444 if (unlink(segpath.str) < 0)
445 {
446 /* ENOENT is expected after the last segment... */
447 if (errno != ENOENT)
450 errmsg("could not remove file \"%s\": %m", segpath.str)));
451 break;
452 }
453 }
454 }
455}
bool IsBinaryUpgrade
Definition: globals.c:120
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1439
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1422
static int do_truncate(const char *path)
Definition: md.c:333
#define RelFileLocatorBackendIsTemp(rlocator)

References do_truncate(), ereport, errcode_for_file_access(), errmsg(), IsBinaryUpgrade, MAIN_FORKNUM, register_forget_request(), register_unlink_segment(), RelFileLocatorBackendIsTemp, relpath, sprintf, MdPathStr::str, RelPathStr::str, and WARNING.

Referenced by mdunlink().

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 1053 of file md.c.

1055{
1057
1058 /*
1059 * Issue flush requests in as few requests as possible; have to split at
1060 * segment boundaries though, since those are actually separate files.
1061 */
1062 while (nblocks > 0)
1063 {
1064 BlockNumber nflush = nblocks;
1065 off_t seekpos;
1066 MdfdVec *v;
1067 int segnum_start,
1068 segnum_end;
1069
1070 v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
1072
1073 /*
1074 * We might be flushing buffers of already removed relations, that's
1075 * ok, just ignore that case. If the segment file wasn't open already
1076 * (ie from a recent mdwrite()), then we don't want to re-open it, to
1077 * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
1078 * us with a descriptor to a file that is about to be unlinked.
1079 */
1080 if (!v)
1081 return;
1082
1083 /* compute offset inside the current segment */
1084 segnum_start = blocknum / RELSEG_SIZE;
1085
1086 /* compute number of desired writes within the current segment */
1087 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1088 if (segnum_start != segnum_end)
1089 nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
1090
1091 Assert(nflush >= 1);
1092 Assert(nflush <= nblocks);
1093
1094 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1095
1096 FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
1097
1098 nblocks -= nflush;
1099 blocknum += nflush;
1100 }
1101}
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2134

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdwritev()

void mdwritev ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void **  buffers,
BlockNumber  nblocks,
bool  skipFsync 
)

Definition at line 948 of file md.c.

950{
951 /* This assert is too expensive to have on normally ... */
952#ifdef CHECK_WRITE_VS_EXTEND
953 Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum));
954#endif
955
956 while (nblocks > 0)
957 {
958 struct iovec iov[PG_IOV_MAX];
959 int iovcnt;
960 off_t seekpos;
961 int nbytes;
962 MdfdVec *v;
963 BlockNumber nblocks_this_segment;
964 size_t transferred_this_segment;
965 size_t size_this_segment;
966
967 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
969
970 seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
971
972 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
973
974 nblocks_this_segment =
975 Min(nblocks,
976 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
977 nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
978
979 if (nblocks_this_segment != nblocks)
980 elog(ERROR, "write crosses segment boundary");
981
982 iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
983 size_this_segment = nblocks_this_segment * BLCKSZ;
984 transferred_this_segment = 0;
985
986 /*
987 * Inner loop to continue after a short write. If the reason is that
988 * we're out of disk space, a future attempt should get an ENOSPC
989 * error from the kernel.
990 */
991 for (;;)
992 {
993 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
997 reln->smgr_rlocator.backend);
998 nbytes = FileWriteV(v->mdfd_vfd, iov, iovcnt, seekpos,
999 WAIT_EVENT_DATA_FILE_WRITE);
1000 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1004 reln->smgr_rlocator.backend,
1005 nbytes,
1006 size_this_segment - transferred_this_segment);
1007
1008#ifdef SIMULATE_SHORT_WRITE
1009 nbytes = Min(nbytes, 4096);
1010#endif
1011
1012 if (nbytes < 0)
1013 {
1014 bool enospc = errno == ENOSPC;
1015
1016 ereport(ERROR,
1018 errmsg("could not write blocks %u..%u in file \"%s\": %m",
1019 blocknum,
1020 blocknum + nblocks_this_segment - 1,
1022 enospc ? errhint("Check free disk space.") : 0));
1023 }
1024
1025 /* One loop should usually be enough. */
1026 transferred_this_segment += nbytes;
1027 Assert(transferred_this_segment <= size_this_segment);
1028 if (transferred_this_segment == size_this_segment)
1029 break;
1030
1031 /* Adjust position and iovecs after a short write. */
1032 seekpos += nbytes;
1033 iovcnt = compute_remaining_iovec(iov, iov, iovcnt, nbytes);
1034 }
1035
1036 if (!skipFsync && !SmgrIsTemp(reln))
1037 register_dirty_segment(reln, forknum, v);
1038
1039 nblocks -= nblocks_this_segment;
1040 buffers += nblocks_this_segment;
1041 blocknum += nblocks_this_segment;
1042 }
1043}
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
Definition: fd.c:2216

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, buffers_to_iovec(), compute_remaining_iovec(), RelFileLocator::dbOid, elog, ereport, errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWriteV(), lengthof, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), Min, PG_IOV_MAX, register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and RelFileLocator::spcOid.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)

Definition at line 532 of file md.c.

534{
535 MdfdVec *v;
536 BlockNumber curblocknum = blocknum;
537 int remblocks = nblocks;
538
539 Assert(nblocks > 0);
540
541 /* This assert is too expensive to have on normally ... */
542#ifdef CHECK_WRITE_VS_EXTEND
543 Assert(blocknum >= mdnblocks(reln, forknum));
544#endif
545
546 /*
547 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
548 * more --- we mustn't create a block whose number actually is
549 * InvalidBlockNumber or larger.
550 */
551 if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
553 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
554 errmsg("cannot extend file \"%s\" beyond %u blocks",
555 relpath(reln->smgr_rlocator, forknum).str,
557
558 while (remblocks > 0)
559 {
560 BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
561 off_t seekpos = (off_t) BLCKSZ * segstartblock;
562 int numblocks;
563
564 if (segstartblock + remblocks > RELSEG_SIZE)
565 numblocks = RELSEG_SIZE - segstartblock;
566 else
567 numblocks = remblocks;
568
569 v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
570
571 Assert(segstartblock < RELSEG_SIZE);
572 Assert(segstartblock + numblocks <= RELSEG_SIZE);
573
574 /*
575 * If available and useful, use posix_fallocate() (via
576 * FileFallocate()) to extend the relation. That's often more
577 * efficient than using write(), as it commonly won't cause the kernel
578 * to allocate page cache space for the extended pages.
579 *
580 * However, we don't use FileFallocate() for small extensions, as it
581 * defeats delayed allocation on some filesystems. Not clear where
582 * that decision should be made though? For now just use a cutoff of
583 * 8, anything between 4 and 8 worked OK in some local testing.
584 */
585 if (numblocks > 8)
586 {
587 int ret;
588
589 ret = FileFallocate(v->mdfd_vfd,
590 seekpos, (off_t) BLCKSZ * numblocks,
591 WAIT_EVENT_DATA_FILE_EXTEND);
592 if (ret != 0)
593 {
596 errmsg("could not extend file \"%s\" with FileFallocate(): %m",
598 errhint("Check free disk space."));
599 }
600 }
601 else
602 {
603 int ret;
604
605 /*
606 * Even if we don't want to use fallocate, we can still extend a
607 * bit more efficiently than writing each 8kB block individually.
608 * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
609 * to avoid multiple writes or needing a zeroed buffer for the
610 * whole length of the extension.
611 */
612 ret = FileZero(v->mdfd_vfd,
613 seekpos, (off_t) BLCKSZ * numblocks,
614 WAIT_EVENT_DATA_FILE_EXTEND);
615 if (ret < 0)
618 errmsg("could not extend file \"%s\": %m",
620 errhint("Check free disk space."));
621 }
622
623 if (!skipFsync && !SmgrIsTemp(reln))
624 register_dirty_segment(reln, forknum, v);
625
626 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
627
628 remblocks -= numblocks;
629 curblocknum += numblocks;
630 }
631}
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2393
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2348

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ register_dirty_segment()

static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1378 of file md.c.

1379{
1380 FileTag tag;
1381
1382 INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
1383
1384 /* Temp relations should never be fsync'd */
1385 Assert(!SmgrIsTemp(reln));
1386
1387 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
1388 {
1389 instr_time io_start;
1390
1392 (errmsg_internal("could not forward fsync request because request queue is full")));
1393
1395
1396 if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
1399 errmsg("could not fsync file \"%s\": %m",
1400 FilePathName(seg->mdfd_vfd))));
1401
1402 /*
1403 * We have no way of knowing if the current IOContext is
1404 * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
1405 * point, so count the fsync as being in the IOCONTEXT_NORMAL
1406 * IOContext. This is probably okay, because the number of backend
1407 * fsyncs doesn't say anything about the efficacy of the
1408 * BufferAccessStrategy. And counting both fsyncs done in
1409 * IOCONTEXT_NORMAL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under
1410 * IOCONTEXT_NORMAL is likely clearer when investigating the number of
1411 * backend fsyncs.
1412 */
1414 IOOP_FSYNC, io_start, 1, 0);
1415 }
1416}
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define DEBUG1
Definition: elog.h:30
@ SYNC_REQUEST
Definition: sync.h:25

References Assert(), data_sync_elevel(), DEBUG1, ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), ERROR, FilePathName(), FileSync(), INIT_MD_FILETAG, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, RelFileLocatorBackend::locator, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RegisterSyncRequest(), SMgrRelationData::smgr_rlocator, SmgrIsTemp, SYNC_REQUEST, and track_io_timing.

Referenced by mdcreate(), mdextend(), mdregistersync(), mdtruncate(), mdwritev(), and mdzeroextend().

◆ register_forget_request()

static void register_forget_request ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1439 of file md.c.

1441{
1442 FileTag tag;
1443
1444 INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1445
1446 RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
1447}
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), and SYNC_FORGET_REQUEST.

Referenced by mdunlinkfork().

◆ register_unlink_segment()

static void register_unlink_segment ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1422 of file md.c.

1424{
1425 FileTag tag;
1426
1427 INIT_MD_FILETAG(tag, rlocator.locator, forknum, segno);
1428
1429 /* Should never be used with temp relations */
1431
1432 RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
1433}
@ SYNC_UNLINK_REQUEST
Definition: sync.h:26

References Assert(), INIT_MD_FILETAG, RelFileLocatorBackend::locator, RegisterSyncRequest(), RelFileLocatorBackendIsTemp, and SYNC_UNLINK_REQUEST.

Referenced by mdunlinkfork().

Variable Documentation

◆ MdCxt

MemoryContext MdCxt
static

Definition at line 86 of file md.c.

Referenced by _fdvec_resize(), and mdinit().