PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
md.c File Reference
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include <sys/file.h>
#include "miscadmin.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
#include "storage/bufmgr.h"
#include "storage/relfilenode.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "pg_trace.h"
Include dependency graph for md.c:

Go to the source code of this file.

Data Structures

struct  _MdfdVec
 
struct  PendingOperationEntry
 
struct  PendingUnlinkEntry
 

Macros

#define FSYNCS_PER_ABSORB   10
 
#define UNLINKS_PER_ABSORB   10
 
#define FORGET_RELATION_FSYNC   (InvalidBlockNumber)
 
#define FORGET_DATABASE_FSYNC   (InvalidBlockNumber-1)
 
#define UNLINK_RELATION_REQUEST   (InvalidBlockNumber-2)
 
#define FILE_POSSIBLY_DELETED(err)   ((err) == ENOENT)
 
#define EXTENSION_FAIL   (1 << 0)
 
#define EXTENSION_RETURN_NULL   (1 << 1)
 
#define EXTENSION_CREATE   (1 << 2)
 
#define EXTENSION_CREATE_RECOVERY   (1 << 3)
 
#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)
 

Typedefs

typedef struct _MdfdVec MdfdVec
 
typedef uint16 CycleCtr
 

Functions

static void mdunlinkfork (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
static MdfdVecmdopen (SMgrRelation reln, ForkNumber forknum, int behavior)
 
static void register_dirty_segment (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
static void register_unlink (RelFileNodeBackend rnode)
 
static void _fdvec_resize (SMgrRelation reln, ForkNumber forknum, int nseg)
 
static char * _mdfd_segpath (SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 
static MdfdVec_mdfd_openseg (SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
 
static MdfdVec_mdfd_getseg (SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
 
static BlockNumber _mdnblocks (SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
void mdinit (void)
 
void SetForwardFsyncRequests (void)
 
bool mdexists (SMgrRelation reln, ForkNumber forkNum)
 
void mdcreate (SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 
void mdunlink (RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void mdsync (void)
 
void mdpreckpt (void)
 
void mdpostckpt (void)
 
void RememberFsyncRequest (RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
 
void ForgetRelationFsyncRequests (RelFileNode rnode, ForkNumber forknum)
 
void ForgetDatabaseFsyncRequests (Oid dbid)
 

Variables

static MemoryContext MdCxt
 
static HTABpendingOpsTable = NULL
 
static ListpendingUnlinks = NIL
 
static MemoryContext pendingOpsCxt
 
static CycleCtr mdsync_cycle_ctr = 0
 
static CycleCtr mdckpt_cycle_ctr = 0
 

Macro Definition Documentation

#define EXTENSION_CREATE   (1 << 2)

Definition at line 169 of file md.c.

Referenced by _mdfd_getseg(), and mdextend().

#define EXTENSION_CREATE_RECOVERY   (1 << 3)

Definition at line 171 of file md.c.

Referenced by _mdfd_getseg(), mdread(), and mdwrite().

#define EXTENSION_DONT_CHECK_SIZE   (1 << 4)

Definition at line 179 of file md.c.

Referenced by _mdfd_getseg(), and mdsync().

#define EXTENSION_FAIL   (1 << 0)

Definition at line 165 of file md.c.

Referenced by _mdfd_getseg(), mdnblocks(), mdprefetch(), mdread(), and mdwrite().

#define EXTENSION_RETURN_NULL   (1 << 1)

Definition at line 167 of file md.c.

Referenced by _mdfd_getseg(), mdexists(), mdopen(), mdsync(), and mdwriteback().

#define FILE_POSSIBLY_DELETED (   err)    ((err) == ENOENT)

Definition at line 66 of file md.c.

Referenced by _mdfd_getseg(), mdopen(), and mdsync().

#define FORGET_DATABASE_FSYNC   (InvalidBlockNumber-1)

Definition at line 55 of file md.c.

Referenced by ForgetDatabaseFsyncRequests(), and RememberFsyncRequest().

#define FORGET_RELATION_FSYNC   (InvalidBlockNumber)

Definition at line 54 of file md.c.

Referenced by ForgetRelationFsyncRequests(), and RememberFsyncRequest().

#define FSYNCS_PER_ABSORB   10

Definition at line 44 of file md.c.

Referenced by mdsync().

#define UNLINK_RELATION_REQUEST   (InvalidBlockNumber-2)

Definition at line 56 of file md.c.

Referenced by register_unlink(), and RememberFsyncRequest().

#define UNLINKS_PER_ABSORB   10

Definition at line 45 of file md.c.

Referenced by mdpostckpt().

Typedef Documentation

Definition at line 137 of file md.c.

Function Documentation

static void _fdvec_resize ( SMgrRelation  reln,
ForkNumber  forknum,
int  nseg 
)
static

Definition at line 1712 of file md.c.

References SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, MemoryContextAlloc(), pfree(), and repalloc().

Referenced by _mdfd_openseg(), mdclose(), mdcreate(), mdopen(), and mdtruncate().

1715 {
1716  if (nseg == 0)
1717  {
1718  if (reln->md_num_open_segs[forknum] > 0)
1719  {
1720  pfree(reln->md_seg_fds[forknum]);
1721  reln->md_seg_fds[forknum] = NULL;
1722  }
1723  }
1724  else if (reln->md_num_open_segs[forknum] == 0)
1725  {
1726  reln->md_seg_fds[forknum] =
1727  MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
1728  }
1729  else
1730  {
1731  /*
1732  * It doesn't seem worthwhile complicating the code by having a more
1733  * aggressive growth strategy here; the number of segments doesn't
1734  * grow that fast, and the memory context internally will sometimes
1735  * avoid doing an actual reallocation.
1736  */
1737  reln->md_seg_fds[forknum] =
1738  repalloc(reln->md_seg_fds[forknum],
1739  sizeof(MdfdVec) * nseg);
1740  }
1741 
1742  reln->md_num_open_segs[forknum] = nseg;
1743 }
static MemoryContext MdCxt
Definition: md.c:115
void pfree(void *pointer)
Definition: mcxt.c:949
Definition: md.c:109
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:962
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:706
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
static MdfdVec * _mdfd_getseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  blkno,
bool  skipFsync,
int  behavior 
)
static

Definition at line 1813 of file md.c.

References _mdfd_openseg(), _mdfd_segpath(), _mdnblocks(), Assert, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE, EXTENSION_CREATE_RECOVERY, EXTENSION_DONT_CHECK_SIZE, EXTENSION_FAIL, EXTENSION_RETURN_NULL, FATAL, FILE_POSSIBLY_DELETED, InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, mdextend(), _MdfdVec::mdfd_segno, mdopen(), palloc0(), and pfree().

Referenced by mdextend(), mdprefetch(), mdread(), mdsync(), mdwrite(), and mdwriteback().

1815 {
1816  MdfdVec *v;
1817  BlockNumber targetseg;
1818  BlockNumber nextsegno;
1819 
1820  /* some way to handle non-existent segments needs to be specified */
1821  Assert(behavior &
1823 
1824  targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
1825 
1826  /* if an existing and opened segment, we're done */
1827  if (targetseg < reln->md_num_open_segs[forknum])
1828  {
1829  v = &reln->md_seg_fds[forknum][targetseg];
1830  return v;
1831  }
1832 
1833  /*
1834  * The target segment is not yet open. Iterate over all the segments
1835  * between the last opened and the target segment. This way missing
1836  * segments either raise an error, or get created (according to
1837  * 'behavior'). Start with either the last opened, or the first segment if
1838  * none was opened before.
1839  */
1840  if (reln->md_num_open_segs[forknum] > 0)
1841  v = &reln->md_seg_fds[forknum][reln->md_num_open_segs[forknum] - 1];
1842  else
1843  {
1844  v = mdopen(reln, forknum, behavior);
1845  if (!v)
1846  return NULL; /* if behavior & EXTENSION_RETURN_NULL */
1847  }
1848 
1849  for (nextsegno = reln->md_num_open_segs[forknum];
1850  nextsegno <= targetseg; nextsegno++)
1851  {
1852  BlockNumber nblocks = _mdnblocks(reln, forknum, v);
1853  int flags = 0;
1854 
1855  Assert(nextsegno == v->mdfd_segno + 1);
1856 
1857  if (nblocks > ((BlockNumber) RELSEG_SIZE))
1858  elog(FATAL, "segment too big");
1859 
1860  if ((behavior & EXTENSION_CREATE) ||
1861  (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
1862  {
1863  /*
1864  * Normally we will create new segments only if authorized by the
1865  * caller (i.e., we are doing mdextend()). But when doing WAL
1866  * recovery, create segments anyway; this allows cases such as
1867  * replaying WAL data that has a write into a high-numbered
1868  * segment of a relation that was later deleted. We want to go
1869  * ahead and create the segments so we can finish out the replay.
1870  * However if the caller has specified
1871  * EXTENSION_REALLY_RETURN_NULL, then extension is not desired
1872  * even in recovery; we won't reach this point in that case.
1873  *
1874  * We have to maintain the invariant that segments before the last
1875  * active segment are of size RELSEG_SIZE; therefore, if
1876  * extending, pad them out with zeroes if needed. (This only
1877  * matters if in recovery, or if the caller is extending the
1878  * relation discontiguously, but that can happen in hash indexes.)
1879  */
1880  if (nblocks < ((BlockNumber) RELSEG_SIZE))
1881  {
1882  char *zerobuf = palloc0(BLCKSZ);
1883 
1884  mdextend(reln, forknum,
1885  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
1886  zerobuf, skipFsync);
1887  pfree(zerobuf);
1888  }
1889  flags = O_CREAT;
1890  }
1891  else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
1892  nblocks < ((BlockNumber) RELSEG_SIZE))
1893  {
1894  /*
1895  * When not extending (or explicitly including truncated
1896  * segments), only open the next segment if the current one is
1897  * exactly RELSEG_SIZE. If not (this branch), either return NULL
1898  * or fail.
1899  */
1900  if (behavior & EXTENSION_RETURN_NULL)
1901  {
1902  /*
1903  * Some callers discern between reasons for _mdfd_getseg()
1904  * returning NULL based on errno. As there's no failing
1905  * syscall involved in this case, explicitly set errno to
1906  * ENOENT, as that seems the closest interpretation.
1907  */
1908  errno = ENOENT;
1909  return NULL;
1910  }
1911 
1912  ereport(ERROR,
1914  errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1915  _mdfd_segpath(reln, forknum, nextsegno),
1916  blkno, nblocks)));
1917  }
1918 
1919  v = _mdfd_openseg(reln, forknum, nextsegno, flags);
1920 
1921  if (v == NULL)
1922  {
1923  if ((behavior & EXTENSION_RETURN_NULL) &&
1924  FILE_POSSIBLY_DELETED(errno))
1925  return NULL;
1926  ereport(ERROR,
1928  errmsg("could not open file \"%s\" (target block %u): %m",
1929  _mdfd_segpath(reln, forknum, nextsegno),
1930  blkno)));
1931  }
1932  }
1933 
1934  return v;
1935 }
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:179
BlockNumber mdfd_segno
Definition: md.c:112
bool InRecovery
Definition: xlog.c:194
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:165
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: md.c:495
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
#define EXTENSION_RETURN_NULL
Definition: md.c:167
#define FATAL
Definition: elog.h:52
int errcode_for_file_access(void)
Definition: elog.c:598
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:171
#define ereport(elevel, rest)
Definition: elog.h:122
static MdfdVec * mdopen(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:574
void * palloc0(Size size)
Definition: mcxt.c:877
Definition: md.c:109
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
Definition: md.c:1773
#define EXTENSION_CREATE
Definition: md.c:169
#define Assert(condition)
Definition: c.h:681
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1941
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
int errmsg(const char *fmt,...)
Definition: elog.c:797
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
#define elog
Definition: elog.h:219
#define FILE_POSSIBLY_DELETED(err)
Definition: md.c:66
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1750
static MdfdVec * _mdfd_openseg ( SMgrRelation  reln,
ForkNumber  forkno,
BlockNumber  segno,
int  oflags 
)
static

Definition at line 1773 of file md.c.

References _fdvec_resize(), _mdfd_segpath(), _mdnblocks(), Assert, fd(), SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), and PG_BINARY.

Referenced by _mdfd_getseg(), and mdnblocks().

1775 {
1776  MdfdVec *v;
1777  int fd;
1778  char *fullpath;
1779 
1780  fullpath = _mdfd_segpath(reln, forknum, segno);
1781 
1782  /* open the file */
1783  fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
1784 
1785  pfree(fullpath);
1786 
1787  if (fd < 0)
1788  return NULL;
1789 
1790  if (segno <= reln->md_num_open_segs[forknum])
1791  _fdvec_resize(reln, forknum, segno + 1);
1792 
1793  /* fill the entry */
1794  v = &reln->md_seg_fds[forknum][segno];
1795  v->mdfd_vfd = fd;
1796  v->mdfd_segno = segno;
1797 
1798  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
1799 
1800  /* all done */
1801  return v;
1802 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1315
BlockNumber mdfd_segno
Definition: md.c:112
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
void pfree(void *pointer)
Definition: mcxt.c:949
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1712
Definition: md.c:109
#define Assert(condition)
Definition: c.h:681
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1941
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
File mdfd_vfd
Definition: md.c:111
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1750
static char * _mdfd_segpath ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  segno 
)
static

Definition at line 1750 of file md.c.

References pfree(), psprintf(), relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), _mdfd_openseg(), and mdsync().

1751 {
1752  char *path,
1753  *fullpath;
1754 
1755  path = relpath(reln->smgr_rnode, forknum);
1756 
1757  if (segno > 0)
1758  {
1759  fullpath = psprintf("%s.%u", path, segno);
1760  pfree(path);
1761  }
1762  else
1763  fullpath = path;
1764 
1765  return fullpath;
1766 }
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
void pfree(void *pointer)
Definition: mcxt.c:949
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
#define relpath(rnode, forknum)
Definition: relpath.h:71
static BlockNumber _mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1941 of file md.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSeek(), and _MdfdVec::mdfd_vfd.

Referenced by _mdfd_getseg(), _mdfd_openseg(), mdextend(), mdnblocks(), and mdopen().

1942 {
1943  off_t len;
1944 
1945  len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
1946  if (len < 0)
1947  ereport(ERROR,
1949  errmsg("could not seek to end of file \"%s\": %m",
1950  FilePathName(seg->mdfd_vfd))));
1951  /* note that this calculation will ignore any partial block at EOF */
1952  return (BlockNumber) (len / BLCKSZ);
1953 }
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:1997
#define ERROR
Definition: elog.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
int errmsg(const char *fmt,...)
Definition: elog.c:797
off_t FileSeek(File file, off_t offset, int whence)
Definition: fd.c:1874
File mdfd_vfd
Definition: md.c:111
void ForgetDatabaseFsyncRequests ( Oid  dbid)

Definition at line 1685 of file md.c.

References RelFileNode::dbNode, FORGET_DATABASE_FSYNC, ForwardFsyncRequest(), InvalidForkNumber, IsUnderPostmaster, pg_usleep(), RelFileNode::relNode, RememberFsyncRequest(), and RelFileNode::spcNode.

Referenced by dbase_redo(), and dropdb().

1686 {
1687  RelFileNode rnode;
1688 
1689  rnode.dbNode = dbid;
1690  rnode.spcNode = 0;
1691  rnode.relNode = 0;
1692 
1693  if (pendingOpsTable)
1694  {
1695  /* standalone backend or startup process: fsync state is local */
1697  }
1698  else if (IsUnderPostmaster)
1699  {
1700  /* see notes in ForgetRelationFsyncRequests */
1701  while (!ForwardFsyncRequest(rnode, InvalidForkNumber,
1703  pg_usleep(10000L); /* 10 msec seems a good number */
1704  }
1705 }
void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1514
bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
static HTAB * pendingOpsTable
Definition: md.c:155
void pg_usleep(long microsec)
Definition: signal.c:53
#define FORGET_DATABASE_FSYNC
Definition: md.c:55
bool IsUnderPostmaster
Definition: globals.c:101
void ForgetRelationFsyncRequests ( RelFileNode  rnode,
ForkNumber  forknum 
)

Definition at line 1652 of file md.c.

References FORGET_RELATION_FSYNC, ForwardFsyncRequest(), IsUnderPostmaster, pg_usleep(), and RememberFsyncRequest().

Referenced by mdunlink().

1653 {
1654  if (pendingOpsTable)
1655  {
1656  /* standalone backend or startup process: fsync state is local */
1658  }
1659  else if (IsUnderPostmaster)
1660  {
1661  /*
1662  * Notify the checkpointer about it. If we fail to queue the cancel
1663  * message, we have to sleep and try again ... ugly, but hopefully
1664  * won't happen often.
1665  *
1666  * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with an
1667  * error would leave the no-longer-used file still present on disk,
1668  * which would be bad, so I'm inclined to assume that the checkpointer
1669  * will always empty the queue soon.
1670  */
1671  while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC))
1672  pg_usleep(10000L); /* 10 msec seems a good number */
1673 
1674  /*
1675  * Note we don't wait for the checkpointer to actually absorb the
1676  * cancel message; see mdsync() for the implications.
1677  */
1678  }
1679 }
void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1514
bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
static HTAB * pendingOpsTable
Definition: md.c:155
#define FORGET_RELATION_FSYNC
Definition: md.c:54
void pg_usleep(long microsec)
Definition: signal.c:53
bool IsUnderPostmaster
Definition: globals.c:101
void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 628 of file md.c.

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

629 {
630  int nopensegs = reln->md_num_open_segs[forknum];
631 
632  /* No work if already closed */
633  if (nopensegs == 0)
634  return;
635 
636  /* close segments starting from the end */
637  while (nopensegs > 0)
638  {
639  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
640 
641  /* if not closed already */
642  if (v->mdfd_vfd >= 0)
643  {
644  FileClose(v->mdfd_vfd);
645  v->mdfd_vfd = -1;
646  }
647 
648  nopensegs--;
649  }
650 
651  /* resize just once, avoids pointless reallocations */
652  _fdvec_resize(reln, forknum, 0);
653 }
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1712
Definition: md.c:109
void FileClose(File file)
Definition: fd.c:1516
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
File mdfd_vfd
Definition: md.c:111
void mdcreate ( SMgrRelation  reln,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 294 of file md.c.

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), IsBootstrapProcessingMode, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rnode.

295 {
296  MdfdVec *mdfd;
297  char *path;
298  File fd;
299 
300  if (isRedo && reln->md_num_open_segs[forkNum] > 0)
301  return; /* created and opened already... */
302 
303  Assert(reln->md_num_open_segs[forkNum] == 0);
304 
305  path = relpath(reln->smgr_rnode, forkNum);
306 
307  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
308 
309  if (fd < 0)
310  {
311  int save_errno = errno;
312 
313  /*
314  * During bootstrap, there are cases where a system relation will be
315  * accessed (by internal backend processes) before the bootstrap
316  * script nominally creates it. Therefore, allow the file to exist
317  * already, even if isRedo is not set. (See also mdopen)
318  */
319  if (isRedo || IsBootstrapProcessingMode())
320  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
321  if (fd < 0)
322  {
323  /* be sure to report the error reported by create, not open */
324  errno = save_errno;
325  ereport(ERROR,
327  errmsg("could not create file \"%s\": %m", path)));
328  }
329  }
330 
331  pfree(path);
332 
333  _fdvec_resize(reln, forkNum, 1);
334  mdfd = &reln->md_seg_fds[forkNum][0];
335  mdfd->mdfd_vfd = fd;
336  mdfd->mdfd_segno = 0;
337 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1315
BlockNumber mdfd_segno
Definition: md.c:112
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1712
Definition: md.c:109
#define Assert(condition)
Definition: c.h:681
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:367
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define relpath(rnode, forknum)
Definition: relpath.h:71
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
File mdfd_vfd
Definition: md.c:111
int File
Definition: fd.h:49
bool mdexists ( SMgrRelation  reln,
ForkNumber  forkNum 
)

Definition at line 277 of file md.c.

References EXTENSION_RETURN_NULL, mdclose(), and mdopen().

278 {
279  /*
280  * Close it first, to ensure that we notice if the fork has been unlinked
281  * since we opened it.
282  */
283  mdclose(reln, forkNum);
284 
285  return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
286 }
#define EXTENSION_RETURN_NULL
Definition: md.c:167
static MdfdVec * mdopen(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:574
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:628
void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 495 of file md.c.

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileSeek(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

497 {
498  off_t seekpos;
499  int nbytes;
500  MdfdVec *v;
501 
502  /* This assert is too expensive to have on normally ... */
503 #ifdef CHECK_WRITE_VS_EXTEND
504  Assert(blocknum >= mdnblocks(reln, forknum));
505 #endif
506 
507  /*
508  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
509  * more --- we mustn't create a block whose number actually is
510  * InvalidBlockNumber.
511  */
512  if (blocknum == InvalidBlockNumber)
513  ereport(ERROR,
514  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
515  errmsg("cannot extend file \"%s\" beyond %u blocks",
516  relpath(reln->smgr_rnode, forknum),
518 
519  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
520 
521  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
522 
523  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
524 
525  /*
526  * Note: because caller usually obtained blocknum by calling mdnblocks,
527  * which did a seek(SEEK_END), this seek is often redundant and will be
528  * optimized away by fd.c. It's not redundant, however, if there is a
529  * partial page at the end of the file. In that case we want to try to
530  * overwrite the partial page with a full page. It's also not redundant
531  * if bufmgr.c had to dump another buffer of the same file to make room
532  * for the new page's buffer.
533  */
534  if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
535  ereport(ERROR,
537  errmsg("could not seek to block %u in file \"%s\": %m",
538  blocknum, FilePathName(v->mdfd_vfd))));
539 
540  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
541  {
542  if (nbytes < 0)
543  ereport(ERROR,
545  errmsg("could not extend file \"%s\": %m",
546  FilePathName(v->mdfd_vfd)),
547  errhint("Check free disk space.")));
548  /* short write: complain appropriately */
549  ereport(ERROR,
550  (errcode(ERRCODE_DISK_FULL),
551  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
553  nbytes, BLCKSZ, blocknum),
554  errhint("Check free disk space.")));
555  }
556 
557  if (!skipFsync && !SmgrIsTemp(reln))
558  register_dirty_segment(reln, forknum, v);
559 
560  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
561 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
int errhint(const char *fmt,...)
Definition: elog.c:987
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:873
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:1997
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
int FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
Definition: fd.c:1732
#define ereport(elevel, rest)
Definition: elog.h:122
Definition: md.c:109
#define EXTENSION_CREATE
Definition: md.c:169
#define Assert(condition)
Definition: c.h:681
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1941
#define InvalidBlockNumber
Definition: block.h:33
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define relpath(rnode, forknum)
Definition: relpath.h:71
off_t FileSeek(File file, off_t offset, int whence)
Definition: fd.c:1874
File mdfd_vfd
Definition: md.c:111
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1429
void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1025 of file md.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

1026 {
1027  int segno;
1028 
1029  /*
1030  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1031  * fsync loop will get them all!
1032  */
1033  mdnblocks(reln, forknum);
1034 
1035  segno = reln->md_num_open_segs[forknum];
1036 
1037  while (segno > 0)
1038  {
1039  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1040 
1042  ereport(ERROR,
1044  errmsg("could not fsync file \"%s\": %m",
1045  FilePathName(v->mdfd_vfd))));
1046  segno--;
1047  }
1048 }
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:873
char * FilePathName(File file)
Definition: fd.c:1997
#define ERROR
Definition: elog.h:43
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:1853
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
Definition: md.c:109
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
int errmsg(const char *fmt,...)
Definition: elog.c:797
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
File mdfd_vfd
Definition: md.c:111
void mdinit ( void  )

Definition at line 206 of file md.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate(), AmCheckpointerProcess, AmStartupProcess, HASHCTL::entrysize, HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASHCTL::hcxt, IsUnderPostmaster, HASHCTL::keysize, MemoryContextAllowInCriticalSection(), MemSet, NIL, pendingOpsCxt, and TopMemoryContext.

207 {
209  "MdSmgr",
211 
212  /*
213  * Create pending-operations hashtable if we need it. Currently, we need
214  * it if we are standalone (not under a postmaster) or if we are a startup
215  * or checkpointer auxiliary process.
216  */
218  {
219  HASHCTL hash_ctl;
220 
221  /*
222  * XXX: The checkpointer needs to add entries to the pending ops table
223  * when absorbing fsync requests. That is done within a critical
224  * section, which isn't usually allowed, but we make an exception. It
225  * means that there's a theoretical possibility that you run out of
226  * memory while absorbing fsync requests, which leads to a PANIC.
227  * Fortunately the hash table is small so that's unlikely to happen in
228  * practice.
229  */
231  "Pending ops context",
234 
235  MemSet(&hash_ctl, 0, sizeof(hash_ctl));
236  hash_ctl.keysize = sizeof(RelFileNode);
237  hash_ctl.entrysize = sizeof(PendingOperationEntry);
238  hash_ctl.hcxt = pendingOpsCxt;
239  pendingOpsTable = hash_create("Pending Ops Table",
240  100L,
241  &hash_ctl,
244  }
245 }
#define NIL
Definition: pg_list.h:69
#define AmStartupProcess()
Definition: miscadmin.h:405
#define HASH_CONTEXT
Definition: hsearch.h:93
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:374
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:863
static HTAB * pendingOpsTable
Definition: md.c:155
static MemoryContext MdCxt
Definition: md.c:115
#define AmCheckpointerProcess()
Definition: miscadmin.h:407
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
struct RelFileNode RelFileNode
bool IsUnderPostmaster
Definition: globals.c:101
MemoryContext TopMemoryContext
Definition: mcxt.c:43
#define HASH_BLOBS
Definition: hsearch.h:88
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:316
Size keysize
Definition: hsearch.h:72
static MemoryContext pendingOpsCxt
Definition: md.c:157
static List * pendingUnlinks
Definition: md.c:156
BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 873 of file md.c.

References _mdfd_openseg(), _mdnblocks(), Assert, elog, EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopen().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

874 {
875  MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL);
876  BlockNumber nblocks;
877  BlockNumber segno = 0;
878 
879  /* mdopen has opened the first segment */
880  Assert(reln->md_num_open_segs[forknum] > 0);
881 
882  /*
883  * Start from the last open segments, to avoid redundant seeks. We have
884  * previously verified that these segments are exactly RELSEG_SIZE long,
885  * and it's useless to recheck that each time.
886  *
887  * NOTE: this assumption could only be wrong if another backend has
888  * truncated the relation. We rely on higher code levels to handle that
889  * scenario by closing and re-opening the md fd, which is handled via
890  * relcache flush. (Since the checkpointer doesn't participate in
891  * relcache flush, it could have segment entries for inactive segments;
892  * that's OK because the checkpointer never needs to compute relation
893  * size.)
894  */
895  segno = reln->md_num_open_segs[forknum] - 1;
896  v = &reln->md_seg_fds[forknum][segno];
897 
898  for (;;)
899  {
900  nblocks = _mdnblocks(reln, forknum, v);
901  if (nblocks > ((BlockNumber) RELSEG_SIZE))
902  elog(FATAL, "segment too big");
903  if (nblocks < ((BlockNumber) RELSEG_SIZE))
904  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
905 
906  /*
907  * If segment is exactly RELSEG_SIZE, advance to next one.
908  */
909  segno++;
910 
911  /*
912  * We used to pass O_CREAT here, but that's has the disadvantage that
913  * it might create a segment which has vanished through some operating
914  * system misadventure. In such a case, creating the segment here
915  * undermines _mdfd_getseg's attempts to notice and report an error
916  * upon access to a missing segment.
917  */
918  v = _mdfd_openseg(reln, forknum, segno, 0);
919  if (v == NULL)
920  return segno * ((BlockNumber) RELSEG_SIZE);
921  }
922 }
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:165
#define FATAL
Definition: elog.h:52
static MdfdVec * mdopen(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:574
Definition: md.c:109
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags)
Definition: md.c:1773
#define Assert(condition)
Definition: c.h:681
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1941
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
#define elog
Definition: elog.h:219
static MdfdVec * mdopen ( SMgrRelation  reln,
ForkNumber  forknum,
int  behavior 
)
static

Definition at line 574 of file md.c.

References _fdvec_resize(), _mdnblocks(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_RETURN_NULL, fd(), FILE_POSSIBLY_DELETED, IsBootstrapProcessingMode, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, and SMgrRelationData::smgr_rnode.

Referenced by _mdfd_getseg(), mdexists(), and mdnblocks().

575 {
576  MdfdVec *mdfd;
577  char *path;
578  File fd;
579 
580  /* No work if already open */
581  if (reln->md_num_open_segs[forknum] > 0)
582  return &reln->md_seg_fds[forknum][0];
583 
584  path = relpath(reln->smgr_rnode, forknum);
585 
586  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
587 
588  if (fd < 0)
589  {
590  /*
591  * During bootstrap, there are cases where a system relation will be
592  * accessed (by internal backend processes) before the bootstrap
593  * script nominally creates it. Therefore, accept mdopen() as a
594  * substitute for mdcreate() in bootstrap mode only. (See mdcreate)
595  */
597  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
598  if (fd < 0)
599  {
600  if ((behavior & EXTENSION_RETURN_NULL) &&
601  FILE_POSSIBLY_DELETED(errno))
602  {
603  pfree(path);
604  return NULL;
605  }
606  ereport(ERROR,
608  errmsg("could not open file \"%s\": %m", path)));
609  }
610  }
611 
612  pfree(path);
613 
614  _fdvec_resize(reln, forknum, 1);
615  mdfd = &reln->md_seg_fds[forknum][0];
616  mdfd->mdfd_vfd = fd;
617  mdfd->mdfd_segno = 0;
618 
619  Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
620 
621  return mdfd;
622 }
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1315
BlockNumber mdfd_segno
Definition: md.c:112
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
#define EXTENSION_RETURN_NULL
Definition: md.c:167
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1712
Definition: md.c:109
#define Assert(condition)
Definition: c.h:681
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1941
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:367
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define relpath(rnode, forknum)
Definition: relpath.h:71
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
File mdfd_vfd
Definition: md.c:111
int File
Definition: fd.h:49
#define FILE_POSSIBLY_DELETED(err)
Definition: md.c:66
void mdpostckpt ( void  )

Definition at line 1361 of file md.c.

References AbsorbFsyncRequests(), PendingUnlinkEntry::cycle_ctr, ereport, errcode_for_file_access(), errmsg(), linitial, list_delete_first(), MAIN_FORKNUM, mdckpt_cycle_ctr, NIL, pfree(), relpathperm, PendingUnlinkEntry::rnode, UNLINKS_PER_ABSORB, and WARNING.

1362 {
1363  int absorb_counter;
1364 
1365  absorb_counter = UNLINKS_PER_ABSORB;
1366  while (pendingUnlinks != NIL)
1367  {
1369  char *path;
1370 
1371  /*
1372  * New entries are appended to the end, so if the entry is new we've
1373  * reached the end of old entries.
1374  *
1375  * Note: if just the right number of consecutive checkpoints fail, we
1376  * could be fooled here by cycle_ctr wraparound. However, the only
1377  * consequence is that we'd delay unlinking for one more checkpoint,
1378  * which is perfectly tolerable.
1379  */
1380  if (entry->cycle_ctr == mdckpt_cycle_ctr)
1381  break;
1382 
1383  /* Unlink the file */
1384  path = relpathperm(entry->rnode, MAIN_FORKNUM);
1385  if (unlink(path) < 0)
1386  {
1387  /*
1388  * There's a race condition, when the database is dropped at the
1389  * same time that we process the pending unlink requests. If the
1390  * DROP DATABASE deletes the file before we do, we will get ENOENT
1391  * here. rmtree() also has to ignore ENOENT errors, to deal with
1392  * the possibility that we delete the file first.
1393  */
1394  if (errno != ENOENT)
1395  ereport(WARNING,
1397  errmsg("could not remove file \"%s\": %m", path)));
1398  }
1399  pfree(path);
1400 
1401  /* And remove the list entry */
1403  pfree(entry);
1404 
1405  /*
1406  * As in mdsync, we don't want to stop absorbing fsync requests for a
1407  * long time when there are many deletions to be done. We can safely
1408  * call AbsorbFsyncRequests() at this point in the loop (note it might
1409  * try to delete list entries).
1410  */
1411  if (--absorb_counter <= 0)
1412  {
1414  absorb_counter = UNLINKS_PER_ABSORB;
1415  }
1416  }
1417 }
#define NIL
Definition: pg_list.h:69
#define relpathperm(rnode, forknum)
Definition: relpath.h:67
CycleCtr cycle_ctr
Definition: md.c:152
static CycleCtr mdckpt_cycle_ctr
Definition: md.c:160
void pfree(void *pointer)
Definition: mcxt.c:949
#define linitial(l)
Definition: pg_list.h:111
RelFileNode rnode
Definition: md.c:151
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
#define WARNING
Definition: elog.h:40
int errmsg(const char *fmt,...)
Definition: elog.c:797
void AbsorbFsyncRequests(void)
#define UNLINKS_PER_ABSORB
Definition: md.c:45
static List * pendingUnlinks
Definition: md.c:156
List * list_delete_first(List *list)
Definition: list.c:666
void mdpreckpt ( void  )

Definition at line 1346 of file md.c.

References mdckpt_cycle_ctr.

1347 {
1348  /*
1349  * Any unlink requests arriving after this point will be assigned the next
1350  * cycle counter, and won't be unlinked until next checkpoint.
1351  */
1352  mdckpt_cycle_ctr++;
1353 }
static CycleCtr mdckpt_cycle_ctr
Definition: md.c:160
void mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 659 of file md.c.

References _mdfd_getseg(), Assert, EXTENSION_FAIL, FilePrefetch(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

660 {
661 #ifdef USE_PREFETCH
662  off_t seekpos;
663  MdfdVec *v;
664 
665  v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
666 
667  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
668 
669  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
670 
671  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
672 #endif /* USE_PREFETCH */
673 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_FAIL
Definition: md.c:165
Definition: md.c:109
int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
Definition: fd.c:1613
#define Assert(condition)
Definition: c.h:681
File mdfd_vfd
Definition: md.c:111
void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer 
)

Definition at line 731 of file md.c.

References _mdfd_getseg(), Assert, RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), FileSeek(), InRecovery, _MdfdVec::mdfd_vfd, MemSet, RelFileNodeBackend::node, RelFileNode::relNode, SMgrRelationData::smgr_rnode, RelFileNode::spcNode, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

733 {
734  off_t seekpos;
735  int nbytes;
736  MdfdVec *v;
737 
738  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
739  reln->smgr_rnode.node.spcNode,
740  reln->smgr_rnode.node.dbNode,
741  reln->smgr_rnode.node.relNode,
742  reln->smgr_rnode.backend);
743 
744  v = _mdfd_getseg(reln, forknum, blocknum, false,
746 
747  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
748 
749  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
750 
751  if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
752  ereport(ERROR,
754  errmsg("could not seek to block %u in file \"%s\": %m",
755  blocknum, FilePathName(v->mdfd_vfd))));
756 
757  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_READ);
758 
759  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
760  reln->smgr_rnode.node.spcNode,
761  reln->smgr_rnode.node.dbNode,
762  reln->smgr_rnode.node.relNode,
763  reln->smgr_rnode.backend,
764  nbytes,
765  BLCKSZ);
766 
767  if (nbytes != BLCKSZ)
768  {
769  if (nbytes < 0)
770  ereport(ERROR,
772  errmsg("could not read block %u in file \"%s\": %m",
773  blocknum, FilePathName(v->mdfd_vfd))));
774 
775  /*
776  * Short read: we are at or past EOF, or we read a partial block at
777  * EOF. Normally this is an error; upper levels should never try to
778  * read a nonexistent block. However, if zero_damaged_pages is ON or
779  * we are InRecovery, we should instead return zeroes without
780  * complaining. This allows, for example, the case of trying to
781  * update a block that was later truncated away.
782  */
784  MemSet(buffer, 0, BLCKSZ);
785  else
786  ereport(ERROR,
787  (errcode(ERRCODE_DATA_CORRUPTED),
788  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
789  blocknum, FilePathName(v->mdfd_vfd),
790  nbytes, BLCKSZ)));
791  }
792 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
bool InRecovery
Definition: xlog.c:194
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:863
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:1997
#define EXTENSION_FAIL
Definition: md.c:165
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:171
#define ereport(elevel, rest)
Definition: elog.h:122
int FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
Definition: fd.c:1668
Definition: md.c:109
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:681
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
BackendId backend
Definition: relfilenode.h:75
int errmsg(const char *fmt,...)
Definition: elog.c:797
off_t FileSeek(File file, off_t offset, int whence)
Definition: fd.c:1874
File mdfd_vfd
Definition: md.c:111
bool zero_damaged_pages
Definition: bufmgr.c:108
void mdsync ( void  )

Definition at line 1054 of file md.c.

References _mdfd_getseg(), _mdfd_segpath(), AbsorbFsyncRequests(), Assert, bms_first_member(), bms_free(), PendingOperationEntry::canceled, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_sync_rels, PendingOperationEntry::cycle_ctr, DEBUG1, elog, enableFsync, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_DONT_CHECK_SIZE, EXTENSION_RETURN_NULL, FILE_POSSIBLY_DELETED, FilePathName(), FileSync(), FSYNCS_PER_ABSORB, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, log_checkpoints, longest(), MAX_FORKNUM, _MdfdVec::mdfd_vfd, mdsync_cycle_ctr, pfree(), PendingOperationEntry::requests, PendingOperationEntry::rnode, smgropen(), and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by SetForwardFsyncRequests().

1055 {
1056  static bool mdsync_in_progress = false;
1057 
1058  HASH_SEQ_STATUS hstat;
1059  PendingOperationEntry *entry;
1060  int absorb_counter;
1061 
1062  /* Statistics on sync times */
1063  int processed = 0;
1064  instr_time sync_start,
1065  sync_end,
1066  sync_diff;
1067  uint64 elapsed;
1068  uint64 longest = 0;
1069  uint64 total_elapsed = 0;
1070 
1071  /*
1072  * This is only called during checkpoints, and checkpoints should only
1073  * occur in processes that have created a pendingOpsTable.
1074  */
1075  if (!pendingOpsTable)
1076  elog(ERROR, "cannot sync without a pendingOpsTable");
1077 
1078  /*
1079  * If we are in the checkpointer, the sync had better include all fsync
1080  * requests that were queued by backends up to this point. The tightest
1081  * race condition that could occur is that a buffer that must be written
1082  * and fsync'd for the checkpoint could have been dumped by a backend just
1083  * before it was visited by BufferSync(). We know the backend will have
1084  * queued an fsync request before clearing the buffer's dirtybit, so we
1085  * are safe as long as we do an Absorb after completing BufferSync().
1086  */
1088 
1089  /*
1090  * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
1091  * checkpoint), we want to ignore fsync requests that are entered into the
1092  * hashtable after this point --- they should be processed next time,
1093  * instead. We use mdsync_cycle_ctr to tell old entries apart from new
1094  * ones: new ones will have cycle_ctr equal to the incremented value of
1095  * mdsync_cycle_ctr.
1096  *
1097  * In normal circumstances, all entries present in the table at this point
1098  * will have cycle_ctr exactly equal to the current (about to be old)
1099  * value of mdsync_cycle_ctr. However, if we fail partway through the
1100  * fsync'ing loop, then older values of cycle_ctr might remain when we
1101  * come back here to try again. Repeated checkpoint failures would
1102  * eventually wrap the counter around to the point where an old entry
1103  * might appear new, causing us to skip it, possibly allowing a checkpoint
1104  * to succeed that should not have. To forestall wraparound, any time the
1105  * previous mdsync() failed to complete, run through the table and
1106  * forcibly set cycle_ctr = mdsync_cycle_ctr.
1107  *
1108  * Think not to merge this loop with the main loop, as the problem is
1109  * exactly that that loop may fail before having visited all the entries.
1110  * From a performance point of view it doesn't matter anyway, as this path
1111  * will never be taken in a system that's functioning normally.
1112  */
1113  if (mdsync_in_progress)
1114  {
1115  /* prior try failed, so update any stale cycle_ctr values */
1116  hash_seq_init(&hstat, pendingOpsTable);
1117  while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
1118  {
1119  entry->cycle_ctr = mdsync_cycle_ctr;
1120  }
1121  }
1122 
1123  /* Advance counter so that new hashtable entries are distinguishable */
1124  mdsync_cycle_ctr++;
1125 
1126  /* Set flag to detect failure if we don't reach the end of the loop */
1127  mdsync_in_progress = true;
1128 
1129  /* Now scan the hashtable for fsync requests to process */
1130  absorb_counter = FSYNCS_PER_ABSORB;
1131  hash_seq_init(&hstat, pendingOpsTable);
1132  while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
1133  {
1134  ForkNumber forknum;
1135 
1136  /*
1137  * If the entry is new then don't process it this time; it might
1138  * contain multiple fsync-request bits, but they are all new. Note
1139  * "continue" bypasses the hash-remove call at the bottom of the loop.
1140  */
1141  if (entry->cycle_ctr == mdsync_cycle_ctr)
1142  continue;
1143 
1144  /* Else assert we haven't missed it */
1145  Assert((CycleCtr) (entry->cycle_ctr + 1) == mdsync_cycle_ctr);
1146 
1147  /*
1148  * Scan over the forks and segments represented by the entry.
1149  *
1150  * The bitmap manipulations are slightly tricky, because we can call
1151  * AbsorbFsyncRequests() inside the loop and that could result in
1152  * bms_add_member() modifying and even re-palloc'ing the bitmapsets.
1153  * This is okay because we unlink each bitmapset from the hashtable
1154  * entry before scanning it. That means that any incoming fsync
1155  * requests will be processed now if they reach the table before we
1156  * begin to scan their fork.
1157  */
1158  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
1159  {
1160  Bitmapset *requests = entry->requests[forknum];
1161  int segno;
1162 
1163  entry->requests[forknum] = NULL;
1164  entry->canceled[forknum] = false;
1165 
1166  while ((segno = bms_first_member(requests)) >= 0)
1167  {
1168  int failures;
1169 
1170  /*
1171  * If fsync is off then we don't have to bother opening the
1172  * file at all. (We delay checking until this point so that
1173  * changing fsync on the fly behaves sensibly.)
1174  */
1175  if (!enableFsync)
1176  continue;
1177 
1178  /*
1179  * If in checkpointer, we want to absorb pending requests
1180  * every so often to prevent overflow of the fsync request
1181  * queue. It is unspecified whether newly-added entries will
1182  * be visited by hash_seq_search, but we don't care since we
1183  * don't need to process them anyway.
1184  */
1185  if (--absorb_counter <= 0)
1186  {
1188  absorb_counter = FSYNCS_PER_ABSORB;
1189  }
1190 
1191  /*
1192  * The fsync table could contain requests to fsync segments
1193  * that have been deleted (unlinked) by the time we get to
1194  * them. Rather than just hoping an ENOENT (or EACCES on
1195  * Windows) error can be ignored, what we do on error is
1196  * absorb pending requests and then retry. Since mdunlink()
1197  * queues a "cancel" message before actually unlinking, the
1198  * fsync request is guaranteed to be marked canceled after the
1199  * absorb if it really was this case. DROP DATABASE likewise
1200  * has to tell us to forget fsync requests before it starts
1201  * deletions.
1202  */
1203  for (failures = 0;; failures++) /* loop exits at "break" */
1204  {
1205  SMgrRelation reln;
1206  MdfdVec *seg;
1207  char *path;
1208  int save_errno;
1209 
1210  /*
1211  * Find or create an smgr hash entry for this relation.
1212  * This may seem a bit unclean -- md calling smgr? But
1213  * it's really the best solution. It ensures that the
1214  * open file reference isn't permanently leaked if we get
1215  * an error here. (You may say "but an unreferenced
1216  * SMgrRelation is still a leak!" Not really, because the
1217  * only case in which a checkpoint is done by a process
1218  * that isn't about to shut down is in the checkpointer,
1219  * and it will periodically do smgrcloseall(). This fact
1220  * justifies our not closing the reln in the success path
1221  * either, which is a good thing since in non-checkpointer
1222  * cases we couldn't safely do that.)
1223  */
1224  reln = smgropen(entry->rnode, InvalidBackendId);
1225 
1226  /* Attempt to open and fsync the target segment */
1227  seg = _mdfd_getseg(reln, forknum,
1228  (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
1229  false,
1232 
1233  INSTR_TIME_SET_CURRENT(sync_start);
1234 
1235  if (seg != NULL &&
1237  {
1238  /* Success; update statistics about sync timing */
1239  INSTR_TIME_SET_CURRENT(sync_end);
1240  sync_diff = sync_end;
1241  INSTR_TIME_SUBTRACT(sync_diff, sync_start);
1242  elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
1243  if (elapsed > longest)
1244  longest = elapsed;
1245  total_elapsed += elapsed;
1246  processed++;
1247  if (log_checkpoints)
1248  elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
1249  processed,
1250  FilePathName(seg->mdfd_vfd),
1251  (double) elapsed / 1000);
1252 
1253  break; /* out of retry loop */
1254  }
1255 
1256  /* Compute file name for use in message */
1257  save_errno = errno;
1258  path = _mdfd_segpath(reln, forknum, (BlockNumber) segno);
1259  errno = save_errno;
1260 
1261  /*
1262  * It is possible that the relation has been dropped or
1263  * truncated since the fsync request was entered.
1264  * Therefore, allow ENOENT, but only if we didn't fail
1265  * already on this file. This applies both for
1266  * _mdfd_getseg() and for FileSync, since fd.c might have
1267  * closed the file behind our back.
1268  *
1269  * XXX is there any point in allowing more than one retry?
1270  * Don't see one at the moment, but easy to change the
1271  * test here if so.
1272  */
1273  if (!FILE_POSSIBLY_DELETED(errno) ||
1274  failures > 0)
1275  ereport(ERROR,
1277  errmsg("could not fsync file \"%s\": %m",
1278  path)));
1279  else
1280  ereport(DEBUG1,
1282  errmsg("could not fsync file \"%s\" but retrying: %m",
1283  path)));
1284  pfree(path);
1285 
1286  /*
1287  * Absorb incoming requests and check to see if a cancel
1288  * arrived for this relation fork.
1289  */
1291  absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
1292 
1293  if (entry->canceled[forknum])
1294  break;
1295  } /* end retry loop */
1296  }
1297  bms_free(requests);
1298  }
1299 
1300  /*
1301  * We've finished everything that was requested before we started to
1302  * scan the entry. If no new requests have been inserted meanwhile,
1303  * remove the entry. Otherwise, update its cycle counter, as all the
1304  * requests now in it must have arrived during this cycle.
1305  */
1306  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
1307  {
1308  if (entry->requests[forknum] != NULL)
1309  break;
1310  }
1311  if (forknum <= MAX_FORKNUM)
1312  entry->cycle_ctr = mdsync_cycle_ctr;
1313  else
1314  {
1315  /* Okay to remove it */
1316  if (hash_search(pendingOpsTable, &entry->rnode,
1317  HASH_REMOVE, NULL) == NULL)
1318  elog(ERROR, "pendingOpsTable corrupted");
1319  }
1320  } /* end loop over hashtable entries */
1321 
1322  /* Return sync performance metrics for report at checkpoint end */
1323  CheckpointStats.ckpt_sync_rels = processed;
1325  CheckpointStats.ckpt_agg_sync_time = total_elapsed;
1326 
1327  /* Flag successful completion of mdsync */
1328  mdsync_in_progress = false;
1329 }
uint64 ckpt_agg_sync_time
Definition: xlog.h:213
bool log_checkpoints
Definition: xlog.c:102
int bms_first_member(Bitmapset *a)
Definition: bitmapset.c:885
int ckpt_sync_rels
Definition: xlog.h:211
#define DEBUG1
Definition: elog.h:25
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
#define EXTENSION_DONT_CHECK_SIZE
Definition: md.c:179
static CycleCtr mdsync_cycle_ctr
Definition: md.c:159
#define FSYNCS_PER_ABSORB
Definition: md.c:44
struct timeval instr_time
Definition: instr_time.h:147
RelFileNode rnode
Definition: md.c:141
uint32 BlockNumber
Definition: block.h:31
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:902
char * FilePathName(File file)
Definition: fd.c:1997
CycleCtr cycle_ctr
Definition: md.c:142
static HTAB * pendingOpsTable
Definition: md.c:155
bool canceled[MAX_FORKNUM+1]
Definition: md.c:146
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
#define EXTENSION_RETURN_NULL
Definition: md.c:167
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:1853
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
ForkNumber
Definition: relpath.h:24
#define InvalidBackendId
Definition: backendid.h:23
Definition: md.c:109
void bms_free(Bitmapset *a)
Definition: bitmapset.c:201
#define Assert(condition)
Definition: c.h:681
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:202
CheckpointStatsData CheckpointStats
Definition: xlog.c:175
#define MAX_FORKNUM
Definition: relpath.h:39
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1385
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1375
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
static chr * longest(struct vars *v, struct dfa *d, chr *start, chr *stop, int *hitstopp)
Definition: rege_dfa.c:42
bool enableFsync
Definition: globals.c:111
int errmsg(const char *fmt,...)
Definition: elog.c:797
Bitmapset * requests[MAX_FORKNUM+1]
Definition: md.c:144
#define elog
Definition: elog.h:219
void AbsorbFsyncRequests(void)
File mdfd_vfd
Definition: md.c:111
uint16 CycleCtr
Definition: md.c:137
#define FILE_POSSIBLY_DELETED(err)
Definition: md.c:66
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1750
uint64 ckpt_longest_sync
Definition: xlog.h:212
void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 928 of file md.c.

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

929 {
930  BlockNumber curnblk;
931  BlockNumber priorblocks;
932  int curopensegs;
933 
934  /*
935  * NOTE: mdnblocks makes sure we have opened all active segments, so that
936  * truncation loop will get them all!
937  */
938  curnblk = mdnblocks(reln, forknum);
939  if (nblocks > curnblk)
940  {
941  /* Bogus request ... but no complaint if InRecovery */
942  if (InRecovery)
943  return;
944  ereport(ERROR,
945  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
946  relpath(reln->smgr_rnode, forknum),
947  nblocks, curnblk)));
948  }
949  if (nblocks == curnblk)
950  return; /* no work */
951 
952  /*
953  * Truncate segments, starting at the last one. Starting at the end makes
954  * managing the memory for the fd array easier, should there be errors.
955  */
956  curopensegs = reln->md_num_open_segs[forknum];
957  while (curopensegs > 0)
958  {
959  MdfdVec *v;
960 
961  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
962 
963  v = &reln->md_seg_fds[forknum][curopensegs - 1];
964 
965  if (priorblocks > nblocks)
966  {
967  /*
968  * This segment is no longer active. We truncate the file, but do
969  * not delete it, for reasons explained in the header comments.
970  */
972  ereport(ERROR,
974  errmsg("could not truncate file \"%s\": %m",
975  FilePathName(v->mdfd_vfd))));
976 
977  if (!SmgrIsTemp(reln))
978  register_dirty_segment(reln, forknum, v);
979 
980  /* we never drop the 1st segment */
981  Assert(v != &reln->md_seg_fds[forknum][0]);
982 
983  FileClose(v->mdfd_vfd);
984  _fdvec_resize(reln, forknum, curopensegs - 1);
985  }
986  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
987  {
988  /*
989  * This is the last segment we want to keep. Truncate the file to
990  * the right length. NOTE: if nblocks is exactly a multiple K of
991  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
992  * keep it. This adheres to the invariant given in the header
993  * comments.
994  */
995  BlockNumber lastsegblocks = nblocks - priorblocks;
996 
997  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
998  ereport(ERROR,
1000  errmsg("could not truncate file \"%s\" to %u blocks: %m",
1001  FilePathName(v->mdfd_vfd),
1002  nblocks)));
1003  if (!SmgrIsTemp(reln))
1004  register_dirty_segment(reln, forknum, v);
1005  }
1006  else
1007  {
1008  /*
1009  * We still need this segment, so nothing to do for this and any
1010  * earlier segment.
1011  */
1012  break;
1013  }
1014  curopensegs--;
1015  }
1016 }
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:873
bool InRecovery
Definition: xlog.c:194
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:1997
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1712
Definition: md.c:109
void FileClose(File file)
Definition: fd.c:1516
#define Assert(condition)
Definition: c.h:681
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:71
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define relpath(rnode, forknum)
Definition: relpath.h:71
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:72
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:1962
File mdfd_vfd
Definition: md.c:111
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1429
void mdunlink ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)

Definition at line 387 of file md.c.

References ForgetRelationFsyncRequests(), InvalidForkNumber, MAX_FORKNUM, mdunlinkfork(), RelFileNodeBackend::node, and RelFileNodeBackendIsTemp.

388 {
389  /*
390  * We have to clean out any pending fsync requests for the doomed
391  * relation, else the next mdsync() will fail. There can't be any such
392  * requests for a temp relation, though. We can send just one request
393  * even when deleting multiple forks, since the fsync queuing code accepts
394  * the "InvalidForkNumber = all forks" convention.
395  */
396  if (!RelFileNodeBackendIsTemp(rnode))
397  ForgetRelationFsyncRequests(rnode.node, forkNum);
398 
399  /* Now do the per-fork work */
400  if (forkNum == InvalidForkNumber)
401  {
402  for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
403  mdunlinkfork(rnode, forkNum, isRedo);
404  }
405  else
406  mdunlinkfork(rnode, forkNum, isRedo);
407 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
Definition: md.c:1652
static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
Definition: md.c:410
RelFileNode node
Definition: relfilenode.h:74
#define MAX_FORKNUM
Definition: relpath.h:39
static void mdunlinkfork ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
bool  isRedo 
)
static

Definition at line 410 of file md.c.

References CloseTransientFile(), ereport, errcode_for_file_access(), errmsg(), fd(), ftruncate, MAIN_FORKNUM, OpenTransientFile(), palloc(), pfree(), PG_BINARY, register_unlink(), RelFileNodeBackendIsTemp, relpath, and WARNING.

Referenced by mdunlink().

411 {
412  char *path;
413  int ret;
414 
415  path = relpath(rnode, forkNum);
416 
417  /*
418  * Delete or truncate the first segment.
419  */
420  if (isRedo || forkNum != MAIN_FORKNUM || RelFileNodeBackendIsTemp(rnode))
421  {
422  ret = unlink(path);
423  if (ret < 0 && errno != ENOENT)
426  errmsg("could not remove file \"%s\": %m", path)));
427  }
428  else
429  {
430  /* truncate(2) would be easier here, but Windows hasn't got it */
431  int fd;
432 
433  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
434  if (fd >= 0)
435  {
436  int save_errno;
437 
438  ret = ftruncate(fd, 0);
439  save_errno = errno;
440  CloseTransientFile(fd);
441  errno = save_errno;
442  }
443  else
444  ret = -1;
445  if (ret < 0 && errno != ENOENT)
448  errmsg("could not truncate file \"%s\": %m", path)));
449 
450  /* Register request to unlink first segment later */
451  register_unlink(rnode);
452  }
453 
454  /*
455  * Delete any additional segments.
456  */
457  if (ret >= 0)
458  {
459  char *segpath = (char *) palloc(strlen(path) + 12);
460  BlockNumber segno;
461 
462  /*
463  * Note that because we loop until getting ENOENT, we will correctly
464  * remove all inactive segments as well as active ones.
465  */
466  for (segno = 1;; segno++)
467  {
468  sprintf(segpath, "%s.%u", path, segno);
469  if (unlink(segpath) < 0)
470  {
471  /* ENOENT is expected after the last segment... */
472  if (errno != ENOENT)
475  errmsg("could not remove file \"%s\": %m", segpath)));
476  break;
477  }
478  }
479  pfree(segpath);
480  }
481 
482  pfree(path);
483 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
uint32 BlockNumber
Definition: block.h:31
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
void pfree(void *pointer)
Definition: mcxt.c:949
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2167
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
int CloseTransientFile(int fd)
Definition: fd.c:2337
#define WARNING
Definition: elog.h:40
#define ftruncate(a, b)
Definition: win32.h:59
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define relpath(rnode, forknum)
Definition: relpath.h:71
static void register_unlink(RelFileNodeBackend rnode)
Definition: md.c:1465
void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
char *  buffer,
bool  skipFsync 
)

Definition at line 802 of file md.c.

References _mdfd_getseg(), Assert, RelFileNodeBackend::backend, RelFileNode::dbNode, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileSeek(), FileWrite(), _MdfdVec::mdfd_vfd, mdnblocks(), RelFileNodeBackend::node, register_dirty_segment(), RelFileNode::relNode, SMgrRelationData::smgr_rnode, SmgrIsTemp, RelFileNode::spcNode, and WAIT_EVENT_DATA_FILE_WRITE.

804 {
805  off_t seekpos;
806  int nbytes;
807  MdfdVec *v;
808 
809  /* This assert is too expensive to have on normally ... */
810 #ifdef CHECK_WRITE_VS_EXTEND
811  Assert(blocknum < mdnblocks(reln, forknum));
812 #endif
813 
814  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
815  reln->smgr_rnode.node.spcNode,
816  reln->smgr_rnode.node.dbNode,
817  reln->smgr_rnode.node.relNode,
818  reln->smgr_rnode.backend);
819 
820  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
822 
823  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
824 
825  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
826 
827  if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
828  ereport(ERROR,
830  errmsg("could not seek to block %u in file \"%s\": %m",
831  blocknum, FilePathName(v->mdfd_vfd))));
832 
833  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE);
834 
835  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
836  reln->smgr_rnode.node.spcNode,
837  reln->smgr_rnode.node.dbNode,
838  reln->smgr_rnode.node.relNode,
839  reln->smgr_rnode.backend,
840  nbytes,
841  BLCKSZ);
842 
843  if (nbytes != BLCKSZ)
844  {
845  if (nbytes < 0)
846  ereport(ERROR,
848  errmsg("could not write block %u in file \"%s\": %m",
849  blocknum, FilePathName(v->mdfd_vfd))));
850  /* short write: complain appropriately */
851  ereport(ERROR,
852  (errcode(ERRCODE_DISK_FULL),
853  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
854  blocknum,
856  nbytes, BLCKSZ),
857  errhint("Check free disk space.")));
858  }
859 
860  if (!skipFsync && !SmgrIsTemp(reln))
861  register_dirty_segment(reln, forknum, v);
862 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
int errhint(const char *fmt,...)
Definition: elog.c:987
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:873
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
char * FilePathName(File file)
Definition: fd.c:1997
#define EXTENSION_FAIL
Definition: md.c:165
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int errcode_for_file_access(void)
Definition: elog.c:598
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:171
int FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
Definition: fd.c:1732
#define ereport(elevel, rest)
Definition: elog.h:122
Definition: md.c:109
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:681
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
BackendId backend
Definition: relfilenode.h:75
int errmsg(const char *fmt,...)
Definition: elog.c:797
off_t FileSeek(File file, off_t offset, int whence)
Definition: fd.c:1874
File mdfd_vfd
Definition: md.c:111
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1429
void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 682 of file md.c.

References _mdfd_getseg(), Assert, EXTENSION_RETURN_NULL, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.

684 {
685  /*
686  * Issue flush requests in as few requests as possible; have to split at
687  * segment boundaries though, since those are actually separate files.
688  */
689  while (nblocks > 0)
690  {
691  BlockNumber nflush = nblocks;
692  off_t seekpos;
693  MdfdVec *v;
694  int segnum_start,
695  segnum_end;
696 
697  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
699 
700  /*
701  * We might be flushing buffers of already removed relations, that's
702  * ok, just ignore that case.
703  */
704  if (!v)
705  return;
706 
707  /* compute offset inside the current segment */
708  segnum_start = blocknum / RELSEG_SIZE;
709 
710  /* compute number of desired writes within the current segment */
711  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
712  if (segnum_start != segnum_end)
713  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
714 
715  Assert(nflush >= 1);
716  Assert(nflush <= nblocks);
717 
718  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
719 
720  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
721 
722  nblocks -= nflush;
723  blocknum += nflush;
724  }
725 }
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forkno, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1813
uint32 BlockNumber
Definition: block.h:31
#define EXTENSION_RETURN_NULL
Definition: md.c:167
Definition: md.c:109
#define Assert(condition)
Definition: c.h:681
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:1641
File mdfd_vfd
Definition: md.c:111
static void register_dirty_segment ( SMgrRelation  reln,
ForkNumber  forknum,
MdfdVec seg 
)
static

Definition at line 1429 of file md.c.

References Assert, DEBUG1, ereport, errcode_for_file_access(), errmsg(), ERROR, FilePathName(), FileSync(), ForwardFsyncRequest(), _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, RelFileNodeBackend::node, RememberFsyncRequest(), SMgrRelationData::smgr_rnode, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_SYNC.

Referenced by mdextend(), mdtruncate(), and mdwrite().

1430 {
1431  /* Temp relations should never be fsync'd */
1432  Assert(!SmgrIsTemp(reln));
1433 
1434  if (pendingOpsTable)
1435  {
1436  /* push it into local pending-ops table */
1437  RememberFsyncRequest(reln->smgr_rnode.node, forknum, seg->mdfd_segno);
1438  }
1439  else
1440  {
1441  if (ForwardFsyncRequest(reln->smgr_rnode.node, forknum, seg->mdfd_segno))
1442  return; /* passed it off successfully */
1443 
1444  ereport(DEBUG1,
1445  (errmsg("could not forward fsync request because request queue is full")));
1446 
1448  ereport(ERROR,
1450  errmsg("could not fsync file \"%s\": %m",
1451  FilePathName(seg->mdfd_vfd))));
1452  }
1453 }
void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1514
#define DEBUG1
Definition: elog.h:25
bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
BlockNumber mdfd_segno
Definition: md.c:112
char * FilePathName(File file)
Definition: fd.c:1997
static HTAB * pendingOpsTable
Definition: md.c:155
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
#define ERROR
Definition: elog.h:43
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:1853
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:681
int errmsg(const char *fmt,...)
Definition: elog.c:797
File mdfd_vfd
Definition: md.c:111
static void register_unlink ( RelFileNodeBackend  rnode)
static

Definition at line 1465 of file md.c.

References Assert, ForwardFsyncRequest(), IsUnderPostmaster, MAIN_FORKNUM, RelFileNodeBackend::node, pg_usleep(), RelFileNodeBackendIsTemp, RememberFsyncRequest(), and UNLINK_RELATION_REQUEST.

Referenced by mdunlinkfork().

1466 {
1467  /* Should never be used with temp relations */
1469 
1470  if (pendingOpsTable)
1471  {
1472  /* push it into local pending-ops table */
1475  }
1476  else
1477  {
1478  /*
1479  * Notify the checkpointer about it. If we fail to queue the request
1480  * message, we have to sleep and try again, because we can't simply
1481  * delete the file now. Ugly, but hopefully won't happen often.
1482  *
1483  * XXX should we just leave the file orphaned instead?
1484  */
1486  while (!ForwardFsyncRequest(rnode.node, MAIN_FORKNUM,
1488  pg_usleep(10000L); /* 10 msec seems a good number */
1489  }
1490 }
#define UNLINK_RELATION_REQUEST
Definition: md.c:56
void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1514
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
static HTAB * pendingOpsTable
Definition: md.c:155
void pg_usleep(long microsec)
Definition: signal.c:53
bool IsUnderPostmaster
Definition: globals.c:101
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:681
void RememberFsyncRequest ( RelFileNode  rnode,
ForkNumber  forknum,
BlockNumber  segno 
)

Definition at line 1514 of file md.c.

References Assert, bms_add_member(), bms_free(), PendingOperationEntry::canceled, PendingOperationEntry::cycle_ctr, PendingUnlinkEntry::cycle_ctr, RelFileNode::dbNode, FORGET_DATABASE_FSYNC, FORGET_RELATION_FSYNC, HASH_ENTER, HASH_FIND, hash_search(), hash_seq_init(), hash_seq_search(), InvalidForkNumber, lappend(), lfirst, list_delete_cell(), list_head(), lnext, MAIN_FORKNUM, MAX_FORKNUM, mdckpt_cycle_ctr, mdsync_cycle_ctr, MemoryContextSwitchTo(), MemSet, next, palloc(), pfree(), PendingOperationEntry::requests, PendingOperationEntry::rnode, PendingUnlinkEntry::rnode, and UNLINK_RELATION_REQUEST.

Referenced by AbsorbFsyncRequests(), ForgetDatabaseFsyncRequests(), ForgetRelationFsyncRequests(), register_dirty_segment(), and register_unlink().

1515 {
1517 
1518  if (segno == FORGET_RELATION_FSYNC)
1519  {
1520  /* Remove any pending requests for the relation (one or all forks) */
1521  PendingOperationEntry *entry;
1522 
1524  &rnode,
1525  HASH_FIND,
1526  NULL);
1527  if (entry)
1528  {
1529  /*
1530  * We can't just delete the entry since mdsync could have an
1531  * active hashtable scan. Instead we delete the bitmapsets; this
1532  * is safe because of the way mdsync is coded. We also set the
1533  * "canceled" flags so that mdsync can tell that a cancel arrived
1534  * for the fork(s).
1535  */
1536  if (forknum == InvalidForkNumber)
1537  {
1538  /* remove requests for all forks */
1539  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
1540  {
1541  bms_free(entry->requests[forknum]);
1542  entry->requests[forknum] = NULL;
1543  entry->canceled[forknum] = true;
1544  }
1545  }
1546  else
1547  {
1548  /* remove requests for single fork */
1549  bms_free(entry->requests[forknum]);
1550  entry->requests[forknum] = NULL;
1551  entry->canceled[forknum] = true;
1552  }
1553  }
1554  }
1555  else if (segno == FORGET_DATABASE_FSYNC)
1556  {
1557  /* Remove any pending requests for the entire database */
1558  HASH_SEQ_STATUS hstat;
1559  PendingOperationEntry *entry;
1560  ListCell *cell,
1561  *prev,
1562  *next;
1563 
1564  /* Remove fsync requests */
1565  hash_seq_init(&hstat, pendingOpsTable);
1566  while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
1567  {
1568  if (entry->rnode.dbNode == rnode.dbNode)
1569  {
1570  /* remove requests for all forks */
1571  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
1572  {
1573  bms_free(entry->requests[forknum]);
1574  entry->requests[forknum] = NULL;
1575  entry->canceled[forknum] = true;
1576  }
1577  }
1578  }
1579 
1580  /* Remove unlink requests */
1581  prev = NULL;
1582  for (cell = list_head(pendingUnlinks); cell; cell = next)
1583  {
1584  PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);
1585 
1586  next = lnext(cell);
1587  if (entry->rnode.dbNode == rnode.dbNode)
1588  {
1590  pfree(entry);
1591  }
1592  else
1593  prev = cell;
1594  }
1595  }
1596  else if (segno == UNLINK_RELATION_REQUEST)
1597  {
1598  /* Unlink request: put it in the linked list */
1600  PendingUnlinkEntry *entry;
1601 
1602  /* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
1603  Assert(forknum == MAIN_FORKNUM);
1604 
1605  entry = palloc(sizeof(PendingUnlinkEntry));
1606  entry->rnode = rnode;
1607  entry->cycle_ctr = mdckpt_cycle_ctr;
1608 
1610 
1611  MemoryContextSwitchTo(oldcxt);
1612  }
1613  else
1614  {
1615  /* Normal case: enter a request to fsync this segment */
1617  PendingOperationEntry *entry;
1618  bool found;
1619 
1621  &rnode,
1622  HASH_ENTER,
1623  &found);
1624  /* if new entry, initialize it */
1625  if (!found)
1626  {
1627  entry->cycle_ctr = mdsync_cycle_ctr;
1628  MemSet(entry->requests, 0, sizeof(entry->requests));
1629  MemSet(entry->canceled, 0, sizeof(entry->canceled));
1630  }
1631 
1632  /*
1633  * NB: it's intentional that we don't change cycle_ctr if the entry
1634  * already exists. The cycle_ctr must represent the oldest fsync
1635  * request that could be in the entry.
1636  */
1637 
1638  entry->requests[forknum] = bms_add_member(entry->requests[forknum],
1639  (int) segno);
1640 
1641  MemoryContextSwitchTo(oldcxt);
1642  }
1643 }
#define UNLINK_RELATION_REQUEST
Definition: md.c:56
CycleCtr cycle_ctr
Definition: md.c:152
static int32 next
Definition: blutils.c:210
static CycleCtr mdsync_cycle_ctr
Definition: md.c:159
RelFileNode rnode
Definition: md.c:141
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static CycleCtr mdckpt_cycle_ctr
Definition: md.c:160
#define MemSet(start, val, len)
Definition: c.h:863
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:902
CycleCtr cycle_ctr
Definition: md.c:142
static HTAB * pendingOpsTable
Definition: md.c:155
bool canceled[MAX_FORKNUM+1]
Definition: md.c:146
#define FORGET_RELATION_FSYNC
Definition: md.c:54
void pfree(void *pointer)
Definition: mcxt.c:949
RelFileNode rnode
Definition: md.c:151
#define FORGET_DATABASE_FSYNC
Definition: md.c:55
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
#define lnext(lc)
Definition: pg_list.h:105
List * lappend(List *list, void *datum)
Definition: list.c:128
List * list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Definition: list.c:528
void bms_free(Bitmapset *a)
Definition: bitmapset.c:201
#define Assert(condition)
Definition: c.h:681
#define lfirst(lc)
Definition: pg_list.h:106
#define MAX_FORKNUM
Definition: relpath.h:39
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1385
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:698
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1375
void * palloc(Size size)
Definition: mcxt.c:848
Bitmapset * requests[MAX_FORKNUM+1]
Definition: md.c:144
static MemoryContext pendingOpsCxt
Definition: md.c:157
static List * pendingUnlinks
Definition: md.c:156
void SetForwardFsyncRequests ( void  )

Definition at line 254 of file md.c.

References Assert, hash_destroy(), mdsync(), and NIL.

Referenced by StartupXLOG().

255 {
256  /* Perform any pending fsyncs we may have queued up, then drop table */
257  if (pendingOpsTable)
258  {
259  mdsync();
261  }
262  pendingOpsTable = NULL;
263 
264  /*
265  * We should not have any pending unlink requests, since mdunlink doesn't
266  * queue unlink requests when isRedo.
267  */
269 }
#define NIL
Definition: pg_list.h:69
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:810
void mdsync(void)
Definition: md.c:1054
static HTAB * pendingOpsTable
Definition: md.c:155
#define Assert(condition)
Definition: c.h:681
static List * pendingUnlinks
Definition: md.c:156

Variable Documentation

CycleCtr mdckpt_cycle_ctr = 0
static

Definition at line 160 of file md.c.

Referenced by mdpostckpt(), mdpreckpt(), and RememberFsyncRequest().

MemoryContext MdCxt
static

Definition at line 115 of file md.c.

CycleCtr mdsync_cycle_ctr = 0
static

Definition at line 159 of file md.c.

Referenced by mdsync(), and RememberFsyncRequest().

MemoryContext pendingOpsCxt
static

Definition at line 157 of file md.c.

Referenced by mdinit().

HTAB* pendingOpsTable = NULL
static

Definition at line 155 of file md.c.

List* pendingUnlinks = NIL
static

Definition at line 156 of file md.c.