PostgreSQL Source Code  git master
md.h File Reference
#include "storage/block.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
Include dependency graph for md.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void mdinit (void)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Function Documentation

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1252 of file md.c.

1253 {
1254  SMgrRelation *srels;
1255  int i;
1256 
1257  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1258  for (i = 0; i < ndelrels; i++)
1259  {
1260  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1261 
1262  if (isRedo)
1263  {
1264  ForkNumber fork;
1265 
1266  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1267  XLogDropRelation(delrels[i], fork);
1268  }
1269  srels[i] = srel;
1270  }
1271 
1272  smgrdounlinkall(srels, ndelrels, isRedo);
1273 
1274  for (i = 0; i < ndelrels; i++)
1275  smgrclose(srels[i]);
1276  pfree(srels);
1277 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrclose(SMgrRelation reln)
Definition: smgr.c:260
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:150
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:425
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:643

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1234 of file md.c.

1235 {
1236  FileTag tag;
1237  RelFileLocator rlocator;
1238 
1239  rlocator.dbOid = dbid;
1240  rlocator.spcOid = 0;
1241  rlocator.relNumber = 0;
1242 
1244 
1245  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1246 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:49
Definition: sync.h:51
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:585
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 693 of file md.c.

694 {
695  int nopensegs = reln->md_num_open_segs[forknum];
696 
697  /* No work if already closed */
698  if (nopensegs == 0)
699  return;
700 
701  /* close segments starting from the end */
702  while (nopensegs > 0)
703  {
704  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
705 
706  FileClose(v->mdfd_vfd);
707  _fdvec_resize(reln, forknum, nopensegs - 1);
708  nopensegs--;
709  }
710 }
void FileClose(File file)
Definition: fd.c:1955
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1284
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 192 of file md.c.

193 {
194  MdfdVec *mdfd;
195  char *path;
196  File fd;
197 
198  if (isRedo && reln->md_num_open_segs[forknum] > 0)
199  return; /* created and opened already... */
200 
201  Assert(reln->md_num_open_segs[forknum] == 0);
202 
203  /*
204  * We may be using the target table space for the first time in this
205  * database, so create a per-database subdirectory if needed.
206  *
207  * XXX this is a fairly ugly violation of module layering, but this seems
208  * to be the best place to put the check. Maybe TablespaceCreateDbspace
209  * should be here and not in commands/tablespace.c? But that would imply
210  * importing a lot of stuff that smgr.c oughtn't know, either.
211  */
214  isRedo);
215 
216  path = relpath(reln->smgr_rlocator, forknum);
217 
218  fd = PathNameOpenFile(path, _mdfd_open_flags() | O_CREAT | O_EXCL);
219 
220  if (fd < 0)
221  {
222  int save_errno = errno;
223 
224  if (isRedo)
226  if (fd < 0)
227  {
228  /* be sure to report the error reported by create, not open */
229  errno = save_errno;
230  ereport(ERROR,
232  errmsg("could not create file \"%s\": %m", path)));
233  }
234  }
235 
236  pfree(path);
237 
238  _fdvec_resize(reln, forknum, 1);
239  mdfd = &reln->md_seg_fds[forknum][0];
240  mdfd->mdfd_vfd = fd;
241  mdfd->mdfd_segno = 0;
242 
243  if (!SmgrIsTemp(reln))
244  register_dirty_segment(reln, forknum, mdfd);
245 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:118
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1552
int File
Definition: fd.h:49
Assert(fmt[strlen(fmt) - 1] !='\n')
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1159
static int _mdfd_open_flags(void)
Definition: md.c:146
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define relpath(rlocator, forknum)
Definition: relpath.h:94
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42
BlockNumber mdfd_segno
Definition: md.c:85

References _fdvec_resize(), _mdfd_open_flags(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 173 of file md.c.

174 {
175  /*
176  * Close it first, to ensure that we notice if the fork has been unlinked
177  * since we opened it. As an optimization, we can skip that in recovery,
178  * which already closes relations when dropping them.
179  */
180  if (!InRecovery)
181  mdclose(reln, forknum);
182 
183  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
184 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:693
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:639
bool InRecovery
Definition: xlogutils.c:53

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 462 of file md.c.

464 {
465  off_t seekpos;
466  int nbytes;
467  MdfdVec *v;
468 
469  /* If this build supports direct I/O, the buffer must be I/O aligned. */
470  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
471  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
472 
473  /* This assert is too expensive to have on normally ... */
474 #ifdef CHECK_WRITE_VS_EXTEND
475  Assert(blocknum >= mdnblocks(reln, forknum));
476 #endif
477 
478  /*
479  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
480  * more --- we mustn't create a block whose number actually is
481  * InvalidBlockNumber. (Note that this failure should be unreachable
482  * because of upstream checks in bufmgr.c.)
483  */
484  if (blocknum == InvalidBlockNumber)
485  ereport(ERROR,
486  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
487  errmsg("cannot extend file \"%s\" beyond %u blocks",
488  relpath(reln->smgr_rlocator, forknum),
490 
491  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
492 
493  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
494 
495  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
496 
497  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
498  {
499  if (nbytes < 0)
500  ereport(ERROR,
502  errmsg("could not extend file \"%s\": %m",
503  FilePathName(v->mdfd_vfd)),
504  errhint("Check free disk space.")));
505  /* short write: complain appropriately */
506  ereport(ERROR,
507  (errcode(ERRCODE_DISK_FULL),
508  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
510  nbytes, BLCKSZ, blocknum),
511  errhint("Check free disk space.")));
512  }
513 
514  if (!skipFsync && !SmgrIsTemp(reln))
515  register_dirty_segment(reln, forknum, v);
516 
517  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
518 }
uint32 BlockNumber
Definition: block.h:31
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:793
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2169
char * FilePathName(File file)
Definition: fd.c:2431
#define PG_O_DIRECT
Definition: fd.h:95
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1521
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:938
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1390
#define EXTENSION_CREATE
Definition: md.c:108
#define PG_IO_ALIGN_SIZE

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1614 of file md.c.

1615 {
1616  /*
1617  * For now we only use filter requests as a way to drop all scheduled
1618  * callbacks relating to a given database, when dropping the database.
1619  * We'll return true for all candidates that have the same database OID as
1620  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1621  */
1622  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1623 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 1097 of file md.c.

1098 {
1099  int segno;
1100  int min_inactive_seg;
1101 
1102  /*
1103  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1104  * fsync loop will get them all!
1105  */
1106  mdnblocks(reln, forknum);
1107 
1108  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1109 
1110  /*
1111  * Temporarily open inactive segments, then close them after sync. There
1112  * may be some inactive segments left opened after fsync() error, but that
1113  * is harmless. We don't bother to clean them up and take a risk of
1114  * further trouble. The next mdclose() will soon close them.
1115  */
1116  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1117  segno++;
1118 
1119  while (segno > 0)
1120  {
1121  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1122 
1123  /*
1124  * fsyncs done through mdimmedsync() should be tracked in a separate
1125  * IOContext than those done through mdsyncfiletag() to differentiate
1126  * between unavoidable client backend fsyncs (e.g. those done during
1127  * index build) and those which ideally would have been done by the
1128  * checkpointer. Since other IO operations bypassing the buffer
1129  * manager could also be tracked in such an IOContext, wait until
1130  * these are also tracked to track immediate fsyncs.
1131  */
1132  if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
1135  errmsg("could not fsync file \"%s\": %m",
1136  FilePathName(v->mdfd_vfd))));
1137 
1138  /* Close inactive segments immediately */
1139  if (segno > min_inactive_seg)
1140  {
1141  FileClose(v->mdfd_vfd);
1142  _fdvec_resize(reln, forknum, segno - 1);
1143  }
1144 
1145  segno--;
1146  }
1147 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2267
int data_sync_elevel(int elevel)
Definition: fd.c:3906
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1345

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )

Definition at line 160 of file md.c.

161 {
163  "MdSmgr",
165 }
MemoryContext TopMemoryContext
Definition: mcxt.c:141
static MemoryContext MdCxt
Definition: md.c:88
#define AllocSetContextCreate
Definition: memutils.h:126
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:150

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 938 of file md.c.

939 {
940  MdfdVec *v;
941  BlockNumber nblocks;
942  BlockNumber segno;
943 
944  mdopenfork(reln, forknum, EXTENSION_FAIL);
945 
946  /* mdopen has opened the first segment */
947  Assert(reln->md_num_open_segs[forknum] > 0);
948 
949  /*
950  * Start from the last open segments, to avoid redundant seeks. We have
951  * previously verified that these segments are exactly RELSEG_SIZE long,
952  * and it's useless to recheck that each time.
953  *
954  * NOTE: this assumption could only be wrong if another backend has
955  * truncated the relation. We rely on higher code levels to handle that
956  * scenario by closing and re-opening the md fd, which is handled via
957  * relcache flush. (Since the checkpointer doesn't participate in
958  * relcache flush, it could have segment entries for inactive segments;
959  * that's OK because the checkpointer never needs to compute relation
960  * size.)
961  */
962  segno = reln->md_num_open_segs[forknum] - 1;
963  v = &reln->md_seg_fds[forknum][segno];
964 
965  for (;;)
966  {
967  nblocks = _mdnblocks(reln, forknum, v);
968  if (nblocks > ((BlockNumber) RELSEG_SIZE))
969  elog(FATAL, "segment too big");
970  if (nblocks < ((BlockNumber) RELSEG_SIZE))
971  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
972 
973  /*
974  * If segment is exactly RELSEG_SIZE, advance to next one.
975  */
976  segno++;
977 
978  /*
979  * We used to pass O_CREAT here, but that has the disadvantage that it
980  * might create a segment which has vanished through some operating
981  * system misadventure. In such a case, creating the segment here
982  * undermines _mdfd_getseg's attempts to notice and report an error
983  * upon access to a missing segment.
984  */
985  v = _mdfd_openseg(reln, forknum, segno, 0);
986  if (v == NULL)
987  return segno * ((BlockNumber) RELSEG_SIZE);
988  }
989 }
#define FATAL
Definition: elog.h:41
#define EXTENSION_FAIL
Definition: md.c:104

References _mdfd_openseg(), _mdnblocks(), Assert(), elog(), EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), mdwrite(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 682 of file md.c.

683 {
684  /* mark it not open */
685  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
686  reln->md_num_open_segs[forknum] = 0;
687 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 716 of file md.c.

717 {
718 #ifdef USE_PREFETCH
719  off_t seekpos;
720  MdfdVec *v;
721 
723 
724  v = _mdfd_getseg(reln, forknum, blocknum, false,
726  if (v == NULL)
727  return false;
728 
729  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
730 
731  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
732 
733  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
734 #endif /* USE_PREFETCH */
735 
736  return true;
737 }
int io_direct_flags
Definition: fd.c:168
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2055
#define IO_DIRECT_DATA
Definition: fd.h:52

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void *  buffer 
)

Definition at line 743 of file md.c.

745 {
746  off_t seekpos;
747  int nbytes;
748  MdfdVec *v;
749 
750  /* If this build supports direct I/O, the buffer must be I/O aligned. */
751  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
752  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
753 
754  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
758  reln->smgr_rlocator.backend);
759 
760  v = _mdfd_getseg(reln, forknum, blocknum, false,
762 
763  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
764 
765  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
766 
767  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
768 
769  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
773  reln->smgr_rlocator.backend,
774  nbytes,
775  BLCKSZ);
776 
777  if (nbytes != BLCKSZ)
778  {
779  if (nbytes < 0)
780  ereport(ERROR,
782  errmsg("could not read block %u in file \"%s\": %m",
783  blocknum, FilePathName(v->mdfd_vfd))));
784 
785  /*
786  * Short read: we are at or past EOF, or we read a partial block at
787  * EOF. Normally this is an error; upper levels should never try to
788  * read a nonexistent block. However, if zero_damaged_pages is ON or
789  * we are InRecovery, we should instead return zeroes without
790  * complaining. This allows, for example, the case of trying to
791  * update a block that was later truncated away.
792  */
794  MemSet(buffer, 0, BLCKSZ);
795  else
796  ereport(ERROR,
798  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
799  blocknum, FilePathName(v->mdfd_vfd),
800  nbytes, BLCKSZ)));
801  }
802 }
bool zero_damaged_pages
Definition: bufmgr.c:136
#define MemSet(start, val, len)
Definition: c.h:1009
int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2113
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, MemSet, PG_IO_ALIGN_SIZE, PG_O_DIRECT, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, TYPEALIGN, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1542 of file md.c.

1543 {
1545  File file;
1546  instr_time io_start;
1547  bool need_to_close;
1548  int result,
1549  save_errno;
1550 
1551  /* See if we already have the file open, or need to open it. */
1552  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1553  {
1554  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1555  strlcpy(path, FilePathName(file), MAXPGPATH);
1556  need_to_close = false;
1557  }
1558  else
1559  {
1560  char *p;
1561 
1562  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1563  strlcpy(path, p, MAXPGPATH);
1564  pfree(p);
1565 
1566  file = PathNameOpenFile(path, _mdfd_open_flags());
1567  if (file < 0)
1568  return -1;
1569  need_to_close = true;
1570  }
1571 
1572  io_start = pgstat_prepare_io_time();
1573 
1574  /* Sync the file. */
1575  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1576  save_errno = errno;
1577 
1578  if (need_to_close)
1579  FileClose(file);
1580 
1582  IOOP_FSYNC, io_start, 1);
1583 
1584  errno = save_errno;
1585  return result;
1586 }
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1322
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:287
@ IOOP_FSYNC
Definition: pgstat.h:297
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:112
instr_time pgstat_prepare_io_time(void)
Definition: pgstat_io.c:96
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint64 segno
Definition: sync.h:55

References _mdfd_open_flags(), _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), and strlcpy().

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 995 of file md.c.

996 {
997  BlockNumber curnblk;
998  BlockNumber priorblocks;
999  int curopensegs;
1000 
1001  /*
1002  * NOTE: mdnblocks makes sure we have opened all active segments, so that
1003  * truncation loop will get them all!
1004  */
1005  curnblk = mdnblocks(reln, forknum);
1006  if (nblocks > curnblk)
1007  {
1008  /* Bogus request ... but no complaint if InRecovery */
1009  if (InRecovery)
1010  return;
1011  ereport(ERROR,
1012  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1013  relpath(reln->smgr_rlocator, forknum),
1014  nblocks, curnblk)));
1015  }
1016  if (nblocks == curnblk)
1017  return; /* no work */
1018 
1019  /*
1020  * Truncate segments, starting at the last one. Starting at the end makes
1021  * managing the memory for the fd array easier, should there be errors.
1022  */
1023  curopensegs = reln->md_num_open_segs[forknum];
1024  while (curopensegs > 0)
1025  {
1026  MdfdVec *v;
1027 
1028  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1029 
1030  v = &reln->md_seg_fds[forknum][curopensegs - 1];
1031 
1032  if (priorblocks > nblocks)
1033  {
1034  /*
1035  * This segment is no longer active. We truncate the file, but do
1036  * not delete it, for reasons explained in the header comments.
1037  */
1038  if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1039  ereport(ERROR,
1041  errmsg("could not truncate file \"%s\": %m",
1042  FilePathName(v->mdfd_vfd))));
1043 
1044  if (!SmgrIsTemp(reln))
1045  register_dirty_segment(reln, forknum, v);
1046 
1047  /* we never drop the 1st segment */
1048  Assert(v != &reln->md_seg_fds[forknum][0]);
1049 
1050  FileClose(v->mdfd_vfd);
1051  _fdvec_resize(reln, forknum, curopensegs - 1);
1052  }
1053  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1054  {
1055  /*
1056  * This is the last segment we want to keep. Truncate the file to
1057  * the right length. NOTE: if nblocks is exactly a multiple K of
1058  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1059  * keep it. This adheres to the invariant given in the header
1060  * comments.
1061  */
1062  BlockNumber lastsegblocks = nblocks - priorblocks;
1063 
1064  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1065  ereport(ERROR,
1067  errmsg("could not truncate file \"%s\" to %u blocks: %m",
1068  FilePathName(v->mdfd_vfd),
1069  nblocks)));
1070  if (!SmgrIsTemp(reln))
1071  register_dirty_segment(reln, forknum, v);
1072  }
1073  else
1074  {
1075  /*
1076  * We still need this segment, so nothing to do for this and any
1077  * earlier segment.
1078  */
1079  break;
1080  }
1081  curopensegs--;
1082  }
1083 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2396

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 309 of file md.c.

310 {
311  /* Now do the per-fork work */
312  if (forknum == InvalidForkNumber)
313  {
314  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
315  mdunlinkfork(rlocator, forknum, isRedo);
316  }
317  else
318  mdunlinkfork(rlocator, forknum, isRedo);
319 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:346

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1595 of file md.c.

1596 {
1597  char *p;
1598 
1599  /* Compute the path. */
1600  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1601  strlcpy(path, p, MAXPGPATH);
1602  pfree(p);
1603 
1604  /* Try to unlink the file. */
1605  return unlink(path);
1606 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 812 of file md.c.

814 {
815  off_t seekpos;
816  int nbytes;
817  MdfdVec *v;
818 
819  /* If this build supports direct I/O, the buffer must be I/O aligned. */
820  if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
821  Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
822 
823  /* This assert is too expensive to have on normally ... */
824 #ifdef CHECK_WRITE_VS_EXTEND
825  Assert(blocknum < mdnblocks(reln, forknum));
826 #endif
827 
828  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
832  reln->smgr_rlocator.backend);
833 
834  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
836 
837  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
838 
839  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
840 
841  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
842 
843  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
847  reln->smgr_rlocator.backend,
848  nbytes,
849  BLCKSZ);
850 
851  if (nbytes != BLCKSZ)
852  {
853  if (nbytes < 0)
854  ereport(ERROR,
856  errmsg("could not write block %u in file \"%s\": %m",
857  blocknum, FilePathName(v->mdfd_vfd))));
858  /* short write: complain appropriately */
859  ereport(ERROR,
860  (errcode(ERRCODE_DISK_FULL),
861  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
862  blocknum,
864  nbytes, BLCKSZ),
865  errhint("Check free disk space.")));
866  }
867 
868  if (!skipFsync && !SmgrIsTemp(reln))
869  register_dirty_segment(reln, forknum, v);
870 }

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and TYPEALIGN.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 879 of file md.c.

881 {
883 
884  /*
885  * Issue flush requests in as few requests as possible; have to split at
886  * segment boundaries though, since those are actually separate files.
887  */
888  while (nblocks > 0)
889  {
890  BlockNumber nflush = nblocks;
891  off_t seekpos;
892  MdfdVec *v;
893  int segnum_start,
894  segnum_end;
895 
896  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
898 
899  /*
900  * We might be flushing buffers of already removed relations, that's
901  * ok, just ignore that case. If the segment file wasn't open already
902  * (ie from a recent mdwrite()), then we don't want to re-open it, to
903  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
904  * us with a descriptor to a file that is about to be unlinked.
905  */
906  if (!v)
907  return;
908 
909  /* compute offset inside the current segment */
910  segnum_start = blocknum / RELSEG_SIZE;
911 
912  /* compute number of desired writes within the current segment */
913  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
914  if (segnum_start != segnum_end)
915  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
916 
917  Assert(nflush >= 1);
918  Assert(nflush <= nblocks);
919 
920  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
921 
922  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
923 
924  nblocks -= nflush;
925  blocknum += nflush;
926  }
927 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2087
#define EXTENSION_DONT_OPEN
Definition: md.c:120

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)

Definition at line 527 of file md.c.

529 {
530  MdfdVec *v;
531  BlockNumber curblocknum = blocknum;
532  int remblocks = nblocks;
533 
534  Assert(nblocks > 0);
535 
536  /* This assert is too expensive to have on normally ... */
537 #ifdef CHECK_WRITE_VS_EXTEND
538  Assert(blocknum >= mdnblocks(reln, forknum));
539 #endif
540 
541  /*
542  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
543  * more --- we mustn't create a block whose number actually is
544  * InvalidBlockNumber or larger.
545  */
546  if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
547  ereport(ERROR,
548  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
549  errmsg("cannot extend file \"%s\" beyond %u blocks",
550  relpath(reln->smgr_rlocator, forknum),
552 
553  while (remblocks > 0)
554  {
555  BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
556  off_t seekpos = (off_t) BLCKSZ * segstartblock;
557  int numblocks;
558 
559  if (segstartblock + remblocks > RELSEG_SIZE)
560  numblocks = RELSEG_SIZE - segstartblock;
561  else
562  numblocks = remblocks;
563 
564  v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
565 
566  Assert(segstartblock < RELSEG_SIZE);
567  Assert(segstartblock + numblocks <= RELSEG_SIZE);
568 
569  /*
570  * If available and useful, use posix_fallocate() (via
571  * FileFallocate()) to extend the relation. That's often more
572  * efficient than using write(), as it commonly won't cause the kernel
573  * to allocate page cache space for the extended pages.
574  *
575  * However, we don't use FileFallocate() for small extensions, as it
576  * defeats delayed allocation on some filesystems. Not clear where
577  * that decision should be made though? For now just use a cutoff of
578  * 8, anything between 4 and 8 worked OK in some local testing.
579  */
580  if (numblocks > 8)
581  {
582  int ret;
583 
584  ret = FileFallocate(v->mdfd_vfd,
585  seekpos, (off_t) BLCKSZ * numblocks,
586  WAIT_EVENT_DATA_FILE_EXTEND);
587  if (ret != 0)
588  {
589  ereport(ERROR,
591  errmsg("could not extend file \"%s\" with FileFallocate(): %m",
592  FilePathName(v->mdfd_vfd)),
593  errhint("Check free disk space."));
594  }
595  }
596  else
597  {
598  int ret;
599 
600  /*
601  * Even if we don't want to use fallocate, we can still extend a
602  * bit more efficiently than writing each 8kB block individually.
603  * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
604  * to avoid multiple writes or needing a zeroed buffer for the
605  * whole length of the extension.
606  */
607  ret = FileZero(v->mdfd_vfd,
608  seekpos, (off_t) BLCKSZ * numblocks,
609  WAIT_EVENT_DATA_FILE_EXTEND);
610  if (ret < 0)
611  ereport(ERROR,
613  errmsg("could not extend file \"%s\": %m",
614  FilePathName(v->mdfd_vfd)),
615  errhint("Check free disk space."));
616  }
617 
618  if (!skipFsync && !SmgrIsTemp(reln))
619  register_dirty_segment(reln, forknum, v);
620 
621  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
622 
623  remblocks -= numblocks;
624  curblocknum += numblocks;
625  }
626 }
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2339
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:2294

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, and SmgrIsTemp.