PostgreSQL Source Code  git master
md.h File Reference
#include "storage/block.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
Include dependency graph for md.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void mdinit (void)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdread (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
 
void mdwrite (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Function Documentation

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)

Definition at line 1110 of file md.c.

1111 {
1112  SMgrRelation *srels;
1113  int i;
1114 
1115  srels = palloc(sizeof(SMgrRelation) * ndelrels);
1116  for (i = 0; i < ndelrels; i++)
1117  {
1118  SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
1119 
1120  if (isRedo)
1121  {
1122  ForkNumber fork;
1123 
1124  for (fork = 0; fork <= MAX_FORKNUM; fork++)
1125  XLogDropRelation(delrels[i], fork);
1126  }
1127  srels[i] = srel;
1128  }
1129 
1130  smgrdounlinkall(srels, ndelrels, isRedo);
1131 
1132  for (i = 0; i < ndelrels; i++)
1133  smgrclose(srels[i]);
1134  pfree(srels);
1135 }
#define InvalidBackendId
Definition: backendid.h:23
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1436
void * palloc(Size size)
Definition: mcxt.c:1210
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:420
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition: xlogutils.c:658

References i, InvalidBackendId, MAX_FORKNUM, palloc(), pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)

Definition at line 1092 of file md.c.

1093 {
1094  FileTag tag;
1095  RelFileLocator rlocator;
1096 
1097  rlocator.dbOid = dbid;
1098  rlocator.spcOid = 0;
1099  rlocator.relNumber = 0;
1100 
1102 
1103  RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1104 }
#define InvalidBlockNumber
Definition: block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition: md.c:92
@ InvalidForkNumber
Definition: relpath.h:49
Definition: sync.h:51
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:587
@ SYNC_FILTER_REQUEST
Definition: sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 568 of file md.c.

569 {
570  int nopensegs = reln->md_num_open_segs[forknum];
571 
572  /* No work if already closed */
573  if (nopensegs == 0)
574  return;
575 
576  /* close segments starting from the end */
577  while (nopensegs > 0)
578  {
579  MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
580 
581  FileClose(v->mdfd_vfd);
582  _fdvec_resize(reln, forknum, nopensegs - 1);
583  nopensegs--;
584  }
585 }
void FileClose(File file)
Definition: fd.c:1884
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition: md.c:1142
int md_num_open_segs[MAX_FORKNUM+1]
Definition: smgr.h:68
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
Definition: smgr.h:69
Definition: md.c:83
File mdfd_vfd
Definition: md.c:84

References _fdvec_resize(), FileClose(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 182 of file md.c.

183 {
184  MdfdVec *mdfd;
185  char *path;
186  File fd;
187 
188  if (isRedo && reln->md_num_open_segs[forknum] > 0)
189  return; /* created and opened already... */
190 
191  Assert(reln->md_num_open_segs[forknum] == 0);
192 
193  /*
194  * We may be using the target table space for the first time in this
195  * database, so create a per-database subdirectory if needed.
196  *
197  * XXX this is a fairly ugly violation of module layering, but this seems
198  * to be the best place to put the check. Maybe TablespaceCreateDbspace
199  * should be here and not in commands/tablespace.c? But that would imply
200  * importing a lot of stuff that smgr.c oughtn't know, either.
201  */
204  isRedo);
205 
206  path = relpath(reln->smgr_rlocator, forknum);
207 
208  fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
209 
210  if (fd < 0)
211  {
212  int save_errno = errno;
213 
214  if (isRedo)
215  fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
216  if (fd < 0)
217  {
218  /* be sure to report the error reported by create, not open */
219  errno = save_errno;
220  ereport(ERROR,
222  errmsg("could not create file \"%s\": %m", path)));
223  }
224  }
225 
226  pfree(path);
227 
228  _fdvec_resize(reln, forknum, 1);
229  mdfd = &reln->md_seg_fds[forknum][0];
230  mdfd->mdfd_vfd = fd;
231  mdfd->mdfd_segno = 0;
232 }
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition: tablespace.c:118
#define PG_BINARY
Definition: c.h:1260
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1481
int File
Definition: fd.h:54
Assert(fmt[strlen(fmt) - 1] !='\n')
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define relpath(rlocator, forknum)
Definition: relpath.h:94
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42
BlockNumber mdfd_segno
Definition: md.c:85

References _fdvec_resize(), Assert(), RelFileLocator::dbOid, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), RelFileLocatorBackend::locator, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_segno, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, relpath, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 163 of file md.c.

164 {
165  /*
166  * Close it first, to ensure that we notice if the fork has been unlinked
167  * since we opened it. As an optimization, we can skip that in recovery,
168  * which already closes relations when dropping them.
169  */
170  if (!InRecovery)
171  mdclose(reln, forknum);
172 
173  return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
174 }
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:568
#define EXTENSION_RETURN_NULL
Definition: md.c:106
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition: md.c:514
bool InRecovery
Definition: xlogutils.c:53

References EXTENSION_RETURN_NULL, InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 449 of file md.c.

451 {
452  off_t seekpos;
453  int nbytes;
454  MdfdVec *v;
455 
456  /* This assert is too expensive to have on normally ... */
457 #ifdef CHECK_WRITE_VS_EXTEND
458  Assert(blocknum >= mdnblocks(reln, forknum));
459 #endif
460 
461  /*
462  * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
463  * more --- we mustn't create a block whose number actually is
464  * InvalidBlockNumber. (Note that this failure should be unreachable
465  * because of upstream checks in bufmgr.c.)
466  */
467  if (blocknum == InvalidBlockNumber)
468  ereport(ERROR,
469  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
470  errmsg("cannot extend file \"%s\" beyond %u blocks",
471  relpath(reln->smgr_rlocator, forknum),
473 
474  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
475 
476  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
477 
478  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
479 
480  if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
481  {
482  if (nbytes < 0)
483  ereport(ERROR,
485  errmsg("could not extend file \"%s\": %m",
486  FilePathName(v->mdfd_vfd)),
487  errhint("Check free disk space.")));
488  /* short write: complain appropriately */
489  ereport(ERROR,
490  (errcode(ERRCODE_DISK_FULL),
491  errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
493  nbytes, BLCKSZ, blocknum),
494  errhint("Check free disk space.")));
495  }
496 
497  if (!skipFsync && !SmgrIsTemp(reln))
498  register_dirty_segment(reln, forknum, v);
499 
500  Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
501 }
uint32 BlockNumber
Definition: block.h:31
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2091
char * FilePathName(File file)
Definition: fd.c:2262
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1378
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:801
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition: md.c:1022
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition: md.c:1248
#define EXTENSION_CREATE
Definition: md.c:108
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
@ WAIT_EVENT_DATA_FILE_EXTEND
Definition: wait_event.h:177

References _mdfd_getseg(), _mdnblocks(), Assert(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_EXTEND.

Referenced by _mdfd_getseg().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)

Definition at line 1467 of file md.c.

1468 {
1469  /*
1470  * For now we only use filter requests as a way to drop all scheduled
1471  * callbacks relating to a given database, when dropping the database.
1472  * We'll return true for all candidates that have the same database OID as
1473  * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1474  */
1475  return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1476 }
RelFileLocator rlocator
Definition: sync.h:54

References RelFileLocator::dbOid, and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 960 of file md.c.

961 {
962  int segno;
963  int min_inactive_seg;
964 
965  /*
966  * NOTE: mdnblocks makes sure we have opened all active segments, so that
967  * fsync loop will get them all!
968  */
969  mdnblocks(reln, forknum);
970 
971  min_inactive_seg = segno = reln->md_num_open_segs[forknum];
972 
973  /*
974  * Temporarily open inactive segments, then close them after sync. There
975  * may be some inactive segments left opened after fsync() error, but that
976  * is harmless. We don't bother to clean them up and take a risk of
977  * further trouble. The next mdclose() will soon close them.
978  */
979  while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
980  segno++;
981 
982  while (segno > 0)
983  {
984  MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
985 
986  /*
987  * fsyncs done through mdimmedsync() should be tracked in a separate
988  * IOContext than those done through mdsyncfiletag() to differentiate
989  * between unavoidable client backend fsyncs (e.g. those done during
990  * index build) and those which ideally would have been done by the
991  * checkpointer. Since other IO operations bypassing the buffer
992  * manager could also be tracked in such an IOContext, wait until
993  * these are also tracked to track immediate fsyncs.
994  */
998  errmsg("could not fsync file \"%s\": %m",
999  FilePathName(v->mdfd_vfd))));
1000 
1001  /* Close inactive segments immediately */
1002  if (segno > min_inactive_seg)
1003  {
1004  FileClose(v->mdfd_vfd);
1005  _fdvec_resize(reln, forknum, segno - 1);
1006  }
1007 
1008  segno--;
1009  }
1010 }
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2189
int data_sync_elevel(int elevel)
Definition: fd.c:3737
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition: md.c:1203
@ WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC
Definition: wait_event.h:179

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileSync(), SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), and WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC.

◆ mdinit()

void mdinit ( void  )

Definition at line 150 of file md.c.

151 {
153  "MdSmgr",
155 }
MemoryContext TopMemoryContext
Definition: mcxt.c:141
static MemoryContext MdCxt
Definition: md.c:88
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)

Definition at line 801 of file md.c.

802 {
803  MdfdVec *v;
804  BlockNumber nblocks;
805  BlockNumber segno;
806 
807  mdopenfork(reln, forknum, EXTENSION_FAIL);
808 
809  /* mdopen has opened the first segment */
810  Assert(reln->md_num_open_segs[forknum] > 0);
811 
812  /*
813  * Start from the last open segments, to avoid redundant seeks. We have
814  * previously verified that these segments are exactly RELSEG_SIZE long,
815  * and it's useless to recheck that each time.
816  *
817  * NOTE: this assumption could only be wrong if another backend has
818  * truncated the relation. We rely on higher code levels to handle that
819  * scenario by closing and re-opening the md fd, which is handled via
820  * relcache flush. (Since the checkpointer doesn't participate in
821  * relcache flush, it could have segment entries for inactive segments;
822  * that's OK because the checkpointer never needs to compute relation
823  * size.)
824  */
825  segno = reln->md_num_open_segs[forknum] - 1;
826  v = &reln->md_seg_fds[forknum][segno];
827 
828  for (;;)
829  {
830  nblocks = _mdnblocks(reln, forknum, v);
831  if (nblocks > ((BlockNumber) RELSEG_SIZE))
832  elog(FATAL, "segment too big");
833  if (nblocks < ((BlockNumber) RELSEG_SIZE))
834  return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
835 
836  /*
837  * If segment is exactly RELSEG_SIZE, advance to next one.
838  */
839  segno++;
840 
841  /*
842  * We used to pass O_CREAT here, but that has the disadvantage that it
843  * might create a segment which has vanished through some operating
844  * system misadventure. In such a case, creating the segment here
845  * undermines _mdfd_getseg's attempts to notice and report an error
846  * upon access to a missing segment.
847  */
848  v = _mdfd_openseg(reln, forknum, segno, 0);
849  if (v == NULL)
850  return segno * ((BlockNumber) RELSEG_SIZE);
851  }
852 }
#define FATAL
Definition: elog.h:41
#define EXTENSION_FAIL
Definition: md.c:104

References _mdfd_openseg(), _mdnblocks(), Assert(), elog(), EXTENSION_FAIL, FATAL, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdtruncate(), and mdwrite().

◆ mdopen()

void mdopen ( SMgrRelation  reln)

Definition at line 557 of file md.c.

558 {
559  /* mark it not open */
560  for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
561  reln->md_num_open_segs[forknum] = 0;
562 }

References MAX_FORKNUM, and SMgrRelationData::md_num_open_segs.

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)

Definition at line 591 of file md.c.

592 {
593 #ifdef USE_PREFETCH
594  off_t seekpos;
595  MdfdVec *v;
596 
597  v = _mdfd_getseg(reln, forknum, blocknum, false,
599  if (v == NULL)
600  return false;
601 
602  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
603 
604  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
605 
606  (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
607 #endif /* USE_PREFETCH */
608 
609  return true;
610 }
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
Definition: fd.c:1984
@ WAIT_EVENT_DATA_FILE_PREFETCH
Definition: wait_event.h:180

References _mdfd_getseg(), Assert(), EXTENSION_FAIL, EXTENSION_RETURN_NULL, FilePrefetch(), InRecovery, _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_PREFETCH.

◆ mdread()

void mdread ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void *  buffer 
)

Definition at line 671 of file md.c.

673 {
674  off_t seekpos;
675  int nbytes;
676  MdfdVec *v;
677 
678  TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
682  reln->smgr_rlocator.backend);
683 
684  v = _mdfd_getseg(reln, forknum, blocknum, false,
686 
687  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
688 
689  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
690 
691  nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
692 
693  TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
697  reln->smgr_rlocator.backend,
698  nbytes,
699  BLCKSZ);
700 
701  if (nbytes != BLCKSZ)
702  {
703  if (nbytes < 0)
704  ereport(ERROR,
706  errmsg("could not read block %u in file \"%s\": %m",
707  blocknum, FilePathName(v->mdfd_vfd))));
708 
709  /*
710  * Short read: we are at or past EOF, or we read a partial block at
711  * EOF. Normally this is an error; upper levels should never try to
712  * read a nonexistent block. However, if zero_damaged_pages is ON or
713  * we are InRecovery, we should instead return zeroes without
714  * complaining. This allows, for example, the case of trying to
715  * update a block that was later truncated away.
716  */
718  MemSet(buffer, 0, BLCKSZ);
719  else
720  ereport(ERROR,
722  errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
723  blocknum, FilePathName(v->mdfd_vfd),
724  nbytes, BLCKSZ)));
725  }
726 }
bool zero_damaged_pages
Definition: bufmgr.c:134
#define MemSet(start, val, len)
Definition: c.h:1004
int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.c:2035
#define EXTENSION_CREATE_RECOVERY
Definition: md.c:110
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ WAIT_EVENT_DATA_FILE_READ
Definition: wait_event.h:181

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileRead(), InRecovery, RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, MemSet, RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, WAIT_EVENT_DATA_FILE_READ, and zero_damaged_pages.

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1399 of file md.c.

1400 {
1402  File file;
1403  bool need_to_close;
1404  int result,
1405  save_errno;
1406 
1407  /* See if we already have the file open, or need to open it. */
1408  if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1409  {
1410  file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1411  strlcpy(path, FilePathName(file), MAXPGPATH);
1412  need_to_close = false;
1413  }
1414  else
1415  {
1416  char *p;
1417 
1418  p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1419  strlcpy(path, p, MAXPGPATH);
1420  pfree(p);
1421 
1422  file = PathNameOpenFile(path, O_RDWR | PG_BINARY);
1423  if (file < 0)
1424  return -1;
1425  need_to_close = true;
1426  }
1427 
1428  /* Sync the file. */
1429  result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1430  save_errno = errno;
1431 
1432  if (need_to_close)
1433  FileClose(file);
1434 
1436 
1437  errno = save_errno;
1438  return result;
1439 }
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition: md.c:1180
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition: pgstat.h:278
@ IOCONTEXT_NORMAL
Definition: pgstat.h:288
@ IOOP_FSYNC
Definition: pgstat.h:298
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:66
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int16 forknum
Definition: sync.h:53
uint32 segno
Definition: sync.h:55
@ WAIT_EVENT_DATA_FILE_SYNC
Definition: wait_event.h:182

References _mdfd_segpath(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, InvalidBackendId, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, PathNameOpenFile(), pfree(), PG_BINARY, pgstat_count_io_op(), FileTag::rlocator, FileTag::segno, smgropen(), strlcpy(), and WAIT_EVENT_DATA_FILE_SYNC.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  nblocks 
)

Definition at line 858 of file md.c.

859 {
860  BlockNumber curnblk;
861  BlockNumber priorblocks;
862  int curopensegs;
863 
864  /*
865  * NOTE: mdnblocks makes sure we have opened all active segments, so that
866  * truncation loop will get them all!
867  */
868  curnblk = mdnblocks(reln, forknum);
869  if (nblocks > curnblk)
870  {
871  /* Bogus request ... but no complaint if InRecovery */
872  if (InRecovery)
873  return;
874  ereport(ERROR,
875  (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
876  relpath(reln->smgr_rlocator, forknum),
877  nblocks, curnblk)));
878  }
879  if (nblocks == curnblk)
880  return; /* no work */
881 
882  /*
883  * Truncate segments, starting at the last one. Starting at the end makes
884  * managing the memory for the fd array easier, should there be errors.
885  */
886  curopensegs = reln->md_num_open_segs[forknum];
887  while (curopensegs > 0)
888  {
889  MdfdVec *v;
890 
891  priorblocks = (curopensegs - 1) * RELSEG_SIZE;
892 
893  v = &reln->md_seg_fds[forknum][curopensegs - 1];
894 
895  if (priorblocks > nblocks)
896  {
897  /*
898  * This segment is no longer active. We truncate the file, but do
899  * not delete it, for reasons explained in the header comments.
900  */
902  ereport(ERROR,
904  errmsg("could not truncate file \"%s\": %m",
905  FilePathName(v->mdfd_vfd))));
906 
907  if (!SmgrIsTemp(reln))
908  register_dirty_segment(reln, forknum, v);
909 
910  /* we never drop the 1st segment */
911  Assert(v != &reln->md_seg_fds[forknum][0]);
912 
913  FileClose(v->mdfd_vfd);
914  _fdvec_resize(reln, forknum, curopensegs - 1);
915  }
916  else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
917  {
918  /*
919  * This is the last segment we want to keep. Truncate the file to
920  * the right length. NOTE: if nblocks is exactly a multiple K of
921  * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
922  * keep it. This adheres to the invariant given in the header
923  * comments.
924  */
925  BlockNumber lastsegblocks = nblocks - priorblocks;
926 
927  if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
928  ereport(ERROR,
930  errmsg("could not truncate file \"%s\" to %u blocks: %m",
932  nblocks)));
933  if (!SmgrIsTemp(reln))
934  register_dirty_segment(reln, forknum, v);
935  }
936  else
937  {
938  /*
939  * We still need this segment, so nothing to do for this and any
940  * earlier segment.
941  */
942  break;
943  }
944  curopensegs--;
945  }
946 }
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
Definition: fd.c:2227
@ WAIT_EVENT_DATA_FILE_TRUNCATE
Definition: wait_event.h:183

References _fdvec_resize(), Assert(), ereport, errcode_for_file_access(), errmsg(), ERROR, FileClose(), FilePathName(), FileTruncate(), InRecovery, SMgrRelationData::md_num_open_segs, SMgrRelationData::md_seg_fds, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, SMgrRelationData::smgr_rlocator, SmgrIsTemp, and WAIT_EVENT_DATA_FILE_TRUNCATE.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)

Definition at line 296 of file md.c.

297 {
298  /* Now do the per-fork work */
299  if (forknum == InvalidForkNumber)
300  {
301  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
302  mdunlinkfork(rlocator, forknum, isRedo);
303  }
304  else
305  mdunlinkfork(rlocator, forknum, isRedo);
306 }
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:333

References InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char *  path 
)

Definition at line 1448 of file md.c.

1449 {
1450  char *p;
1451 
1452  /* Compute the path. */
1453  p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1454  strlcpy(path, p, MAXPGPATH);
1455  pfree(p);
1456 
1457  /* Try to unlink the file. */
1458  return unlink(path);
1459 }
@ MAIN_FORKNUM
Definition: relpath.h:50
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References MAIN_FORKNUM, MAXPGPATH, pfree(), relpathperm, FileTag::rlocator, and strlcpy().

◆ mdwrite()

void mdwrite ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void *  buffer,
bool  skipFsync 
)

Definition at line 736 of file md.c.

738 {
739  off_t seekpos;
740  int nbytes;
741  MdfdVec *v;
742 
743  /* This assert is too expensive to have on normally ... */
744 #ifdef CHECK_WRITE_VS_EXTEND
745  Assert(blocknum < mdnblocks(reln, forknum));
746 #endif
747 
748  TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
752  reln->smgr_rlocator.backend);
753 
754  v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
756 
757  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
758 
759  Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
760 
761  nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
762 
763  TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
767  reln->smgr_rlocator.backend,
768  nbytes,
769  BLCKSZ);
770 
771  if (nbytes != BLCKSZ)
772  {
773  if (nbytes < 0)
774  ereport(ERROR,
776  errmsg("could not write block %u in file \"%s\": %m",
777  blocknum, FilePathName(v->mdfd_vfd))));
778  /* short write: complain appropriately */
779  ereport(ERROR,
780  (errcode(ERRCODE_DISK_FULL),
781  errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes",
782  blocknum,
784  nbytes, BLCKSZ),
785  errhint("Check free disk space.")));
786  }
787 
788  if (!skipFsync && !SmgrIsTemp(reln))
789  register_dirty_segment(reln, forknum, v);
790 }
@ WAIT_EVENT_DATA_FILE_WRITE
Definition: wait_event.h:184

References _mdfd_getseg(), Assert(), RelFileLocatorBackend::backend, RelFileLocator::dbOid, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, FilePathName(), FileWrite(), RelFileLocatorBackend::locator, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), RelFileLocator::relNumber, SMgrRelationData::smgr_rlocator, SmgrIsTemp, RelFileLocator::spcOid, and WAIT_EVENT_DATA_FILE_WRITE.

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)

Definition at line 619 of file md.c.

621 {
622  /*
623  * Issue flush requests in as few requests as possible; have to split at
624  * segment boundaries though, since those are actually separate files.
625  */
626  while (nblocks > 0)
627  {
628  BlockNumber nflush = nblocks;
629  off_t seekpos;
630  MdfdVec *v;
631  int segnum_start,
632  segnum_end;
633 
634  v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
636 
637  /*
638  * We might be flushing buffers of already removed relations, that's
639  * ok, just ignore that case. If the segment file wasn't open already
640  * (ie from a recent mdwrite()), then we don't want to re-open it, to
641  * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
642  * us with a descriptor to a file that is about to be unlinked.
643  */
644  if (!v)
645  return;
646 
647  /* compute offset inside the current segment */
648  segnum_start = blocknum / RELSEG_SIZE;
649 
650  /* compute number of desired writes within the current segment */
651  segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
652  if (segnum_start != segnum_end)
653  nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
654 
655  Assert(nflush >= 1);
656  Assert(nflush <= nblocks);
657 
658  seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
659 
660  FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
661 
662  nblocks -= nflush;
663  blocknum += nflush;
664  }
665 }
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
Definition: fd.c:2012
#define EXTENSION_DONT_OPEN
Definition: md.c:120
@ WAIT_EVENT_DATA_FILE_FLUSH
Definition: wait_event.h:178

References _mdfd_getseg(), Assert(), EXTENSION_DONT_OPEN, FileWriteback(), _MdfdVec::mdfd_vfd, and WAIT_EVENT_DATA_FILE_FLUSH.