90#define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \
92 memset(&(a), 0, sizeof(FileTag)), \
93 (a).handler = SYNC_HANDLER_MD, \
94 (a).rlocator = (xx_rlocator), \
95 (a).forknum = (xx_forknum), \
96 (a).segno = (xx_segno) \
102#define EXTENSION_FAIL (1 << 0)
104#define EXTENSION_RETURN_NULL (1 << 1)
106#define EXTENSION_CREATE (1 << 2)
108#define EXTENSION_CREATE_RECOVERY (1 << 3)
110#define EXTENSION_DONT_OPEN (1 << 5)
212 int save_errno = errno;
222 errmsg(
"could not create file \"%s\": %m", path)));
304 for (forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
323 if (ret < 0 && errno != ENOENT)
328 errmsg(
"could not truncate file \"%s\": %m", path)));
342 path =
relpath(rlocator, forknum);
365 if (ret >= 0 || errno != ENOENT)
368 if (ret < 0 && errno != ENOENT)
373 errmsg(
"could not remove file \"%s\": %m", path)));
401 if (ret >= 0 || errno != ENOENT)
403 char *segpath = (
char *)
palloc(strlen(path) + 12);
406 for (segno = 1;; segno++)
408 sprintf(segpath,
"%s.%u", path, segno);
426 if (unlink(segpath) < 0)
432 errmsg(
"could not remove file \"%s\": %m", segpath)));
453 const void *buffer,
bool skipFsync)
464#ifdef CHECK_WRITE_VS_EXTEND
476 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
477 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
483 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
485 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
487 if ((nbytes =
FileWrite(v->
mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
492 errmsg(
"could not extend file \"%s\": %m",
494 errhint(
"Check free disk space.")));
498 errmsg(
"could not extend file \"%s\": wrote only %d of %d bytes at block %u",
500 nbytes, BLCKSZ, blocknum),
501 errhint(
"Check free disk space.")));
522 int remblocks = nblocks;
527#ifdef CHECK_WRITE_VS_EXTEND
538 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
539 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
543 while (remblocks > 0)
546 off_t seekpos = (off_t) BLCKSZ * segstartblock;
549 if (segstartblock + remblocks > RELSEG_SIZE)
550 numblocks = RELSEG_SIZE - segstartblock;
552 numblocks = remblocks;
556 Assert(segstartblock < RELSEG_SIZE);
557 Assert(segstartblock + numblocks <= RELSEG_SIZE);
575 seekpos, (off_t) BLCKSZ * numblocks,
576 WAIT_EVENT_DATA_FILE_EXTEND);
581 errmsg(
"could not extend file \"%s\" with FileFallocate(): %m",
583 errhint(
"Check free disk space."));
598 seekpos, (off_t) BLCKSZ * numblocks,
599 WAIT_EVENT_DATA_FILE_EXTEND);
603 errmsg(
"could not extend file \"%s\": %m",
605 errhint(
"Check free disk space."));
613 remblocks -= numblocks;
614 curblocknum += numblocks;
653 errmsg(
"could not open file \"%s\": %m", path)));
675 for (
int forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
692 while (nopensegs > 0)
720 int nblocks_this_segment;
727 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
729 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
731 nblocks_this_segment =
733 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
736 WAIT_EVENT_DATA_FILE_PREFETCH);
738 blocknum += nblocks_this_segment;
739 nblocks -= nblocks_this_segment;
762 for (
int i = 0;
i < nblocks; ++
i)
765 Assert((uintptr_t) buffers[
i] ==
771 iovp->iov_base = buffers[0];
772 iovp->iov_len = BLCKSZ;
776 for (
int i = 1;
i < nblocks; ++
i)
778 void *buffer = buffers[
i];
780 if (((
char *) iovp->iov_base + iovp->iov_len) == buffer)
783 iovp->iov_len += BLCKSZ;
789 iovp->iov_base = buffer;
790 iovp->iov_len = BLCKSZ;
810 return RELSEG_SIZE - segoff;
828 size_t transferred_this_segment;
829 size_t size_this_segment;
834 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
836 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
838 nblocks_this_segment =
840 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
841 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
843 if (nblocks_this_segment != nblocks)
844 elog(
ERROR,
"read crosses segment boundary");
847 size_this_segment = nblocks_this_segment * BLCKSZ;
848 transferred_this_segment = 0;
857 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
863 WAIT_EVENT_DATA_FILE_READ);
864 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
870 size_this_segment - transferred_this_segment);
872#ifdef SIMULATE_SHORT_READ
873 nbytes =
Min(nbytes, 4096);
879 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
881 blocknum + nblocks_this_segment - 1,
897 i < nblocks_this_segment;
899 memset(buffers[
i], 0, BLCKSZ);
905 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
907 blocknum + nblocks_this_segment - 1,
909 transferred_this_segment,
910 size_this_segment)));
914 transferred_this_segment += nbytes;
915 Assert(transferred_this_segment <= size_this_segment);
916 if (transferred_this_segment == size_this_segment)
924 nblocks -= nblocks_this_segment;
925 buffers += nblocks_this_segment;
926 blocknum += nblocks_this_segment;
939 const void **buffers,
BlockNumber nblocks,
bool skipFsync)
942#ifdef CHECK_WRITE_VS_EXTEND
954 size_t transferred_this_segment;
955 size_t size_this_segment;
960 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
962 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
964 nblocks_this_segment =
966 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
967 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
969 if (nblocks_this_segment != nblocks)
970 elog(
ERROR,
"write crosses segment boundary");
973 size_this_segment = nblocks_this_segment * BLCKSZ;
974 transferred_this_segment = 0;
983 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
989 WAIT_EVENT_DATA_FILE_WRITE);
990 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
996 size_this_segment - transferred_this_segment);
998#ifdef SIMULATE_SHORT_WRITE
999 nbytes =
Min(nbytes, 4096);
1004 bool enospc = errno == ENOSPC;
1008 errmsg(
"could not write blocks %u..%u in file \"%s\": %m",
1010 blocknum + nblocks_this_segment - 1,
1012 enospc ?
errhint(
"Check free disk space.") : 0));
1016 transferred_this_segment += nbytes;
1017 Assert(transferred_this_segment <= size_this_segment);
1018 if (transferred_this_segment == size_this_segment)
1029 nblocks -= nblocks_this_segment;
1030 buffers += nblocks_this_segment;
1031 blocknum += nblocks_this_segment;
1074 segnum_start = blocknum / RELSEG_SIZE;
1077 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1078 if (segnum_start != segnum_end)
1079 nflush = RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE));
1082 Assert(nflush <= nblocks);
1084 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
1135 return (segno * ((
BlockNumber) RELSEG_SIZE)) + nblocks;
1172 if (nblocks > curnblk)
1178 (
errmsg(
"could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1180 nblocks, curnblk)));
1182 if (nblocks == curnblk)
1190 while (curopensegs > 0)
1194 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1196 v = &reln->
md_seg_fds[forknum][curopensegs - 1];
1198 if (priorblocks > nblocks)
1207 errmsg(
"could not truncate file \"%s\": %m",
1219 else if (priorblocks + ((
BlockNumber) RELSEG_SIZE) > nblocks)
1228 BlockNumber lastsegblocks = nblocks - priorblocks;
1230 if (
FileTruncate(v->
mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1233 errmsg(
"could not truncate file \"%s\" to %u blocks: %m",
1258 int min_inactive_seg;
1284 if (segno > min_inactive_seg)
1309 int min_inactive_seg;
1344 errmsg(
"could not fsync file \"%s\": %m",
1348 if (segno > min_inactive_seg)
1382 (
errmsg_internal(
"could not forward fsync request because request queue is full")));
1389 errmsg(
"could not fsync file \"%s\": %m",
1448 rlocator.
dbOid = dbid;
1467 for (
i = 0;
i < ndelrels;
i++)
1483 for (
i = 0;
i < ndelrels;
i++)
1550 fullpath =
psprintf(
"%s.%u", path, segno);
1610 bool skipFsync,
int behavior)
1624 if (targetseg < reln->md_num_open_segs[forknum])
1651 nextsegno <= targetseg; nextsegno++)
1685 zerobuf, skipFsync);
1711 errmsg(
"could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1725 errmsg(
"could not open file \"%s\" (target block %u): %m",
1746 errmsg(
"could not seek to end of file \"%s\": %m",
1773 need_to_close =
false;
1786 need_to_close =
true;
1792 result =
FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1822 return unlink(path);
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
#define InvalidBlockNumber
#define TYPEALIGN(ALIGNVAL, LEN)
#define Assert(condition)
int errmsg_internal(const char *fmt,...)
int errcode_for_file_access(void)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
char * FilePathName(File file)
int FileSync(File file, uint32 wait_event_info)
void FileClose(File file)
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
int data_sync_elevel(int elevel)
File PathNameOpenFile(const char *fileName, int fileFlags)
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
off_t FileSize(File file)
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
int pg_truncate(const char *path, off_t length)
#define FILE_POSSIBLY_DELETED(err)
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
void * MemoryContextAlloc(MemoryContext context, Size size)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
MemoryContext TopMemoryContext
void * palloc_aligned(Size size, Size alignto, int flags)
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_CREATE_RECOVERY
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
bool mdexists(SMgrRelation reln, ForkNumber forknum)
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_DONT_OPEN
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
int mdunlinkfiletag(const FileTag *ftag, char *path)
static MemoryContext MdCxt
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
static int do_truncate(const char *path)
void mdclose(SMgrRelation reln, ForkNumber forknum)
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
int mdsyncfiletag(const FileTag *ftag, char *path)
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
#define EXTENSION_RETURN_NULL
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
void mdopen(SMgrRelation reln)
static int _mdfd_open_flags(void)
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
void ForgetDatabaseSyncRequests(Oid dbid)
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define ERRCODE_DATA_CORRUPTED
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
size_t strlcpy(char *dst, const char *src, size_t siz)
static int fd(const char *x, int i)
#define INVALID_PROC_NUMBER
char * psprintf(const char *fmt,...)
#define RelFileLocatorBackendIsTemp(rlocator)
#define relpath(rlocator, forknum)
#define relpathperm(rlocator, forknum)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrclose(SMgrRelation reln)
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
int md_num_open_segs[MAX_FORKNUM+1]
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)